Addition of autofilling excel forms. Improved pydantic validation.

This commit is contained in:
Landon Wark
2023-07-19 14:33:15 -05:00
parent 1c804bfc6a
commit ba35696055
21 changed files with 774 additions and 961 deletions

View File

@@ -5,14 +5,12 @@ Convenience functions for interacting with the database.
from . import models
from .models.kits import reagenttypes_kittypes
from .models.submissions import reagents_submissions
# from .models.samples import WWSample
import pandas as pd
import sqlalchemy.exc
import sqlite3
import logging
from datetime import date, datetime, timedelta
from sqlalchemy import and_
import uuid
from sqlalchemy import JSON, event
from sqlalchemy.engine import Engine
import json
@@ -22,6 +20,7 @@ import yaml
from pathlib import Path
logger = logging.getLogger(f"submissions.{__name__}")
# The below _should_ allow automatic creation of foreign keys in the database
@@ -111,12 +110,12 @@ def construct_submission_info(ctx:dict, info_dict:dict) -> models.BasicSubmissio
# convert submission type into model name
query = info_dict['submission_type'].replace(" ", "")
# Ensure an rsl plate number exists for the plate
# if info_dict["rsl_plate_num"] == 'nan' or info_dict["rsl_plate_num"] == None or not check_not_nan(info_dict["rsl_plate_num"]):
if not check_regex_match("^RSL", info_dict["rsl_plate_num"]):
instance = None
msg = "A proper RSL plate number is required."
return instance, {'code': 2, 'message': "A proper RSL plate number is required."}
else:
# enforce conventions on the rsl plate number from the form
info_dict['rsl_plate_num'] = RSLNamer(info_dict["rsl_plate_num"]).parsed_name
# check database for existing object
instance = ctx['database_session'].query(models.BasicSubmission).filter(models.BasicSubmission.rsl_plate_num==info_dict['rsl_plate_num']).first()
@@ -160,10 +159,11 @@ def construct_submission_info(ctx:dict, info_dict:dict) -> models.BasicSubmissio
case "submitter_plate_num":
# Because of unique constraint, there will be problems with
# multiple submissions named 'None', so...
# Should be depreciated with use of pydantic validator
logger.debug(f"Submitter plate id: {info_dict[item]}")
if info_dict[item] == None or info_dict[item] == "None" or info_dict[item] == "":
logger.debug(f"Got None as a submitter plate number, inserting random string to preserve database unique constraint.")
info_dict[item] = uuid.uuid4().hex.upper()
# if info_dict[item] == None or info_dict[item] == "None" or info_dict[item] == "":
# logger.debug(f"Got None as a submitter plate number, inserting random string to preserve database unique constraint.")
# info_dict[item] = uuid.uuid4().hex.upper()
field_value = info_dict[item]
case _:
field_value = info_dict[item]
@@ -233,20 +233,6 @@ def construct_reagent(ctx:dict, info_dict:dict) -> models.Reagent:
# pass
return reagent
# def lookup_reagent(ctx:dict, reagent_lot:str) -> models.Reagent:
# """
# Query db for reagent based on lot number
# Args:
# ctx (dict): settings passed down from gui
# reagent_lot (str): lot number to query
# Returns:
# models.Reagent: looked up reagent
# """
# lookedup = ctx['database_session'].query(models.Reagent).filter(models.Reagent.lot==reagent_lot).first()
# return lookedup
def get_all_reagenttype_names(ctx:dict) -> list[str]:
"""
Lookup all reagent types and get names
@@ -276,7 +262,7 @@ def lookup_reagenttype_by_name(ctx:dict, rt_name:str) -> models.ReagentType:
logger.debug(f"Found ReagentType: {lookedup}")
return lookedup
def lookup_kittype_by_use(ctx:dict, used_by:str) -> list[models.KitType]:
def lookup_kittype_by_use(ctx:dict, used_by:str|None=None) -> list[models.KitType]:
"""
Lookup kits by a sample type its used for
@@ -287,7 +273,10 @@ def lookup_kittype_by_use(ctx:dict, used_by:str) -> list[models.KitType]:
Returns:
list[models.KitType]: list of kittypes that have that sample type in their uses
"""
return ctx['database_session'].query(models.KitType).filter(models.KitType.used_for.contains(used_by)).all()
if used_by != None:
return ctx['database_session'].query(models.KitType).filter(models.KitType.used_for.contains(used_by)).all()
else:
return ctx['database_session'].query(models.KitType).all()
def lookup_kittype_by_name(ctx:dict, name:str) -> models.KitType:
"""
@@ -872,19 +861,34 @@ def platemap_plate(submission:models.BasicSubmission) -> list:
# image = make_plate_map(plate_dicto)
return plate_dicto
def lookup_reagent(ctx:dict, reagent_lot:str|None=None, type_name:str|None=None) -> models.Reagent:
def lookup_reagent(ctx:dict, reagent_lot:str, type_name:str|None=None) -> models.Reagent:
"""
Query db for reagent based on lot number
Query db for reagent based on lot number, with optional reagent type to enforce
Args:
ctx (dict): settings passed down from gui
reagent_lot (str): lot number to query
type_name (str | None, optional): name of reagent type. Defaults to None.
Returns:
models.Reagent: looked up reagent
"""
if reagent_lot != None and type_name != None:
return ctx['database_session'].query(models.Reagent).join(models.Reagent.type, aliased=True).filter(models.ReagentType.name==type_name).filter(models.Reagent.lot==reagent_lot).all()
return ctx['database_session'].query(models.Reagent).join(models.Reagent.type, aliased=True).filter(models.ReagentType.name==type_name).filter(models.Reagent.lot==reagent_lot).first()
elif type_name == None:
return ctx['database_session'].query(models.Reagent).filter(models.Reagent.lot==reagent_lot).first()
return ctx['database_session'].query(models.Reagent).filter(models.Reagent.lot==reagent_lot).first()
def lookup_last_used_reagenttype_lot(ctx:dict, type_name:str) -> models.Reagent:
"""
Look up the last used reagent of the reagent type
Args:
ctx (dict): Settings passed down from gui
type_name (str): Name of reagent type
Returns:
models.Reagent: Reagent object with last used lot.
"""
rt = ctx['database_session'].query(models.ReagentType).filter(models.ReagentType.name==type_name).first()
logger.debug(f"Reagent type looked up for {type_name}: {rt.__str__()}")
return lookup_reagent(ctx=ctx, reagent_lot=rt.last_used, type_name=type_name)

View File

@@ -55,11 +55,8 @@ class ReagentType(Base):
instances = relationship("Reagent", back_populates="type") #: concrete instances of this reagent type
eol_ext = Column(Interval()) #: extension of life interval
required = Column(INTEGER, server_default="1") #: sqlite boolean to determine if reagent type is essential for the kit
# __table_args__ = (
# CheckConstraint(required >= 0, name='check_required_positive'),
# CheckConstraint(required < 2, name='check_required_less_2'),
# {})
last_used = Column(String(32)) #: last used lot number of this type of reagent
@validates('required')
def validate_age(self, key, value):
if not 0 <= value < 2:
@@ -125,6 +122,13 @@ class Reagent(Base):
"expiry": place_holder.strftime("%Y-%m-%d")
}
def to_reagent_dict(self) -> dict:
return {
"type": self.type.name,
"lot": self.lot,
"expiry": self.expiry.strftime("%Y-%m-%d")
}
class Discount(Base):
"""

View File

@@ -6,6 +6,7 @@ from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, FLOAT, BO
from sqlalchemy.orm import relationship
import logging
logger = logging.getLogger(f"submissions.{__name__}")
@@ -22,7 +23,7 @@ class WWSample(Base):
rsl_plate = relationship("Wastewater", back_populates="samples") #: relationship to parent plate
rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_WWS_submission_id"))
collection_date = Column(TIMESTAMP) #: Date submission received
well_number = Column(String(8)) #: location on 24 well plate
well_number = Column(String(8)) #: location on 96 well plate
# The following are fields from the sample tracking excel sheet Ruth put together.
# I have no idea when they will be implemented or how.
testing_type = Column(String(64))
@@ -36,7 +37,7 @@ class WWSample(Base):
ww_seq_run_id = Column(String(64))
sample_type = Column(String(8))
pcr_results = Column(JSON)
elution_well = Column(String(8)) #: location on 96 well plate
well_24 = Column(String(8)) #: location on 24 well plate
artic_rsl_plate = relationship("WastewaterArtic", back_populates="samples")
artic_well_number = Column(String(8))
@@ -57,10 +58,6 @@ class WWSample(Base):
Returns:
dict: well location and id NOTE: keys must sync with BCSample to_sub_dict below
"""
# well_col = self.well_number[1:]
# well_row = self.well_number[0]
# if well_col > 4:
# well
if self.ct_n1 != None and self.ct_n2 != None:
# logger.debug(f"Using well info in name.")
name = f"{self.ww_sample_full_id}\n\t- ct N1: {'{:.2f}'.format(self.ct_n1)} ({self.n1_status})\n\t- ct N2: {'{:.2f}'.format(self.ct_n2)} ({self.n2_status})"
@@ -87,8 +84,8 @@ class WWSample(Base):
except TypeError as e:
logger.error(f"Couldn't check positives for {self.rsl_number}. Looks like there isn't PCR data.")
return None
well_row = row_dict[self.elution_well[0]]
well_col = self.elution_well[1:]
well_row = row_dict[self.well_number[0]]
well_col = self.well_number[1:]
# if positive:
# try:
# # The first character of the elution well is the row

View File

@@ -5,7 +5,6 @@ import math
from . import Base
from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, Table, JSON, FLOAT
from sqlalchemy.orm import relationship
from datetime import datetime as dt
import logging
import json
from json.decoder import JSONDecodeError
@@ -164,7 +163,8 @@ class BasicSubmission(Base):
def calculate_base_cost(self):
try:
cols_count_96 = ceil(int(self.sample_count) / 8)
# cols_count_96 = ceil(int(self.sample_count) / 8)
cols_count_96 = self.calculate_column_count()
except Exception as e:
logger.error(f"Column count error: {e}")
# cols_count_24 = ceil(int(self.sample_count) / 3)
@@ -173,6 +173,11 @@ class BasicSubmission(Base):
except Exception as e:
logger.error(f"Calculation error: {e}")
def calculate_column_count(self):
columns = [int(sample.well_number[-2:]) for sample in self.samples]
logger.debug(f"Here are the columns for {self.rsl_plate_num}: {columns}")
return max(columns)
# Below are the custom submission types
class BacterialCulture(BasicSubmission):

View File

@@ -4,47 +4,3 @@ Contains pandas convenience functions for interacting with excel workbooks
from .reports import *
from .parser import *
# from pandas import DataFrame
# import re
# def get_unique_values_in_df_column(df: DataFrame, column_name: str) -> list:
# """
# get all unique values in a dataframe column by name
# Args:
# df (DataFrame): input dataframe
# column_name (str): name of column of interest
# Returns:
# list: sorted list of unique values
# """
# return sorted(df[column_name].unique())
# def drop_reruns_from_df(ctx:dict, df: DataFrame) -> DataFrame:
# """
# Removes semi-duplicates from dataframe after finding sequencing repeats.
# Args:
# settings (dict): settings passed from gui
# df (DataFrame): initial dataframe
# Returns:
# DataFrame: dataframe with originals removed in favour of repeats.
# """
# sample_names = get_unique_values_in_df_column(df, column_name="name")
# if 'rerun_regex' in ctx:
# # logger.debug(f"Compiling regex from: {settings['rerun_regex']}")
# rerun_regex = re.compile(fr"{ctx['rerun_regex']}")
# for sample in sample_names:
# # logger.debug(f'Running search on {sample}')
# if rerun_regex.search(sample):
# # logger.debug(f'Match on {sample}')
# first_run = re.sub(rerun_regex, "", sample)
# # logger.debug(f"First run: {first_run}")
# df = df.drop(df[df.name == first_run].index)
# return df
# else:
# return None

View File

@@ -8,14 +8,14 @@ import pandas as pd
from pathlib import Path
from backend.db.models import WWSample, BCSample
from backend.db import lookup_ww_sample_by_ww_sample_num
from backend.pydant import PydSubmission
from backend.pydant import PydSubmission, PydReagent
import logging
from collections import OrderedDict
import re
import numpy as np
from datetime import date, datetime
import uuid
from tools import check_not_nan, RSLNamer, massage_common_reagents
from tools import check_not_nan, RSLNamer, massage_common_reagents, convert_nans_to_nones
logger = logging.getLogger(f"submissions.{__name__}")
@@ -26,31 +26,29 @@ class SheetParser(object):
def __init__(self, ctx:dict, filepath:Path|None = None):
"""
Args:
ctx (dict): Settings passed down from gui
filepath (Path | None, optional): file path to excel sheet. Defaults to None.
"""
"""
self.ctx = ctx
logger.debug(f"Parsing {filepath.__str__()}")
# set attributes based on kwargs from gui ctx
# for kwarg in kwargs:
# setattr(self, f"_{kwarg}", kwargs[kwarg])
# self.__dict__.update(kwargs)
if filepath == None:
logger.error(f"No filepath given.")
self.xl = None
else:
self.filepath = filepath
# Open excel file
try:
self.xl = pd.ExcelFile(filepath.__str__())
except ValueError as e:
logger.error(f"Incorrect value: {e}")
self.xl = None
# TODO: replace OrderedDict with pydantic BaseModel
self.sub = OrderedDict()
# make decision about type of sample we have
self.sub['submission_type'] = self.type_decider()
# select proper parser based on sample type
parse_sub = getattr(self, f"parse_{self.sub['submission_type'].lower()}")
parse_sub()
# self.calculate_column_count()
def type_decider(self) -> str:
"""
@@ -65,7 +63,7 @@ class SheetParser(object):
return categories[0].replace(" ", "_")
else:
# This code is going to be depreciated once there is full adoption of the client sheets
# with updated metadata
# with updated metadata... but how will it work for Artic?
try:
for type in self.ctx['submission_types']:
# This gets the *first* submission type that matches the sheet names in the workbook
@@ -76,7 +74,6 @@ class SheetParser(object):
logger.warning(f"We were unable to parse the submission type due to: {e}")
return "Unknown"
def parse_unknown(self) -> None:
"""
Dummy function to handle unknown excel structures
@@ -84,7 +81,6 @@ class SheetParser(object):
logger.error(f"Unknown excel workbook structure. Cannot parse.")
self.sub = None
def parse_generic(self, sheet_name:str) -> pd.DataFrame:
"""
Pulls information common to all wasterwater/bacterial culture types and passes on dataframe
@@ -98,14 +94,17 @@ class SheetParser(object):
# self.xl is a pd.ExcelFile so we need to parse it into a df
submission_info = self.xl.parse(sheet_name=sheet_name, dtype=object)
self.sub['submitter_plate_num'] = submission_info.iloc[0][1]
self.sub['rsl_plate_num'] = RSLNamer(submission_info.iloc[10][1]).parsed_name
if check_not_nan(submission_info.iloc[10][1]):
self.sub['rsl_plate_num'] = RSLNamer(submission_info.iloc[10][1]).parsed_name
else:
# self.sub['rsl_plate_num'] = RSLNamer(self.filepath).parsed_name
self.sub['rsl_plate_num'] = None
self.sub['submitted_date'] = submission_info.iloc[1][1]
self.sub['submitting_lab'] = submission_info.iloc[0][3]
self.sub['sample_count'] = submission_info.iloc[2][3]
self.sub['extraction_kit'] = submission_info.iloc[3][3]
return submission_info
def parse_bacterial_culture(self) -> None:
"""
pulls info specific to bacterial culture sample type
@@ -121,22 +120,27 @@ class SheetParser(object):
for ii, row in df.iterrows():
# skip positive control
logger.debug(f"Running reagent parse for {row[1]} with type {type(row[1])} and value: {row[2]} with type {type(row[2])}")
if not isinstance(row[2], float) and check_not_nan(row[1]):
# if the lot number isn't a float and the reagent type isn't blank
# if not isinstance(row[2], float) and check_not_nan(row[1]):
if check_not_nan(row[1]):
# must be prefixed with 'lot_' to be recognized by gui
# This is no longer true since reagents are loaded into their own key in dictionary
try:
reagent_type = row[1].replace(' ', '_').lower().strip()
except AttributeError:
pass
# If there is a double slash in the type field, such as ethanol/iso
# Use the cell to the left for reagent type.
if reagent_type == "//":
if check_not_nan(row[2]):
reagent_type = row[0].replace(' ', '_').lower().strip()
else:
continue
try:
output_var = row[2].upper()
output_var = convert_nans_to_nones(str(row[2]).upper())
except AttributeError:
logger.debug(f"Couldn't upperize {row[2]}, must be a number")
output_var = row[2]
output_var = convert_nans_to_nones(str(row[2]))
logger.debug(f"Output variable is {output_var}")
logger.debug(f"Expiry date for imported reagent: {row[3]}")
if check_not_nan(row[3]):
@@ -149,22 +153,17 @@ class SheetParser(object):
expiry = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + row[3] - 2)
else:
logger.debug(f"Date: {row[3]}")
expiry = date.today()
# expiry = date.today()
expiry = date(year=1970, month=1, day=1)
# self.sub[f"lot_{reagent_type}"] = {'lot':output_var, 'exp':expiry}
self.sub['reagents'].append(dict(type=reagent_type, lot=output_var, exp=expiry))
# self.sub['reagents'].append(dict(type=reagent_type, lot=output_var, exp=expiry))
self.sub['reagents'].append(PydReagent(type=reagent_type, lot=output_var, exp=expiry))
submission_info = self.parse_generic("Sample List")
# iloc is [row][column] and the first row is set as header row so -2
tech = str(submission_info.iloc[11][1])
# moved to pydantic model
# if tech == "nan":
# tech = "Unknown"
# elif len(tech.split(",")) > 1:
# tech_reg = re.compile(r"[A-Z]{2}")
# tech = ", ".join(tech_reg.findall(tech))
self.sub['technician'] = tech
self.sub['technician'] = str(submission_info.iloc[11][1])
# reagents
# must be prefixed with 'lot_' to be recognized by gui
# TODO: find a more adaptable way to read reagents.
# This is no longer true wince the creation of self.sub['reagents']
self.sub['reagents'] = []
reagent_range = submission_info.iloc[1:14, 4:8]
logger.debug(reagent_range)
@@ -175,7 +174,6 @@ class SheetParser(object):
logger.debug(f"Parser result: {self.sub}")
self.sample_result, self.sub['samples'] = sample_parse()
def parse_wastewater(self) -> None:
"""
pulls info specific to wastewater sample type
@@ -196,17 +194,18 @@ class SheetParser(object):
"""
# iterate through sub-df rows
for ii, row in df.iterrows():
if not isinstance(row[5], float) and check_not_nan(row[5]):
logger.debug(f"Parsing this row for reagents: {row}")
if check_not_nan(row[5]):
# must be prefixed with 'lot_' to be recognized by gui
# regex below will remove 80% from 80% ethanol in the Wastewater kit.
output_key = re.sub(r"^\d{1,3}%\s?", "", row[0].lower().strip().replace(' ', '_'))
output_key = output_key.strip("_")
# output_var is the lot number
try:
output_var = row[5].upper()
output_var = convert_nans_to_nones(str(row[5].upper()))
except AttributeError:
logger.debug(f"Couldn't upperize {row[5]}, must be a number")
output_var = row[5]
output_var = convert_nans_to_nones(str(row[5]))
if check_not_nan(row[7]):
try:
expiry = row[7].date()
@@ -214,8 +213,12 @@ class SheetParser(object):
expiry = date.today()
else:
expiry = date.today()
logger.debug(f"Expiry date for {output_key}: {expiry} of type {type(expiry)}")
# self.sub[f"lot_{output_key}"] = {'lot':output_var, 'exp':expiry}
self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry))
# self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry))
reagent = PydReagent(type=output_key, lot=output_var, exp=expiry)
logger.debug(f"Here is the created reagent: {reagent}")
self.sub['reagents'].append(reagent)
# parse submission sheet
submission_info = self.parse_generic("WW Submissions (ENTER HERE)")
# parse enrichment sheet
@@ -230,7 +233,7 @@ class SheetParser(object):
qprc_info = self.xl.parse("qPCR Worksheet", dtype=object)
# set qpcr reagent range
pcr_reagent_range = qprc_info.iloc[0:5, 9:20]
# compile technician info
# compile technician info from all sheets
self.sub['technician'] = f"Enr: {enrichment_info.columns[2]}, Ext: {extraction_info.columns[2]}, PCR: {qprc_info.columns[2]}"
self.sub['reagents'] = []
parse_reagents(enr_reagent_range)
@@ -242,7 +245,6 @@ class SheetParser(object):
self.sample_result, self.sub['samples'] = sample_parse()
self.sub['csv'] = self.xl.parse("Copy to import file", dtype=object)
def parse_wastewater_artic(self) -> None:
"""
pulls info specific to wastewater_arctic submission type
@@ -258,10 +260,10 @@ class SheetParser(object):
output_key = output_key.strip("_")
output_key = massage_common_reagents(output_key)
try:
output_var = row[1].upper()
output_var = convert_nans_to_nones(str(row[1].upper()))
except AttributeError:
logger.debug(f"Couldn't upperize {row[1]}, must be a number")
output_var = row[1]
output_var = convert_nans_to_nones(str(row[1]))
logger.debug(f"Output variable is {output_var}")
logger.debug(f"Expiry date for imported reagent: {row[2]}")
if check_not_nan(row[2]):
@@ -277,7 +279,8 @@ class SheetParser(object):
else:
logger.debug(f"Date: {row[2]}")
expiry = date.today()
self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry))
# self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry))
self.sub['reagents'].append(PydReagent(type=output_key, lot=output_var, exp=expiry))
else:
continue
def massage_samples(df:pd.DataFrame) -> pd.DataFrame:
@@ -317,20 +320,19 @@ class SheetParser(object):
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
self.sample_result, self.sub['samples'] = sample_parse()
def to_pydantic(self) -> PydSubmission:
"""
Generates a pydantic model of scraped data for validation
Returns:
PydSubmission: output pydantic model
"""
psm = PydSubmission(filepath=self.filepath, **self.sub)
"""
logger.debug(f"Submission dictionary coming into 'to_pydantic':\n{pprint.pformat(self.sub)}")
psm = PydSubmission(ctx=self.ctx, filepath=self.filepath, **self.sub)
delattr(psm, "filepath")
return psm
class SampleParser(object):
"""
object to pull data for samples in excel sheet and construct individual sample objects
@@ -385,7 +387,7 @@ class SampleParser(object):
list[WWSample]: list of sample objects
"""
def search_df_for_sample(sample_rsl:str):
# logger.debug(f"Attempting to find sample {sample_rsl} in \n {self.elution_map}")
logger.debug(f"Attempting to find sample {sample_rsl} in \n {self.elution_map}")
well = self.elution_map.where(self.elution_map==sample_rsl)
# logger.debug(f"Well: {well}")
well = well.dropna(how='all').dropna(axis=1, how="all")
@@ -394,9 +396,9 @@ class SampleParser(object):
logger.debug(f"well {sample_rsl} post processing: {well.size}: {type(well)}, {well.index[0]}, {well.columns[0]}")
self.elution_map.at[well.index[0], well.columns[0]] = np.nan
try:
col = str(int(well.columns[0]))
col = str(int(well.columns[0])).zfill(2)
except ValueError:
col = str(well.columns[0])
col = str(well.columns[0]).zfill(2)
except TypeError as e:
logger.error(f"Problem parsing out column number for {well}:\n {e}")
return f"{well.index[0]}{col}"
@@ -424,10 +426,12 @@ class SampleParser(object):
# new.testing_type = sample['Unnamed: 6']
# new.site_status = sample['Unnamed: 7']
new.notes = str(sample['Unnamed: 6']) # previously Unnamed: 8
new.well_number = sample['Unnamed: 1']
new.well_24 = sample['Unnamed: 1']
elu_well = search_df_for_sample(new.rsl_number)
if elu_well != None:
new.elution_well = elu_well
row = elu_well[0]
col = elu_well[1:].zfill(2)
new.well_number = f"{row}{col}"
else:
# try:
return_val += f"{new.rsl_number}\n"
@@ -455,12 +459,14 @@ class SampleParser(object):
missed_samples.append(sample['sample_name'])
continue
logger.debug(f"Got instance: {instance.ww_sample_full_id}")
if sample['well'] != None:
row = sample['well'][0]
col = sample['well'][1:].zfill(2)
sample['well'] = f"{row}{col}"
instance.artic_well_number = sample['well']
new_list.append(instance)
missed_str = "\n\t".join(missed_samples)
return f"Could not find matches for the following samples:\n\t {missed_str}", new_list
class PCRParser(object):
@@ -590,5 +596,5 @@ class PCRParser(object):
self.samples.append(sample_obj)

View File

@@ -3,24 +3,14 @@ Contains functions for generating summary reports
'''
from pandas import DataFrame
import logging
from jinja2 import Environment, FileSystemLoader
from datetime import date, timedelta
import sys
from pathlib import Path
import re
from tools import check_if_app
from typing import Tuple
from configure import jinja_template_loading
logger = logging.getLogger(f"submissions.{__name__}")
# set path of templates depending on pyinstaller/raw python
# if getattr(sys, 'frozen', False):
if check_if_app():
loader_path = Path(sys._MEIPASS).joinpath("files", "templates")
else:
loader_path = Path(__file__).parents[2].joinpath('templates').absolute().__str__()
loader = FileSystemLoader(loader_path)
env = Environment(loader=loader)
env = jinja_template_loading()
logger = logging.getLogger(f"submissions.{__name__}")
@@ -115,7 +105,6 @@ def convert_data_list_to_df(ctx:dict, input:list[dict], subtype:str|None=None) -
# logger.debug(df)
# move date of sample submitted on same date as previous ahead one.
df = displace_date(df)
# df.sort_values('submitted_date').to_excel("controls.xlsx", engine="openpyxl")
# ad hoc method to make data labels more accurate.
df = df_column_renamer(df=df)
return df
@@ -156,46 +145,33 @@ def displace_date(df:DataFrame) -> DataFrame:
dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in sorted(df['name'].unique())]
previous_dates = []
for _, item in enumerate(dict_list):
# try:
# # check = item['date'] == dict_list[ii-1]['date']
# check = item['date'] in previous_dates
# except IndexError:
# check = False
# if check:
# # occurences = previous_dates.count(item['date'])
# logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
# # get df locations where name == item name
# mask = df['name'] == item['name']
# # increment date in dataframe
# df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
# outdate = item['date'] + timedelta(days=1)
# # previous_dates.append(item['date'] + timedelta(days=1))
# else:
# outdate = item['date']
# previous_dates.append(outdate)
# logger.debug(f"\n\tCurrent date: {outdate}\n\tPrevious dates:{previous_dates}")
# logger.debug(type(item))
df, previous_dates = check_date(df=df, item=item, previous_dates=previous_dates)
return df
def check_date(df:DataFrame, item:dict, previous_dates:list) -> Tuple[DataFrame, list]:
"""
Checks if an items date is already present in df and adjusts df accordingly
Args:
df (DataFrame): input dataframe
item (dict): control for checking
previous_dates (list): list of dates found in previous controls
Returns:
Tuple[DataFrame, list]: Output dataframe and appended list of previous dates
"""
try:
# check = item['date'] == dict_list[ii-1]['date']
check = item['date'] in previous_dates
except IndexError:
check = False
previous_dates.append(item['date'])
if check:
# occurences = previous_dates.count(item['date'])
logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
# get df locations where name == item name
mask = df['name'] == item['name']
# increment date in dataframe
df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
item['date'] += timedelta(days=1)
# previous_dates.append(item['date'] + timedelta(days=1))
passed = False
else:
passed = True
@@ -249,8 +225,7 @@ def drop_reruns_from_df(ctx:dict, df: DataFrame) -> DataFrame:
# logger.debug(f"First run: {first_run}")
df = df.drop(df[df.name == first_run].index)
return df
# else:
# return df
def make_hitpicks(input:list) -> DataFrame:

View File

@@ -1,65 +1,153 @@
import uuid
from pydantic import BaseModel, validator
from datetime import date
from pydantic import BaseModel, field_validator, model_validator, Extra
from datetime import date, datetime
from typing import List, Any
from tools import RSLNamer
from pathlib import Path
import re
import logging
from tools import check_not_nan, convert_nans_to_nones
import numpy as np
logger = logging.getLogger(f"submissions.{__name__}")
class PydSubmission(BaseModel):
class PydReagent(BaseModel):
type: str|None
lot: str|None
exp: date|None
@field_validator("type", mode='before')
@classmethod
def remove_undesired_types(cls, value):
match value:
case "atcc":
return None
case _:
return value
@field_validator("lot", mode='before')
@classmethod
def enforce_lot_string(cls, value):
if value != None:
return convert_nans_to_nones(str(value))
return value
@field_validator("exp", mode="before")
@classmethod
def enforce_date(cls, value):
if isinstance(value, float) or value == np.nan:
raise ValueError(f"Date cannot be a float: {value}")
else:
return value
class PydSubmission(BaseModel, extra=Extra.allow):
ctx: dict
filepath: Path
submission_type: str
submitter_plate_num: str|None
rsl_plate_num: str
rsl_plate_num: str|dict|None
submitted_date: date
submitting_lab: str
submitting_lab: str|None
sample_count: int
extraction_kit: str
technician: str
reagents: List[dict]
extraction_kit: str|dict|None
technician: str|None
reagents: List[PydReagent] = []
samples: List[Any]
@validator("submitted_date", pre=True)
# missing_fields: List[str] = []
@field_validator("submitted_date", mode="before")
@classmethod
def strip_datetime_string(cls, value):
if isinstance(value, datetime):
return value
if isinstance(value, date):
return value
return re.sub(r"_\d$", "", value)
@validator("submitter_plate_num")
@field_validator("submitter_plate_num")
@classmethod
def enforce_with_uuid(cls, value):
if value == None or value == "" or value == "None":
return uuid.uuid4().hex.upper()
@validator("rsl_plate_num", pre=True)
@classmethod
def rsl_from_file(cls, value, values):
if value == None:
logger.debug(f"Pydant values:\n{values}")
return RSLNamer(values['filepath'].__str__()).parsed_name
else:
return value
@validator("technician")
@field_validator("submitting_lab", mode="before")
@classmethod
def transform_nan(cls, value):
return convert_nans_to_nones(value)
@field_validator("rsl_plate_num", mode='before')
@classmethod
def rsl_from_file(cls, value, values):
logger.debug(f"RSL-plate initial value: {value}")
if check_not_nan(value):
if isinstance(value, str):
return dict(value=value, parsed=True)
else:
return value
else:
logger.debug(f"Pydant values:{type(values)}\n{values}")
return dict(value=RSLNamer(values.data['filepath'].__str__()).parsed_name, parsed=False)
@field_validator("technician")
@classmethod
def enforce_tech(cls, value):
if value == "nan" or value == "None":
value = "Unknown"
# elif len(value.split(",")) > 1:
# tech_reg = re.compile(r"\b[A-Z]{2}\b")
# value = ", ".join(tech_reg.findall(value))
return value
@validator("reagents")
@field_validator("reagents")
@classmethod
def remove_atcc(cls, value):
return_val = []
for reagent in value:
match reagent['type']:
case 'atcc':
continue
case _:
return_val.append(reagent)
logger.debug(f"Pydantic reagent: {reagent}")
# match reagent.type.lower():
# case 'atcc':
# continue
# case _:
# return_val.append(reagent)
if reagent.type == None:
continue
else:
return_val.append(reagent)
return return_val
@field_validator("sample_count", mode='before')
@classmethod
def enforce_sample_count(cls, value):
if check_not_nan(value):
return int(value)
else:
# raise ValueError(f"{value} could not be used to create an integer.")
return convert_nans_to_nones(value)
@field_validator("extraction_kit", mode='before')
@classmethod
def get_kit_if_none(cls, value, values):
from frontend.custom_widgets.pop_ups import KitSelector
if check_not_nan(value):
return dict(value=value, parsed=True)
else:
# logger.debug(values.data)
dlg = KitSelector(ctx=values.data['ctx'], title="Kit Needed", message="At minimum a kit is needed. Please select one.")
if dlg.exec():
return dict(value=dlg.getValues(), parsed=False)
else:
raise ValueError("Extraction kit needed.")
# @model_validator(mode="after")
# def ensure_kit(cls, values):
# logger.debug(f"Model values: {values}")
# missing_fields = [k for k,v in values if v == None]
# if len(missing_fields) > 0:
# logger.debug(f"Missing fields: {missing_fields}")
# values['missing_fields'] = missing_fields
# return values