Addition of autofilling excel forms. Improved pydantic validation.
This commit is contained in:
@@ -5,14 +5,12 @@ Convenience functions for interacting with the database.
|
||||
from . import models
|
||||
from .models.kits import reagenttypes_kittypes
|
||||
from .models.submissions import reagents_submissions
|
||||
# from .models.samples import WWSample
|
||||
import pandas as pd
|
||||
import sqlalchemy.exc
|
||||
import sqlite3
|
||||
import logging
|
||||
from datetime import date, datetime, timedelta
|
||||
from sqlalchemy import and_
|
||||
import uuid
|
||||
from sqlalchemy import JSON, event
|
||||
from sqlalchemy.engine import Engine
|
||||
import json
|
||||
@@ -22,6 +20,7 @@ import yaml
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
|
||||
logger = logging.getLogger(f"submissions.{__name__}")
|
||||
|
||||
# The below _should_ allow automatic creation of foreign keys in the database
|
||||
@@ -111,12 +110,12 @@ def construct_submission_info(ctx:dict, info_dict:dict) -> models.BasicSubmissio
|
||||
# convert submission type into model name
|
||||
query = info_dict['submission_type'].replace(" ", "")
|
||||
# Ensure an rsl plate number exists for the plate
|
||||
# if info_dict["rsl_plate_num"] == 'nan' or info_dict["rsl_plate_num"] == None or not check_not_nan(info_dict["rsl_plate_num"]):
|
||||
if not check_regex_match("^RSL", info_dict["rsl_plate_num"]):
|
||||
instance = None
|
||||
msg = "A proper RSL plate number is required."
|
||||
return instance, {'code': 2, 'message': "A proper RSL plate number is required."}
|
||||
else:
|
||||
# enforce conventions on the rsl plate number from the form
|
||||
info_dict['rsl_plate_num'] = RSLNamer(info_dict["rsl_plate_num"]).parsed_name
|
||||
# check database for existing object
|
||||
instance = ctx['database_session'].query(models.BasicSubmission).filter(models.BasicSubmission.rsl_plate_num==info_dict['rsl_plate_num']).first()
|
||||
@@ -160,10 +159,11 @@ def construct_submission_info(ctx:dict, info_dict:dict) -> models.BasicSubmissio
|
||||
case "submitter_plate_num":
|
||||
# Because of unique constraint, there will be problems with
|
||||
# multiple submissions named 'None', so...
|
||||
# Should be depreciated with use of pydantic validator
|
||||
logger.debug(f"Submitter plate id: {info_dict[item]}")
|
||||
if info_dict[item] == None or info_dict[item] == "None" or info_dict[item] == "":
|
||||
logger.debug(f"Got None as a submitter plate number, inserting random string to preserve database unique constraint.")
|
||||
info_dict[item] = uuid.uuid4().hex.upper()
|
||||
# if info_dict[item] == None or info_dict[item] == "None" or info_dict[item] == "":
|
||||
# logger.debug(f"Got None as a submitter plate number, inserting random string to preserve database unique constraint.")
|
||||
# info_dict[item] = uuid.uuid4().hex.upper()
|
||||
field_value = info_dict[item]
|
||||
case _:
|
||||
field_value = info_dict[item]
|
||||
@@ -233,20 +233,6 @@ def construct_reagent(ctx:dict, info_dict:dict) -> models.Reagent:
|
||||
# pass
|
||||
return reagent
|
||||
|
||||
# def lookup_reagent(ctx:dict, reagent_lot:str) -> models.Reagent:
|
||||
# """
|
||||
# Query db for reagent based on lot number
|
||||
|
||||
# Args:
|
||||
# ctx (dict): settings passed down from gui
|
||||
# reagent_lot (str): lot number to query
|
||||
|
||||
# Returns:
|
||||
# models.Reagent: looked up reagent
|
||||
# """
|
||||
# lookedup = ctx['database_session'].query(models.Reagent).filter(models.Reagent.lot==reagent_lot).first()
|
||||
# return lookedup
|
||||
|
||||
def get_all_reagenttype_names(ctx:dict) -> list[str]:
|
||||
"""
|
||||
Lookup all reagent types and get names
|
||||
@@ -276,7 +262,7 @@ def lookup_reagenttype_by_name(ctx:dict, rt_name:str) -> models.ReagentType:
|
||||
logger.debug(f"Found ReagentType: {lookedup}")
|
||||
return lookedup
|
||||
|
||||
def lookup_kittype_by_use(ctx:dict, used_by:str) -> list[models.KitType]:
|
||||
def lookup_kittype_by_use(ctx:dict, used_by:str|None=None) -> list[models.KitType]:
|
||||
"""
|
||||
Lookup kits by a sample type its used for
|
||||
|
||||
@@ -287,7 +273,10 @@ def lookup_kittype_by_use(ctx:dict, used_by:str) -> list[models.KitType]:
|
||||
Returns:
|
||||
list[models.KitType]: list of kittypes that have that sample type in their uses
|
||||
"""
|
||||
return ctx['database_session'].query(models.KitType).filter(models.KitType.used_for.contains(used_by)).all()
|
||||
if used_by != None:
|
||||
return ctx['database_session'].query(models.KitType).filter(models.KitType.used_for.contains(used_by)).all()
|
||||
else:
|
||||
return ctx['database_session'].query(models.KitType).all()
|
||||
|
||||
def lookup_kittype_by_name(ctx:dict, name:str) -> models.KitType:
|
||||
"""
|
||||
@@ -872,19 +861,34 @@ def platemap_plate(submission:models.BasicSubmission) -> list:
|
||||
# image = make_plate_map(plate_dicto)
|
||||
return plate_dicto
|
||||
|
||||
|
||||
def lookup_reagent(ctx:dict, reagent_lot:str|None=None, type_name:str|None=None) -> models.Reagent:
|
||||
def lookup_reagent(ctx:dict, reagent_lot:str, type_name:str|None=None) -> models.Reagent:
|
||||
"""
|
||||
Query db for reagent based on lot number
|
||||
Query db for reagent based on lot number, with optional reagent type to enforce
|
||||
|
||||
Args:
|
||||
ctx (dict): settings passed down from gui
|
||||
reagent_lot (str): lot number to query
|
||||
type_name (str | None, optional): name of reagent type. Defaults to None.
|
||||
|
||||
Returns:
|
||||
models.Reagent: looked up reagent
|
||||
"""
|
||||
if reagent_lot != None and type_name != None:
|
||||
return ctx['database_session'].query(models.Reagent).join(models.Reagent.type, aliased=True).filter(models.ReagentType.name==type_name).filter(models.Reagent.lot==reagent_lot).all()
|
||||
return ctx['database_session'].query(models.Reagent).join(models.Reagent.type, aliased=True).filter(models.ReagentType.name==type_name).filter(models.Reagent.lot==reagent_lot).first()
|
||||
elif type_name == None:
|
||||
return ctx['database_session'].query(models.Reagent).filter(models.Reagent.lot==reagent_lot).first()
|
||||
return ctx['database_session'].query(models.Reagent).filter(models.Reagent.lot==reagent_lot).first()
|
||||
|
||||
def lookup_last_used_reagenttype_lot(ctx:dict, type_name:str) -> models.Reagent:
|
||||
"""
|
||||
Look up the last used reagent of the reagent type
|
||||
|
||||
Args:
|
||||
ctx (dict): Settings passed down from gui
|
||||
type_name (str): Name of reagent type
|
||||
|
||||
Returns:
|
||||
models.Reagent: Reagent object with last used lot.
|
||||
"""
|
||||
rt = ctx['database_session'].query(models.ReagentType).filter(models.ReagentType.name==type_name).first()
|
||||
logger.debug(f"Reagent type looked up for {type_name}: {rt.__str__()}")
|
||||
return lookup_reagent(ctx=ctx, reagent_lot=rt.last_used, type_name=type_name)
|
||||
@@ -55,11 +55,8 @@ class ReagentType(Base):
|
||||
instances = relationship("Reagent", back_populates="type") #: concrete instances of this reagent type
|
||||
eol_ext = Column(Interval()) #: extension of life interval
|
||||
required = Column(INTEGER, server_default="1") #: sqlite boolean to determine if reagent type is essential for the kit
|
||||
# __table_args__ = (
|
||||
# CheckConstraint(required >= 0, name='check_required_positive'),
|
||||
# CheckConstraint(required < 2, name='check_required_less_2'),
|
||||
# {})
|
||||
|
||||
last_used = Column(String(32)) #: last used lot number of this type of reagent
|
||||
|
||||
@validates('required')
|
||||
def validate_age(self, key, value):
|
||||
if not 0 <= value < 2:
|
||||
@@ -125,6 +122,13 @@ class Reagent(Base):
|
||||
"expiry": place_holder.strftime("%Y-%m-%d")
|
||||
}
|
||||
|
||||
def to_reagent_dict(self) -> dict:
|
||||
return {
|
||||
"type": self.type.name,
|
||||
"lot": self.lot,
|
||||
"expiry": self.expiry.strftime("%Y-%m-%d")
|
||||
}
|
||||
|
||||
|
||||
class Discount(Base):
|
||||
"""
|
||||
|
||||
@@ -6,6 +6,7 @@ from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, FLOAT, BO
|
||||
from sqlalchemy.orm import relationship
|
||||
import logging
|
||||
|
||||
|
||||
logger = logging.getLogger(f"submissions.{__name__}")
|
||||
|
||||
|
||||
@@ -22,7 +23,7 @@ class WWSample(Base):
|
||||
rsl_plate = relationship("Wastewater", back_populates="samples") #: relationship to parent plate
|
||||
rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_WWS_submission_id"))
|
||||
collection_date = Column(TIMESTAMP) #: Date submission received
|
||||
well_number = Column(String(8)) #: location on 24 well plate
|
||||
well_number = Column(String(8)) #: location on 96 well plate
|
||||
# The following are fields from the sample tracking excel sheet Ruth put together.
|
||||
# I have no idea when they will be implemented or how.
|
||||
testing_type = Column(String(64))
|
||||
@@ -36,7 +37,7 @@ class WWSample(Base):
|
||||
ww_seq_run_id = Column(String(64))
|
||||
sample_type = Column(String(8))
|
||||
pcr_results = Column(JSON)
|
||||
elution_well = Column(String(8)) #: location on 96 well plate
|
||||
well_24 = Column(String(8)) #: location on 24 well plate
|
||||
artic_rsl_plate = relationship("WastewaterArtic", back_populates="samples")
|
||||
artic_well_number = Column(String(8))
|
||||
|
||||
@@ -57,10 +58,6 @@ class WWSample(Base):
|
||||
Returns:
|
||||
dict: well location and id NOTE: keys must sync with BCSample to_sub_dict below
|
||||
"""
|
||||
# well_col = self.well_number[1:]
|
||||
# well_row = self.well_number[0]
|
||||
# if well_col > 4:
|
||||
# well
|
||||
if self.ct_n1 != None and self.ct_n2 != None:
|
||||
# logger.debug(f"Using well info in name.")
|
||||
name = f"{self.ww_sample_full_id}\n\t- ct N1: {'{:.2f}'.format(self.ct_n1)} ({self.n1_status})\n\t- ct N2: {'{:.2f}'.format(self.ct_n2)} ({self.n2_status})"
|
||||
@@ -87,8 +84,8 @@ class WWSample(Base):
|
||||
except TypeError as e:
|
||||
logger.error(f"Couldn't check positives for {self.rsl_number}. Looks like there isn't PCR data.")
|
||||
return None
|
||||
well_row = row_dict[self.elution_well[0]]
|
||||
well_col = self.elution_well[1:]
|
||||
well_row = row_dict[self.well_number[0]]
|
||||
well_col = self.well_number[1:]
|
||||
# if positive:
|
||||
# try:
|
||||
# # The first character of the elution well is the row
|
||||
|
||||
@@ -5,7 +5,6 @@ import math
|
||||
from . import Base
|
||||
from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, Table, JSON, FLOAT
|
||||
from sqlalchemy.orm import relationship
|
||||
from datetime import datetime as dt
|
||||
import logging
|
||||
import json
|
||||
from json.decoder import JSONDecodeError
|
||||
@@ -164,7 +163,8 @@ class BasicSubmission(Base):
|
||||
|
||||
def calculate_base_cost(self):
|
||||
try:
|
||||
cols_count_96 = ceil(int(self.sample_count) / 8)
|
||||
# cols_count_96 = ceil(int(self.sample_count) / 8)
|
||||
cols_count_96 = self.calculate_column_count()
|
||||
except Exception as e:
|
||||
logger.error(f"Column count error: {e}")
|
||||
# cols_count_24 = ceil(int(self.sample_count) / 3)
|
||||
@@ -173,6 +173,11 @@ class BasicSubmission(Base):
|
||||
except Exception as e:
|
||||
logger.error(f"Calculation error: {e}")
|
||||
|
||||
def calculate_column_count(self):
|
||||
columns = [int(sample.well_number[-2:]) for sample in self.samples]
|
||||
logger.debug(f"Here are the columns for {self.rsl_plate_num}: {columns}")
|
||||
return max(columns)
|
||||
|
||||
# Below are the custom submission types
|
||||
|
||||
class BacterialCulture(BasicSubmission):
|
||||
|
||||
@@ -4,47 +4,3 @@ Contains pandas convenience functions for interacting with excel workbooks
|
||||
|
||||
from .reports import *
|
||||
from .parser import *
|
||||
|
||||
# from pandas import DataFrame
|
||||
# import re
|
||||
|
||||
|
||||
# def get_unique_values_in_df_column(df: DataFrame, column_name: str) -> list:
|
||||
# """
|
||||
# get all unique values in a dataframe column by name
|
||||
|
||||
# Args:
|
||||
# df (DataFrame): input dataframe
|
||||
# column_name (str): name of column of interest
|
||||
|
||||
# Returns:
|
||||
# list: sorted list of unique values
|
||||
# """
|
||||
# return sorted(df[column_name].unique())
|
||||
|
||||
|
||||
# def drop_reruns_from_df(ctx:dict, df: DataFrame) -> DataFrame:
|
||||
# """
|
||||
# Removes semi-duplicates from dataframe after finding sequencing repeats.
|
||||
|
||||
# Args:
|
||||
# settings (dict): settings passed from gui
|
||||
# df (DataFrame): initial dataframe
|
||||
|
||||
# Returns:
|
||||
# DataFrame: dataframe with originals removed in favour of repeats.
|
||||
# """
|
||||
# sample_names = get_unique_values_in_df_column(df, column_name="name")
|
||||
# if 'rerun_regex' in ctx:
|
||||
# # logger.debug(f"Compiling regex from: {settings['rerun_regex']}")
|
||||
# rerun_regex = re.compile(fr"{ctx['rerun_regex']}")
|
||||
# for sample in sample_names:
|
||||
# # logger.debug(f'Running search on {sample}')
|
||||
# if rerun_regex.search(sample):
|
||||
# # logger.debug(f'Match on {sample}')
|
||||
# first_run = re.sub(rerun_regex, "", sample)
|
||||
# # logger.debug(f"First run: {first_run}")
|
||||
# df = df.drop(df[df.name == first_run].index)
|
||||
# return df
|
||||
# else:
|
||||
# return None
|
||||
|
||||
@@ -8,14 +8,14 @@ import pandas as pd
|
||||
from pathlib import Path
|
||||
from backend.db.models import WWSample, BCSample
|
||||
from backend.db import lookup_ww_sample_by_ww_sample_num
|
||||
from backend.pydant import PydSubmission
|
||||
from backend.pydant import PydSubmission, PydReagent
|
||||
import logging
|
||||
from collections import OrderedDict
|
||||
import re
|
||||
import numpy as np
|
||||
from datetime import date, datetime
|
||||
import uuid
|
||||
from tools import check_not_nan, RSLNamer, massage_common_reagents
|
||||
from tools import check_not_nan, RSLNamer, massage_common_reagents, convert_nans_to_nones
|
||||
|
||||
logger = logging.getLogger(f"submissions.{__name__}")
|
||||
|
||||
@@ -26,31 +26,29 @@ class SheetParser(object):
|
||||
def __init__(self, ctx:dict, filepath:Path|None = None):
|
||||
"""
|
||||
Args:
|
||||
ctx (dict): Settings passed down from gui
|
||||
filepath (Path | None, optional): file path to excel sheet. Defaults to None.
|
||||
"""
|
||||
"""
|
||||
self.ctx = ctx
|
||||
logger.debug(f"Parsing {filepath.__str__()}")
|
||||
# set attributes based on kwargs from gui ctx
|
||||
# for kwarg in kwargs:
|
||||
# setattr(self, f"_{kwarg}", kwargs[kwarg])
|
||||
# self.__dict__.update(kwargs)
|
||||
if filepath == None:
|
||||
logger.error(f"No filepath given.")
|
||||
self.xl = None
|
||||
else:
|
||||
self.filepath = filepath
|
||||
# Open excel file
|
||||
try:
|
||||
self.xl = pd.ExcelFile(filepath.__str__())
|
||||
except ValueError as e:
|
||||
logger.error(f"Incorrect value: {e}")
|
||||
self.xl = None
|
||||
# TODO: replace OrderedDict with pydantic BaseModel
|
||||
self.sub = OrderedDict()
|
||||
# make decision about type of sample we have
|
||||
self.sub['submission_type'] = self.type_decider()
|
||||
# select proper parser based on sample type
|
||||
parse_sub = getattr(self, f"parse_{self.sub['submission_type'].lower()}")
|
||||
parse_sub()
|
||||
# self.calculate_column_count()
|
||||
|
||||
def type_decider(self) -> str:
|
||||
"""
|
||||
@@ -65,7 +63,7 @@ class SheetParser(object):
|
||||
return categories[0].replace(" ", "_")
|
||||
else:
|
||||
# This code is going to be depreciated once there is full adoption of the client sheets
|
||||
# with updated metadata
|
||||
# with updated metadata... but how will it work for Artic?
|
||||
try:
|
||||
for type in self.ctx['submission_types']:
|
||||
# This gets the *first* submission type that matches the sheet names in the workbook
|
||||
@@ -76,7 +74,6 @@ class SheetParser(object):
|
||||
logger.warning(f"We were unable to parse the submission type due to: {e}")
|
||||
return "Unknown"
|
||||
|
||||
|
||||
def parse_unknown(self) -> None:
|
||||
"""
|
||||
Dummy function to handle unknown excel structures
|
||||
@@ -84,7 +81,6 @@ class SheetParser(object):
|
||||
logger.error(f"Unknown excel workbook structure. Cannot parse.")
|
||||
self.sub = None
|
||||
|
||||
|
||||
def parse_generic(self, sheet_name:str) -> pd.DataFrame:
|
||||
"""
|
||||
Pulls information common to all wasterwater/bacterial culture types and passes on dataframe
|
||||
@@ -98,14 +94,17 @@ class SheetParser(object):
|
||||
# self.xl is a pd.ExcelFile so we need to parse it into a df
|
||||
submission_info = self.xl.parse(sheet_name=sheet_name, dtype=object)
|
||||
self.sub['submitter_plate_num'] = submission_info.iloc[0][1]
|
||||
self.sub['rsl_plate_num'] = RSLNamer(submission_info.iloc[10][1]).parsed_name
|
||||
if check_not_nan(submission_info.iloc[10][1]):
|
||||
self.sub['rsl_plate_num'] = RSLNamer(submission_info.iloc[10][1]).parsed_name
|
||||
else:
|
||||
# self.sub['rsl_plate_num'] = RSLNamer(self.filepath).parsed_name
|
||||
self.sub['rsl_plate_num'] = None
|
||||
self.sub['submitted_date'] = submission_info.iloc[1][1]
|
||||
self.sub['submitting_lab'] = submission_info.iloc[0][3]
|
||||
self.sub['sample_count'] = submission_info.iloc[2][3]
|
||||
self.sub['extraction_kit'] = submission_info.iloc[3][3]
|
||||
return submission_info
|
||||
|
||||
|
||||
def parse_bacterial_culture(self) -> None:
|
||||
"""
|
||||
pulls info specific to bacterial culture sample type
|
||||
@@ -121,22 +120,27 @@ class SheetParser(object):
|
||||
for ii, row in df.iterrows():
|
||||
# skip positive control
|
||||
logger.debug(f"Running reagent parse for {row[1]} with type {type(row[1])} and value: {row[2]} with type {type(row[2])}")
|
||||
if not isinstance(row[2], float) and check_not_nan(row[1]):
|
||||
# if the lot number isn't a float and the reagent type isn't blank
|
||||
# if not isinstance(row[2], float) and check_not_nan(row[1]):
|
||||
if check_not_nan(row[1]):
|
||||
# must be prefixed with 'lot_' to be recognized by gui
|
||||
# This is no longer true since reagents are loaded into their own key in dictionary
|
||||
try:
|
||||
reagent_type = row[1].replace(' ', '_').lower().strip()
|
||||
except AttributeError:
|
||||
pass
|
||||
# If there is a double slash in the type field, such as ethanol/iso
|
||||
# Use the cell to the left for reagent type.
|
||||
if reagent_type == "//":
|
||||
if check_not_nan(row[2]):
|
||||
reagent_type = row[0].replace(' ', '_').lower().strip()
|
||||
else:
|
||||
continue
|
||||
try:
|
||||
output_var = row[2].upper()
|
||||
output_var = convert_nans_to_nones(str(row[2]).upper())
|
||||
except AttributeError:
|
||||
logger.debug(f"Couldn't upperize {row[2]}, must be a number")
|
||||
output_var = row[2]
|
||||
output_var = convert_nans_to_nones(str(row[2]))
|
||||
logger.debug(f"Output variable is {output_var}")
|
||||
logger.debug(f"Expiry date for imported reagent: {row[3]}")
|
||||
if check_not_nan(row[3]):
|
||||
@@ -149,22 +153,17 @@ class SheetParser(object):
|
||||
expiry = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + row[3] - 2)
|
||||
else:
|
||||
logger.debug(f"Date: {row[3]}")
|
||||
expiry = date.today()
|
||||
# expiry = date.today()
|
||||
expiry = date(year=1970, month=1, day=1)
|
||||
# self.sub[f"lot_{reagent_type}"] = {'lot':output_var, 'exp':expiry}
|
||||
self.sub['reagents'].append(dict(type=reagent_type, lot=output_var, exp=expiry))
|
||||
# self.sub['reagents'].append(dict(type=reagent_type, lot=output_var, exp=expiry))
|
||||
self.sub['reagents'].append(PydReagent(type=reagent_type, lot=output_var, exp=expiry))
|
||||
submission_info = self.parse_generic("Sample List")
|
||||
# iloc is [row][column] and the first row is set as header row so -2
|
||||
tech = str(submission_info.iloc[11][1])
|
||||
# moved to pydantic model
|
||||
# if tech == "nan":
|
||||
# tech = "Unknown"
|
||||
# elif len(tech.split(",")) > 1:
|
||||
# tech_reg = re.compile(r"[A-Z]{2}")
|
||||
# tech = ", ".join(tech_reg.findall(tech))
|
||||
self.sub['technician'] = tech
|
||||
self.sub['technician'] = str(submission_info.iloc[11][1])
|
||||
# reagents
|
||||
# must be prefixed with 'lot_' to be recognized by gui
|
||||
# TODO: find a more adaptable way to read reagents.
|
||||
# This is no longer true wince the creation of self.sub['reagents']
|
||||
self.sub['reagents'] = []
|
||||
reagent_range = submission_info.iloc[1:14, 4:8]
|
||||
logger.debug(reagent_range)
|
||||
@@ -175,7 +174,6 @@ class SheetParser(object):
|
||||
logger.debug(f"Parser result: {self.sub}")
|
||||
self.sample_result, self.sub['samples'] = sample_parse()
|
||||
|
||||
|
||||
def parse_wastewater(self) -> None:
|
||||
"""
|
||||
pulls info specific to wastewater sample type
|
||||
@@ -196,17 +194,18 @@ class SheetParser(object):
|
||||
"""
|
||||
# iterate through sub-df rows
|
||||
for ii, row in df.iterrows():
|
||||
if not isinstance(row[5], float) and check_not_nan(row[5]):
|
||||
logger.debug(f"Parsing this row for reagents: {row}")
|
||||
if check_not_nan(row[5]):
|
||||
# must be prefixed with 'lot_' to be recognized by gui
|
||||
# regex below will remove 80% from 80% ethanol in the Wastewater kit.
|
||||
output_key = re.sub(r"^\d{1,3}%\s?", "", row[0].lower().strip().replace(' ', '_'))
|
||||
output_key = output_key.strip("_")
|
||||
# output_var is the lot number
|
||||
try:
|
||||
output_var = row[5].upper()
|
||||
output_var = convert_nans_to_nones(str(row[5].upper()))
|
||||
except AttributeError:
|
||||
logger.debug(f"Couldn't upperize {row[5]}, must be a number")
|
||||
output_var = row[5]
|
||||
output_var = convert_nans_to_nones(str(row[5]))
|
||||
if check_not_nan(row[7]):
|
||||
try:
|
||||
expiry = row[7].date()
|
||||
@@ -214,8 +213,12 @@ class SheetParser(object):
|
||||
expiry = date.today()
|
||||
else:
|
||||
expiry = date.today()
|
||||
logger.debug(f"Expiry date for {output_key}: {expiry} of type {type(expiry)}")
|
||||
# self.sub[f"lot_{output_key}"] = {'lot':output_var, 'exp':expiry}
|
||||
self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry))
|
||||
# self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry))
|
||||
reagent = PydReagent(type=output_key, lot=output_var, exp=expiry)
|
||||
logger.debug(f"Here is the created reagent: {reagent}")
|
||||
self.sub['reagents'].append(reagent)
|
||||
# parse submission sheet
|
||||
submission_info = self.parse_generic("WW Submissions (ENTER HERE)")
|
||||
# parse enrichment sheet
|
||||
@@ -230,7 +233,7 @@ class SheetParser(object):
|
||||
qprc_info = self.xl.parse("qPCR Worksheet", dtype=object)
|
||||
# set qpcr reagent range
|
||||
pcr_reagent_range = qprc_info.iloc[0:5, 9:20]
|
||||
# compile technician info
|
||||
# compile technician info from all sheets
|
||||
self.sub['technician'] = f"Enr: {enrichment_info.columns[2]}, Ext: {extraction_info.columns[2]}, PCR: {qprc_info.columns[2]}"
|
||||
self.sub['reagents'] = []
|
||||
parse_reagents(enr_reagent_range)
|
||||
@@ -242,7 +245,6 @@ class SheetParser(object):
|
||||
self.sample_result, self.sub['samples'] = sample_parse()
|
||||
self.sub['csv'] = self.xl.parse("Copy to import file", dtype=object)
|
||||
|
||||
|
||||
def parse_wastewater_artic(self) -> None:
|
||||
"""
|
||||
pulls info specific to wastewater_arctic submission type
|
||||
@@ -258,10 +260,10 @@ class SheetParser(object):
|
||||
output_key = output_key.strip("_")
|
||||
output_key = massage_common_reagents(output_key)
|
||||
try:
|
||||
output_var = row[1].upper()
|
||||
output_var = convert_nans_to_nones(str(row[1].upper()))
|
||||
except AttributeError:
|
||||
logger.debug(f"Couldn't upperize {row[1]}, must be a number")
|
||||
output_var = row[1]
|
||||
output_var = convert_nans_to_nones(str(row[1]))
|
||||
logger.debug(f"Output variable is {output_var}")
|
||||
logger.debug(f"Expiry date for imported reagent: {row[2]}")
|
||||
if check_not_nan(row[2]):
|
||||
@@ -277,7 +279,8 @@ class SheetParser(object):
|
||||
else:
|
||||
logger.debug(f"Date: {row[2]}")
|
||||
expiry = date.today()
|
||||
self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry))
|
||||
# self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry))
|
||||
self.sub['reagents'].append(PydReagent(type=output_key, lot=output_var, exp=expiry))
|
||||
else:
|
||||
continue
|
||||
def massage_samples(df:pd.DataFrame) -> pd.DataFrame:
|
||||
@@ -317,20 +320,19 @@ class SheetParser(object):
|
||||
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
|
||||
self.sample_result, self.sub['samples'] = sample_parse()
|
||||
|
||||
|
||||
def to_pydantic(self) -> PydSubmission:
|
||||
"""
|
||||
Generates a pydantic model of scraped data for validation
|
||||
|
||||
Returns:
|
||||
PydSubmission: output pydantic model
|
||||
"""
|
||||
psm = PydSubmission(filepath=self.filepath, **self.sub)
|
||||
"""
|
||||
logger.debug(f"Submission dictionary coming into 'to_pydantic':\n{pprint.pformat(self.sub)}")
|
||||
psm = PydSubmission(ctx=self.ctx, filepath=self.filepath, **self.sub)
|
||||
delattr(psm, "filepath")
|
||||
return psm
|
||||
|
||||
|
||||
|
||||
|
||||
class SampleParser(object):
|
||||
"""
|
||||
object to pull data for samples in excel sheet and construct individual sample objects
|
||||
@@ -385,7 +387,7 @@ class SampleParser(object):
|
||||
list[WWSample]: list of sample objects
|
||||
"""
|
||||
def search_df_for_sample(sample_rsl:str):
|
||||
# logger.debug(f"Attempting to find sample {sample_rsl} in \n {self.elution_map}")
|
||||
logger.debug(f"Attempting to find sample {sample_rsl} in \n {self.elution_map}")
|
||||
well = self.elution_map.where(self.elution_map==sample_rsl)
|
||||
# logger.debug(f"Well: {well}")
|
||||
well = well.dropna(how='all').dropna(axis=1, how="all")
|
||||
@@ -394,9 +396,9 @@ class SampleParser(object):
|
||||
logger.debug(f"well {sample_rsl} post processing: {well.size}: {type(well)}, {well.index[0]}, {well.columns[0]}")
|
||||
self.elution_map.at[well.index[0], well.columns[0]] = np.nan
|
||||
try:
|
||||
col = str(int(well.columns[0]))
|
||||
col = str(int(well.columns[0])).zfill(2)
|
||||
except ValueError:
|
||||
col = str(well.columns[0])
|
||||
col = str(well.columns[0]).zfill(2)
|
||||
except TypeError as e:
|
||||
logger.error(f"Problem parsing out column number for {well}:\n {e}")
|
||||
return f"{well.index[0]}{col}"
|
||||
@@ -424,10 +426,12 @@ class SampleParser(object):
|
||||
# new.testing_type = sample['Unnamed: 6']
|
||||
# new.site_status = sample['Unnamed: 7']
|
||||
new.notes = str(sample['Unnamed: 6']) # previously Unnamed: 8
|
||||
new.well_number = sample['Unnamed: 1']
|
||||
new.well_24 = sample['Unnamed: 1']
|
||||
elu_well = search_df_for_sample(new.rsl_number)
|
||||
if elu_well != None:
|
||||
new.elution_well = elu_well
|
||||
row = elu_well[0]
|
||||
col = elu_well[1:].zfill(2)
|
||||
new.well_number = f"{row}{col}"
|
||||
else:
|
||||
# try:
|
||||
return_val += f"{new.rsl_number}\n"
|
||||
@@ -455,12 +459,14 @@ class SampleParser(object):
|
||||
missed_samples.append(sample['sample_name'])
|
||||
continue
|
||||
logger.debug(f"Got instance: {instance.ww_sample_full_id}")
|
||||
if sample['well'] != None:
|
||||
row = sample['well'][0]
|
||||
col = sample['well'][1:].zfill(2)
|
||||
sample['well'] = f"{row}{col}"
|
||||
instance.artic_well_number = sample['well']
|
||||
new_list.append(instance)
|
||||
missed_str = "\n\t".join(missed_samples)
|
||||
return f"Could not find matches for the following samples:\n\t {missed_str}", new_list
|
||||
|
||||
|
||||
|
||||
|
||||
class PCRParser(object):
|
||||
@@ -590,5 +596,5 @@ class PCRParser(object):
|
||||
self.samples.append(sample_obj)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -3,24 +3,14 @@ Contains functions for generating summary reports
|
||||
'''
|
||||
from pandas import DataFrame
|
||||
import logging
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
from datetime import date, timedelta
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import re
|
||||
from tools import check_if_app
|
||||
from typing import Tuple
|
||||
from configure import jinja_template_loading
|
||||
|
||||
logger = logging.getLogger(f"submissions.{__name__}")
|
||||
|
||||
# set path of templates depending on pyinstaller/raw python
|
||||
# if getattr(sys, 'frozen', False):
|
||||
if check_if_app():
|
||||
loader_path = Path(sys._MEIPASS).joinpath("files", "templates")
|
||||
else:
|
||||
loader_path = Path(__file__).parents[2].joinpath('templates').absolute().__str__()
|
||||
loader = FileSystemLoader(loader_path)
|
||||
env = Environment(loader=loader)
|
||||
env = jinja_template_loading()
|
||||
|
||||
logger = logging.getLogger(f"submissions.{__name__}")
|
||||
|
||||
@@ -115,7 +105,6 @@ def convert_data_list_to_df(ctx:dict, input:list[dict], subtype:str|None=None) -
|
||||
# logger.debug(df)
|
||||
# move date of sample submitted on same date as previous ahead one.
|
||||
df = displace_date(df)
|
||||
# df.sort_values('submitted_date').to_excel("controls.xlsx", engine="openpyxl")
|
||||
# ad hoc method to make data labels more accurate.
|
||||
df = df_column_renamer(df=df)
|
||||
return df
|
||||
@@ -156,46 +145,33 @@ def displace_date(df:DataFrame) -> DataFrame:
|
||||
dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in sorted(df['name'].unique())]
|
||||
previous_dates = []
|
||||
for _, item in enumerate(dict_list):
|
||||
# try:
|
||||
# # check = item['date'] == dict_list[ii-1]['date']
|
||||
# check = item['date'] in previous_dates
|
||||
# except IndexError:
|
||||
# check = False
|
||||
# if check:
|
||||
# # occurences = previous_dates.count(item['date'])
|
||||
# logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
|
||||
# # get df locations where name == item name
|
||||
# mask = df['name'] == item['name']
|
||||
# # increment date in dataframe
|
||||
# df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
|
||||
# outdate = item['date'] + timedelta(days=1)
|
||||
# # previous_dates.append(item['date'] + timedelta(days=1))
|
||||
# else:
|
||||
# outdate = item['date']
|
||||
# previous_dates.append(outdate)
|
||||
# logger.debug(f"\n\tCurrent date: {outdate}\n\tPrevious dates:{previous_dates}")
|
||||
# logger.debug(type(item))
|
||||
df, previous_dates = check_date(df=df, item=item, previous_dates=previous_dates)
|
||||
return df
|
||||
|
||||
def check_date(df:DataFrame, item:dict, previous_dates:list) -> Tuple[DataFrame, list]:
|
||||
|
||||
"""
|
||||
Checks if an items date is already present in df and adjusts df accordingly
|
||||
|
||||
Args:
|
||||
df (DataFrame): input dataframe
|
||||
item (dict): control for checking
|
||||
previous_dates (list): list of dates found in previous controls
|
||||
|
||||
Returns:
|
||||
Tuple[DataFrame, list]: Output dataframe and appended list of previous dates
|
||||
"""
|
||||
try:
|
||||
# check = item['date'] == dict_list[ii-1]['date']
|
||||
check = item['date'] in previous_dates
|
||||
except IndexError:
|
||||
check = False
|
||||
previous_dates.append(item['date'])
|
||||
if check:
|
||||
# occurences = previous_dates.count(item['date'])
|
||||
logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
|
||||
# get df locations where name == item name
|
||||
mask = df['name'] == item['name']
|
||||
# increment date in dataframe
|
||||
df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
|
||||
|
||||
item['date'] += timedelta(days=1)
|
||||
# previous_dates.append(item['date'] + timedelta(days=1))
|
||||
passed = False
|
||||
else:
|
||||
passed = True
|
||||
@@ -249,8 +225,7 @@ def drop_reruns_from_df(ctx:dict, df: DataFrame) -> DataFrame:
|
||||
# logger.debug(f"First run: {first_run}")
|
||||
df = df.drop(df[df.name == first_run].index)
|
||||
return df
|
||||
# else:
|
||||
# return df
|
||||
|
||||
|
||||
|
||||
def make_hitpicks(input:list) -> DataFrame:
|
||||
|
||||
@@ -1,65 +1,153 @@
|
||||
import uuid
|
||||
from pydantic import BaseModel, validator
|
||||
from datetime import date
|
||||
from pydantic import BaseModel, field_validator, model_validator, Extra
|
||||
from datetime import date, datetime
|
||||
from typing import List, Any
|
||||
from tools import RSLNamer
|
||||
from pathlib import Path
|
||||
import re
|
||||
import logging
|
||||
from tools import check_not_nan, convert_nans_to_nones
|
||||
import numpy as np
|
||||
|
||||
|
||||
|
||||
logger = logging.getLogger(f"submissions.{__name__}")
|
||||
|
||||
class PydSubmission(BaseModel):
|
||||
class PydReagent(BaseModel):
|
||||
type: str|None
|
||||
lot: str|None
|
||||
exp: date|None
|
||||
|
||||
@field_validator("type", mode='before')
|
||||
@classmethod
|
||||
def remove_undesired_types(cls, value):
|
||||
match value:
|
||||
case "atcc":
|
||||
return None
|
||||
case _:
|
||||
return value
|
||||
|
||||
@field_validator("lot", mode='before')
|
||||
@classmethod
|
||||
def enforce_lot_string(cls, value):
|
||||
if value != None:
|
||||
return convert_nans_to_nones(str(value))
|
||||
return value
|
||||
|
||||
@field_validator("exp", mode="before")
|
||||
@classmethod
|
||||
def enforce_date(cls, value):
|
||||
if isinstance(value, float) or value == np.nan:
|
||||
raise ValueError(f"Date cannot be a float: {value}")
|
||||
else:
|
||||
return value
|
||||
|
||||
|
||||
|
||||
class PydSubmission(BaseModel, extra=Extra.allow):
|
||||
ctx: dict
|
||||
filepath: Path
|
||||
submission_type: str
|
||||
submitter_plate_num: str|None
|
||||
rsl_plate_num: str
|
||||
rsl_plate_num: str|dict|None
|
||||
submitted_date: date
|
||||
submitting_lab: str
|
||||
submitting_lab: str|None
|
||||
sample_count: int
|
||||
extraction_kit: str
|
||||
technician: str
|
||||
reagents: List[dict]
|
||||
extraction_kit: str|dict|None
|
||||
technician: str|None
|
||||
reagents: List[PydReagent] = []
|
||||
samples: List[Any]
|
||||
|
||||
@validator("submitted_date", pre=True)
|
||||
# missing_fields: List[str] = []
|
||||
|
||||
@field_validator("submitted_date", mode="before")
|
||||
@classmethod
|
||||
def strip_datetime_string(cls, value):
|
||||
if isinstance(value, datetime):
|
||||
return value
|
||||
if isinstance(value, date):
|
||||
return value
|
||||
return re.sub(r"_\d$", "", value)
|
||||
|
||||
@validator("submitter_plate_num")
|
||||
@field_validator("submitter_plate_num")
|
||||
@classmethod
|
||||
def enforce_with_uuid(cls, value):
|
||||
if value == None or value == "" or value == "None":
|
||||
return uuid.uuid4().hex.upper()
|
||||
|
||||
@validator("rsl_plate_num", pre=True)
|
||||
@classmethod
|
||||
def rsl_from_file(cls, value, values):
|
||||
if value == None:
|
||||
logger.debug(f"Pydant values:\n{values}")
|
||||
return RSLNamer(values['filepath'].__str__()).parsed_name
|
||||
else:
|
||||
return value
|
||||
|
||||
@validator("technician")
|
||||
@field_validator("submitting_lab", mode="before")
|
||||
@classmethod
|
||||
def transform_nan(cls, value):
|
||||
return convert_nans_to_nones(value)
|
||||
|
||||
@field_validator("rsl_plate_num", mode='before')
|
||||
@classmethod
|
||||
def rsl_from_file(cls, value, values):
|
||||
logger.debug(f"RSL-plate initial value: {value}")
|
||||
if check_not_nan(value):
|
||||
if isinstance(value, str):
|
||||
return dict(value=value, parsed=True)
|
||||
else:
|
||||
return value
|
||||
else:
|
||||
logger.debug(f"Pydant values:{type(values)}\n{values}")
|
||||
return dict(value=RSLNamer(values.data['filepath'].__str__()).parsed_name, parsed=False)
|
||||
|
||||
@field_validator("technician")
|
||||
@classmethod
|
||||
def enforce_tech(cls, value):
|
||||
if value == "nan" or value == "None":
|
||||
value = "Unknown"
|
||||
# elif len(value.split(",")) > 1:
|
||||
# tech_reg = re.compile(r"\b[A-Z]{2}\b")
|
||||
# value = ", ".join(tech_reg.findall(value))
|
||||
return value
|
||||
|
||||
@validator("reagents")
|
||||
@field_validator("reagents")
|
||||
@classmethod
|
||||
def remove_atcc(cls, value):
|
||||
return_val = []
|
||||
for reagent in value:
|
||||
match reagent['type']:
|
||||
case 'atcc':
|
||||
continue
|
||||
case _:
|
||||
return_val.append(reagent)
|
||||
logger.debug(f"Pydantic reagent: {reagent}")
|
||||
# match reagent.type.lower():
|
||||
# case 'atcc':
|
||||
# continue
|
||||
# case _:
|
||||
# return_val.append(reagent)
|
||||
if reagent.type == None:
|
||||
continue
|
||||
else:
|
||||
return_val.append(reagent)
|
||||
return return_val
|
||||
|
||||
@field_validator("sample_count", mode='before')
|
||||
@classmethod
|
||||
def enforce_sample_count(cls, value):
|
||||
if check_not_nan(value):
|
||||
return int(value)
|
||||
else:
|
||||
# raise ValueError(f"{value} could not be used to create an integer.")
|
||||
return convert_nans_to_nones(value)
|
||||
|
||||
@field_validator("extraction_kit", mode='before')
|
||||
@classmethod
|
||||
def get_kit_if_none(cls, value, values):
|
||||
from frontend.custom_widgets.pop_ups import KitSelector
|
||||
if check_not_nan(value):
|
||||
return dict(value=value, parsed=True)
|
||||
else:
|
||||
# logger.debug(values.data)
|
||||
dlg = KitSelector(ctx=values.data['ctx'], title="Kit Needed", message="At minimum a kit is needed. Please select one.")
|
||||
if dlg.exec():
|
||||
return dict(value=dlg.getValues(), parsed=False)
|
||||
else:
|
||||
raise ValueError("Extraction kit needed.")
|
||||
|
||||
# @model_validator(mode="after")
|
||||
# def ensure_kit(cls, values):
|
||||
# logger.debug(f"Model values: {values}")
|
||||
# missing_fields = [k for k,v in values if v == None]
|
||||
# if len(missing_fields) > 0:
|
||||
# logger.debug(f"Missing fields: {missing_fields}")
|
||||
# values['missing_fields'] = missing_fields
|
||||
# return values
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user