Big update, semi-functional

This commit is contained in:
Landon Wark
2023-08-28 13:20:30 -05:00
parent b6de159631
commit bc7a3b8f5f
13 changed files with 824 additions and 466 deletions

View File

@@ -150,7 +150,7 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi
logger.debug(f"Looking at models for submission type: {query}")
model = getattr(models, query)
logger.debug(f"We've got the model: {type(model)}")
info_dict['submission_type'] = info_dict['submission_type'].replace(" ", "_").lower()
# info_dict['submission_type'] = info_dict['submission_type'].replace(" ", "_").lower()
# if query return nothing, ie doesn't already exist in db
if instance == None:
instance = model()
@@ -224,6 +224,9 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi
logger.critical(e)
continue
continue
case "submission_type":
# item = "submission_type"
field_value = lookup_submissiontype_by_name(ctx=ctx, type_name=value)
case _:
field_value = value
# insert into field
@@ -276,7 +279,7 @@ def construct_reagent(ctx:Settings, info_dict:dict) -> models.Reagent:
"""
reagent = models.Reagent()
for item in info_dict:
logger.debug(f"Reagent info item: {item}")
logger.debug(f"Reagent info item for {item}: {info_dict[item]}")
# set fields based on keys in dictionary
match item:
case "lot":
@@ -284,7 +287,12 @@ def construct_reagent(ctx:Settings, info_dict:dict) -> models.Reagent:
case "expiry":
reagent.expiry = info_dict[item]
case "type":
reagent.type = lookup_reagenttype_by_name(ctx=ctx, rt_name=info_dict[item].replace(" ", "_").lower())
reagent.type = lookup_reagenttype_by_name(ctx=ctx, rt_name=info_dict[item])
case "name":
if item == None:
reagent.name = reagent.type.name
else:
reagent.name = info_dict[item]
# add end-of-life extension from reagent type to expiry date
# NOTE: this will now be done only in the reporting phase to account for potential changes in end-of-life extensions
# try:
@@ -320,7 +328,7 @@ def lookup_reagenttype_by_name(ctx:Settings, rt_name:str) -> models.ReagentType:
Returns:
models.ReagentType: looked up reagent type
"""
logger.debug(f"Looking up ReagentType by name: {rt_name}")
logger.debug(f"Looking up ReagentType by name: {rt_name.title()}")
# lookedup = ctx['database_session'].query(models.ReagentType).filter(models.ReagentType.name==rt_name).first()
lookedup = ctx.database_session.query(models.ReagentType).filter(models.ReagentType.name==rt_name).first()
logger.debug(f"Found ReagentType: {lookedup}")
@@ -339,12 +347,13 @@ def lookup_kittype_by_use(ctx:Settings, used_by:str|None=None) -> list[models.Ki
"""
if used_by != None:
# return ctx['database_session'].query(models.KitType).filter(models.KitType.used_for.contains(used_by)).all()
return ctx.database_session.query(models.KitType).filter(models.KitType.used_for.contains(used_by)).all()
# return ctx.database_session.query(models.KitType).filter(models.KitType.used_for.contains(used_by)).all()
return ctx.database_session.query(models.KitType).filter(models.KitType.used_for.any(name=used_by)).all()
else:
# return ctx['database_session'].query(models.KitType).all()
return ctx.database_session.query(models.KitType).all()
def lookup_kittype_by_name(ctx:Settings, name:str) -> models.KitType:
def lookup_kittype_by_name(ctx:Settings, name:str|dict) -> models.KitType:
"""
Lookup a kit type by name
@@ -359,7 +368,8 @@ def lookup_kittype_by_name(ctx:Settings, name:str) -> models.KitType:
name = name['value']
logger.debug(f"Querying kittype: {name}")
# return ctx['database_session'].query(models.KitType).filter(models.KitType.name==name).first()
return ctx.database_session.query(models.KitType).filter(models.KitType.name==name).first()
with ctx.database_session.no_autoflush:
return ctx.database_session.query(models.KitType).filter(models.KitType.name==name).first()
def lookup_kittype_by_id(ctx:Settings, id:int) -> models.KitType:
return ctx.database_session.query(models.KitType).filter(models.KitType.id==id).first()
@@ -559,12 +569,17 @@ def create_kit_from_yaml(ctx:Settings, exp:dict) -> dict:
# continue
# A submission type may use multiple kits.
for kt in exp[type]['kits']:
submission_type = lookup_submissiontype_by_name(ctx=ctx, type_name=type)
kit = models.KitType(name=kt,
used_for=[type.replace("_", " ").title()],
constant_cost=exp[type]["kits"][kt]["constant_cost"],
mutable_cost_column=exp[type]["kits"][kt]["mutable_cost_column"],
mutable_cost_sample=exp[type]["kits"][kt]["mutable_cost_sample"]
# constant_cost=exp[type]["kits"][kt]["constant_cost"],
# mutable_cost_column=exp[type]["kits"][kt]["mutable_cost_column"],
# mutable_cost_sample=exp[type]["kits"][kt]["mutable_cost_sample"]
)
kt_st_assoc = models.SubmissionTypeKitTypeAssociation(kit_type=kit, submission_type=submission_type)
kt_st_assoc.constant_cost = exp[type]["kits"][kt]["constant_cost"]
kt_st_assoc.mutable_cost_column = exp[type]["kits"][kt]["mutable_cost_column"]
kt_st_assoc.mutable_cost_sample = exp[type]["kits"][kt]["mutable_cost_sample"]
kit.kit_submissiontype_associations.append(kt_st_assoc)
# A kit contains multiple reagent types.
for r in exp[type]['kits'][kt]['reagenttypes']:
# check if reagent type already exists.
@@ -573,7 +588,7 @@ def create_kit_from_yaml(ctx:Settings, exp:dict) -> dict:
look_up = ctx.database_session.query(models.ReagentType).filter(models.ReagentType.name==r).first()
if look_up == None:
# rt = models.ReagentType(name=r.replace(" ", "_").lower(), eol_ext=timedelta(30*exp[type]['kits'][kt]['reagenttypes'][r]['eol_ext']), kits=[kit], required=1)
rt = models.ReagentType(name=r.replace(" ", "_").lower(), eol_ext=timedelta(30*exp[type]['kits'][kt]['reagenttypes'][r]['eol_ext']), last_used="")
rt = models.ReagentType(name=r.replace(" ", "_").lower().strip(), eol_ext=timedelta(30*exp[type]['kits'][kt]['reagenttypes'][r]['eol_ext']), last_used="")
else:
rt = look_up
# rt.kits.append(kit)
@@ -583,7 +598,7 @@ def create_kit_from_yaml(ctx:Settings, exp:dict) -> dict:
# except AttributeError as e:
# logger.error(f"Error appending reagent id to kit.reagent_types_id: {e}, creating new.")
# kit.reagent_types_id = [rt.id]
assoc = models.KitTypeReagentTypeAssociation(kit_type=kit, reagent_type=rt, uses=kit.used_for)
assoc = models.KitTypeReagentTypeAssociation(kit_type=kit, reagent_type=rt, uses={})
# ctx['database_session'].add(rt)
ctx.database_session.add(rt)
kit.kit_reagenttype_associations.append(assoc)
@@ -646,10 +661,11 @@ def lookup_all_sample_types(ctx:Settings) -> list[str]:
list[str]: list of sample type names
"""
# uses = [item.used_for for item in ctx['database_session'].query(models.KitType).all()]
uses = [item.used_for for item in ctx.database_session.query(models.KitType).all()]
# uses = [item.used_for for item in ctx.database_session.query(models.KitType).all()]
# flattened list of lists
uses = list(set([item for sublist in uses for item in sublist]))
return uses
# uses = list(set([item for sublist in uses for item in sublist]))
return [item.name for item in ctx.database_session.query(models.SubmissionType).all()]
def get_all_available_modes(ctx:Settings) -> list[str]:
"""
@@ -1084,7 +1100,8 @@ def lookup_sample_by_submitter_id(ctx:Settings, submitter_id:str) -> models.Basi
Returns:
BasicSample: _description_
"""
return ctx.database_session.query(models.BasicSample).filter(models.BasicSample.submitter_id==submitter_id).first()
with ctx.database_session.no_autoflush:
return ctx.database_session.query(models.BasicSample).filter(models.BasicSample.submitter_id==submitter_id).first()
def get_all_submission_types(ctx:Settings) -> List[str]:
"""
@@ -1150,4 +1167,18 @@ def lookup_all_reagent_names_by_role(ctx:Settings, role_name:str) -> List[str]:
try:
return [reagent.name for reagent in role.instances]
except AttributeError:
return []
return []
def lookup_submissiontype_by_name(ctx:Settings, type_name:str) -> models.SubmissionType:
"""
_summary_
Args:
ctx (Settings): _description_
type_name (str): _description_
Returns:
models.SubmissionType: _description_
"""
return ctx.database_session.query(models.SubmissionType).filter(models.SubmissionType.name==type_name).first()

View File

@@ -7,7 +7,7 @@ Base = declarative_base()
metadata = Base.metadata
from .controls import Control, ControlType
from .kits import KitType, ReagentType, Reagent, Discount, KitTypeReagentTypeAssociation
from .kits import KitType, ReagentType, Reagent, Discount, KitTypeReagentTypeAssociation, SubmissionType, SubmissionTypeKitTypeAssociation
from .organizations import Organization, Contact
# from .samples import WWSample, BCSample, BasicSample
from .submissions import BasicSubmission, BacterialCulture, Wastewater, WastewaterArtic, WastewaterSample, BacterialCultureSample, BasicSample, SubmissionSampleAssociation, WastewaterAssociation

View File

@@ -32,11 +32,9 @@ class KitType(Base):
id = Column(INTEGER, primary_key=True) #: primary key
name = Column(String(64), unique=True) #: name of kit
submissions = relationship("BasicSubmission", back_populates="extraction_kit") #: submissions this kit was used for
used_for = Column(JSON) #: list of names of sample types this kit can process
cost_per_run = Column(FLOAT(2)) #: dollar amount for each full run of this kit NOTE: depreciated, use the constant and mutable costs instead
mutable_cost_column = Column(FLOAT(2)) #: dollar amount per 96 well plate that can change with number of columns (reagents, tips, etc)
mutable_cost_sample = Column(FLOAT(2)) #: dollar amount that can change with number of samples (reagents, tips, etc)
constant_cost = Column(FLOAT(2)) #: dollar amount per plate that will remain constant (plates, man hours, etc)
# used_for = Column(JSON) #: list of names of sample types this kit can process
# used_for = relationship("SubmissionType", back_populates="extraction_kits", uselist=True, secondary=submissiontype_kittypes)
# cost_per_run = Column(FLOAT(2)) #: dollar amount for each full run of this kit NOTE: depreciated, use the constant and mutable costs instead
# reagent_types = relationship("ReagentType", back_populates="kits", uselist=True, secondary=reagenttypes_kittypes) #: reagent types this kit contains
# reagent_types_id = Column(INTEGER, ForeignKey("_reagent_types.id", ondelete='SET NULL', use_alter=True, name="fk_KT_reagentstype_id")) #: joined reagent type id
# kit_reagenttype_association =
@@ -46,12 +44,23 @@ class KitType(Base):
back_populates="kit_type",
cascade="all, delete-orphan",
)
# association proxy of "user_keyword_associations" collection
# to "keyword" attribute
reagent_types = association_proxy("kit_reagenttype_associations", "reagenttype")
kit_submissiontype_associations = relationship(
"SubmissionTypeKitTypeAssociation",
back_populates="kit_type",
cascade="all, delete-orphan",
)
used_for = association_proxy("kit_submissiontype_associations", "submission_type")
def __repr__(self) -> str:
return f"KitType({self.name})"
return f"<KitType({self.name})>"
def __str__(self) -> str:
"""
@@ -64,9 +73,9 @@ class KitType(Base):
def get_reagents(self, required:bool=False) -> list:
if required:
return [item.reagenttype for item in self.kit_reagenttype_associations if item.required == 1]
return [item.reagent_type for item in self.kit_reagenttype_associations if item.required == 1]
else:
return [item.reagenttype for item in self.kit_reagenttype_associations]
return [item.reagent_type for item in self.kit_reagenttype_associations]
def construct_xl_map_for_use(self, use:str) -> dict:
@@ -75,12 +84,16 @@ class KitType(Base):
assocs = [item for item in self.kit_reagenttype_associations if use in item.uses]
for assoc in assocs:
try:
map[assoc.reagenttype.name] = assoc.uses[use]
map[assoc.reagent_type.name] = assoc.uses[use]
except TypeError:
continue
try:
st_assoc = [item for item in self.used_for if use == item.name][0]
map['info'] = st_assoc.info_map
except IndexError as e:
map['info'] = {}
return map
class KitTypeReagentTypeAssociation(Base):
"""
table containing reagenttype/kittype associations
@@ -96,11 +109,11 @@ class KitTypeReagentTypeAssociation(Base):
kit_type = relationship(KitType, back_populates="kit_reagenttype_associations")
# reference to the "ReagentType" object
reagenttype = relationship("ReagentType")
reagent_type = relationship("ReagentType")
def __init__(self, kit_type=None, reagent_type=None, uses=None, required=1):
self.kit = kit_type
self.reagenttype = reagent_type
self.kit_type = kit_type
self.reagent_type = reagent_type
self.uses = uses
self.required = required
@@ -116,8 +129,6 @@ class KitTypeReagentTypeAssociation(Base):
raise ValueError(f'{value} is not a reagenttype')
return value
class ReagentType(Base):
"""
Base of reagent type abstract
@@ -151,7 +162,6 @@ class ReagentType(Base):
def __repr__(self):
return f"ReagentType({self.name})"
class Reagent(Base):
"""
Concrete reagent instance
@@ -215,7 +225,6 @@ class Reagent(Base):
"expiry": self.expiry.strftime("%Y-%m-%d")
}
class Discount(Base):
"""
Relationship table for client labs for certain kits.
@@ -230,4 +239,44 @@ class Discount(Base):
name = Column(String(128))
amount = Column(FLOAT(2))
class SubmissionType(Base):
__tablename__ = "_submission_types"
id = Column(INTEGER, primary_key=True) #: primary key
name = Column(String(128), unique=True) #: name of submission type
info_map = Column(JSON)
instances = relationship("BasicSubmission", backref="submission_type")
submissiontype_kit_associations = relationship(
"SubmissionTypeKitTypeAssociation",
back_populates="submission_type",
cascade="all, delete-orphan",
)
kit_types = association_proxy("kit_submissiontype_associations", "kit_type")
def __repr__(self) -> str:
return f"<SubmissionType({self.name})>"
class SubmissionTypeKitTypeAssociation(Base):
__tablename__ = "_submissiontypes_kittypes"
submission_types_id = Column(INTEGER, ForeignKey("_submission_types.id"), primary_key=True)
kits_id = Column(INTEGER, ForeignKey("_kits.id"), primary_key=True)
mutable_cost_column = Column(FLOAT(2)) #: dollar amount per 96 well plate that can change with number of columns (reagents, tips, etc)
mutable_cost_sample = Column(FLOAT(2)) #: dollar amount that can change with number of samples (reagents, tips, etc)
constant_cost = Column(FLOAT(2)) #: dollar amount per plate that will remain constant (plates, man hours, etc)
# reagent_type_name = Column(INTEGER, ForeignKey("_reagent_types.name"))
kit_type = relationship(KitType, back_populates="kit_submissiontype_associations")
# reference to the "ReagentType" object
submission_type = relationship(SubmissionType, back_populates="submissiontype_kit_associations")
def __init__(self, kit_type=None, submission_type=None):
self.kit_type = kit_type
self.submission_type = submission_type
self.mutable_cost_column = 0.00
self.mutable_cost_sample = 0.00
self.constant_cost = 0.00

View File

@@ -31,6 +31,9 @@ class Organization(Base):
str: string representing organization name
"""
return self.name.replace("_", " ").title()
def __repr__(self) -> str:
return f"<Organization({self.name})>"
class Contact(Base):

View File

@@ -12,6 +12,9 @@ from math import ceil
from sqlalchemy.ext.associationproxy import association_proxy
import uuid
from . import Base
from pandas import Timestamp
from dateutil.parser import parse
import pprint
logger = logging.getLogger(f"submissions.{__name__}")
@@ -33,7 +36,8 @@ class BasicSubmission(Base):
sample_count = Column(INTEGER) #: Number of samples in the submission
extraction_kit = relationship("KitType", back_populates="submissions") #: The extraction kit used
extraction_kit_id = Column(INTEGER, ForeignKey("_kits.id", ondelete="SET NULL", name="fk_BS_extkit_id"))
submission_type = Column(String(32)) #: submission type (should be string in D3 of excel sheet)
# submission_type = Column(String(32)) #: submission type (should be string in D3 of excel sheet)
submission_type_name = Column(String, ForeignKey("_submission_types.name", ondelete="SET NULL", name="fk_BS_subtype_name"))
technician = Column(String(64)) #: initials of processing tech(s)
# Move this into custom types?
reagents = relationship("Reagent", back_populates="submissions", secondary=reagents_submissions) #: relationship to reagents
@@ -55,7 +59,7 @@ class BasicSubmission(Base):
# Allows for subclassing into ex. BacterialCulture, Wastewater, etc.
__mapper_args__ = {
"polymorphic_identity": "basic_submission",
"polymorphic_on": submission_type,
"polymorphic_on": submission_type_name,
"with_polymorphic": "*",
}
@@ -128,7 +132,7 @@ class BasicSubmission(Base):
output = {
"id": self.id,
"Plate Number": self.rsl_plate_num,
"Submission Type": self.submission_type.replace("_", " ").title(),
"Submission Type": self.submission_type_name,
"Submitter Plate Number": self.submitter_plate_num,
"Submitted Date": self.submitted_date.strftime("%Y-%m-%d"),
"Submitting Lab": sub_lab,
@@ -184,14 +188,18 @@ class BasicSubmission(Base):
except Exception as e:
logger.error(f"Column count error: {e}")
# cols_count_24 = ceil(int(self.sample_count) / 3)
if all(item == 0.0 for item in [self.extraction_kit.constant_cost, self.extraction_kit.mutable_cost_column, self.extraction_kit.mutable_cost_sample]):
logger.debug(f"Pre-association check. {pprint.pformat(self.__dict__)}")
assoc = [item for item in self.extraction_kit.kit_submissiontype_associations if item.submission_type == self.submission_type][0]
logger.debug(f"Came up with association: {assoc}")
# if all(item == 0.0 for item in [self.extraction_kit.constant_cost, self.extraction_kit.mutable_cost_column, self.extraction_kit.mutable_cost_sample]):
if all(item == 0.0 for item in [assoc.constant_cost, assoc.mutable_cost_column, assoc.mutable_cost_sample]):
try:
self.run_cost = self.extraction_kit.cost_per_run
except Exception as e:
logger.error(f"Calculation error: {e}")
else:
try:
self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count))
self.run_cost = assoc.constant_cost + (assoc.mutable_cost_column * cols_count_96) + (assoc.mutable_cost_sample * int(self.sample_count))
except Exception as e:
logger.error(f"Calculation error: {e}")
@@ -225,7 +233,7 @@ class BacterialCulture(BasicSubmission):
"""
controls = relationship("Control", back_populates="submission", uselist=True) #: A control sample added to submission
# samples = relationship("BCSample", back_populates="rsl_plate", uselist=True)
__mapper_args__ = {"polymorphic_identity": "bacterial_culture", "polymorphic_load": "inline"}
__mapper_args__ = {"polymorphic_identity": "Bacterial Culture", "polymorphic_load": "inline"}
def to_dict(self) -> dict:
"""
@@ -245,7 +253,7 @@ class Wastewater(BasicSubmission):
# samples = relationship("WWSample", back_populates="rsl_plate", uselist=True)
pcr_info = Column(JSON)
# ww_sample_id = Column(String, ForeignKey("_ww_samples.id", ondelete="SET NULL", name="fk_WW_sample_id"))
__mapper_args__ = {"polymorphic_identity": "wastewater", "polymorphic_load": "inline"}
__mapper_args__ = {"polymorphic_identity": "Wastewater", "polymorphic_load": "inline"}
def to_dict(self) -> dict:
"""
@@ -315,14 +323,14 @@ class BasicSample(Base):
@validates('submitter_id')
def create_id(self, key, value):
logger.debug(f"validating sample_id of: {value}")
# logger.debug(f"validating sample_id of: {value}")
if value == None:
return uuid.uuid4().hex.upper()
else:
return value
def __repr__(self) -> str:
return f"{self.sample_type}Sample({self.submitter_id})"
return f"<{self.sample_type.replace('_', ' ').title(). replace(' ', '')}({self.submitter_id})>"
def to_sub_dict(self, submission_rsl:str) -> dict:
row_map = {1:"A", 2:"B", 3:"C", 4:"D", 5:"E", 6:"F", 7:"G", 8:"H"}
@@ -363,30 +371,31 @@ class WastewaterSample(BasicSample):
# id = Column(INTEGER, primary_key=True) #: primary key
ww_processing_num = Column(String(64)) #: wastewater processing number
# ww_sample_full_id = Column(String(64), nullable=False, unique=True)
ww_sample_full_id = Column(String(64))
rsl_number = Column(String(64)) #: rsl plate identification number
# rsl_plate = relationship("Wastewater", back_populates="samples") #: relationship to parent plate
# rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_WWS_submission_id"))
collection_date = Column(TIMESTAMP) #: Date submission received
collection_date = Column(TIMESTAMP) #: Date sample collected
received_date = Column(TIMESTAMP) #: Date sample received
# well_number = Column(String(8)) #: location on 96 well plate
# The following are fields from the sample tracking excel sheet Ruth put together.
# I have no idea when they will be implemented or how.
testing_type = Column(String(64))
site_status = Column(String(64))
# testing_type = Column(String(64))
# site_status = Column(String(64))
notes = Column(String(2000))
# ct_n1 = Column(FLOAT(2)) #: AKA ct for N1
# ct_n2 = Column(FLOAT(2)) #: AKA ct for N2
# n1_status = Column(String(32))
# n2_status = Column(String(32))
seq_submitted = Column(BOOLEAN())
ww_seq_run_id = Column(String(64))
# seq_submitted = Column(BOOLEAN())
# ww_seq_run_id = Column(String(64))
# sample_type = Column(String(16))
# pcr_results = Column(JSON)
well_24 = Column(String(8)) #: location on 24 well plate
sample_location = Column(String(8)) #: location on 24 well plate
# artic_rsl_plate = relationship("WastewaterArtic", back_populates="samples")
# artic_well_number = Column(String(8))
__mapper_args__ = {"polymorphic_identity": "wastewater_sample", "polymorphic_load": "inline"}
__mapper_args__ = {"polymorphic_identity": "Wastewater Sample", "polymorphic_load": "inline"}
# def to_string(self) -> str:
# """
@@ -397,6 +406,42 @@ class WastewaterSample(BasicSample):
# """
# return f"{self.well_number}: {self.ww_sample_full_id}"
# @validates("received-date")
# def convert_rdate_time(self, key, value):
# if isinstance(value, Timestamp):
# return value.date()
# return value
@validates("collected-date")
def convert_cdate_time(self, key, value):
logger.debug(f"Validating {key}: {value}")
if isinstance(value, Timestamp):
return value.date()
if isinstance(value, str):
return parse(value)
return value
# @collection_date.setter
# def collection_date(self, value):
# match value:
# case Timestamp():
# self.collection_date = value.date()
# case str():
# self.collection_date = parse(value)
# case _:
# self.collection_date = value
def __init__(self, **kwargs):
if 'collection_date' in kwargs.keys():
logger.debug(f"Got collection_date: {kwargs['collection_date']}. Attempting parse.")
if isinstance(kwargs['collection_date'], str):
logger.debug(f"collection_date is a string...")
kwargs['collection_date'] = parse(kwargs['collection_date'])
logger.debug(f"output is {kwargs['collection_date']}")
super().__init__(**kwargs)
def to_sub_dict(self, submission_rsl:str) -> dict:
"""
Gui friendly dictionary. Inherited from BasicSample
@@ -451,7 +496,6 @@ class WastewaterSample(BasicSample):
# return None
return sample
class BacterialCultureSample(BasicSample):
"""
base of bacterial culture sample
@@ -493,8 +537,6 @@ class BacterialCultureSample(BasicSample):
# }
return sample
class SubmissionSampleAssociation(Base):
"""
table containing submission/sample associations

View File

@@ -3,17 +3,17 @@ contains parser object for pulling values from client generated submission sheet
'''
from getpass import getuser
import pprint
from typing import Tuple
from typing import List, Tuple
import pandas as pd
from pathlib import Path
from backend.db.models import WastewaterSample, BacterialCultureSample
from backend.db import lookup_ww_sample_by_ww_sample_num, lookup_sample_by_submitter_id, get_reagents_in_extkit, lookup_kittype_by_name, lookup_kittype_by_use
from backend.db import lookup_ww_sample_by_ww_sample_num, lookup_sample_by_submitter_id, get_reagents_in_extkit, lookup_kittype_by_name, lookup_submissiontype_by_name, models
from backend.pydant import PydSubmission, PydReagent
import logging
from collections import OrderedDict
import re
import numpy as np
from datetime import date, datetime
from dateutil.parser import parse, ParserError
import uuid
# from submissions.backend.db.functions import
from tools import check_not_nan, RSLNamer, massage_common_reagents, convert_nans_to_nones, Settings
@@ -49,13 +49,13 @@ class SheetParser(object):
self.sub = OrderedDict()
# make decision about type of sample we have
self.sub['submission_type'] = self.type_decider()
# select proper parser based on sample type
parse_sub = getattr(self, f"parse_{self.sub['submission_type'].replace(' ', '_').lower()}")
parse_sub()
# self.calculate_column_count()
# # grab the info map from the submission type in database
# self.info_map = self.fetch_kit_info_map()
self.parse_info()
self.import_kit_validation_check()
self.parse_reagents()
self.import_reagent_validation_check()
self.parse_samples()
def type_decider(self) -> str:
@@ -69,7 +69,7 @@ class SheetParser(object):
if self.xl.book.properties.category != None:
logger.debug("Using file properties to find type...")
categories = [item.strip().title() for item in self.xl.book.properties.category.split(";")]
return categories[0].replace(" ", "_")
return dict(value=categories[0], parsed=False)
else:
# This code is going to be depreciated once there is full adoption of the client sheets
# with updated metadata... but how will it work for Artic?
@@ -78,120 +78,107 @@ class SheetParser(object):
for type in self.ctx.submission_types:
# This gets the *first* submission type that matches the sheet names in the workbook
if self.xl.sheet_names == self.ctx.submission_types[type]['excel_map']:
return type.title()
return dict(value=type.title(), parsed=True)
return "Unknown"
except Exception as e:
logger.warning(f"We were unable to parse the submission type due to: {e}")
# return "Unknown"
dlg = SubmissionTypeSelector(ctx=self.ctx, title="Select Submission Type", message="We were unable to find the submission type from the excel metadata. Please select from below.")
if dlg.exec():
return dlg.getValues()
return dict(value=dlg.getValues(), parsed=False)
else:
logger.warning(f"Last attempt at getting submission was rejected.")
raise ValueError("Submission Type needed.")
def parse_unknown(self) -> None:
def parse_info(self):
"""
Dummy function to handle unknown excel structures
"""
logger.error(f"Unknown excel workbook structure. Cannot parse.")
self.sub = None
def parse_generic(self, sheet_name:str) -> pd.DataFrame:
"""
Pulls information common to all wasterwater/bacterial culture types and passes on dataframe
_summary_
"""
info = InfoParser(ctx=self.ctx, xl=self.xl, submission_type=self.sub['submission_type']).parse_info()
for k,v in info.items():
if k != "sample":
self.sub[k] = v
logger.debug(f"Parser.sub after info scrape: {pprint.pformat(self.sub)}")
Args:
sheet_name (str): name of excel worksheet to pull from
def parse_reagents(self):
self.sub['reagents'] = ReagentParser(ctx=self.ctx, xl=self.xl, submission_type=self.sub['submission_type'], extraction_kit=self.sub['extraction_kit']).parse_reagents()
Returns:
pd.DataFrame: relevant dataframe from excel sheet
"""
# self.xl is a pd.ExcelFile so we need to parse it into a df
submission_info = self.xl.parse(sheet_name=sheet_name, dtype=object)
self.sub['submitter_plate_num'] = submission_info.iloc[0][1]
if check_not_nan(submission_info.iloc[10][1]):
self.sub['rsl_plate_num'] = RSLNamer(ctx=self.ctx, instr=submission_info.iloc[10][1]).parsed_name
else:
# self.sub['rsl_plate_num'] = RSLNamer(self.filepath).parsed_name
self.sub['rsl_plate_num'] = None
self.sub['submitted_date'] = submission_info.iloc[1][1]
self.sub['submitting_lab'] = submission_info.iloc[0][3]
self.sub['sample_count'] = submission_info.iloc[2][3]
self.sub['extraction_kit'] = submission_info.iloc[3][3]
if check_not_nan(submission_info.iloc[1][3]):
self.sub['submission_type'] = dict(value=submission_info.iloc[1][3], parsed=True)
else:
self.sub['submission_type'] = dict(value=self.sub['submission_type'], parsed=False)
return submission_info
def parse_samples(self):
self.sample_result, self.sub['samples'] = SampleParser(ctx=self.ctx, xl=self.xl, submission_type=self.sub['submission_type']['value']).parse_samples()
def parse_bacterial_culture(self) -> None:
"""
pulls info specific to bacterial culture sample type
"""
def parse_reagents(df:pd.DataFrame) -> None:
"""
Pulls reagents from the bacterial sub-dataframe
# def parse_reagents(df:pd.DataFrame) -> None:
# """
# Pulls reagents from the bacterial sub-dataframe
Args:
df (pd.DataFrame): input sub dataframe
"""
for ii, row in df.iterrows():
# skip positive control
logger.debug(f"Running reagent parse for {row[1]} with type {type(row[1])} and value: {row[2]} with type {type(row[2])}")
# if the lot number isn't a float and the reagent type isn't blank
# if not isinstance(row[2], float) and check_not_nan(row[1]):
if check_not_nan(row[1]):
# must be prefixed with 'lot_' to be recognized by gui
# This is no longer true since reagents are loaded into their own key in dictionary
try:
reagent_type = row[1].replace(' ', '_').lower().strip()
except AttributeError:
pass
# If there is a double slash in the type field, such as ethanol/iso
# Use the cell to the left for reagent type.
if reagent_type == "//":
if check_not_nan(row[2]):
reagent_type = row[0].replace(' ', '_').lower().strip()
else:
continue
try:
output_var = convert_nans_to_nones(str(row[2]).upper())
except AttributeError:
logger.debug(f"Couldn't upperize {row[2]}, must be a number")
output_var = convert_nans_to_nones(str(row[2]))
logger.debug(f"Output variable is {output_var}")
logger.debug(f"Expiry date for imported reagent: {row[3]}")
if check_not_nan(row[3]):
try:
expiry = row[3].date()
except AttributeError as e:
try:
expiry = datetime.strptime(row[3], "%Y-%m-%d")
except TypeError as e:
expiry = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + row[3] - 2)
else:
logger.debug(f"Date: {row[3]}")
# expiry = date.today()
expiry = date(year=1970, month=1, day=1)
# self.sub[f"lot_{reagent_type}"] = {'lot':output_var, 'exp':expiry}
# self.sub['reagents'].append(dict(type=reagent_type, lot=output_var, exp=expiry))
self.sub['reagents'].append(PydReagent(type=reagent_type, lot=output_var, exp=expiry))
submission_info = self.parse_generic("Sample List")
# iloc is [row][column] and the first row is set as header row so -2
self.sub['technician'] = str(submission_info.iloc[11][1])
# reagents
# must be prefixed with 'lot_' to be recognized by gui
# This is no longer true wince the creation of self.sub['reagents']
self.sub['reagents'] = []
reagent_range = submission_info.iloc[1:14, 4:8]
logger.debug(reagent_range)
# Args:
# df (pd.DataFrame): input sub dataframe
# """
# for ii, row in df.iterrows():
# # skip positive control
# logger.debug(f"Running reagent parse for {row[1]} with type {type(row[1])} and value: {row[2]} with type {type(row[2])}")
# # if the lot number isn't a float and the reagent type isn't blank
# # if not isinstance(row[2], float) and check_not_nan(row[1]):
# if check_not_nan(row[1]):
# # must be prefixed with 'lot_' to be recognized by gui
# # This is no longer true since reagents are loaded into their own key in dictionary
# try:
# reagent_type = row[1].replace(' ', '_').lower().strip()
# except AttributeError:
# pass
# # If there is a double slash in the type field, such as ethanol/iso
# # Use the cell to the left for reagent type.
# if reagent_type == "//":
# if check_not_nan(row[2]):
# reagent_type = row[0].replace(' ', '_').lower().strip()
# else:
# continue
# try:
# output_var = convert_nans_to_nones(str(row[2]).upper())
# except AttributeError:
# logger.debug(f"Couldn't upperize {row[2]}, must be a number")
# output_var = convert_nans_to_nones(str(row[2]))
# logger.debug(f"Output variable is {output_var}")
# logger.debug(f"Expiry date for imported reagent: {row[3]}")
# if check_not_nan(row[3]):
# try:
# expiry = row[3].date()
# except AttributeError as e:
# try:
# expiry = datetime.strptime(row[3], "%Y-%m-%d")
# except TypeError as e:
# expiry = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + row[3] - 2)
# else:
# logger.debug(f"Date: {row[3]}")
# # expiry = date.today()
# expiry = date(year=1970, month=1, day=1)
# # self.sub[f"lot_{reagent_type}"] = {'lot':output_var, 'exp':expiry}
# # self.sub['reagents'].append(dict(type=reagent_type, lot=output_var, exp=expiry))
# self.sub['reagents'].append(PydReagent(type=reagent_type, lot=output_var, exp=expiry))
# submission_info = self.xl.parse(sheet_name="Sample List", dtype=object)
# self.sub['extraction_kit'] = submission_info.iloc[3][3]
# submission_info = self.parse_generic("Sample List")
# # iloc is [row][column] and the first row is set as header row so -2
# self.sub['technician'] = str(submission_info.iloc[11][1])
# # reagents
# # must be prefixed with 'lot_' to be recognized by gui
# # This is no longer true wince the creation of self.sub['reagents']
# self.sub['reagents'] = []
# reagent_range = submission_info.iloc[1:14, 4:8]
# logger.debug(reagent_range)
# parse_reagents(reagent_range)
# get individual sample info
sample_parser = SampleParser(self.ctx, submission_info.iloc[16:112])
logger.debug(f"Sample type: {self.sub['submission_type']}")
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type']['value'].replace(' ', '_').lower()}_samples")
if isinstance(self.sub['submission_type'], dict):
getter = self.sub['submission_type']['value']
else:
getter = self.sub['submission_type']
sample_parse = getattr(sample_parser, f"parse_{getter.replace(' ', '_').lower()}_samples")
logger.debug(f"Parser result: {self.sub}")
self.sample_result, self.sub['samples'] = sample_parse()
@@ -206,40 +193,40 @@ class SheetParser(object):
elu_map.columns = elu_map.iloc[0]
elu_map = elu_map.tail(-1)
return elu_map
def parse_reagents(df:pd.DataFrame) -> None:
"""
Pulls reagents from the bacterial sub-dataframe
# def parse_reagents(df:pd.DataFrame) -> None:
# """
# Pulls reagents from the bacterial sub-dataframe
Args:
df (pd.DataFrame): input sub dataframe
"""
# iterate through sub-df rows
for ii, row in df.iterrows():
# logger.debug(f"Parsing this row for reagents: {row}")
if check_not_nan(row[5]):
# must be prefixed with 'lot_' to be recognized by gui
# regex below will remove 80% from 80% ethanol in the Wastewater kit.
output_key = re.sub(r"^\d{1,3}%\s?", "", row[0].lower().strip().replace(' ', '_'))
output_key = output_key.strip("_")
# output_var is the lot number
try:
output_var = convert_nans_to_nones(str(row[5].upper()))
except AttributeError:
logger.debug(f"Couldn't upperize {row[5]}, must be a number")
output_var = convert_nans_to_nones(str(row[5]))
if check_not_nan(row[7]):
try:
expiry = row[7].date()
except AttributeError:
expiry = date.today()
else:
expiry = date.today()
logger.debug(f"Expiry date for {output_key}: {expiry} of type {type(expiry)}")
# self.sub[f"lot_{output_key}"] = {'lot':output_var, 'exp':expiry}
# self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry))
reagent = PydReagent(type=output_key, lot=output_var, exp=expiry)
logger.debug(f"Here is the created reagent: {reagent}")
self.sub['reagents'].append(reagent)
# Args:
# df (pd.DataFrame): input sub dataframe
# """
# # iterate through sub-df rows
# for ii, row in df.iterrows():
# # logger.debug(f"Parsing this row for reagents: {row}")
# if check_not_nan(row[5]):
# # must be prefixed with 'lot_' to be recognized by gui
# # regex below will remove 80% from 80% ethanol in the Wastewater kit.
# output_key = re.sub(r"^\d{1,3}%\s?", "", row[0].lower().strip().replace(' ', '_'))
# output_key = output_key.strip("_")
# # output_var is the lot number
# try:
# output_var = convert_nans_to_nones(str(row[5].upper()))
# except AttributeError:
# logger.debug(f"Couldn't upperize {row[5]}, must be a number")
# output_var = convert_nans_to_nones(str(row[5]))
# if check_not_nan(row[7]):
# try:
# expiry = row[7].date()
# except AttributeError:
# expiry = date.today()
# else:
# expiry = date.today()
# logger.debug(f"Expiry date for {output_key}: {expiry} of type {type(expiry)}")
# # self.sub[f"lot_{output_key}"] = {'lot':output_var, 'exp':expiry}
# # self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry))
# reagent = PydReagent(type=output_key, lot=output_var, exp=expiry)
# logger.debug(f"Here is the created reagent: {reagent}")
# self.sub['reagents'].append(reagent)
# parse submission sheet
submission_info = self.parse_generic("WW Submissions (ENTER HERE)")
# parse enrichment sheet
@@ -274,41 +261,42 @@ class SheetParser(object):
"""
pulls info specific to wastewater_arctic submission type
"""
self.sub['submission_type'] = dict(value=self.sub['submission_type'], parsed=True)
def parse_reagents(df:pd.DataFrame):
logger.debug(df)
for ii, row in df.iterrows():
if check_not_nan(row[1]):
try:
output_key = re.sub(r"\(.+?\)", "", row[0].lower().strip().replace(' ', '_'))
except AttributeError:
continue
output_key = output_key.strip("_")
output_key = massage_common_reagents(output_key)
try:
output_var = convert_nans_to_nones(str(row[1].upper()))
except AttributeError:
logger.debug(f"Couldn't upperize {row[1]}, must be a number")
output_var = convert_nans_to_nones(str(row[1]))
logger.debug(f"Output variable is {output_var}")
logger.debug(f"Expiry date for imported reagent: {row[2]}")
if check_not_nan(row[2]):
try:
expiry = row[2].date()
except AttributeError as e:
try:
expiry = datetime.strptime(row[2], "%Y-%m-%d")
except TypeError as e:
expiry = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + row[2] - 2)
except ValueError as e:
continue
else:
logger.debug(f"Date: {row[2]}")
expiry = date.today()
# self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry))
self.sub['reagents'].append(PydReagent(type=output_key, lot=output_var, exp=expiry))
else:
continue
if isinstance(self.sub['submission_type'], str):
self.sub['submission_type'] = dict(value=self.sub['submission_type'], parsed=True)
# def parse_reagents(df:pd.DataFrame):
# logger.debug(df)
# for ii, row in df.iterrows():
# if check_not_nan(row[1]):
# try:
# output_key = re.sub(r"\(.+?\)", "", row[0].lower().strip().replace(' ', '_'))
# except AttributeError:
# continue
# output_key = output_key.strip("_")
# output_key = massage_common_reagents(output_key)
# try:
# output_var = convert_nans_to_nones(str(row[1].upper()))
# except AttributeError:
# logger.debug(f"Couldn't upperize {row[1]}, must be a number")
# output_var = convert_nans_to_nones(str(row[1]))
# logger.debug(f"Output variable is {output_var}")
# logger.debug(f"Expiry date for imported reagent: {row[2]}")
# if check_not_nan(row[2]):
# try:
# expiry = row[2].date()
# except AttributeError as e:
# try:
# expiry = datetime.strptime(row[2], "%Y-%m-%d")
# except TypeError as e:
# expiry = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + row[2] - 2)
# except ValueError as e:
# continue
# else:
# logger.debug(f"Date: {row[2]}")
# expiry = date.today()
# # self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry))
# self.sub['reagents'].append(PydReagent(type=output_key, lot=output_var, exp=expiry))
# else:
# continue
def massage_samples(df:pd.DataFrame, lookup_table:pd.DataFrame) -> pd.DataFrame:
"""
Takes sample info from Artic sheet format and converts to regular formate
@@ -376,30 +364,30 @@ class SheetParser(object):
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type']['value'].lower()}_samples")
self.sample_result, self.sub['samples'] = sample_parse()
def parse_reagents(self):
ext_kit = lookup_kittype_by_name(ctx=self.ctx, name=self.sub['extraction_kit'])
if ext_kit != None:
logger.debug(f"Querying extraction kit: {self.sub['submission_type']}")
reagent_map = ext_kit.construct_xl_map_for_use(use=self.sub['submission_type']['value'])
logger.debug(f"Reagent map: {pprint.pformat(reagent_map)}")
else:
raise AttributeError("No extraction kit found, unable to parse reagents")
for sheet in self.xl.sheet_names:
df = self.xl.parse(sheet)
relevant = {k:v for k,v in reagent_map.items() if sheet in reagent_map[k]['sheet']}
logger.debug(f"relevant map for {sheet}: {pprint.pformat(relevant)}")
if relevant == {}:
continue
for item in reagent_map:
try:
# role = item
name = df.iat[relevant[item]['name']['row']-2, relevant[item]['name']['column']-1]
lot = df.iat[relevant[item]['lot']['row']-2, relevant[item]['lot']['column']-1]
expiry = df.iat[relevant[item]['expiry']['row']-2, relevant[item]['expiry']['column']-1]
except (KeyError, IndexError):
continue
# self.sub['reagents'].append(dict(name=name, lot=lot, expiry=expiry, role=role))
self.sub['reagents'].append(PydReagent(type=item, lot=lot, exp=expiry, name=name))
# def parse_reagents(self):
# ext_kit = lookup_kittype_by_name(ctx=self.ctx, name=self.sub['extraction_kit'])
# if ext_kit != None:
# logger.debug(f"Querying extraction kit: {self.sub['submission_type']}")
# reagent_map = ext_kit.construct_xl_map_for_use(use=self.sub['submission_type']['value'])
# logger.debug(f"Reagent map: {pprint.pformat(reagent_map)}")
# else:
# raise AttributeError("No extraction kit found, unable to parse reagents")
# for sheet in self.xl.sheet_names:
# df = self.xl.parse(sheet)
# relevant = {k:v for k,v in reagent_map.items() if sheet in reagent_map[k]['sheet']}
# logger.debug(f"relevant map for {sheet}: {pprint.pformat(relevant)}")
# if relevant == {}:
# continue
# for item in relevant:
# try:
# # role = item
# name = df.iat[relevant[item]['name']['row']-2, relevant[item]['name']['column']-1]
# lot = df.iat[relevant[item]['lot']['row']-2, relevant[item]['lot']['column']-1]
# expiry = df.iat[relevant[item]['expiry']['row']-2, relevant[item]['expiry']['column']-1]
# except (KeyError, IndexError):
# continue
# # self.sub['reagents'].append(dict(name=name, lot=lot, expiry=expiry, role=role))
# self.sub['reagents'].append(PydReagent(type=item, lot=lot, exp=expiry, name=name))
def import_kit_validation_check(self):
@@ -420,7 +408,8 @@ class SheetParser(object):
else:
raise ValueError("Extraction kit needed.")
else:
self.sub['extraction_kit'] = dict(value=self.sub['extraction_kit'], parsed=False)
if isinstance(self.sub['extraction_kit'], str):
self.sub['extraction_kit'] = dict(value=self.sub['extraction_kit'], parsed=False)
# logger.debug(f"Here is the validated parser dictionary:\n\n{pprint.pformat(self.sub)}\n\n")
# return parser_sub
@@ -430,7 +419,8 @@ class SheetParser(object):
Enforce that only allowed reagents get into the Pydantic Model
"""
allowed_reagents = [item.name for item in get_reagents_in_extkit(ctx=self.ctx, kit_name=self.sub['extraction_kit']['value'])]
self.sub['reagents'] = [reagent for reagent in self.sub['reagents'] if reagent.type in allowed_reagents]
logger.debug(f"List of reagents for comparison with allowed_reagents: {pprint.pformat(self.sub['reagents'])}")
self.sub['reagents'] = [reagent for reagent in self.sub['reagents'] if reagent['value'].type in allowed_reagents]
def to_pydantic(self) -> PydSubmission:
"""
@@ -444,6 +434,96 @@ class SheetParser(object):
delattr(psm, "filepath")
return psm
class InfoParser(object):
def __init__(self, ctx:Settings, xl:pd.ExcelFile, submission_type:str):
self.ctx = ctx
# self.submission_type = submission_type
# self.extraction_kit = extraction_kit
self.map = self.fetch_submission_info_map(submission_type=submission_type)
self.xl = xl
logger.debug(f"Info map for InfoParser: {pprint.pformat(self.map)}")
def fetch_submission_info_map(self, submission_type:dict) -> dict:
logger.debug(f"Looking up submission type: {submission_type['value']}")
submission_type = lookup_submissiontype_by_name(ctx=self.ctx, type_name=submission_type['value'])
info_map = submission_type.info_map
try:
del info_map['samples']
except KeyError:
pass
return info_map
def parse_info(self) -> dict:
dicto = {}
for sheet in self.xl.sheet_names:
df = self.xl.parse(sheet, header=None)
relevant = {}
for k, v in self.map.items():
if k == "samples":
continue
if sheet in self.map[k]['sheets']:
relevant[k] = v
# relevant = {k:v for k,v in self.map.items() if sheet in self.map[k]['sheets']}
logger.debug(f"relevant map for {sheet}: {pprint.pformat(relevant)}")
if relevant == {}:
continue
for item in relevant:
value = df.iat[relevant[item]['row']-1, relevant[item]['column']-1]
logger.debug(f"Setting {item} on {sheet} to {value}")
if check_not_nan(value):
try:
dicto[item] = dict(value=value, parsed=True)
except (KeyError, IndexError):
continue
else:
dicto[item] = dict(value=convert_nans_to_nones(value), parsed=False)
if "submitter_plate_num" not in dicto.keys():
dicto['submitter_plate_num'] = dict(value=None, parsed=False)
return dicto
class ReagentParser(object):
def __init__(self, ctx:Settings, xl:pd.ExcelFile, submission_type:str, extraction_kit:str):
self.ctx = ctx
self.map = self.fetch_kit_info_map(extraction_kit=extraction_kit, submission_type=submission_type)
self.xl = xl
def fetch_kit_info_map(self, extraction_kit:dict, submission_type:str):
kit = lookup_kittype_by_name(ctx=self.ctx, name=extraction_kit['value'])
if isinstance(submission_type, dict):
submission_type = submission_type['value']
reagent_map = kit.construct_xl_map_for_use(submission_type.title())
del reagent_map['info']
return reagent_map
def parse_reagents(self) -> list:
listo = []
for sheet in self.xl.sheet_names:
df = self.xl.parse(sheet, header=None, dtype=object)
relevant = {k.strip():v for k,v in self.map.items() if sheet in self.map[k]['sheet']}
logger.debug(f"relevant map for {sheet}: {pprint.pformat(relevant)}")
if relevant == {}:
continue
for item in relevant:
logger.debug(f"Attempting to scrape: {item}")
try:
# role = item
name = df.iat[relevant[item]['name']['row']-1, relevant[item]['name']['column']-1]
lot = df.iat[relevant[item]['lot']['row']-1, relevant[item]['lot']['column']-1]
expiry = df.iat[relevant[item]['expiry']['row']-1, relevant[item]['expiry']['column']-1]
except (KeyError, IndexError):
listo.append(dict(value=PydReagent(type=item.strip(), lot=None, exp=None, name=None), parsed=False))
continue
if check_not_nan(lot):
parsed = True
else:
parsed = False
# self.sub['reagents'].append(dict(name=name, lot=lot, expiry=expiry, role=role))
logger.debug(f"Got lot for {item}-{name}: {lot} as {type(lot)}")
lot = str(lot)
listo.append(dict(value=PydReagent(type=item.strip(), lot=lot, exp=expiry, name=name), parsed=parsed))
return listo
class SampleParser(object):
@@ -451,7 +531,7 @@ class SampleParser(object):
object to pull data for samples in excel sheet and construct individual sample objects
"""
def __init__(self, ctx:Settings, df:pd.DataFrame, elution_map:pd.DataFrame|None=None) -> None:
def __init__(self, ctx:Settings, xl:pd.ExcelFile, submission_type:str) -> None:
"""
convert sample sub-dataframe to dictionary of records
@@ -460,12 +540,122 @@ class SampleParser(object):
df (pd.DataFrame): input sample dataframe
elution_map (pd.DataFrame | None, optional): optional map of elution plate. Defaults to None.
"""
self.samples = []
self.ctx = ctx
self.samples = df.to_dict("records")
self.elution_map = elution_map
self.xl = xl
self.submission_type = submission_type
sample_info_map = self.fetch_sample_info_map(submission_type=submission_type)
self.plate_map = self.construct_plate_map(plate_map_location=sample_info_map['plate_map'])
self.lookup_table = self.construct_lookup_table(lookup_table_location=sample_info_map['lookup_table'])
self.excel_to_db_map = sample_info_map['xl_db_translation']
self.create_basic_dictionaries_from_plate_map()
self.parse_lookup_table()
def fetch_sample_info_map(self, submission_type:dict) -> dict:
logger.debug(f"Looking up submission type: {submission_type}")
submission_type = lookup_submissiontype_by_name(ctx=self.ctx, type_name=submission_type)
sample_info_map = submission_type.info_map['samples']
return sample_info_map
def construct_plate_map(self, plate_map_location:dict) -> pd.DataFrame:
df = self.xl.parse(plate_map_location['sheet'], header=None, dtype=object)
df = df.iloc[plate_map_location['start_row']-1:plate_map_location['end_row'], plate_map_location['start_column']-1:plate_map_location['end_column']]
# logger.debug(f"Input dataframe for plate map: {df}")
df = pd.DataFrame(df.values[1:], columns=df.iloc[0])
df = df.set_index(df.columns[0])
# logger.debug(f"Output dataframe for plate map: {df}")
return df
def construct_lookup_table(self, lookup_table_location) -> pd.DataFrame:
df = self.xl.parse(lookup_table_location['sheet'], header=None, dtype=object)
df = df.iloc[lookup_table_location['start_row']-1:lookup_table_location['end_row']]
df = pd.DataFrame(df.values[1:], columns=df.iloc[0])
df = df.reset_index(drop=True)
# logger.debug(f"Dataframe for lookup table: {df}")
return df
def create_basic_dictionaries_from_plate_map(self):
new_df = self.plate_map.dropna(axis=1, how='all')
columns = new_df.columns.tolist()
for _, iii in new_df.iterrows():
for c in columns:
# logger.debug(f"Checking sample {iii[c]}")
if check_not_nan(iii[c]):
id = iii[c]
logger.debug(f"Adding sample {iii[c]}")
try:
c = self.plate_map.columns.get_loc(c) + 1
except Exception as e:
logger.error(f"Unable to get column index of {c} due to {e}")
self.samples.append(dict(submitter_id=id, row=row_keys[iii._name], column=c))
def parse_lookup_table(self):
def determine_if_date(input_str) -> str|date:
# logger.debug(f"Looks like we have a str: {input_str}")
regex = re.compile(r"\d{4}-?\d{2}-?\d{2}")
if bool(regex.search(input_str)):
try:
return parse(input_str)
except ParserError:
return None
else:
return input_str
for sample in self.samples:
addition = self.lookup_table[self.lookup_table.isin([sample['submitter_id']]).any(axis=1)].squeeze().to_dict()
for k,v in addition.items():
# logger.debug(f"Checking {k} in lookup table.")
if check_not_nan(k) and isinstance(k, str):
if k.lower() not in sample:
k = k.replace(" ", "_").replace("#","num").lower()
# logger.debug(f"Adding {type(v)} - {k}, {v} to the lookuptable output dict")
match v:
case pd.Timestamp():
sample[k] = v.date()
case str():
sample[k] = determine_if_date(v)
case _:
sample[k] = v
logger.debug(f"Output sample dict: {sample}")
def parse_samples(self) -> List[dict]:
result = None
new_samples = []
for sample in self.samples:
translated_dict = {}
for k, v in sample.items():
match v:
case dict():
v = None
case float():
v = convert_nans_to_nones(v)
case _:
v = v
try:
translated_dict[self.excel_to_db_map[k]] = v
except KeyError:
translated_dict[k] = convert_nans_to_nones(v)
# translated_dict['sample_type'] = f"{self.submission_type.replace(' ', '_').lower()}_sample"
translated_dict['sample_type'] = f"{self.submission_type} Sample"
# logger.debug(f"New sample dictionary going into object creation:\n{translated_dict}")
new_samples.append(self.generate_sample_object(translated_dict))
return result, new_samples
def generate_sample_object(self, input_dict) -> models.BasicSample:
# query = input_dict['sample_type'].replace('_sample', '').replace("_", " ").title().replace(" ", "")
query = input_dict['sample_type'].replace(" ", "")
database_obj = getattr(models, query)
instance = lookup_sample_by_submitter_id(ctx=self.ctx, submitter_id=input_dict['submitter_id'])
if instance == None:
instance = database_obj()
for k,v in input_dict.items():
try:
setattr(instance, k, v)
except Exception as e:
logger.error(f"Failed to set {k} due to {type(e).__name__}: {e}")
return dict(sample=instance, row=input_dict['row'], column=input_dict['column'])
def parse_bacterial_culture_samples(self) -> Tuple[str|None, list[dict]]:
# def parse_bacterial_culture_samples(self) -> Tuple[str|None, list[dict]]:
"""
construct bacterial culture specific sample objects
@@ -493,8 +683,7 @@ class SampleParser(object):
new_list.append(dict(sample=instance, row=row, column=column))
return None, new_list
def parse_wastewater_samples(self) -> Tuple[str|None, list[dict]]:
# def parse_wastewater_samples(self) -> Tuple[str|None, list[dict]]:
"""
construct wastewater specific sample objects
@@ -590,7 +779,7 @@ class SampleParser(object):
new_list.append(dict(sample=instance, row=row, column=column))
return return_val, new_list
def parse_wastewater_artic_samples(self) -> Tuple[str|None, list[WastewaterSample]]:
# def parse_wastewater_artic_samples(self) -> Tuple[str|None, list[WastewaterSample]]:
"""
The artic samples are the wastewater samples that are to be sequenced
So we will need to lookup existing ww samples and append Artic well # and plate relation
@@ -751,5 +940,4 @@ class PCRParser(object):
self.samples.append(sample_obj)

View File

@@ -48,34 +48,65 @@ class PydReagent(BaseModel):
if value != None:
return convert_nans_to_nones(str(value))
return value
@field_validator("name", mode="before")
@classmethod
def enforce_name(cls, value, values):
if value != None:
return convert_nans_to_nones(str(value))
else:
return values.data['type']
class PydSubmission(BaseModel, extra=Extra.allow):
ctx: Settings
filepath: Path
submission_type: str|dict|None
submitter_plate_num: str|None
rsl_plate_num: str|dict|None
submitted_date: date|dict
submitting_lab: str|None
sample_count: int
extraction_kit: str|dict|None
technician: str|dict|None
reagents: List[PydReagent] = []
submission_type: dict|None
submitter_plate_num: dict|None
rsl_plate_num: dict|None
submitted_date: dict|None
submitting_lab: dict|None
sample_count: dict|None
extraction_kit: dict|None
technician: dict|None
reagents: List[dict] = []
samples: List[Any]
# missing_fields: List[str] = []
@field_validator("submitter_plate_num")
@classmethod
def rescue_submitter_id(cls, value):
if value == None:
return dict(value=None, parsed=False)
return value
@field_validator("submitter_plate_num")
@classmethod
def enforce_with_uuid(cls, value):
logger.debug(f"submitter plate id: {value}")
if value['value'] == None:
return dict(value=uuid.uuid4().hex.upper(), parsed=False)
else:
return value
@field_validator("submitted_date", mode="before")
@classmethod
def rescue_date(cls, value):
if value == None:
return dict(value=date.today(), parsed=False)
return value
@field_validator("submitted_date")
@classmethod
def strip_datetime_string(cls, value):
if not check_not_nan(value):
value = date.today()
if isinstance(value, datetime):
return dict(value=value, parsed=True)
if isinstance(value, date):
if isinstance(value['value'], datetime):
return value
string = re.sub(r"(_|-)\d$", "", value)
if isinstance(value['value'], date):
return value
if isinstance(value['value'], int):
return dict(value=datetime.fromordinal(datetime(1900, 1, 1).toordinal() + value['value'] - 2).date(), parsed=False)
string = re.sub(r"(_|-)\d$", "", value['value'])
try:
output = dict(value=parse(string).date(), parsed=False)
except ParserError as e:
@@ -85,31 +116,32 @@ class PydSubmission(BaseModel, extra=Extra.allow):
except Exception as e:
logger.error(f"Problem with parse fallback: {e}")
return output
@field_validator("submitter_plate_num")
@classmethod
def enforce_with_uuid(cls, value):
if value == None or value == "" or value == "None":
return uuid.uuid4().hex.upper()
else:
return value
@field_validator("submitting_lab", mode="before")
@classmethod
def transform_nan(cls, value):
return convert_nans_to_nones(value)
def rescue_submitting_lab(cls, value):
if value == None:
return dict(value=None, parsed=False)
return value
@field_validator("rsl_plate_num", mode='before')
@classmethod
def rescue_rsl_number(cls, value):
if value == None:
return dict(value=None, parsed=False)
return value
@field_validator("rsl_plate_num")
@classmethod
def rsl_from_file(cls, value, values):
logger.debug(f"RSL-plate initial value: {value}")
if isinstance(values.data['submission_type'], dict):
sub_type = values.data['submission_type']['value']
elif isinstance(values.data['submission_type'], str):
sub_type = values.data['submission_type']
if check_not_nan(value):
if lookup_submission_by_rsl_num(ctx=values.data['ctx'], rsl_num=value) == None:
return dict(value=value, parsed=True)
logger.debug(f"RSL-plate initial value: {value['value']}")
# if isinstance(values.data['submission_type'], dict):
# sub_type = values.data['submission_type']['value']
# elif isinstance(values.data['submission_type'], str):
sub_type = values.data['submission_type']['value']
if check_not_nan(value['value']):
if lookup_submission_by_rsl_num(ctx=values.data['ctx'], rsl_num=value['value']) == None:
return dict(value=value['value'], parsed=True)
else:
logger.warning(f"Submission number {value} already exists in DB, attempting salvage with filepath")
output = RSLNamer(ctx=values.data['ctx'], instr=values.data['filepath'].__str__(), sub_type=sub_type).parsed_name
@@ -120,58 +152,60 @@ class PydSubmission(BaseModel, extra=Extra.allow):
@field_validator("technician", mode="before")
@classmethod
def rescue_tech(cls, value):
if value == None:
return dict(value=None, parsed=False)
return value
@field_validator("technician")
@classmethod
def enforce_tech(cls, value):
if check_not_nan(value):
if isinstance(value, dict):
value['value'] = re.sub(r"\: \d", "", value['value'])
return value
else:
return dict(value=re.sub(r"\: \d", "", value), parsed=True)
if check_not_nan(value['value']):
value['value'] = re.sub(r"\: \d", "", value['value'])
return value
else:
return dict(value="Unnamed", parsed=False)
return dict(value=convert_nans_to_nones(value['value']), parsed=False)
return value
@field_validator("reagents")
@classmethod
def remove_atcc(cls, value):
return_val = []
for reagent in value:
logger.debug(f"Pydantic reagent: {reagent}")
if reagent.type == None:
continue
else:
return_val.append(reagent)
return return_val
# @field_validator("reagents")
# @classmethod
# def remove_atcc(cls, value):
# return_val = []
# for reagent in value:
# logger.debug(f"Pydantic reagent: {reagent}")
# if reagent['value'].type == None:
# continue
# else:
# return_val.append(reagent)
# return return_val
@field_validator("sample_count", mode='before')
@classmethod
def enforce_sample_count(cls, value):
if check_not_nan(value):
return int(value)
else:
return convert_nans_to_nones(value)
def rescue_sample_count(cls, value):
if value == None:
return dict(value=None, parsed=False)
return value
@field_validator("extraction_kit", mode='before')
@classmethod
def get_kit_if_none(cls, value):
def rescue_kit(cls, value):
# from frontend.custom_widgets.pop_ups import KitSelector
if check_not_nan(value):
if isinstance(value, str):
return dict(value=value, parsed=True)
elif isinstance(value, dict):
return value
else:
raise ValueError(f"No extraction kit found.")
# if check_not_nan(value):
# if isinstance(value, str):
# return dict(value=value, parsed=True)
# elif isinstance(value, dict):
# return value
# else:
# raise ValueError(f"No extraction kit found.")
if value == None:
return dict(value=None, parsed=False)
return value
@field_validator("submission_type", mode='before')
@classmethod
def make_submission_type(cls, value, values):
if check_not_nan(value):
if isinstance(value, dict):
value['value'] = value['value'].title()
return value
elif isinstance(value, str):
return dict(value=value.title(), parsed=False)
if check_not_nan(value['value']):
value = value['value'].title()
return dict(value=value, parsed=True)
else:
return dict(value=RSLNamer(ctx=values.data['ctx'], instr=values.data['filepath'].__str__()).submission_type.title(), parsed=False)