Working new version.

This commit is contained in:
Landon Wark
2023-09-11 13:45:10 -05:00
parent e0b80f6c7a
commit 5a978c9bff
15 changed files with 417 additions and 743 deletions

View File

@@ -4,9 +4,6 @@ Convenience functions for interacting with the database.
import pprint
from . import models
# from .models.kits import KitType
# from .models.submissions import BasicSample, reagents_submissions, BasicSubmission, SubmissionSampleAssociation
# from .models import submissions
import pandas as pd
import sqlalchemy.exc
import sqlite3
@@ -34,7 +31,7 @@ def set_sqlite_pragma(dbapi_connection, connection_record):
cursor.execute("PRAGMA foreign_keys=ON")
cursor.close()
def store_submission(ctx:Settings, base_submission:models.BasicSubmission, samples:List[dict]=[]) -> None|dict:
def store_submission(ctx:Settings, base_submission:models.BasicSubmission) -> None|dict:
"""
Upserts submissions into database
@@ -46,55 +43,19 @@ def store_submission(ctx:Settings, base_submission:models.BasicSubmission, sampl
None|dict : object that indicates issue raised for reporting in gui
"""
logger.debug(f"Hello from store_submission")
# Add all samples to sample table
# Final check for proper RSL name
typer = RSLNamer(ctx=ctx, instr=base_submission.rsl_plate_num)
base_submission.rsl_plate_num = typer.parsed_name
# for sample in samples:
# instance = sample['sample']
# logger.debug(f"Typer: {typer.submission_type}")
# logger.debug(f"sample going in: {type(sample['sample'])}\n{sample['sample'].__dict__}")
# # Suuuuuper hacky way to be sure that the artic doesn't overwrite the ww plate in a ww sample
# # need something more elegant
# # if "_artic" not in typer.submission_type:
# # sample.rsl_plate = base_submission
# # else:
# # logger.debug(f"{sample.ww_sample_full_id} is an ARTIC sample.")
# # # base_submission.samples.remove(sample)
# # # sample.rsl_plate = sample.rsl_plate
# # # sample.artic_rsl_plate = base_submission
# # logger.debug(f"Attempting to add sample: {sample.to_string()}")
# # try:
# # ctx['database_session'].add(sample)
# # ctx.database_session.add(instance)
# # ctx.database_session.commit()
# # logger.debug(f"Submitter id: {sample['sample'].submitter_id} and table id: {sample['sample'].id}")
# logger.debug(f"Submitter id: {instance.submitter_id} and table id: {instance.id}")
# assoc = SubmissionSampleAssociation(submission=base_submission, sample=instance, row=sample['row'], column=sample['column'])
# # except (sqlite3.IntegrityError, sqlalchemy.exc.IntegrityError) as e:
# # logger.debug(f"Hit an integrity error : {e}")
# # continue
# try:
# base_submission.submission_sample_associations.append(assoc)
# except IntegrityError as e:
# logger.critical(e)
# continue
# logger.debug(f"Here is the sample to be stored in the DB: {sample.__dict__}")
# Add submission to submission table
# ctx['database_session'].add(base_submission)
ctx.database_session.add(base_submission)
logger.debug(f"Attempting to add submission: {base_submission.rsl_plate_num}")
try:
# ctx['database_session'].commit()
ctx.database_session.commit()
except (sqlite3.IntegrityError, sqlalchemy.exc.IntegrityError) as e:
logger.debug(f"Hit an integrity error : {e}")
# ctx['database_session'].rollback()
ctx.database_session.rollback()
return {"message":"This plate number already exists, so we can't add it.", "status":"Critical"}
except (sqlite3.OperationalError, sqlalchemy.exc.IntegrityError) as e:
logger.debug(f"Hit an operational error: {e}")
# ctx['database_session'].rollback()
ctx.database_session.rollback()
return {"message":"The database is locked for editing.", "status":"Critical"}
return None
@@ -111,10 +72,8 @@ def store_reagent(ctx:Settings, reagent:models.Reagent) -> None|dict:
None|dict: object indicating issue to be reported in the gui
"""
logger.debug(f"Reagent dictionary: {reagent.__dict__}")
# ctx['database_session'].add(reagent)
ctx.database_session.add(reagent)
try:
# ctx['database_session'].commit()
ctx.database_session.commit()
except (sqlite3.OperationalError, sqlalchemy.exc.OperationalError):
return {"message":"The database is locked for editing."}
@@ -131,7 +90,6 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi
Returns:
models.BasicSubmission: Constructed submission object
"""
# from tools import check_regex_match, RSLNamer
# convert submission type into model name
query = info_dict['submission_type'].replace(" ", "")
# Ensure an rsl plate number exists for the plate
@@ -143,8 +101,6 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi
# enforce conventions on the rsl plate number from the form
info_dict['rsl_plate_num'] = RSLNamer(ctx=ctx, instr=info_dict["rsl_plate_num"]).parsed_name
# check database for existing object
# instance = ctx['database_session'].query(models.BasicSubmission).filter(models.BasicSubmission.rsl_plate_num==info_dict['rsl_plate_num']).first()
# instance = ctx.database_session.query(models.BasicSubmission).filter(models.BasicSubmission.rsl_plate_num==info_dict['rsl_plate_num']).first()
instance = lookup_submission_by_rsl_num(ctx=ctx, rsl_num=info_dict['rsl_plate_num'])
# get model based on submission type converted above
logger.debug(f"Looking at models for submission type: {query}")
@@ -166,7 +122,6 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi
# set fields based on keys in dictionary
match item:
case "extraction_kit":
# q_str = info_dict[item]
logger.debug(f"Looking up kit {value}")
try:
field_value = lookup_kittype_by_name(ctx=ctx, name=value)
@@ -185,13 +140,7 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi
field_value = lookup_org_by_name(ctx=ctx, name=value)
logger.debug(f"Got {field_value} for organization {value}")
case "submitter_plate_num":
# Because of unique constraint, there will be problems with
# multiple submissions named 'None', so...
# Should be depreciated with use of pydantic validator
logger.debug(f"Submitter plate id: {value}")
# if info_dict[item] == None or info_dict[item] == "None" or info_dict[item] == "":
# logger.debug(f"Got None as a submitter plate number, inserting random string to preserve database unique constraint.")
# info_dict[item] = uuid.uuid4().hex.upper()
field_value = value
case "samples":
for sample in value:
@@ -200,6 +149,7 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi
sample_instance = sample['sample']
else:
logger.warning(f"Sample {sample} already exists, creating association.")
logger.debug(f"Adding {sample_instance.__dict__}")
if sample_instance in instance.samples:
logger.error(f"Looks like there's a duplicate sample on this plate: {sample_instance.submitter_id}!")
continue
@@ -207,7 +157,7 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi
with ctx.database_session.no_autoflush:
try:
sample_query = sample_instance.sample_type.replace('Sample', '').strip()
logger.debug(f"Here is the sample instance type: {sample_query}")
logger.debug(f"Here is the sample instance type: {sample_instance}")
try:
assoc = getattr(models, f"{sample_query}Association")
except AttributeError as e:
@@ -227,7 +177,6 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi
continue
continue
case "submission_type":
# item = "submission_type"
field_value = lookup_submissiontype_by_name(ctx=ctx, type_name=value)
case _:
field_value = value
@@ -242,14 +191,13 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi
# calculate cost of the run: immutable cost + mutable times number of columns
# This is now attached to submission upon creation to preserve at-run costs incase of cost increase in the future.
try:
# ceil(instance.sample_count / 8) will get number of columns
# the cost of a full run multiplied by (that number / 12) is x twelfths the cost of a full run
logger.debug(f"Calculating costs for procedure...")
instance.calculate_base_cost()
except (TypeError, AttributeError) as e:
logger.debug(f"Looks like that kit doesn't have cost breakdown yet due to: {e}, using full plate cost.")
instance.run_cost = instance.extraction_kit.cost_per_run
logger.debug(f"Calculated base run cost of: {instance.run_cost}")
# Apply any discounts that are applicable for client and kit.
try:
logger.debug("Checking and applying discounts...")
discounts = [item.amount for item in lookup_discounts_by_org_and_kit(ctx=ctx, kit_id=instance.extraction_kit.id, lab_id=instance.submitting_lab.id)]
@@ -299,12 +247,6 @@ def construct_reagent(ctx:Settings, info_dict:dict) -> models.Reagent:
reagent.name = info_dict[item]
# add end-of-life extension from reagent type to expiry date
# NOTE: this will now be done only in the reporting phase to account for potential changes in end-of-life extensions
# try:
# reagent.expiry = reagent.expiry + reagent.type.eol_ext
# except TypeError as e:
# logger.debug(f"We got a type error: {e}.")
# except AttributeError:
# pass
return reagent
def get_all_reagenttype_names(ctx:Settings) -> list[str]:
@@ -317,7 +259,6 @@ def get_all_reagenttype_names(ctx:Settings) -> list[str]:
Returns:
list[str]: reagent type names
"""
# lookedup = [item.__str__() for item in ctx['database_session'].query(models.ReagentType).all()]
lookedup = [item.__str__() for item in ctx.database_session.query(models.ReagentType).all()]
return lookedup
@@ -333,12 +274,11 @@ def lookup_reagenttype_by_name(ctx:Settings, rt_name:str) -> models.ReagentType:
models.ReagentType: looked up reagent type
"""
logger.debug(f"Looking up ReagentType by name: {rt_name.title()}")
# lookedup = ctx['database_session'].query(models.ReagentType).filter(models.ReagentType.name==rt_name).first()
lookedup = ctx.database_session.query(models.ReagentType).filter(models.ReagentType.name==rt_name).first()
logger.debug(f"Found ReagentType: {lookedup}")
return lookedup
def lookup_kittype_by_use(ctx:Settings, used_by:str|None=None) -> list[models.KitType]:
def lookup_kittype_by_use(ctx:Settings, used_for:str|None=None) -> list[models.KitType]:
"""
Lookup kits by a sample type its used for
@@ -349,10 +289,9 @@ def lookup_kittype_by_use(ctx:Settings, used_by:str|None=None) -> list[models.Ki
Returns:
list[models.KitType]: list of kittypes that have that sample type in their uses
"""
if used_by != None:
# return ctx['database_session'].query(models.KitType).filter(models.KitType.used_for.contains(used_by)).all()
# return ctx.database_session.query(models.KitType).filter(models.KitType.used_for.contains(used_by)).all()
return ctx.database_session.query(models.KitType).filter(models.KitType.used_for.any(name=used_by)).all()
if used_for != None:
# Get kittypes whose 'used_for' name is used_for.
return ctx.database_session.query(models.KitType).filter(models.KitType.used_for.any(name=used_for)).all()
else:
# return ctx['database_session'].query(models.KitType).all()
return ctx.database_session.query(models.KitType).all()
@@ -371,11 +310,20 @@ def lookup_kittype_by_name(ctx:Settings, name:str|dict) -> models.KitType:
if isinstance(name, dict):
name = name['value']
logger.debug(f"Querying kittype: {name}")
# return ctx['database_session'].query(models.KitType).filter(models.KitType.name==name).first()
with ctx.database_session.no_autoflush:
return ctx.database_session.query(models.KitType).filter(models.KitType.name==name).first()
def lookup_kittype_by_id(ctx:Settings, id:int) -> models.KitType:
"""
Find a kit by its id integer
Args:
ctx (Settings): Settings passed down from gui
id (int): id number of the kit.
Returns:
models.KitType: Kit.
"""
return ctx.database_session.query(models.KitType).filter(models.KitType.id==id).first()
def lookup_regent_by_type_name(ctx:Settings, type_name:str) -> list[models.Reagent]:
@@ -389,7 +337,6 @@ def lookup_regent_by_type_name(ctx:Settings, type_name:str) -> list[models.Reage
Returns:
list[models.Reagent]: list of retrieved reagents
"""
# return ctx['database_session'].query(models.Reagent).join(models.Reagent.type, aliased=True).filter(models.ReagentType.name==type_name).all()
return ctx.database_session.query(models.Reagent).join(models.Reagent.type, aliased=True).filter(models.ReagentType.name==type_name).all()
def lookup_regent_by_type_name_and_kit_name(ctx:Settings, type_name:str, kit_name:str) -> list[models.Reagent]:
@@ -406,8 +353,6 @@ def lookup_regent_by_type_name_and_kit_name(ctx:Settings, type_name:str, kit_nam
"""
# What I want to do is get the reagent type by name
# Hang on, this is going to be a long one.
# by_type = ctx['database_session'].query(models.Reagent).join(models.Reagent.type, aliased=True).filter(models.ReagentType.name.endswith(type_name)).all()
# rt_types = ctx['database_session'].query(models.ReagentType).filter(models.ReagentType.name.endswith(type_name))
rt_types = ctx.database_session.query(models.ReagentType).filter(models.ReagentType.name.endswith(type_name))
# add filter for kit name...
try:
@@ -440,7 +385,7 @@ def lookup_all_submissions_by_type(ctx:Settings, sub_type:str|None=None, chronol
subs = ctx.database_session.query(models.BasicSubmission)
else:
# subs = ctx['database_session'].query(models.BasicSubmission).filter(models.BasicSubmission.submission_type==sub_type.lower().replace(" ", "_")).all()
subs = ctx.database_session.query(models.BasicSubmission).filter(models.BasicSubmission.submission_type==sub_type.lower().replace(" ", "_"))
subs = ctx.database_session.query(models.BasicSubmission).filter(models.BasicSubmission.submission_type_name==sub_type)
if chronologic:
subs.order_by(models.BasicSubmission.submitted_date)
return subs.all()
@@ -1172,6 +1117,25 @@ def lookup_subsamp_association_by_plate_sample(ctx:Settings, rsl_plate_num:str,
.filter(models.BasicSample.submitter_id==rsl_sample_num)\
.first()
def lookup_sub_wwsamp_association_by_plate_sample(ctx:Settings, rsl_plate_num:str, rsl_sample_num:str) -> models.WastewaterAssociation:
"""
_summary_
Args:
ctx (Settings): _description_
rsl_plate_num (str): _description_
sample_submitter_id (_type_): _description_
Returns:
models.SubmissionSampleAssociation: _description_
"""
return ctx.database_session.query(models.WastewaterAssociation)\
.join(models.Wastewater)\
.join(models.WastewaterSample)\
.filter(models.BasicSubmission.rsl_plate_num==rsl_plate_num)\
.filter(models.BasicSample.submitter_id==rsl_sample_num)\
.first()
def lookup_all_reagent_names_by_role(ctx:Settings, role_name:str) -> List[str]:
"""
_summary_
@@ -1222,4 +1186,18 @@ def add_reagenttype_to_kit(ctx:Settings, rt_name:str, kit_name:str, eol:int=0):
kit.kit_reagenttype_associations.append(assoc)
ctx.database_session.add(kit)
ctx.database_session.commit()
def lookup_subsamp_association_by_models(ctx:Settings, submission:models.BasicSubmission, sample:models.BasicSample) -> models.SubmissionSampleAssociation:
return ctx.database_session.query(models.SubmissionSampleAssociation) \
.filter(models.SubmissionSampleAssociation.submission==submission) \
.filter(models.SubmissionSampleAssociation.sample==sample).first()
def update_subsampassoc_with_pcr(ctx:Settings, submission:models.BasicSubmission, sample:models.BasicSample, input_dict:dict):
assoc = lookup_subsamp_association_by_models(ctx, submission=submission, sample=sample)
for k,v in input_dict.items():
try:
setattr(assoc, k, v)
except AttributeError:
logger.error(f"Can't set {k} to {v}")
ctx.database_session.add(assoc)
ctx.database_session.commit()

View File

@@ -9,5 +9,4 @@ metadata = Base.metadata
from .controls import Control, ControlType
from .kits import KitType, ReagentType, Reagent, Discount, KitTypeReagentTypeAssociation, SubmissionType, SubmissionTypeKitTypeAssociation
from .organizations import Organization, Contact
# from .samples import WWSample, BCSample, BasicSample
from .submissions import BasicSubmission, BacterialCulture, Wastewater, WastewaterArtic, WastewaterSample, BacterialCultureSample, BasicSample, SubmissionSampleAssociation, WastewaterAssociation

View File

@@ -12,16 +12,6 @@ import logging
logger = logging.getLogger(f'submissions.{__name__}')
# # Table containing reagenttype-kittype relationships
# reagenttypes_kittypes = Table("_reagentstypes_kittypes", Base.metadata,
# Column("reagent_types_id", INTEGER, ForeignKey("_reagent_types.id")),
# Column("kits_id", INTEGER, ForeignKey("_kits.id")),
# # The entry will look like ["Bacteria Culture":{"row":1, "column":4}]
# Column("uses", JSON),
# # is the reagent required for that kit?
# Column("required", INTEGER)
# )
reagenttypes_reagents = Table("_reagenttypes_reagents", Base.metadata, Column("reagent_id", INTEGER, ForeignKey("_reagents.id")), Column("reagenttype_id", INTEGER, ForeignKey("_reagent_types.id")))
@@ -34,13 +24,7 @@ class KitType(Base):
id = Column(INTEGER, primary_key=True) #: primary key
name = Column(String(64), unique=True) #: name of kit
submissions = relationship("BasicSubmission", back_populates="extraction_kit") #: submissions this kit was used for
# used_for = Column(JSON) #: list of names of sample types this kit can process
# used_for = relationship("SubmissionType", back_populates="extraction_kits", uselist=True, secondary=submissiontype_kittypes)
# cost_per_run = Column(FLOAT(2)) #: dollar amount for each full run of this kit NOTE: depreciated, use the constant and mutable costs instead
# reagent_types = relationship("ReagentType", back_populates="kits", uselist=True, secondary=reagenttypes_kittypes) #: reagent types this kit contains
# reagent_types_id = Column(INTEGER, ForeignKey("_reagent_types.id", ondelete='SET NULL', use_alter=True, name="fk_KT_reagentstype_id")) #: joined reagent type id
# kit_reagenttype_association =
kit_reagenttype_associations = relationship(
"KitTypeReagentTypeAssociation",
back_populates="kit_type",
@@ -51,7 +35,6 @@ class KitType(Base):
# to "keyword" attribute
reagent_types = association_proxy("kit_reagenttype_associations", "reagent_type")
kit_submissiontype_associations = relationship(
"SubmissionTypeKitTypeAssociation",
back_populates="kit_type",
@@ -60,7 +43,6 @@ class KitType(Base):
used_for = association_proxy("kit_submissiontype_associations", "submission_type")
def __repr__(self) -> str:
return f"<KitType({self.name})>"
@@ -74,6 +56,15 @@ class KitType(Base):
return self.name
def get_reagents(self, required:bool=False) -> list:
"""
Return ReagentTypes linked to kit through KitTypeReagentTypeAssociation.
Args:
required (bool, optional): If true only return required types. Defaults to False.
Returns:
list: List of ReagentTypes
"""
if required:
return [item.reagent_type for item in self.kit_reagenttype_associations if item.required == 1]
else:
@@ -81,14 +72,24 @@ class KitType(Base):
def construct_xl_map_for_use(self, use:str) -> dict:
# map = self.used_for[use]
"""
Creates map of locations in excel workbook for a SubmissionType
Args:
use (str): Submissiontype.name
Returns:
dict: Dictionary containing information locations.
"""
map = {}
# Get all KitTypeReagentTypeAssociation for SubmissionType
assocs = [item for item in self.kit_reagenttype_associations if use in item.uses]
for assoc in assocs:
try:
map[assoc.reagent_type.name] = assoc.uses[use]
except TypeError:
continue
# Get SubmissionType info map
try:
st_assoc = [item for item in self.used_for if use == item.name][0]
map['info'] = st_assoc.info_map
@@ -106,7 +107,6 @@ class KitTypeReagentTypeAssociation(Base):
kits_id = Column(INTEGER, ForeignKey("_kits.id"), primary_key=True)
uses = Column(JSON)
required = Column(INTEGER)
# reagent_type_name = Column(INTEGER, ForeignKey("_reagent_types.name"))
kit_type = relationship(KitType, back_populates="kit_reagenttype_associations")
@@ -139,11 +139,8 @@ class ReagentType(Base):
id = Column(INTEGER, primary_key=True) #: primary key
name = Column(String(64)) #: name of reagent type
# kit_id = Column(INTEGER, ForeignKey("_kits.id", ondelete="SET NULL", use_alter=True, name="fk_RT_kits_id")) #: id of joined kit type
# kits = relationship("KitType", back_populates="reagent_types", uselist=True, foreign_keys=[kit_id]) #: kits this reagent is used in
instances = relationship("Reagent", back_populates="type", secondary=reagenttypes_reagents) #: concrete instances of this reagent type
eol_ext = Column(Interval()) #: extension of life interval
# required = Column(INTEGER, server_default="1") #: sqlite boolean to determine if reagent type is essential for the kit
last_used = Column(String(32)) #: last used lot number of this type of reagent
@validates('required')
@@ -202,8 +199,10 @@ class Reagent(Base):
dict: gui friendly dictionary
"""
if extraction_kit != None:
# Get the intersection of this reagent's ReagentType and all ReagentTypes in KitType
try:
reagent_role = list(set(self.type).intersection(extraction_kit.reagent_types))[0]
# Most will be able to fall back to first ReagentType in itself because most will only have 1.
except:
reagent_role = self.type[0]
else:
@@ -212,9 +211,9 @@ class Reagent(Base):
rtype = reagent_role.name.replace("_", " ").title()
except AttributeError:
rtype = "Unknown"
# Calculate expiry with EOL from ReagentType
try:
place_holder = self.expiry + reagent_role.eol_ext
# logger.debug(f"EOL_ext for {self.lot} -- {self.expiry} + {self.type.eol_ext} = {place_holder}")
except TypeError as e:
place_holder = date.today()
logger.debug(f"We got a type error setting {self.lot} expiry: {e}. setting to today for testing")
@@ -227,9 +226,28 @@ class Reagent(Base):
"expiry": place_holder.strftime("%Y-%m-%d")
}
def to_reagent_dict(self) -> dict:
def to_reagent_dict(self, extraction_kit:KitType=None) -> dict:
"""
Returns basic reagent dictionary.
Returns:
dict: Basic reagent dictionary of 'type', 'lot', 'expiry'
"""
if extraction_kit != None:
# Get the intersection of this reagent's ReagentType and all ReagentTypes in KitType
try:
reagent_role = list(set(self.type).intersection(extraction_kit.reagent_types))[0]
# Most will be able to fall back to first ReagentType in itself because most will only have 1.
except:
reagent_role = self.type[0]
else:
reagent_role = self.type[0]
try:
rtype = reagent_role.name
except AttributeError:
rtype = "Unknown"
return {
"type": type,
"type": rtype,
"lot": self.lot,
"expiry": self.expiry.strftime("%Y-%m-%d")
}
@@ -249,12 +267,14 @@ class Discount(Base):
amount = Column(FLOAT(2))
class SubmissionType(Base):
"""
Abstract of types of submissions.
"""
__tablename__ = "_submission_types"
id = Column(INTEGER, primary_key=True) #: primary key
name = Column(String(128), unique=True) #: name of submission type
info_map = Column(JSON)
info_map = Column(JSON) #: Where basic information is found in the excel workbook corresponding to this type.
instances = relationship("BasicSubmission", backref="submission_type")
submissiontype_kit_associations = relationship(
@@ -269,14 +289,15 @@ class SubmissionType(Base):
return f"<SubmissionType({self.name})>"
class SubmissionTypeKitTypeAssociation(Base):
"""
Abstract of relationship between kits and their submission type.
"""
__tablename__ = "_submissiontypes_kittypes"
submission_types_id = Column(INTEGER, ForeignKey("_submission_types.id"), primary_key=True)
kits_id = Column(INTEGER, ForeignKey("_kits.id"), primary_key=True)
mutable_cost_column = Column(FLOAT(2)) #: dollar amount per 96 well plate that can change with number of columns (reagents, tips, etc)
mutable_cost_sample = Column(FLOAT(2)) #: dollar amount that can change with number of samples (reagents, tips, etc)
constant_cost = Column(FLOAT(2)) #: dollar amount per plate that will remain constant (plates, man hours, etc)
# reagent_type_name = Column(INTEGER, ForeignKey("_reagent_types.name"))
kit_type = relationship(KitType, back_populates="kit_submissiontype_associations")

View File

@@ -21,7 +21,6 @@ class Organization(Base):
submissions = relationship("BasicSubmission", back_populates="submitting_lab") #: submissions this organization has submitted
cost_centre = Column(String()) #: cost centre used by org for payment
contacts = relationship("Contact", back_populates="organization", secondary=orgs_contacts) #: contacts involved with this org
# contact_ids = Column(INTEGER, ForeignKey("_contacts.id", ondelete="SET NULL", name="fk_org_contact_id")) #: contact ids of this organization
def __str__(self) -> str:
"""
@@ -47,5 +46,4 @@ class Contact(Base):
email = Column(String(64)) #: contact email
phone = Column(String(32)) #: contact phone number
organization = relationship("Organization", back_populates="contacts", uselist=True, secondary=orgs_contacts) #: relationship to joined organization
# organization_id = Column(INTEGER, ForeignKey("_organizations.id", ondelete="SET NULL", name="fk_contact_org_id")) #: joined organization ids

View File

@@ -3,7 +3,7 @@ Models for the main submission types.
'''
import math
from . import Base
from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, Table, JSON, FLOAT
from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, Table, JSON, FLOAT, case
from sqlalchemy.orm import relationship, validates
import logging
import json
@@ -11,10 +11,10 @@ from json.decoder import JSONDecodeError
from math import ceil
from sqlalchemy.ext.associationproxy import association_proxy
import uuid
from . import Base
from pandas import Timestamp
from dateutil.parser import parse
import pprint
from tools import check_not_nan
logger = logging.getLogger(f"submissions.{__name__}")
@@ -23,7 +23,7 @@ reagents_submissions = Table("_reagents_submissions", Base.metadata, Column("rea
class BasicSubmission(Base):
"""
Base of basic submission which polymorphs into BacterialCulture and Wastewater
Concrete of basic submission which polymorphs into BacterialCulture and Wastewater
"""
__tablename__ = "_submissions"
@@ -36,7 +36,6 @@ class BasicSubmission(Base):
sample_count = Column(INTEGER) #: Number of samples in the submission
extraction_kit = relationship("KitType", back_populates="submissions") #: The extraction kit used
extraction_kit_id = Column(INTEGER, ForeignKey("_kits.id", ondelete="SET NULL", name="fk_BS_extkit_id"))
# submission_type = Column(String(32)) #: submission type (should be string in D3 of excel sheet)
submission_type_name = Column(String, ForeignKey("_submission_types.name", ondelete="SET NULL", name="fk_BS_subtype_name"))
technician = Column(String(64)) #: initials of processing tech(s)
# Move this into custom types?
@@ -83,7 +82,6 @@ class BasicSubmission(Base):
dict: dictionary used in submissions summary
"""
# get lab from nested organization object
try:
sub_lab = self.submitting_lab.name
except AttributeError:
@@ -105,24 +103,16 @@ class BasicSubmission(Base):
except JSONDecodeError as e:
ext_info = None
logger.debug(f"Json error in {self.rsl_plate_num}: {e}")
# Updated 2023-09 to use the extraction kit to pull reagents.
try:
reagents = [item.to_sub_dict(extraction_kit=self.extraction_kit) for item in self.reagents]
except Exception as e:
logger.error(f"We got an error retrieving reagents: {e}")
reagents = None
# try:
# samples = [item.sample.to_sub_dict(item.__dict__()) for item in self.submission_sample_associations]
# except Exception as e:
# logger.error(f"Problem making list of samples: {e}")
# samples = None
samples = []
# Updated 2023-09 to get sample association with plate number
for item in self.submission_sample_associations:
sample = item.sample.to_sub_dict(submission_rsl=self.rsl_plate_num)
# try:
# sample['well'] = f"{row_map[item.row]}{item.column}"
# except KeyError as e:
# logger.error(f"Unable to find row {item.row} in row_map.")
# sample['well'] = None
samples.append(sample)
try:
comments = self.comment
@@ -171,7 +161,7 @@ class BasicSubmission(Base):
output = {
"id": self.id,
"Plate Number": self.rsl_plate_num,
"Submission Type": self.submission_type.replace("_", " ").title(),
"Submission Type": self.submission_type_name.replace("_", " ").title(),
"Submitter Plate Number": self.submitter_plate_num,
"Submitted Date": self.submitted_date.strftime("%Y-%m-%d"),
"Submitting Lab": sub_lab,
@@ -182,16 +172,18 @@ class BasicSubmission(Base):
return output
def calculate_base_cost(self):
"""
Calculates cost of the plate
"""
# Calculate number of columns based on largest column number
try:
# cols_count_96 = ceil(int(self.sample_count) / 8)
cols_count_96 = self.calculate_column_count()
except Exception as e:
logger.error(f"Column count error: {e}")
# cols_count_24 = ceil(int(self.sample_count) / 3)
logger.debug(f"Pre-association check. {pprint.pformat(self.__dict__)}")
# Get kit associated with this submission
assoc = [item for item in self.extraction_kit.kit_submissiontype_associations if item.submission_type == self.submission_type][0]
logger.debug(f"Came up with association: {assoc}")
# if all(item == 0.0 for item in [self.extraction_kit.constant_cost, self.extraction_kit.mutable_cost_column, self.extraction_kit.mutable_cost_sample]):
# If every individual cost is 0 this is probably an old plate.
if all(item == 0.0 for item in [assoc.constant_cost, assoc.mutable_cost_column, assoc.mutable_cost_sample]):
try:
self.run_cost = self.extraction_kit.cost_per_run
@@ -203,14 +195,28 @@ class BasicSubmission(Base):
except Exception as e:
logger.error(f"Calculation error: {e}")
def calculate_column_count(self):
def calculate_column_count(self) -> int:
"""
Calculate the number of columns in this submission
Returns:
int: largest column number
"""
logger.debug(f"Here's the samples: {self.samples}")
# columns = [int(sample.well_number[-2:]) for sample in self.samples]
columns = [assoc.column for assoc in self.submission_sample_associations]
logger.debug(f"Here are the columns for {self.rsl_plate_num}: {columns}")
return max(columns)
def hitpick_plate(self, plate_number:int|None=None) -> list:
"""
Returns positve sample locations for plate
Args:
plate_number (int | None, optional): Plate id. Defaults to None.
Returns:
list: list of htipick dictionaries for each sample
"""
output_list = []
for assoc in self.submission_sample_associations:
samp = assoc.sample.to_hitpick(submission_rsl=self.rsl_plate_num)
@@ -232,7 +238,6 @@ class BacterialCulture(BasicSubmission):
derivative submission type from BasicSubmission
"""
controls = relationship("Control", back_populates="submission", uselist=True) #: A control sample added to submission
# samples = relationship("BCSample", back_populates="rsl_plate", uselist=True)
__mapper_args__ = {"polymorphic_identity": "Bacterial Culture", "polymorphic_load": "inline"}
def to_dict(self) -> dict:
@@ -250,11 +255,9 @@ class Wastewater(BasicSubmission):
"""
derivative submission type from BasicSubmission
"""
# samples = relationship("WWSample", back_populates="rsl_plate", uselist=True)
pcr_info = Column(JSON)
ext_technician = Column(String(64))
pcr_technician = Column(String(64))
# ww_sample_id = Column(String, ForeignKey("_ww_samples.id", ondelete="SET NULL", name="fk_WW_sample_id"))
__mapper_args__ = {"polymorphic_identity": "Wastewater", "polymorphic_load": "inline"}
def to_dict(self) -> dict:
@@ -276,10 +279,7 @@ class WastewaterArtic(BasicSubmission):
"""
derivative submission type for artic wastewater
"""
# samples = relationship("WWSample", back_populates="artic_rsl_plate", uselist=True)
# Can it use the pcr_info from the wastewater? Cause I can't define pcr_info here due to conflicts with that
# Not necessary because we don't get any results for this procedure.
__mapper_args__ = {"polymorphic_identity": "wastewater_artic", "polymorphic_load": "inline"}
__mapper_args__ = {"polymorphic_identity": "Wastewater Artic", "polymorphic_load": "inline"}
def calculate_base_cost(self):
"""
@@ -290,12 +290,13 @@ class WastewaterArtic(BasicSubmission):
cols_count_96 = ceil(int(self.sample_count) / 8)
except Exception as e:
logger.error(f"Column count error: {e}")
assoc = [item for item in self.extraction_kit.kit_submissiontype_associations if item.submission_type == self.submission_type][0]
# Since we have multiple output plates per submission form, the constant cost will have to reflect this.
output_plate_count = math.ceil(int(self.sample_count) / 16)
logger.debug(f"Looks like we have {output_plate_count} output plates.")
const_cost = self.extraction_kit.constant_cost * output_plate_count
const_cost = assoc.constant_cost * output_plate_count
try:
self.run_cost = const_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count))
self.run_cost = const_cost + (assoc.mutable_cost_column * cols_count_96) + (assoc.mutable_cost_sample * int(self.sample_count))
except Exception as e:
logger.error(f"Calculation error: {e}")
@@ -318,7 +319,15 @@ class BasicSample(Base):
__mapper_args__ = {
"polymorphic_identity": "basic_sample",
"polymorphic_on": sample_type,
# "polymorphic_on": sample_type,
"polymorphic_on": case(
[
(sample_type == "Wastewater Sample", "Wastewater Sample"),
(sample_type == "Wastewater Artic Sample", "Wastewater Sample"),
(sample_type == "Bacterial Culture Sample", "Bacterial Culture Sample"),
],
else_="basic_sample"
),
"with_polymorphic": "*",
}
@@ -335,7 +344,23 @@ class BasicSample(Base):
def __repr__(self) -> str:
return f"<{self.sample_type.replace('_', ' ').title(). replace(' ', '')}({self.submitter_id})>"
def set_attribute(self, name, value):
# logger.debug(f"Setting {name} to {value}")
try:
setattr(self, name, value)
except AttributeError:
logger.error(f"Attribute {name} not found")
def to_sub_dict(self, submission_rsl:str) -> dict:
"""
Returns a dictionary of locations.
Args:
submission_rsl (str): Submission RSL number.
Returns:
dict: 'well' and sample submitter_id as 'name'
"""
row_map = {1:"A", 2:"B", 3:"C", 4:"D", 5:"E", 6:"F", 7:"G", 8:"H"}
self.assoc = [item for item in self.sample_submission_associations if item.submission.rsl_plate_num==submission_rsl][0]
sample = {}
@@ -347,73 +372,30 @@ class BasicSample(Base):
sample['name'] = self.submitter_id
return sample
def to_hitpick(self, submission_rsl:str) -> dict|None:
def to_hitpick(self, submission_rsl:str|None=None) -> dict|None:
"""
Outputs a dictionary of locations
Returns:
dict: dictionary of sample id, row and column in elution plate
"""
self.assoc = [item for item in self.sample_submission_associations if item.submission.rsl_plate_num==submission_rsl][0]
# dictionary to translate row letters into numbers
# row_dict = dict(A=1, B=2, C=3, D=4, E=5, F=6, G=7, H=8)
# if either n1 or n2 is positive, include this sample
# well_row = row_dict[self.well_number[0]]
# The remaining charagers are the columns
# well_col = self.well_number[1:]
return dict(name=self.submitter_id,
# row=well_row,
# col=well_col,
positive=False)
# self.assoc = [item for item in self.sample_submission_associations if item.submission.rsl_plate_num==submission_rsl][0]
# Since there is no PCR, negliable result is necessary.
return dict(name=self.submitter_id, positive=False)
class WastewaterSample(BasicSample):
"""
Base wastewater sample
Derivative wastewater sample
"""
# __tablename__ = "_ww_samples"
# id = Column(INTEGER, primary_key=True) #: primary key
ww_processing_num = Column(String(64)) #: wastewater processing number
ww_sample_full_id = Column(String(64))
ww_full_sample_id = Column(String(64))
rsl_number = Column(String(64)) #: rsl plate identification number
# rsl_plate = relationship("Wastewater", back_populates="samples") #: relationship to parent plate
# rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_WWS_submission_id"))
collection_date = Column(TIMESTAMP) #: Date sample collected
received_date = Column(TIMESTAMP) #: Date sample received
# well_number = Column(String(8)) #: location on 96 well plate
# The following are fields from the sample tracking excel sheet Ruth put together.
# I have no idea when they will be implemented or how.
# testing_type = Column(String(64))
# site_status = Column(String(64))
notes = Column(String(2000))
# ct_n1 = Column(FLOAT(2)) #: AKA ct for N1
# ct_n2 = Column(FLOAT(2)) #: AKA ct for N2
# n1_status = Column(String(32))
# n2_status = Column(String(32))
# seq_submitted = Column(BOOLEAN())
# ww_seq_run_id = Column(String(64))
# sample_type = Column(String(16))
# pcr_results = Column(JSON)
sample_location = Column(String(8)) #: location on 24 well plate
# artic_rsl_plate = relationship("WastewaterArtic", back_populates="samples")
# artic_well_number = Column(String(8))
__mapper_args__ = {"polymorphic_identity": "Wastewater Sample", "polymorphic_load": "inline"}
# def to_string(self) -> str:
# """
# string representing sample object
# Returns:
# str: string representing location and sample id
# """
# return f"{self.well_number}: {self.ww_sample_full_id}"
# @validates("received-date")
# def convert_rdate_time(self, key, value):
# if isinstance(value, Timestamp):
# return value.date()
# return value
@validates("collected-date")
def convert_cdate_time(self, key, value):
@@ -423,31 +405,68 @@ class WastewaterSample(BasicSample):
if isinstance(value, str):
return parse(value)
return value
@validates("rsl_number")
def use_submitter_id(self, key, value):
logger.debug(f"Validating {key}: {value}")
return value or self.submitter_id
# @collection_date.setter
# def collection_date(self, value):
# match value:
# case Timestamp():
# self.collection_date = value.date()
# case str():
# self.collection_date = parse(value)
# case _:
# self.collection_date = value
# def __init__(self, **kwargs):
# # Had a problem getting collection date from excel as text only.
# if 'collection_date' in kwargs.keys():
# logger.debug(f"Got collection_date: {kwargs['collection_date']}. Attempting parse.")
# if isinstance(kwargs['collection_date'], str):
# logger.debug(f"collection_date is a string...")
# kwargs['collection_date'] = parse(kwargs['collection_date'])
# logger.debug(f"output is {kwargs['collection_date']}")
# # Due to the plate map being populated with RSL numbers, we have to do some shuffling.
# try:
# kwargs['rsl_number'] = kwargs['submitter_id']
# except KeyError as e:
# logger.error(f"Error using {kwargs} for submitter_id")
# try:
# check = check_not_nan(kwargs['ww_full_sample_id'])
# except KeyError:
# logger.error(f"Error using {kwargs} for ww_full_sample_id")
# check = False
# if check:
# kwargs['submitter_id'] = kwargs["ww_full_sample_id"]
# super().__init__(**kwargs)
def __init__(self, **kwargs):
if 'collection_date' in kwargs.keys():
logger.debug(f"Got collection_date: {kwargs['collection_date']}. Attempting parse.")
if isinstance(kwargs['collection_date'], str):
logger.debug(f"collection_date is a string...")
kwargs['collection_date'] = parse(kwargs['collection_date'])
logger.debug(f"output is {kwargs['collection_date']}")
super().__init__(**kwargs)
def set_attribute(self, name:str, value):
"""
Set an attribute of this object. Extends parent.
Args:
name (str): _description_
value (_type_): _description_
"""
# Due to the plate map being populated with RSL numbers, we have to do some shuffling.
# logger.debug(f"Input - {name}:{value}")
match name:
case "submitter_id":
if self.submitter_id != None:
return
else:
super().set_attribute("rsl_number", value)
case "ww_full_sample_id":
if value != None:
super().set_attribute(name, value)
name = "submitter_id"
case 'collection_date':
if isinstance(value, str):
logger.debug(f"collection_date {value} is a string. Attempting parse...")
value = parse(value)
case "rsl_number":
if value == None:
value = self.submitter_id
# logger.debug(f"Output - {name}:{value}")
super().set_attribute(name, value)
def to_sub_dict(self, submission_rsl:str) -> dict:
"""
Gui friendly dictionary. Inherited from BasicSample
Gui friendly dictionary. Extends parent method.
This version will include PCR status.
Args:
@@ -458,15 +477,13 @@ class WastewaterSample(BasicSample):
"""
# Get the relevant submission association for this sample
sample = super().to_sub_dict(submission_rsl=submission_rsl)
# check if PCR data exists.
try:
check = self.assoc.ct_n1 != None and self.assoc.ct_n2 != None
except AttributeError as e:
check = False
if check:
# logger.debug(f"Using well info in name.")
sample['name'] = f"{self.submitter_id}\n\t- ct N1: {'{:.2f}'.format(self.assoc.ct_n1)} ({self.assoc.n1_status})\n\t- ct N2: {'{:.2f}'.format(self.assoc.ct_n2)} ({self.assoc.n2_status})"
# else:
# logger.error(f"Couldn't get the pcr info")
return sample
def to_hitpick(self, submission_rsl:str) -> dict|None:
@@ -477,67 +494,30 @@ class WastewaterSample(BasicSample):
dict: dictionary of sample id, row and column in elution plate
"""
sample = super().to_hitpick(submission_rsl=submission_rsl)
# dictionary to translate row letters into numbers
# row_dict = dict(A=1, B=2, C=3, D=4, E=5, F=6, G=7, H=8)
# if either n1 or n2 is positive, include this sample
try:
sample['positive'] = any(["positive" in item for item in [self.assoc.n1_status, self.assoc.n2_status]])
except (TypeError, AttributeError) as e:
logger.error(f"Couldn't check positives for {self.rsl_number}. Looks like there isn't PCR data.")
# return None
# positive = False
# well_row = row_dict[self.well_number[0]]
# well_col = self.well_number[1:]
# if positive:
# try:
# # The first character of the elution well is the row
# well_row = row_dict[self.elution_well[0]]
# # The remaining charagers are the columns
# well_col = self.elution_well[1:]
# except TypeError as e:
# logger.error(f"This sample doesn't have elution plate info.")
# return None
return sample
class BacterialCultureSample(BasicSample):
"""
base of bacterial culture sample
"""
# __tablename__ = "_bc_samples"
# id = Column(INTEGER, primary_key=True) #: primary key
# well_number = Column(String(8)) #: location on parent plate
# sample_id = Column(String(64), nullable=False, unique=True) #: identification from submitter
organism = Column(String(64)) #: bacterial specimen
concentration = Column(String(16)) #:
# sample_type = Column(String(16))
# rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_BCS_sample_id")) #: id of parent plate
# rsl_plate = relationship("BacterialCulture", back_populates="samples") #: relationship to parent plate
concentration = Column(String(16)) #: sample concentration
__mapper_args__ = {"polymorphic_identity": "Bacterial Culture Sample", "polymorphic_load": "inline"}
# def to_string(self) -> str:
# """
# string representing object
# Returns:
# str: string representing well location, sample id and organism
# """
# return f"{self.well_number}: {self.sample_id} - {self.organism}"
def to_sub_dict(self, submission_rsl:str) -> dict:
"""
gui friendly dictionary
gui friendly dictionary, extends parent method.
Returns:
dict: well location and name (sample id, organism) NOTE: keys must sync with WWSample to_sub_dict above
"""
sample = super().to_sub_dict(submission_rsl=submission_rsl)
sample['name'] = f"{self.submitter_id} - ({self.organism})"
# return {
# # "well": self.well_number,
# "name": f"{self.submitter_id} - ({self.organism})",
# }
return sample
class SubmissionSampleAssociation(Base):
@@ -548,18 +528,19 @@ class SubmissionSampleAssociation(Base):
__tablename__ = "_submission_sample"
sample_id = Column(INTEGER, ForeignKey("_samples.id"), nullable=False)
submission_id = Column(INTEGER, ForeignKey("_submissions.id"), primary_key=True)
row = Column(INTEGER, primary_key=True)
column = Column(INTEGER, primary_key=True)
row = Column(INTEGER, primary_key=True) #: row on the 96 well plate
column = Column(INTEGER, primary_key=True) #: column on the 96 well plate
# reference to the Submission object
submission = relationship(BasicSubmission, back_populates="submission_sample_associations")
# reference to the "ReagentType" object
# sample = relationship("BasicSample")
# reference to the Sample object
sample = relationship(BasicSample, back_populates="sample_submission_associations")
base_sub_type = Column(String)
# """Refers to the type of parent."""
# Refers to the type of parent.
# Hooooooo boy, polymorphic association type, now we're getting into the weeds!
__mapper_args__ = {
"polymorphic_identity": "basic_association",
"polymorphic_on": base_sub_type,
@@ -576,11 +557,14 @@ class SubmissionSampleAssociation(Base):
return f"<SubmissionSampleAssociation({self.submission.rsl_plate_num} & {self.sample.submitter_id})"
class WastewaterAssociation(SubmissionSampleAssociation):
"""
Derivative custom Wastewater/Submission Association... fancy.
"""
ct_n1 = Column(FLOAT(2)) #: AKA ct for N1
ct_n2 = Column(FLOAT(2)) #: AKA ct for N2
n1_status = Column(String(32))
n2_status = Column(String(32))
pcr_results = Column(JSON)
n1_status = Column(String(32)) #: positive or negative for N1
n2_status = Column(String(32)) #: positive or negative for N2
pcr_results = Column(JSON) #: imported PCR status from QuantStudio
__mapper_args__ = {"polymorphic_identity": "wastewater", "polymorphic_load": "inline"}
__mapper_args__ = {"polymorphic_identity": "wastewater", "polymorphic_load": "inline"}

View File

@@ -6,7 +6,7 @@ import pprint
from typing import List
import pandas as pd
from pathlib import Path
from backend.db import lookup_ww_sample_by_ww_sample_num, lookup_sample_by_submitter_id, get_reagents_in_extkit, lookup_kittype_by_name, lookup_submissiontype_by_name, models
from backend.db import lookup_sample_by_submitter_id, get_reagents_in_extkit, lookup_kittype_by_name, lookup_submissiontype_by_name, models
from backend.pydant import PydSubmission, PydReagent
import logging
from collections import OrderedDict
@@ -14,8 +14,6 @@ import re
import numpy as np
from datetime import date
from dateutil.parser import parse, ParserError
import uuid
# from submissions.backend.db.functions import
from tools import check_not_nan, RSLNamer, convert_nans_to_nones, Settings
from frontend.custom_widgets.pop_ups import SubmissionTypeSelector, KitSelector
@@ -69,7 +67,7 @@ class SheetParser(object):
# Check metadata for category, return first category
if self.xl.book.properties.category != None:
logger.debug("Using file properties to find type...")
categories = [item.strip().title() for item in self.xl.book.properties.category.split(";")]
categories = [item.strip().replace("_", " ").title() for item in self.xl.book.properties.category.split(";")]
return dict(value=categories[0], parsed=False)
else:
# This code is going to be depreciated once there is full adoption of the client sheets
@@ -95,7 +93,13 @@ class SheetParser(object):
"""
_summary_
"""
info = InfoParser(ctx=self.ctx, xl=self.xl, submission_type=self.sub['submission_type']).parse_info()
info = InfoParser(ctx=self.ctx, xl=self.xl, submission_type=self.sub['submission_type']['value']).parse_info()
parser_query = f"parse_{self.sub['submission_type']['value'].replace(' ', '_').lower()}"
try:
custom_parser = getattr(self, parser_query)
info = custom_parser(info)
except AttributeError:
logger.error(f"Couldn't find submission parser: {parser_query}")
for k,v in info.items():
if k != "sample":
self.sub[k] = v
@@ -107,288 +111,41 @@ class SheetParser(object):
def parse_samples(self):
self.sample_result, self.sub['samples'] = SampleParser(ctx=self.ctx, xl=self.xl, submission_type=self.sub['submission_type']['value']).parse_samples()
def parse_bacterial_culture(self) -> None:
"""
pulls info specific to bacterial culture sample type
def parse_bacterial_culture(self, input_dict) -> dict:
"""
Update submission dictionary with type specific information
# def parse_reagents(df:pd.DataFrame) -> None:
# """
# Pulls reagents from the bacterial sub-dataframe
Args:
input_dict (dict): Input sample dictionary
# Args:
# df (pd.DataFrame): input sub dataframe
# """
# for ii, row in df.iterrows():
# # skip positive control
# logger.debug(f"Running reagent parse for {row[1]} with type {type(row[1])} and value: {row[2]} with type {type(row[2])}")
# # if the lot number isn't a float and the reagent type isn't blank
# # if not isinstance(row[2], float) and check_not_nan(row[1]):
# if check_not_nan(row[1]):
# # must be prefixed with 'lot_' to be recognized by gui
# # This is no longer true since reagents are loaded into their own key in dictionary
# try:
# reagent_type = row[1].replace(' ', '_').lower().strip()
# except AttributeError:
# pass
# # If there is a double slash in the type field, such as ethanol/iso
# # Use the cell to the left for reagent type.
# if reagent_type == "//":
# if check_not_nan(row[2]):
# reagent_type = row[0].replace(' ', '_').lower().strip()
# else:
# continue
# try:
# output_var = convert_nans_to_nones(str(row[2]).upper())
# except AttributeError:
# logger.debug(f"Couldn't upperize {row[2]}, must be a number")
# output_var = convert_nans_to_nones(str(row[2]))
# logger.debug(f"Output variable is {output_var}")
# logger.debug(f"Expiry date for imported reagent: {row[3]}")
# if check_not_nan(row[3]):
# try:
# expiry = row[3].date()
# except AttributeError as e:
# try:
# expiry = datetime.strptime(row[3], "%Y-%m-%d")
# except TypeError as e:
# expiry = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + row[3] - 2)
# else:
# logger.debug(f"Date: {row[3]}")
# # expiry = date.today()
# expiry = date(year=1970, month=1, day=1)
# # self.sub[f"lot_{reagent_type}"] = {'lot':output_var, 'exp':expiry}
# # self.sub['reagents'].append(dict(type=reagent_type, lot=output_var, exp=expiry))
# self.sub['reagents'].append(PydReagent(type=reagent_type, lot=output_var, exp=expiry))
# submission_info = self.xl.parse(sheet_name="Sample List", dtype=object)
# self.sub['extraction_kit'] = submission_info.iloc[3][3]
# submission_info = self.parse_generic("Sample List")
# # iloc is [row][column] and the first row is set as header row so -2
# self.sub['technician'] = str(submission_info.iloc[11][1])
# # reagents
# # must be prefixed with 'lot_' to be recognized by gui
# # This is no longer true wince the creation of self.sub['reagents']
# self.sub['reagents'] = []
# reagent_range = submission_info.iloc[1:14, 4:8]
# logger.debug(reagent_range)
# parse_reagents(reagent_range)
# get individual sample info
sample_parser = SampleParser(self.ctx, submission_info.iloc[16:112])
logger.debug(f"Sample type: {self.sub['submission_type']}")
if isinstance(self.sub['submission_type'], dict):
getter = self.sub['submission_type']['value']
else:
getter = self.sub['submission_type']
sample_parse = getattr(sample_parser, f"parse_{getter.replace(' ', '_').lower()}_samples")
logger.debug(f"Parser result: {self.sub}")
self.sample_result, self.sub['samples'] = sample_parse()
def parse_wastewater(self) -> None:
"""
pulls info specific to wastewater sample type
Returns:
dict: Updated sample dictionary
"""
def retrieve_elution_map():
full = self.xl.parse("Extraction Worksheet")
elu_map = full.iloc[9:18, 5:]
elu_map.set_index(elu_map.columns[0], inplace=True)
elu_map.columns = elu_map.iloc[0]
elu_map = elu_map.tail(-1)
return elu_map
# def parse_reagents(df:pd.DataFrame) -> None:
# """
# Pulls reagents from the bacterial sub-dataframe
# Args:
# df (pd.DataFrame): input sub dataframe
# """
# # iterate through sub-df rows
# for ii, row in df.iterrows():
# # logger.debug(f"Parsing this row for reagents: {row}")
# if check_not_nan(row[5]):
# # must be prefixed with 'lot_' to be recognized by gui
# # regex below will remove 80% from 80% ethanol in the Wastewater kit.
# output_key = re.sub(r"^\d{1,3}%\s?", "", row[0].lower().strip().replace(' ', '_'))
# output_key = output_key.strip("_")
# # output_var is the lot number
# try:
# output_var = convert_nans_to_nones(str(row[5].upper()))
# except AttributeError:
# logger.debug(f"Couldn't upperize {row[5]}, must be a number")
# output_var = convert_nans_to_nones(str(row[5]))
# if check_not_nan(row[7]):
# try:
# expiry = row[7].date()
# except AttributeError:
# expiry = date.today()
# else:
# expiry = date.today()
# logger.debug(f"Expiry date for {output_key}: {expiry} of type {type(expiry)}")
# # self.sub[f"lot_{output_key}"] = {'lot':output_var, 'exp':expiry}
# # self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry))
# reagent = PydReagent(type=output_key, lot=output_var, exp=expiry)
# logger.debug(f"Here is the created reagent: {reagent}")
# self.sub['reagents'].append(reagent)
# parse submission sheet
submission_info = self.parse_generic("WW Submissions (ENTER HERE)")
# parse enrichment sheet
enrichment_info = self.xl.parse("Enrichment Worksheet", dtype=object)
# set enrichment reagent range
enr_reagent_range = enrichment_info.iloc[0:4, 9:20]
# parse extraction sheet
extraction_info = self.xl.parse("Extraction Worksheet", dtype=object)
# set extraction reagent range
ext_reagent_range = extraction_info.iloc[0:5, 9:20]
# parse qpcr sheet
qprc_info = self.xl.parse("qPCR Worksheet", dtype=object)
# set qpcr reagent range
pcr_reagent_range = qprc_info.iloc[0:5, 9:20]
# compile technician info from all sheets
if all(map(check_not_nan, [enrichment_info.columns[2], extraction_info.columns[2], qprc_info.columns[2]])):
parsed = True
else:
parsed = False
self.sub['technician'] = dict(value=f"Enr: {enrichment_info.columns[2]}, Ext: {extraction_info.columns[2]}, PCR: {qprc_info.columns[2]}", parsed=parsed)
self.sub['reagents'] = []
# parse_reagents(enr_reagent_range)
# parse_reagents(ext_reagent_range)
# parse_reagents(pcr_reagent_range)
# parse samples
sample_parser = SampleParser(self.ctx, submission_info.iloc[16:], elution_map=retrieve_elution_map())
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type']['value'].lower()}_samples")
self.sample_result, self.sub['samples'] = sample_parse()
self.sub['csv'] = self.xl.parse("Copy to import file", dtype=object)
def parse_wastewater_artic(self) -> None:
return input_dict
def parse_wastewater(self, input_dict) -> dict:
"""
pulls info specific to wastewater_arctic submission type
Update submission dictionary with type specific information
Args:
input_dict (dict): Input sample dictionary
Returns:
dict: Updated sample dictionary
"""
return input_dict
def parse_wastewater_artic(self, input_dict:dict) -> dict:
"""
if isinstance(self.sub['submission_type'], str):
self.sub['submission_type'] = dict(value=self.sub['submission_type'], parsed=True)
# def parse_reagents(df:pd.DataFrame):
# logger.debug(df)
# for ii, row in df.iterrows():
# if check_not_nan(row[1]):
# try:
# output_key = re.sub(r"\(.+?\)", "", row[0].lower().strip().replace(' ', '_'))
# except AttributeError:
# continue
# output_key = output_key.strip("_")
# output_key = massage_common_reagents(output_key)
# try:
# output_var = convert_nans_to_nones(str(row[1].upper()))
# except AttributeError:
# logger.debug(f"Couldn't upperize {row[1]}, must be a number")
# output_var = convert_nans_to_nones(str(row[1]))
# logger.debug(f"Output variable is {output_var}")
# logger.debug(f"Expiry date for imported reagent: {row[2]}")
# if check_not_nan(row[2]):
# try:
# expiry = row[2].date()
# except AttributeError as e:
# try:
# expiry = datetime.strptime(row[2], "%Y-%m-%d")
# except TypeError as e:
# expiry = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + row[2] - 2)
# except ValueError as e:
# continue
# else:
# logger.debug(f"Date: {row[2]}")
# expiry = date.today()
# # self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry))
# self.sub['reagents'].append(PydReagent(type=output_key, lot=output_var, exp=expiry))
# else:
# continue
def massage_samples(df:pd.DataFrame, lookup_table:pd.DataFrame) -> pd.DataFrame:
"""
Takes sample info from Artic sheet format and converts to regular formate
Update submission dictionary with type specific information
Args:
df (pd.DataFrame): Elution plate map
lookup_table (pd.DataFrame): Sample submission form map.
Args:
input_dict (dict): Input sample dictionary
Returns:
pd.DataFrame: _description_
"""
lookup_table.set_index(lookup_table.columns[0], inplace=True)
lookup_table.columns = lookup_table.iloc[0]
logger.debug(f"Massaging samples from {lookup_table}")
df.set_index(df.columns[0], inplace=True)
df.columns = df.iloc[0]
logger.debug(f"df to massage\n: {df}")
return_list = []
for _, ii in df.iloc[1:,1:].iterrows():
for c in df.columns.to_list():
if not check_not_nan(c):
continue
logger.debug(f"Checking {ii.name}{c}")
if check_not_nan(df.loc[ii.name, int(c)]) and df.loc[ii.name, int(c)] != "EMPTY":
sample_name = df.loc[ii.name, int(c)]
row = lookup_table.loc[lookup_table['Sample Name (WW)'] == sample_name]
logger.debug(f"Looking up {row['Sample Name (LIMS)'][-1]}")
try:
return_list.append(dict(submitter_id=re.sub(r"\s?\(.*\)", "", df.loc[ii.name, int(c)]), \
# well=f"{ii.name}{c}",
row = row_keys[ii.name],
column = c,
artic_plate=self.sub['rsl_plate_num'],
sample_name=row['Sample Name (LIMS)'][-1]
))
except TypeError as e:
logger.error(f"Got an int for {c}, skipping.")
continue
logger.debug(f"massaged sample list for {self.sub['rsl_plate_num']}: {pprint.pprint(return_list)}")
return return_list
submission_info = self.xl.parse("First Strand", dtype=object)
biomek_info = self.xl.parse("ArticV4 Biomek", dtype=object)
sub_reagent_range = submission_info.iloc[56:, 1:4].dropna(how='all')
biomek_reagent_range = biomek_info.iloc[60:, 0:3].dropna(how='all')
# submission_info = self.xl.parse("cDNA", dtype=object)
# biomek_info = self.xl.parse("ArticV4_1 Biomek", dtype=object)
# # Reminder that the iloc uses row, column ordering
# # sub_reagent_range = submission_info.iloc[56:, 1:4].dropna(how='all')
# sub_reagent_range = submission_info.iloc[7:15, 5:9].dropna(how='all')
# biomek_reagent_range = biomek_info.iloc[62:, 0:3].dropna(how='all')
self.sub['submitter_plate_num'] = ""
self.sub['rsl_plate_num'] = RSLNamer(ctx=self.ctx, instr=self.filepath.__str__()).parsed_name
self.sub['submitted_date'] = biomek_info.iloc[1][1]
self.sub['submitting_lab'] = "Enterics Wastewater Genomics"
self.sub['sample_count'] = submission_info.iloc[4][6]
# self.sub['sample_count'] = submission_info.iloc[34][6]
self.sub['extraction_kit'] = "ArticV4.1"
self.sub['technician'] = f"MM: {biomek_info.iloc[2][1]}, Bio: {biomek_info.iloc[3][1]}"
self.sub['reagents'] = []
# parse_reagents(sub_reagent_range)
# parse_reagents(biomek_reagent_range)
samples = massage_samples(biomek_info.iloc[22:31, 0:], submission_info.iloc[4:37, 1:5])
# samples = massage_samples(biomek_info.iloc[25:33, 0:])
sample_parser = SampleParser(self.ctx, pd.DataFrame.from_records(samples))
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type']['value'].lower()}_samples")
self.sample_result, self.sub['samples'] = sample_parse()
# def parse_reagents(self):
# ext_kit = lookup_kittype_by_name(ctx=self.ctx, name=self.sub['extraction_kit'])
# if ext_kit != None:
# logger.debug(f"Querying extraction kit: {self.sub['submission_type']}")
# reagent_map = ext_kit.construct_xl_map_for_use(use=self.sub['submission_type']['value'])
# logger.debug(f"Reagent map: {pprint.pformat(reagent_map)}")
# else:
# raise AttributeError("No extraction kit found, unable to parse reagents")
# for sheet in self.xl.sheet_names:
# df = self.xl.parse(sheet)
# relevant = {k:v for k,v in reagent_map.items() if sheet in reagent_map[k]['sheet']}
# logger.debug(f"relevant map for {sheet}: {pprint.pformat(relevant)}")
# if relevant == {}:
# continue
# for item in relevant:
# try:
# # role = item
# name = df.iat[relevant[item]['name']['row']-2, relevant[item]['name']['column']-1]
# lot = df.iat[relevant[item]['lot']['row']-2, relevant[item]['lot']['column']-1]
# expiry = df.iat[relevant[item]['expiry']['row']-2, relevant[item]['expiry']['column']-1]
# except (KeyError, IndexError):
# continue
# # self.sub['reagents'].append(dict(name=name, lot=lot, expiry=expiry, role=role))
# self.sub['reagents'].append(PydReagent(type=item, lot=lot, exp=expiry, name=name))
Returns:
dict: Updated sample dictionary
"""
return input_dict
def import_kit_validation_check(self):
@@ -411,9 +168,6 @@ class SheetParser(object):
else:
if isinstance(self.sub['extraction_kit'], str):
self.sub['extraction_kit'] = dict(value=self.sub['extraction_kit'], parsed=False)
# logger.debug(f"Here is the validated parser dictionary:\n\n{pprint.pformat(self.sub)}\n\n")
# return parser_sub
def import_reagent_validation_check(self):
"""
@@ -439,20 +193,16 @@ class InfoParser(object):
def __init__(self, ctx:Settings, xl:pd.ExcelFile, submission_type:str):
self.ctx = ctx
# self.submission_type = submission_type
# self.extraction_kit = extraction_kit
self.map = self.fetch_submission_info_map(submission_type=submission_type)
self.xl = xl
logger.debug(f"Info map for InfoParser: {pprint.pformat(self.map)}")
def fetch_submission_info_map(self, submission_type:dict) -> dict:
if isinstance(submission_type, str):
submission_type = dict(value=submission_type, parsed=False)
logger.debug(f"Looking up submission type: {submission_type['value']}")
submission_type = lookup_submissiontype_by_name(ctx=self.ctx, type_name=submission_type['value'])
info_map = submission_type.info_map
# try:
# del info_map['samples']
# except KeyError:
# pass
return info_map
def parse_info(self) -> dict:
@@ -461,11 +211,13 @@ class InfoParser(object):
df = self.xl.parse(sheet, header=None)
relevant = {}
for k, v in self.map.items():
if isinstance(v, str):
dicto[k] = dict(value=v, parsed=True)
continue
if k == "samples":
continue
if sheet in self.map[k]['sheets']:
relevant[k] = v
# relevant = {k:v for k,v in self.map.items() if sheet in self.map[k]['sheets']}
logger.debug(f"relevant map for {sheet}: {pprint.pformat(relevant)}")
if relevant == {}:
continue
@@ -485,8 +237,6 @@ class InfoParser(object):
continue
else:
dicto[item] = dict(value=convert_nans_to_nones(value), parsed=False)
# if "submitter_plate_num" not in dicto.keys():
# dicto['submitter_plate_num'] = dict(value=None, parsed=False)
return dicto
class ReagentParser(object):
@@ -515,7 +265,6 @@ class ReagentParser(object):
for item in relevant:
logger.debug(f"Attempting to scrape: {item}")
try:
# role = item
name = df.iat[relevant[item]['name']['row']-1, relevant[item]['name']['column']-1]
lot = df.iat[relevant[item]['lot']['row']-1, relevant[item]['lot']['column']-1]
expiry = df.iat[relevant[item]['expiry']['row']-1, relevant[item]['expiry']['column']-1]
@@ -526,7 +275,6 @@ class ReagentParser(object):
parsed = True
else:
parsed = False
# self.sub['reagents'].append(dict(name=name, lot=lot, expiry=expiry, role=role))
logger.debug(f"Got lot for {item}-{name}: {lot} as {type(lot)}")
lot = str(lot)
listo.append(dict(value=PydReagent(type=item.strip(), lot=lot, exp=expiry, name=name), parsed=parsed))
@@ -556,8 +304,9 @@ class SampleParser(object):
self.lookup_table = self.construct_lookup_table(lookup_table_location=sample_info_map['lookup_table'])
self.excel_to_db_map = sample_info_map['xl_db_translation']
self.create_basic_dictionaries_from_plate_map()
self.parse_lookup_table()
if isinstance(self.lookup_table, pd.DataFrame):
self.parse_lookup_table()
def fetch_sample_info_map(self, submission_type:dict) -> dict:
logger.debug(f"Looking up submission type: {submission_type}")
submission_type = lookup_submissiontype_by_name(ctx=self.ctx, type_name=submission_type)
@@ -575,7 +324,10 @@ class SampleParser(object):
return df
def construct_lookup_table(self, lookup_table_location) -> pd.DataFrame:
df = self.xl.parse(lookup_table_location['sheet'], header=None, dtype=object)
try:
df = self.xl.parse(lookup_table_location['sheet'], header=None, dtype=object)
except KeyError:
return None
df = df.iloc[lookup_table_location['start_row']-1:lookup_table_location['end_row']]
df = pd.DataFrame(df.values[1:], columns=df.iloc[0])
df = df.reset_index(drop=True)
@@ -583,12 +335,16 @@ class SampleParser(object):
return df
def create_basic_dictionaries_from_plate_map(self):
invalids = [0, "0"]
new_df = self.plate_map.dropna(axis=1, how='all')
columns = new_df.columns.tolist()
for _, iii in new_df.iterrows():
for c in columns:
# logger.debug(f"Checking sample {iii[c]}")
if check_not_nan(iii[c]):
if iii[c] in invalids:
logger.debug(f"Invalid sample name: {iii[c]}, skipping.")
continue
id = iii[c]
logger.debug(f"Adding sample {iii[c]}")
try:
@@ -600,8 +356,9 @@ class SampleParser(object):
def parse_lookup_table(self):
def determine_if_date(input_str) -> str|date:
# logger.debug(f"Looks like we have a str: {input_str}")
regex = re.compile(r"\d{4}-?\d{2}-?\d{2}")
regex = re.compile(r"^\d{4}-?\d{2}-?\d{2}")
if bool(regex.search(input_str)):
logger.warning(f"{input_str} is a date!")
try:
return parse(input_str)
except ParserError:
@@ -610,6 +367,7 @@ class SampleParser(object):
return input_str
for sample in self.samples:
addition = self.lookup_table[self.lookup_table.isin([sample['submitter_id']]).any(axis=1)].squeeze().to_dict()
logger.debug(f"Lookuptable info: {addition}")
for k,v in addition.items():
# logger.debug(f"Checking {k} in lookup table.")
if check_not_nan(k) and isinstance(k, str):
@@ -645,193 +403,89 @@ class SampleParser(object):
case _:
v = v
try:
translated_dict[self.excel_to_db_map[k]] = v
translated_dict[self.excel_to_db_map[k]] = convert_nans_to_nones(v)
except KeyError:
translated_dict[k] = convert_nans_to_nones(v)
# translated_dict['sample_type'] = f"{self.submission_type.replace(' ', '_').lower()}_sample"
translated_dict['sample_type'] = f"{self.submission_type} Sample"
parser_query = f"parse_{translated_dict['sample_type'].replace(' ', '_').lower()}"
# logger.debug(f"New sample dictionary going into object creation:\n{translated_dict}")
try:
custom_parser = getattr(self, parser_query)
translated_dict = custom_parser(translated_dict)
except AttributeError:
logger.error(f"Couldn't get custom parser: {parser_query}")
new_samples.append(self.generate_sample_object(translated_dict))
return result, new_samples
def generate_sample_object(self, input_dict) -> models.BasicSample:
# query = input_dict['sample_type'].replace('_sample', '').replace("_", " ").title().replace(" ", "")
query = input_dict['sample_type'].replace(" ", "")
database_obj = getattr(models, query)
try:
database_obj = getattr(models, query)
except AttributeError as e:
logger.error(f"Could not find the model {query}. Using generic.")
database_obj = models.BasicSample
logger.debug(f"Searching database for {input_dict['submitter_id']}...")
instance = lookup_sample_by_submitter_id(ctx=self.ctx, submitter_id=input_dict['submitter_id'])
if instance == None:
logger.debug(f"Couldn't find sample {input_dict['submitter_id']}. Creating new sample.")
instance = database_obj()
for k,v in input_dict.items():
try:
setattr(instance, k, v)
# setattr(instance, k, v)
instance.set_attribute(k, v)
except Exception as e:
logger.error(f"Failed to set {k} due to {type(e).__name__}: {e}")
else:
logger.debug(f"Sample already exists, will run update.")
logger.debug(f"Sample {instance.submitter_id} already exists, will run update.")
return dict(sample=instance, row=input_dict['row'], column=input_dict['column'])
# def parse_bacterial_culture_samples(self) -> Tuple[str|None, list[dict]]:
def parse_bacterial_culture_sample(self, input_dict:dict) -> dict:
"""
construct bacterial culture specific sample objects
Update sample dictionary with bacterial culture specific information
Args:
input_dict (dict): Input sample dictionary
Returns:
list[BCSample]: list of sample objects
"""
# logger.debug(f"Samples: {self.samples}")
new_list = []
for sample in self.samples:
logger.debug(f"Well info: {sample['This section to be filled in completely by submittor']}")
instance = lookup_sample_by_submitter_id(ctx=self.ctx, submitter_id=sample['Unnamed: 1'])
if instance == None:
instance = BacterialCultureSample()
well_number = sample['This section to be filled in completely by submittor']
row = row_keys[well_number[0]]
column = int(well_number[1:])
instance.submitter_id = sample['Unnamed: 1']
instance.organism = sample['Unnamed: 2']
instance.concentration = sample['Unnamed: 3']
# logger.debug(f"Sample object: {new.sample_id} = {type(new.sample_id)}")
logger.debug(f"Got sample_id: {instance.submitter_id}")
# need to exclude empties and blanks
if check_not_nan(instance.submitter_id):
new_list.append(dict(sample=instance, row=row, column=column))
return None, new_list
# def parse_wastewater_samples(self) -> Tuple[str|None, list[dict]]:
"""
construct wastewater specific sample objects
Returns:
list[WWSample]: list of sample objects
dict: Updated sample dictionary
"""
def search_df_for_sample(sample_rsl:str):
# logger.debug(f"Attempting to find sample {sample_rsl} in \n {self.elution_map}")
well = self.elution_map.where(self.elution_map==sample_rsl)
# logger.debug(f"Well: {well}")
well = well.dropna(how='all').dropna(axis=1, how="all")
if well.size > 1:
well = well.iloc[0].to_frame().dropna().T
logger.debug(f"well {sample_rsl} post processing: {well.size}: {type(well)}")#, {well.index[0]}, {well.columns[0]}")
try:
self.elution_map.at[well.index[0], well.columns[0]] = np.nan
except IndexError as e:
logger.error(f"Couldn't find the well for {sample_rsl}")
return 0, 0
try:
column = int(well.columns[0])
except TypeError as e:
logger.error(f"Problem parsing out column number for {well}:\n {e}")
row = row_keys[well.index[0]]
return row, column
new_list = []
return_val = None
for sample in self.samples:
logger.debug(f"Sample: {sample}")
instance = lookup_ww_sample_by_ww_sample_num(ctx=self.ctx, sample_number=sample['Unnamed: 3'])
if instance == None:
instance = WastewaterSample()
if check_not_nan(sample["Unnamed: 7"]):
if sample["Unnamed: 7"] != "Fixed" and sample['Unnamed: 7'] != "Flex":
instance.rsl_number = sample['Unnamed: 7'] # previously Unnamed: 9
elif check_not_nan(sample['Unnamed: 9']):
instance.rsl_number = sample['Unnamed: 9'] # previously Unnamed: 9
else:
logger.error(f"No RSL sample number found for this sample.")
continue
else:
logger.error(f"No RSL sample number found for this sample.")
continue
instance.ww_processing_num = sample['Unnamed: 2']
# need to ensure we have a sample id for database integrity
# if we don't have a sample full id, make one up
if check_not_nan(sample['Unnamed: 3']):
logger.debug(f"Sample name: {sample['Unnamed: 3']}")
instance.submitter_id = sample['Unnamed: 3']
else:
instance.submitter_id = uuid.uuid4().hex.upper()
# logger.debug(f"The Submitter sample id is: {instance.submitter_id}")
# need to ensure we get a collection date
if check_not_nan(sample['Unnamed: 5']):
instance.collection_date = sample['Unnamed: 5']
else:
instance.collection_date = date.today()
# new.testing_type = sample['Unnamed: 6']
# new.site_status = sample['Unnamed: 7']
instance.notes = str(sample['Unnamed: 6']) # previously Unnamed: 8
instance.well_24 = sample['Unnamed: 1']
else:
# What to do if the sample already exists
assert isinstance(instance, WastewaterSample)
if instance.rsl_number == None:
if check_not_nan(sample["Unnamed: 7"]):
if sample["Unnamed: 7"] != "Fixed" and sample['Unnamed: 7'] != "Flex":
instance.rsl_number = sample['Unnamed: 7'] # previously Unnamed: 9
elif check_not_nan(sample['Unnamed: 9']):
instance.rsl_number = sample['Unnamed: 9'] # previously Unnamed: 9
else:
logger.error(f"No RSL sample number found for this sample.")
if instance.collection_date == None:
if check_not_nan(sample['Unnamed: 5']):
instance.collection_date = sample['Unnamed: 5']
else:
instance.collection_date = date.today()
if instance.notes == None:
instance.notes = str(sample['Unnamed: 6']) # previously Unnamed: 8
if instance.well_24 == None:
instance.well_24 = sample['Unnamed: 1']
logger.debug(f"Already have that sample, going to add association to this plate.")
row, column = search_df_for_sample(instance.rsl_number)
# if elu_well != None:
# row = elu_well[0]
# col = elu_well[1:].zfill(2)
# # new.well_number = f"{row}{col}"
# else:
# # try:
# return_val += f"{new.rsl_number}\n"
# # except TypeError:
# # return_val = f"{new.rsl_number}\n"
new_list.append(dict(sample=instance, row=row, column=column))
return return_val, new_list
logger.debug("Called bacterial culture sample parser")
return input_dict
def parse_wastewater_sample(self, input_dict:dict) -> dict:
"""
Update sample dictionary with wastewater specific information
Args:
input_dict (dict): Input sample dictionary
Returns:
dict: Updated sample dictionary
"""
logger.debug(f"Called wastewater sample parser")
# def parse_wastewater_artic_samples(self) -> Tuple[str|None, list[WastewaterSample]]:
def parse_wastewater_artic_sample(self, input_dict:dict) -> dict:
"""
The artic samples are the wastewater samples that are to be sequenced
So we will need to lookup existing ww samples and append Artic well # and plate relation
Update sample dictionary with artic specific information
Args:
input_dict (dict): Input sample dictionary
Returns:
list[WWSample]: list of wastewater samples to be updated
dict: Updated sample dictionary
"""
logger.debug("Called wastewater artic sample parser")
input_dict['sample_type'] = "Wastewater Sample"
# Because generate_sample_object needs the submitter_id and the artic has the "({origin well})"
# at the end, this has to be done here. No moving to sqlalchemy object :(
input_dict['submitter_id'] = re.sub(r"\s\(.+\)$", "", str(input_dict['submitter_id'])).strip()
return input_dict
new_list = []
missed_samples = []
for sample in self.samples:
with self.ctx.database_session.no_autoflush:
instance = lookup_ww_sample_by_ww_sample_num(ctx=self.ctx, sample_number=sample['sample_name'])
logger.debug(f"Checking: {sample}")
if instance == None:
logger.error(f"Unable to find match for: {sample['sample_name']}. Making new instance using {sample['submitter_id']}.")
instance = WastewaterSample()
instance.ww_processing_num = sample['sample_name']
instance.submitter_id = sample['submitter_id']
missed_samples.append(sample['sample_name'])
# continue
logger.debug(f"Got instance: {instance.submitter_id}")
# if sample['row'] != None:
# row = int(row_keys[sample['well'][0]])
# if sample['column'] != None:
# column = int(sample['well'][1:])
# sample['well'] = f"{row}{col}"
# instance.artic_well_number = sample['well']
if instance.submitter_id != "NTC1" and instance.submitter_id != "NTC2":
new_list.append(dict(sample=instance, row=sample['row'], column=sample['column']))
missed_str = "\n\t".join(missed_samples)
return f"Could not find matches for the following samples:\n\t {missed_str}", new_list
class PCRParser(object):
"""
Object to pull data from Design and Analysis PCR export file.
TODO: Generify this object.
"""
def __init__(self, ctx:dict, filepath:Path|None = None) -> None:
"""

View File

@@ -1,5 +1,5 @@
import uuid
from pydantic import BaseModel, field_validator, Extra
from pydantic import BaseModel, field_validator, Extra, Field
from datetime import date, datetime
from dateutil.parser import parse
from dateutil.parser._parser import ParserError
@@ -32,10 +32,17 @@ class PydReagent(BaseModel):
@field_validator("lot", mode='before')
@classmethod
def enforce_lot_string(cls, value):
def rescue_lot_string(cls, value):
if value != None:
return convert_nans_to_nones(str(value))
return value
@field_validator("lot")
@classmethod
def enforce_lot_string(cls, value):
if value != None:
return value.upper()
return value
@field_validator("exp", mode="before")
@classmethod
@@ -66,8 +73,9 @@ class PydSubmission(BaseModel, extra=Extra.allow):
ctx: Settings
filepath: Path
submission_type: dict|None
submitter_plate_num: dict|None
rsl_plate_num: dict|None
# For defaults
submitter_plate_num: dict|None = Field(default=dict(value=None, parsed=False), validate_default=True)
rsl_plate_num: dict|None = Field(default=dict(value=None, parsed=False), validate_default=True)
submitted_date: dict|None
submitting_lab: dict|None
sample_count: dict|None
@@ -77,12 +85,12 @@ class PydSubmission(BaseModel, extra=Extra.allow):
samples: List[Any]
# missing_fields: List[str] = []
@field_validator("submitter_plate_num")
@classmethod
def rescue_submitter_id(cls, value):
if value == None:
return dict(value=None, parsed=False)
return value
# @field_validator("submitter_plate_num", mode="before")
# @classmethod
# def rescue_submitter_id(cls, value):
# if value == None:
# return dict(value=None, parsed=False)
# return value
@field_validator("submitter_plate_num")
@classmethod