diff --git a/CHANGELOG.md b/CHANGELOG.md index c4ca625..c8994f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 202309.02 + +- Massive restructure of app and database to allow better relationships between kits/reagenttypes & submissions/samples. + ## 202308.03 - Large restructure of database to allow better relationships between kits/reagenttypes & submissions/samples. diff --git a/TODO.md b/TODO.md index 647b4c1..7752aa4 100644 --- a/TODO.md +++ b/TODO.md @@ -1,8 +1,8 @@ - [ ] Clean up & document code... again. - Including paring down the logging.debugs - [ ] Fix Tests... again. -- [ ] Rebuild database -- [ ] Provide more generic names for reagenttypes in kits and move specific names to reagents. +- [x] Rebuild database +- [x] Provide more generic names for reagenttypes in kits and move specific names to reagents. - ex. Instead of "omega_e-z_96_disruptor_plate_c_plus" in reagent types, have "omega_plate" and have "omega_e-z_96_disruptor_plate_c_plus" in reagent name. - Maybe rename to "ReagentRoles"? - If I'm doing this, since the forms have a different layout for each submission type I should rewrite the parser to use the locations given in database... Which I should do anyway diff --git a/src/submissions/__init__.py b/src/submissions/__init__.py index f4f8f2b..136be07 100644 --- a/src/submissions/__init__.py +++ b/src/submissions/__init__.py @@ -4,7 +4,7 @@ from pathlib import Path # Version of the realpython-reader package __project__ = "submissions" -__version__ = "202308.1b" +__version__ = "202309.2b" __author__ = {"name":"Landon Wark", "email":"Landon.Wark@phac-aspc.gc.ca"} __copyright__ = "2022-2023, Government of Canada" @@ -32,4 +32,8 @@ class bcolors: # Landon, this is your slightly less past self here. For the most part, Past Landon has not screwed us. I've been able to add in the # Wastewater Artic with minimal difficulties, except that the parser of the non-standard, user-generated excel sheets required slightly -# more work. \ No newline at end of file +# more work. + +# Landon, this is your even more slightly less past self here. I've overhauled a lot of stuff to make things more flexible, so you should +# hopefully be even less screwed than before... at least with regards to parsers. The addition of kits and such is another story. Putting that +# On the todo list. \ No newline at end of file diff --git a/src/submissions/backend/db/functions.py b/src/submissions/backend/db/functions.py index a80c211..c434c02 100644 --- a/src/submissions/backend/db/functions.py +++ b/src/submissions/backend/db/functions.py @@ -4,9 +4,6 @@ Convenience functions for interacting with the database. import pprint from . import models -# from .models.kits import KitType -# from .models.submissions import BasicSample, reagents_submissions, BasicSubmission, SubmissionSampleAssociation -# from .models import submissions import pandas as pd import sqlalchemy.exc import sqlite3 @@ -34,7 +31,7 @@ def set_sqlite_pragma(dbapi_connection, connection_record): cursor.execute("PRAGMA foreign_keys=ON") cursor.close() -def store_submission(ctx:Settings, base_submission:models.BasicSubmission, samples:List[dict]=[]) -> None|dict: +def store_submission(ctx:Settings, base_submission:models.BasicSubmission) -> None|dict: """ Upserts submissions into database @@ -46,55 +43,19 @@ def store_submission(ctx:Settings, base_submission:models.BasicSubmission, sampl None|dict : object that indicates issue raised for reporting in gui """ logger.debug(f"Hello from store_submission") - # Add all samples to sample table + # Final check for proper RSL name typer = RSLNamer(ctx=ctx, instr=base_submission.rsl_plate_num) base_submission.rsl_plate_num = typer.parsed_name - # for sample in samples: - # instance = sample['sample'] - # logger.debug(f"Typer: {typer.submission_type}") - # logger.debug(f"sample going in: {type(sample['sample'])}\n{sample['sample'].__dict__}") - # # Suuuuuper hacky way to be sure that the artic doesn't overwrite the ww plate in a ww sample - # # need something more elegant - # # if "_artic" not in typer.submission_type: - # # sample.rsl_plate = base_submission - # # else: - # # logger.debug(f"{sample.ww_sample_full_id} is an ARTIC sample.") - # # # base_submission.samples.remove(sample) - # # # sample.rsl_plate = sample.rsl_plate - # # # sample.artic_rsl_plate = base_submission - # # logger.debug(f"Attempting to add sample: {sample.to_string()}") - # # try: - # # ctx['database_session'].add(sample) - # # ctx.database_session.add(instance) - # # ctx.database_session.commit() - # # logger.debug(f"Submitter id: {sample['sample'].submitter_id} and table id: {sample['sample'].id}") - # logger.debug(f"Submitter id: {instance.submitter_id} and table id: {instance.id}") - # assoc = SubmissionSampleAssociation(submission=base_submission, sample=instance, row=sample['row'], column=sample['column']) - - # # except (sqlite3.IntegrityError, sqlalchemy.exc.IntegrityError) as e: - # # logger.debug(f"Hit an integrity error : {e}") - # # continue - # try: - # base_submission.submission_sample_associations.append(assoc) - # except IntegrityError as e: - # logger.critical(e) - # continue - # logger.debug(f"Here is the sample to be stored in the DB: {sample.__dict__}") - # Add submission to submission table - # ctx['database_session'].add(base_submission) ctx.database_session.add(base_submission) logger.debug(f"Attempting to add submission: {base_submission.rsl_plate_num}") try: - # ctx['database_session'].commit() ctx.database_session.commit() except (sqlite3.IntegrityError, sqlalchemy.exc.IntegrityError) as e: logger.debug(f"Hit an integrity error : {e}") - # ctx['database_session'].rollback() ctx.database_session.rollback() return {"message":"This plate number already exists, so we can't add it.", "status":"Critical"} except (sqlite3.OperationalError, sqlalchemy.exc.IntegrityError) as e: logger.debug(f"Hit an operational error: {e}") - # ctx['database_session'].rollback() ctx.database_session.rollback() return {"message":"The database is locked for editing.", "status":"Critical"} return None @@ -111,10 +72,8 @@ def store_reagent(ctx:Settings, reagent:models.Reagent) -> None|dict: None|dict: object indicating issue to be reported in the gui """ logger.debug(f"Reagent dictionary: {reagent.__dict__}") - # ctx['database_session'].add(reagent) ctx.database_session.add(reagent) try: - # ctx['database_session'].commit() ctx.database_session.commit() except (sqlite3.OperationalError, sqlalchemy.exc.OperationalError): return {"message":"The database is locked for editing."} @@ -131,7 +90,6 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi Returns: models.BasicSubmission: Constructed submission object """ - # from tools import check_regex_match, RSLNamer # convert submission type into model name query = info_dict['submission_type'].replace(" ", "") # Ensure an rsl plate number exists for the plate @@ -143,8 +101,6 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi # enforce conventions on the rsl plate number from the form info_dict['rsl_plate_num'] = RSLNamer(ctx=ctx, instr=info_dict["rsl_plate_num"]).parsed_name # check database for existing object - # instance = ctx['database_session'].query(models.BasicSubmission).filter(models.BasicSubmission.rsl_plate_num==info_dict['rsl_plate_num']).first() - # instance = ctx.database_session.query(models.BasicSubmission).filter(models.BasicSubmission.rsl_plate_num==info_dict['rsl_plate_num']).first() instance = lookup_submission_by_rsl_num(ctx=ctx, rsl_num=info_dict['rsl_plate_num']) # get model based on submission type converted above logger.debug(f"Looking at models for submission type: {query}") @@ -166,7 +122,6 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi # set fields based on keys in dictionary match item: case "extraction_kit": - # q_str = info_dict[item] logger.debug(f"Looking up kit {value}") try: field_value = lookup_kittype_by_name(ctx=ctx, name=value) @@ -185,13 +140,7 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi field_value = lookup_org_by_name(ctx=ctx, name=value) logger.debug(f"Got {field_value} for organization {value}") case "submitter_plate_num": - # Because of unique constraint, there will be problems with - # multiple submissions named 'None', so... - # Should be depreciated with use of pydantic validator logger.debug(f"Submitter plate id: {value}") - # if info_dict[item] == None or info_dict[item] == "None" or info_dict[item] == "": - # logger.debug(f"Got None as a submitter plate number, inserting random string to preserve database unique constraint.") - # info_dict[item] = uuid.uuid4().hex.upper() field_value = value case "samples": for sample in value: @@ -200,6 +149,7 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi sample_instance = sample['sample'] else: logger.warning(f"Sample {sample} already exists, creating association.") + logger.debug(f"Adding {sample_instance.__dict__}") if sample_instance in instance.samples: logger.error(f"Looks like there's a duplicate sample on this plate: {sample_instance.submitter_id}!") continue @@ -207,7 +157,7 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi with ctx.database_session.no_autoflush: try: sample_query = sample_instance.sample_type.replace('Sample', '').strip() - logger.debug(f"Here is the sample instance type: {sample_query}") + logger.debug(f"Here is the sample instance type: {sample_instance}") try: assoc = getattr(models, f"{sample_query}Association") except AttributeError as e: @@ -227,7 +177,6 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi continue continue case "submission_type": - # item = "submission_type" field_value = lookup_submissiontype_by_name(ctx=ctx, type_name=value) case _: field_value = value @@ -242,14 +191,13 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi # calculate cost of the run: immutable cost + mutable times number of columns # This is now attached to submission upon creation to preserve at-run costs incase of cost increase in the future. try: - # ceil(instance.sample_count / 8) will get number of columns - # the cost of a full run multiplied by (that number / 12) is x twelfths the cost of a full run logger.debug(f"Calculating costs for procedure...") instance.calculate_base_cost() except (TypeError, AttributeError) as e: logger.debug(f"Looks like that kit doesn't have cost breakdown yet due to: {e}, using full plate cost.") instance.run_cost = instance.extraction_kit.cost_per_run logger.debug(f"Calculated base run cost of: {instance.run_cost}") + # Apply any discounts that are applicable for client and kit. try: logger.debug("Checking and applying discounts...") discounts = [item.amount for item in lookup_discounts_by_org_and_kit(ctx=ctx, kit_id=instance.extraction_kit.id, lab_id=instance.submitting_lab.id)] @@ -299,12 +247,6 @@ def construct_reagent(ctx:Settings, info_dict:dict) -> models.Reagent: reagent.name = info_dict[item] # add end-of-life extension from reagent type to expiry date # NOTE: this will now be done only in the reporting phase to account for potential changes in end-of-life extensions - # try: - # reagent.expiry = reagent.expiry + reagent.type.eol_ext - # except TypeError as e: - # logger.debug(f"We got a type error: {e}.") - # except AttributeError: - # pass return reagent def get_all_reagenttype_names(ctx:Settings) -> list[str]: @@ -317,7 +259,6 @@ def get_all_reagenttype_names(ctx:Settings) -> list[str]: Returns: list[str]: reagent type names """ - # lookedup = [item.__str__() for item in ctx['database_session'].query(models.ReagentType).all()] lookedup = [item.__str__() for item in ctx.database_session.query(models.ReagentType).all()] return lookedup @@ -333,12 +274,11 @@ def lookup_reagenttype_by_name(ctx:Settings, rt_name:str) -> models.ReagentType: models.ReagentType: looked up reagent type """ logger.debug(f"Looking up ReagentType by name: {rt_name.title()}") - # lookedup = ctx['database_session'].query(models.ReagentType).filter(models.ReagentType.name==rt_name).first() lookedup = ctx.database_session.query(models.ReagentType).filter(models.ReagentType.name==rt_name).first() logger.debug(f"Found ReagentType: {lookedup}") return lookedup -def lookup_kittype_by_use(ctx:Settings, used_by:str|None=None) -> list[models.KitType]: +def lookup_kittype_by_use(ctx:Settings, used_for:str|None=None) -> list[models.KitType]: """ Lookup kits by a sample type its used for @@ -349,10 +289,9 @@ def lookup_kittype_by_use(ctx:Settings, used_by:str|None=None) -> list[models.Ki Returns: list[models.KitType]: list of kittypes that have that sample type in their uses """ - if used_by != None: - # return ctx['database_session'].query(models.KitType).filter(models.KitType.used_for.contains(used_by)).all() - # return ctx.database_session.query(models.KitType).filter(models.KitType.used_for.contains(used_by)).all() - return ctx.database_session.query(models.KitType).filter(models.KitType.used_for.any(name=used_by)).all() + if used_for != None: + # Get kittypes whose 'used_for' name is used_for. + return ctx.database_session.query(models.KitType).filter(models.KitType.used_for.any(name=used_for)).all() else: # return ctx['database_session'].query(models.KitType).all() return ctx.database_session.query(models.KitType).all() @@ -371,11 +310,20 @@ def lookup_kittype_by_name(ctx:Settings, name:str|dict) -> models.KitType: if isinstance(name, dict): name = name['value'] logger.debug(f"Querying kittype: {name}") - # return ctx['database_session'].query(models.KitType).filter(models.KitType.name==name).first() with ctx.database_session.no_autoflush: return ctx.database_session.query(models.KitType).filter(models.KitType.name==name).first() def lookup_kittype_by_id(ctx:Settings, id:int) -> models.KitType: + """ + Find a kit by its id integer + + Args: + ctx (Settings): Settings passed down from gui + id (int): id number of the kit. + + Returns: + models.KitType: Kit. + """ return ctx.database_session.query(models.KitType).filter(models.KitType.id==id).first() def lookup_regent_by_type_name(ctx:Settings, type_name:str) -> list[models.Reagent]: @@ -389,7 +337,6 @@ def lookup_regent_by_type_name(ctx:Settings, type_name:str) -> list[models.Reage Returns: list[models.Reagent]: list of retrieved reagents """ - # return ctx['database_session'].query(models.Reagent).join(models.Reagent.type, aliased=True).filter(models.ReagentType.name==type_name).all() return ctx.database_session.query(models.Reagent).join(models.Reagent.type, aliased=True).filter(models.ReagentType.name==type_name).all() def lookup_regent_by_type_name_and_kit_name(ctx:Settings, type_name:str, kit_name:str) -> list[models.Reagent]: @@ -406,8 +353,6 @@ def lookup_regent_by_type_name_and_kit_name(ctx:Settings, type_name:str, kit_nam """ # What I want to do is get the reagent type by name # Hang on, this is going to be a long one. - # by_type = ctx['database_session'].query(models.Reagent).join(models.Reagent.type, aliased=True).filter(models.ReagentType.name.endswith(type_name)).all() - # rt_types = ctx['database_session'].query(models.ReagentType).filter(models.ReagentType.name.endswith(type_name)) rt_types = ctx.database_session.query(models.ReagentType).filter(models.ReagentType.name.endswith(type_name)) # add filter for kit name... try: @@ -440,7 +385,7 @@ def lookup_all_submissions_by_type(ctx:Settings, sub_type:str|None=None, chronol subs = ctx.database_session.query(models.BasicSubmission) else: # subs = ctx['database_session'].query(models.BasicSubmission).filter(models.BasicSubmission.submission_type==sub_type.lower().replace(" ", "_")).all() - subs = ctx.database_session.query(models.BasicSubmission).filter(models.BasicSubmission.submission_type==sub_type.lower().replace(" ", "_")) + subs = ctx.database_session.query(models.BasicSubmission).filter(models.BasicSubmission.submission_type_name==sub_type) if chronologic: subs.order_by(models.BasicSubmission.submitted_date) return subs.all() @@ -1172,6 +1117,25 @@ def lookup_subsamp_association_by_plate_sample(ctx:Settings, rsl_plate_num:str, .filter(models.BasicSample.submitter_id==rsl_sample_num)\ .first() +def lookup_sub_wwsamp_association_by_plate_sample(ctx:Settings, rsl_plate_num:str, rsl_sample_num:str) -> models.WastewaterAssociation: + """ + _summary_ + + Args: + ctx (Settings): _description_ + rsl_plate_num (str): _description_ + sample_submitter_id (_type_): _description_ + + Returns: + models.SubmissionSampleAssociation: _description_ + """ + return ctx.database_session.query(models.WastewaterAssociation)\ + .join(models.Wastewater)\ + .join(models.WastewaterSample)\ + .filter(models.BasicSubmission.rsl_plate_num==rsl_plate_num)\ + .filter(models.BasicSample.submitter_id==rsl_sample_num)\ + .first() + def lookup_all_reagent_names_by_role(ctx:Settings, role_name:str) -> List[str]: """ _summary_ @@ -1222,4 +1186,18 @@ def add_reagenttype_to_kit(ctx:Settings, rt_name:str, kit_name:str, eol:int=0): kit.kit_reagenttype_associations.append(assoc) ctx.database_session.add(kit) ctx.database_session.commit() - \ No newline at end of file + +def lookup_subsamp_association_by_models(ctx:Settings, submission:models.BasicSubmission, sample:models.BasicSample) -> models.SubmissionSampleAssociation: + return ctx.database_session.query(models.SubmissionSampleAssociation) \ + .filter(models.SubmissionSampleAssociation.submission==submission) \ + .filter(models.SubmissionSampleAssociation.sample==sample).first() + +def update_subsampassoc_with_pcr(ctx:Settings, submission:models.BasicSubmission, sample:models.BasicSample, input_dict:dict): + assoc = lookup_subsamp_association_by_models(ctx, submission=submission, sample=sample) + for k,v in input_dict.items(): + try: + setattr(assoc, k, v) + except AttributeError: + logger.error(f"Can't set {k} to {v}") + ctx.database_session.add(assoc) + ctx.database_session.commit() \ No newline at end of file diff --git a/src/submissions/backend/db/models/__init__.py b/src/submissions/backend/db/models/__init__.py index cf0ed2d..dd4a087 100644 --- a/src/submissions/backend/db/models/__init__.py +++ b/src/submissions/backend/db/models/__init__.py @@ -9,5 +9,4 @@ metadata = Base.metadata from .controls import Control, ControlType from .kits import KitType, ReagentType, Reagent, Discount, KitTypeReagentTypeAssociation, SubmissionType, SubmissionTypeKitTypeAssociation from .organizations import Organization, Contact -# from .samples import WWSample, BCSample, BasicSample from .submissions import BasicSubmission, BacterialCulture, Wastewater, WastewaterArtic, WastewaterSample, BacterialCultureSample, BasicSample, SubmissionSampleAssociation, WastewaterAssociation diff --git a/src/submissions/backend/db/models/kits.py b/src/submissions/backend/db/models/kits.py index 787c3c3..f293aaa 100644 --- a/src/submissions/backend/db/models/kits.py +++ b/src/submissions/backend/db/models/kits.py @@ -12,16 +12,6 @@ import logging logger = logging.getLogger(f'submissions.{__name__}') -# # Table containing reagenttype-kittype relationships -# reagenttypes_kittypes = Table("_reagentstypes_kittypes", Base.metadata, -# Column("reagent_types_id", INTEGER, ForeignKey("_reagent_types.id")), -# Column("kits_id", INTEGER, ForeignKey("_kits.id")), -# # The entry will look like ["Bacteria Culture":{"row":1, "column":4}] -# Column("uses", JSON), -# # is the reagent required for that kit? -# Column("required", INTEGER) -# ) - reagenttypes_reagents = Table("_reagenttypes_reagents", Base.metadata, Column("reagent_id", INTEGER, ForeignKey("_reagents.id")), Column("reagenttype_id", INTEGER, ForeignKey("_reagent_types.id"))) @@ -34,13 +24,7 @@ class KitType(Base): id = Column(INTEGER, primary_key=True) #: primary key name = Column(String(64), unique=True) #: name of kit submissions = relationship("BasicSubmission", back_populates="extraction_kit") #: submissions this kit was used for - # used_for = Column(JSON) #: list of names of sample types this kit can process - # used_for = relationship("SubmissionType", back_populates="extraction_kits", uselist=True, secondary=submissiontype_kittypes) - # cost_per_run = Column(FLOAT(2)) #: dollar amount for each full run of this kit NOTE: depreciated, use the constant and mutable costs instead - # reagent_types = relationship("ReagentType", back_populates="kits", uselist=True, secondary=reagenttypes_kittypes) #: reagent types this kit contains - # reagent_types_id = Column(INTEGER, ForeignKey("_reagent_types.id", ondelete='SET NULL', use_alter=True, name="fk_KT_reagentstype_id")) #: joined reagent type id - # kit_reagenttype_association = - + kit_reagenttype_associations = relationship( "KitTypeReagentTypeAssociation", back_populates="kit_type", @@ -51,7 +35,6 @@ class KitType(Base): # to "keyword" attribute reagent_types = association_proxy("kit_reagenttype_associations", "reagent_type") - kit_submissiontype_associations = relationship( "SubmissionTypeKitTypeAssociation", back_populates="kit_type", @@ -60,7 +43,6 @@ class KitType(Base): used_for = association_proxy("kit_submissiontype_associations", "submission_type") - def __repr__(self) -> str: return f"" @@ -74,6 +56,15 @@ class KitType(Base): return self.name def get_reagents(self, required:bool=False) -> list: + """ + Return ReagentTypes linked to kit through KitTypeReagentTypeAssociation. + + Args: + required (bool, optional): If true only return required types. Defaults to False. + + Returns: + list: List of ReagentTypes + """ if required: return [item.reagent_type for item in self.kit_reagenttype_associations if item.required == 1] else: @@ -81,14 +72,24 @@ class KitType(Base): def construct_xl_map_for_use(self, use:str) -> dict: - # map = self.used_for[use] + """ + Creates map of locations in excel workbook for a SubmissionType + + Args: + use (str): Submissiontype.name + + Returns: + dict: Dictionary containing information locations. + """ map = {} + # Get all KitTypeReagentTypeAssociation for SubmissionType assocs = [item for item in self.kit_reagenttype_associations if use in item.uses] for assoc in assocs: try: map[assoc.reagent_type.name] = assoc.uses[use] except TypeError: continue + # Get SubmissionType info map try: st_assoc = [item for item in self.used_for if use == item.name][0] map['info'] = st_assoc.info_map @@ -106,7 +107,6 @@ class KitTypeReagentTypeAssociation(Base): kits_id = Column(INTEGER, ForeignKey("_kits.id"), primary_key=True) uses = Column(JSON) required = Column(INTEGER) - # reagent_type_name = Column(INTEGER, ForeignKey("_reagent_types.name")) kit_type = relationship(KitType, back_populates="kit_reagenttype_associations") @@ -139,11 +139,8 @@ class ReagentType(Base): id = Column(INTEGER, primary_key=True) #: primary key name = Column(String(64)) #: name of reagent type - # kit_id = Column(INTEGER, ForeignKey("_kits.id", ondelete="SET NULL", use_alter=True, name="fk_RT_kits_id")) #: id of joined kit type - # kits = relationship("KitType", back_populates="reagent_types", uselist=True, foreign_keys=[kit_id]) #: kits this reagent is used in instances = relationship("Reagent", back_populates="type", secondary=reagenttypes_reagents) #: concrete instances of this reagent type eol_ext = Column(Interval()) #: extension of life interval - # required = Column(INTEGER, server_default="1") #: sqlite boolean to determine if reagent type is essential for the kit last_used = Column(String(32)) #: last used lot number of this type of reagent @validates('required') @@ -202,8 +199,10 @@ class Reagent(Base): dict: gui friendly dictionary """ if extraction_kit != None: + # Get the intersection of this reagent's ReagentType and all ReagentTypes in KitType try: reagent_role = list(set(self.type).intersection(extraction_kit.reagent_types))[0] + # Most will be able to fall back to first ReagentType in itself because most will only have 1. except: reagent_role = self.type[0] else: @@ -212,9 +211,9 @@ class Reagent(Base): rtype = reagent_role.name.replace("_", " ").title() except AttributeError: rtype = "Unknown" + # Calculate expiry with EOL from ReagentType try: place_holder = self.expiry + reagent_role.eol_ext - # logger.debug(f"EOL_ext for {self.lot} -- {self.expiry} + {self.type.eol_ext} = {place_holder}") except TypeError as e: place_holder = date.today() logger.debug(f"We got a type error setting {self.lot} expiry: {e}. setting to today for testing") @@ -227,9 +226,28 @@ class Reagent(Base): "expiry": place_holder.strftime("%Y-%m-%d") } - def to_reagent_dict(self) -> dict: + def to_reagent_dict(self, extraction_kit:KitType=None) -> dict: + """ + Returns basic reagent dictionary. + + Returns: + dict: Basic reagent dictionary of 'type', 'lot', 'expiry' + """ + if extraction_kit != None: + # Get the intersection of this reagent's ReagentType and all ReagentTypes in KitType + try: + reagent_role = list(set(self.type).intersection(extraction_kit.reagent_types))[0] + # Most will be able to fall back to first ReagentType in itself because most will only have 1. + except: + reagent_role = self.type[0] + else: + reagent_role = self.type[0] + try: + rtype = reagent_role.name + except AttributeError: + rtype = "Unknown" return { - "type": type, + "type": rtype, "lot": self.lot, "expiry": self.expiry.strftime("%Y-%m-%d") } @@ -249,12 +267,14 @@ class Discount(Base): amount = Column(FLOAT(2)) class SubmissionType(Base): - + """ + Abstract of types of submissions. + """ __tablename__ = "_submission_types" id = Column(INTEGER, primary_key=True) #: primary key name = Column(String(128), unique=True) #: name of submission type - info_map = Column(JSON) + info_map = Column(JSON) #: Where basic information is found in the excel workbook corresponding to this type. instances = relationship("BasicSubmission", backref="submission_type") submissiontype_kit_associations = relationship( @@ -269,14 +289,15 @@ class SubmissionType(Base): return f"" class SubmissionTypeKitTypeAssociation(Base): - + """ + Abstract of relationship between kits and their submission type. + """ __tablename__ = "_submissiontypes_kittypes" submission_types_id = Column(INTEGER, ForeignKey("_submission_types.id"), primary_key=True) kits_id = Column(INTEGER, ForeignKey("_kits.id"), primary_key=True) mutable_cost_column = Column(FLOAT(2)) #: dollar amount per 96 well plate that can change with number of columns (reagents, tips, etc) mutable_cost_sample = Column(FLOAT(2)) #: dollar amount that can change with number of samples (reagents, tips, etc) constant_cost = Column(FLOAT(2)) #: dollar amount per plate that will remain constant (plates, man hours, etc) - # reagent_type_name = Column(INTEGER, ForeignKey("_reagent_types.name")) kit_type = relationship(KitType, back_populates="kit_submissiontype_associations") diff --git a/src/submissions/backend/db/models/organizations.py b/src/submissions/backend/db/models/organizations.py index 480e93d..d3dde7a 100644 --- a/src/submissions/backend/db/models/organizations.py +++ b/src/submissions/backend/db/models/organizations.py @@ -21,7 +21,6 @@ class Organization(Base): submissions = relationship("BasicSubmission", back_populates="submitting_lab") #: submissions this organization has submitted cost_centre = Column(String()) #: cost centre used by org for payment contacts = relationship("Contact", back_populates="organization", secondary=orgs_contacts) #: contacts involved with this org - # contact_ids = Column(INTEGER, ForeignKey("_contacts.id", ondelete="SET NULL", name="fk_org_contact_id")) #: contact ids of this organization def __str__(self) -> str: """ @@ -47,5 +46,4 @@ class Contact(Base): email = Column(String(64)) #: contact email phone = Column(String(32)) #: contact phone number organization = relationship("Organization", back_populates="contacts", uselist=True, secondary=orgs_contacts) #: relationship to joined organization - # organization_id = Column(INTEGER, ForeignKey("_organizations.id", ondelete="SET NULL", name="fk_contact_org_id")) #: joined organization ids diff --git a/src/submissions/backend/db/models/submissions.py b/src/submissions/backend/db/models/submissions.py index e33f7ce..a83845f 100644 --- a/src/submissions/backend/db/models/submissions.py +++ b/src/submissions/backend/db/models/submissions.py @@ -3,7 +3,7 @@ Models for the main submission types. ''' import math from . import Base -from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, Table, JSON, FLOAT +from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, Table, JSON, FLOAT, case from sqlalchemy.orm import relationship, validates import logging import json @@ -11,10 +11,10 @@ from json.decoder import JSONDecodeError from math import ceil from sqlalchemy.ext.associationproxy import association_proxy import uuid -from . import Base from pandas import Timestamp from dateutil.parser import parse import pprint +from tools import check_not_nan logger = logging.getLogger(f"submissions.{__name__}") @@ -23,7 +23,7 @@ reagents_submissions = Table("_reagents_submissions", Base.metadata, Column("rea class BasicSubmission(Base): """ - Base of basic submission which polymorphs into BacterialCulture and Wastewater + Concrete of basic submission which polymorphs into BacterialCulture and Wastewater """ __tablename__ = "_submissions" @@ -36,7 +36,6 @@ class BasicSubmission(Base): sample_count = Column(INTEGER) #: Number of samples in the submission extraction_kit = relationship("KitType", back_populates="submissions") #: The extraction kit used extraction_kit_id = Column(INTEGER, ForeignKey("_kits.id", ondelete="SET NULL", name="fk_BS_extkit_id")) - # submission_type = Column(String(32)) #: submission type (should be string in D3 of excel sheet) submission_type_name = Column(String, ForeignKey("_submission_types.name", ondelete="SET NULL", name="fk_BS_subtype_name")) technician = Column(String(64)) #: initials of processing tech(s) # Move this into custom types? @@ -83,7 +82,6 @@ class BasicSubmission(Base): dict: dictionary used in submissions summary """ # get lab from nested organization object - try: sub_lab = self.submitting_lab.name except AttributeError: @@ -105,24 +103,16 @@ class BasicSubmission(Base): except JSONDecodeError as e: ext_info = None logger.debug(f"Json error in {self.rsl_plate_num}: {e}") + # Updated 2023-09 to use the extraction kit to pull reagents. try: reagents = [item.to_sub_dict(extraction_kit=self.extraction_kit) for item in self.reagents] except Exception as e: logger.error(f"We got an error retrieving reagents: {e}") reagents = None - # try: - # samples = [item.sample.to_sub_dict(item.__dict__()) for item in self.submission_sample_associations] - # except Exception as e: - # logger.error(f"Problem making list of samples: {e}") - # samples = None samples = [] + # Updated 2023-09 to get sample association with plate number for item in self.submission_sample_associations: sample = item.sample.to_sub_dict(submission_rsl=self.rsl_plate_num) - # try: - # sample['well'] = f"{row_map[item.row]}{item.column}" - # except KeyError as e: - # logger.error(f"Unable to find row {item.row} in row_map.") - # sample['well'] = None samples.append(sample) try: comments = self.comment @@ -171,7 +161,7 @@ class BasicSubmission(Base): output = { "id": self.id, "Plate Number": self.rsl_plate_num, - "Submission Type": self.submission_type.replace("_", " ").title(), + "Submission Type": self.submission_type_name.replace("_", " ").title(), "Submitter Plate Number": self.submitter_plate_num, "Submitted Date": self.submitted_date.strftime("%Y-%m-%d"), "Submitting Lab": sub_lab, @@ -182,16 +172,18 @@ class BasicSubmission(Base): return output def calculate_base_cost(self): + """ + Calculates cost of the plate + """ + # Calculate number of columns based on largest column number try: - # cols_count_96 = ceil(int(self.sample_count) / 8) cols_count_96 = self.calculate_column_count() except Exception as e: logger.error(f"Column count error: {e}") - # cols_count_24 = ceil(int(self.sample_count) / 3) - logger.debug(f"Pre-association check. {pprint.pformat(self.__dict__)}") + # Get kit associated with this submission assoc = [item for item in self.extraction_kit.kit_submissiontype_associations if item.submission_type == self.submission_type][0] logger.debug(f"Came up with association: {assoc}") - # if all(item == 0.0 for item in [self.extraction_kit.constant_cost, self.extraction_kit.mutable_cost_column, self.extraction_kit.mutable_cost_sample]): + # If every individual cost is 0 this is probably an old plate. if all(item == 0.0 for item in [assoc.constant_cost, assoc.mutable_cost_column, assoc.mutable_cost_sample]): try: self.run_cost = self.extraction_kit.cost_per_run @@ -203,14 +195,28 @@ class BasicSubmission(Base): except Exception as e: logger.error(f"Calculation error: {e}") - def calculate_column_count(self): + def calculate_column_count(self) -> int: + """ + Calculate the number of columns in this submission + + Returns: + int: largest column number + """ logger.debug(f"Here's the samples: {self.samples}") - # columns = [int(sample.well_number[-2:]) for sample in self.samples] columns = [assoc.column for assoc in self.submission_sample_associations] logger.debug(f"Here are the columns for {self.rsl_plate_num}: {columns}") return max(columns) def hitpick_plate(self, plate_number:int|None=None) -> list: + """ + Returns positve sample locations for plate + + Args: + plate_number (int | None, optional): Plate id. Defaults to None. + + Returns: + list: list of htipick dictionaries for each sample + """ output_list = [] for assoc in self.submission_sample_associations: samp = assoc.sample.to_hitpick(submission_rsl=self.rsl_plate_num) @@ -232,7 +238,6 @@ class BacterialCulture(BasicSubmission): derivative submission type from BasicSubmission """ controls = relationship("Control", back_populates="submission", uselist=True) #: A control sample added to submission - # samples = relationship("BCSample", back_populates="rsl_plate", uselist=True) __mapper_args__ = {"polymorphic_identity": "Bacterial Culture", "polymorphic_load": "inline"} def to_dict(self) -> dict: @@ -250,11 +255,9 @@ class Wastewater(BasicSubmission): """ derivative submission type from BasicSubmission """ - # samples = relationship("WWSample", back_populates="rsl_plate", uselist=True) pcr_info = Column(JSON) ext_technician = Column(String(64)) pcr_technician = Column(String(64)) - # ww_sample_id = Column(String, ForeignKey("_ww_samples.id", ondelete="SET NULL", name="fk_WW_sample_id")) __mapper_args__ = {"polymorphic_identity": "Wastewater", "polymorphic_load": "inline"} def to_dict(self) -> dict: @@ -276,10 +279,7 @@ class WastewaterArtic(BasicSubmission): """ derivative submission type for artic wastewater """ - # samples = relationship("WWSample", back_populates="artic_rsl_plate", uselist=True) - # Can it use the pcr_info from the wastewater? Cause I can't define pcr_info here due to conflicts with that - # Not necessary because we don't get any results for this procedure. - __mapper_args__ = {"polymorphic_identity": "wastewater_artic", "polymorphic_load": "inline"} + __mapper_args__ = {"polymorphic_identity": "Wastewater Artic", "polymorphic_load": "inline"} def calculate_base_cost(self): """ @@ -290,12 +290,13 @@ class WastewaterArtic(BasicSubmission): cols_count_96 = ceil(int(self.sample_count) / 8) except Exception as e: logger.error(f"Column count error: {e}") + assoc = [item for item in self.extraction_kit.kit_submissiontype_associations if item.submission_type == self.submission_type][0] # Since we have multiple output plates per submission form, the constant cost will have to reflect this. output_plate_count = math.ceil(int(self.sample_count) / 16) logger.debug(f"Looks like we have {output_plate_count} output plates.") - const_cost = self.extraction_kit.constant_cost * output_plate_count + const_cost = assoc.constant_cost * output_plate_count try: - self.run_cost = const_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count)) + self.run_cost = const_cost + (assoc.mutable_cost_column * cols_count_96) + (assoc.mutable_cost_sample * int(self.sample_count)) except Exception as e: logger.error(f"Calculation error: {e}") @@ -318,7 +319,15 @@ class BasicSample(Base): __mapper_args__ = { "polymorphic_identity": "basic_sample", - "polymorphic_on": sample_type, + # "polymorphic_on": sample_type, + "polymorphic_on": case( + [ + (sample_type == "Wastewater Sample", "Wastewater Sample"), + (sample_type == "Wastewater Artic Sample", "Wastewater Sample"), + (sample_type == "Bacterial Culture Sample", "Bacterial Culture Sample"), + ], + else_="basic_sample" + ), "with_polymorphic": "*", } @@ -335,7 +344,23 @@ class BasicSample(Base): def __repr__(self) -> str: return f"<{self.sample_type.replace('_', ' ').title(). replace(' ', '')}({self.submitter_id})>" + def set_attribute(self, name, value): + # logger.debug(f"Setting {name} to {value}") + try: + setattr(self, name, value) + except AttributeError: + logger.error(f"Attribute {name} not found") + def to_sub_dict(self, submission_rsl:str) -> dict: + """ + Returns a dictionary of locations. + + Args: + submission_rsl (str): Submission RSL number. + + Returns: + dict: 'well' and sample submitter_id as 'name' + """ row_map = {1:"A", 2:"B", 3:"C", 4:"D", 5:"E", 6:"F", 7:"G", 8:"H"} self.assoc = [item for item in self.sample_submission_associations if item.submission.rsl_plate_num==submission_rsl][0] sample = {} @@ -347,73 +372,30 @@ class BasicSample(Base): sample['name'] = self.submitter_id return sample - def to_hitpick(self, submission_rsl:str) -> dict|None: + def to_hitpick(self, submission_rsl:str|None=None) -> dict|None: """ Outputs a dictionary of locations Returns: dict: dictionary of sample id, row and column in elution plate """ - self.assoc = [item for item in self.sample_submission_associations if item.submission.rsl_plate_num==submission_rsl][0] - # dictionary to translate row letters into numbers - # row_dict = dict(A=1, B=2, C=3, D=4, E=5, F=6, G=7, H=8) - # if either n1 or n2 is positive, include this sample - # well_row = row_dict[self.well_number[0]] - # The remaining charagers are the columns - # well_col = self.well_number[1:] - return dict(name=self.submitter_id, - # row=well_row, - # col=well_col, - positive=False) + # self.assoc = [item for item in self.sample_submission_associations if item.submission.rsl_plate_num==submission_rsl][0] + # Since there is no PCR, negliable result is necessary. + return dict(name=self.submitter_id, positive=False) class WastewaterSample(BasicSample): """ - Base wastewater sample + Derivative wastewater sample """ - # __tablename__ = "_ww_samples" - - # id = Column(INTEGER, primary_key=True) #: primary key ww_processing_num = Column(String(64)) #: wastewater processing number - ww_sample_full_id = Column(String(64)) + ww_full_sample_id = Column(String(64)) rsl_number = Column(String(64)) #: rsl plate identification number - # rsl_plate = relationship("Wastewater", back_populates="samples") #: relationship to parent plate - # rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_WWS_submission_id")) collection_date = Column(TIMESTAMP) #: Date sample collected received_date = Column(TIMESTAMP) #: Date sample received - # well_number = Column(String(8)) #: location on 96 well plate - # The following are fields from the sample tracking excel sheet Ruth put together. - # I have no idea when they will be implemented or how. - # testing_type = Column(String(64)) - # site_status = Column(String(64)) notes = Column(String(2000)) - # ct_n1 = Column(FLOAT(2)) #: AKA ct for N1 - # ct_n2 = Column(FLOAT(2)) #: AKA ct for N2 - # n1_status = Column(String(32)) - # n2_status = Column(String(32)) - # seq_submitted = Column(BOOLEAN()) - # ww_seq_run_id = Column(String(64)) - # sample_type = Column(String(16)) - # pcr_results = Column(JSON) sample_location = Column(String(8)) #: location on 24 well plate - # artic_rsl_plate = relationship("WastewaterArtic", back_populates="samples") - # artic_well_number = Column(String(8)) - __mapper_args__ = {"polymorphic_identity": "Wastewater Sample", "polymorphic_load": "inline"} - # def to_string(self) -> str: - # """ - # string representing sample object - - # Returns: - # str: string representing location and sample id - # """ - # return f"{self.well_number}: {self.ww_sample_full_id}" - - # @validates("received-date") - # def convert_rdate_time(self, key, value): - # if isinstance(value, Timestamp): - # return value.date() - # return value @validates("collected-date") def convert_cdate_time(self, key, value): @@ -423,31 +405,68 @@ class WastewaterSample(BasicSample): if isinstance(value, str): return parse(value) return value + + @validates("rsl_number") + def use_submitter_id(self, key, value): + logger.debug(f"Validating {key}: {value}") + return value or self.submitter_id - # @collection_date.setter - # def collection_date(self, value): - # match value: - # case Timestamp(): - # self.collection_date = value.date() - # case str(): - # self.collection_date = parse(value) - # case _: - # self.collection_date = value - + # def __init__(self, **kwargs): + # # Had a problem getting collection date from excel as text only. + # if 'collection_date' in kwargs.keys(): + # logger.debug(f"Got collection_date: {kwargs['collection_date']}. Attempting parse.") + # if isinstance(kwargs['collection_date'], str): + # logger.debug(f"collection_date is a string...") + # kwargs['collection_date'] = parse(kwargs['collection_date']) + # logger.debug(f"output is {kwargs['collection_date']}") + # # Due to the plate map being populated with RSL numbers, we have to do some shuffling. + # try: + # kwargs['rsl_number'] = kwargs['submitter_id'] + # except KeyError as e: + # logger.error(f"Error using {kwargs} for submitter_id") + # try: + # check = check_not_nan(kwargs['ww_full_sample_id']) + # except KeyError: + # logger.error(f"Error using {kwargs} for ww_full_sample_id") + # check = False + # if check: + # kwargs['submitter_id'] = kwargs["ww_full_sample_id"] + # super().__init__(**kwargs) - def __init__(self, **kwargs): - if 'collection_date' in kwargs.keys(): - logger.debug(f"Got collection_date: {kwargs['collection_date']}. Attempting parse.") - if isinstance(kwargs['collection_date'], str): - logger.debug(f"collection_date is a string...") - kwargs['collection_date'] = parse(kwargs['collection_date']) - logger.debug(f"output is {kwargs['collection_date']}") - super().__init__(**kwargs) + def set_attribute(self, name:str, value): + """ + Set an attribute of this object. Extends parent. + + Args: + name (str): _description_ + value (_type_): _description_ + """ + # Due to the plate map being populated with RSL numbers, we have to do some shuffling. + # logger.debug(f"Input - {name}:{value}") + match name: + case "submitter_id": + if self.submitter_id != None: + return + else: + super().set_attribute("rsl_number", value) + case "ww_full_sample_id": + if value != None: + super().set_attribute(name, value) + name = "submitter_id" + case 'collection_date': + if isinstance(value, str): + logger.debug(f"collection_date {value} is a string. Attempting parse...") + value = parse(value) + case "rsl_number": + if value == None: + value = self.submitter_id + # logger.debug(f"Output - {name}:{value}") + super().set_attribute(name, value) def to_sub_dict(self, submission_rsl:str) -> dict: """ - Gui friendly dictionary. Inherited from BasicSample + Gui friendly dictionary. Extends parent method. This version will include PCR status. Args: @@ -458,15 +477,13 @@ class WastewaterSample(BasicSample): """ # Get the relevant submission association for this sample sample = super().to_sub_dict(submission_rsl=submission_rsl) + # check if PCR data exists. try: check = self.assoc.ct_n1 != None and self.assoc.ct_n2 != None except AttributeError as e: check = False if check: - # logger.debug(f"Using well info in name.") sample['name'] = f"{self.submitter_id}\n\t- ct N1: {'{:.2f}'.format(self.assoc.ct_n1)} ({self.assoc.n1_status})\n\t- ct N2: {'{:.2f}'.format(self.assoc.ct_n2)} ({self.assoc.n2_status})" - # else: - # logger.error(f"Couldn't get the pcr info") return sample def to_hitpick(self, submission_rsl:str) -> dict|None: @@ -477,67 +494,30 @@ class WastewaterSample(BasicSample): dict: dictionary of sample id, row and column in elution plate """ sample = super().to_hitpick(submission_rsl=submission_rsl) - # dictionary to translate row letters into numbers - # row_dict = dict(A=1, B=2, C=3, D=4, E=5, F=6, G=7, H=8) # if either n1 or n2 is positive, include this sample try: sample['positive'] = any(["positive" in item for item in [self.assoc.n1_status, self.assoc.n2_status]]) except (TypeError, AttributeError) as e: logger.error(f"Couldn't check positives for {self.rsl_number}. Looks like there isn't PCR data.") - # return None - # positive = False - # well_row = row_dict[self.well_number[0]] - # well_col = self.well_number[1:] - # if positive: - # try: - # # The first character of the elution well is the row - # well_row = row_dict[self.elution_well[0]] - # # The remaining charagers are the columns - # well_col = self.elution_well[1:] - # except TypeError as e: - # logger.error(f"This sample doesn't have elution plate info.") - # return None return sample class BacterialCultureSample(BasicSample): """ base of bacterial culture sample """ - # __tablename__ = "_bc_samples" - - # id = Column(INTEGER, primary_key=True) #: primary key - # well_number = Column(String(8)) #: location on parent plate - # sample_id = Column(String(64), nullable=False, unique=True) #: identification from submitter organism = Column(String(64)) #: bacterial specimen - concentration = Column(String(16)) #: - # sample_type = Column(String(16)) - # rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_BCS_sample_id")) #: id of parent plate - # rsl_plate = relationship("BacterialCulture", back_populates="samples") #: relationship to parent plate - + concentration = Column(String(16)) #: sample concentration __mapper_args__ = {"polymorphic_identity": "Bacterial Culture Sample", "polymorphic_load": "inline"} - # def to_string(self) -> str: - # """ - # string representing object - - # Returns: - # str: string representing well location, sample id and organism - # """ - # return f"{self.well_number}: {self.sample_id} - {self.organism}" - def to_sub_dict(self, submission_rsl:str) -> dict: """ - gui friendly dictionary + gui friendly dictionary, extends parent method. Returns: dict: well location and name (sample id, organism) NOTE: keys must sync with WWSample to_sub_dict above """ sample = super().to_sub_dict(submission_rsl=submission_rsl) sample['name'] = f"{self.submitter_id} - ({self.organism})" - # return { - # # "well": self.well_number, - # "name": f"{self.submitter_id} - ({self.organism})", - # } return sample class SubmissionSampleAssociation(Base): @@ -548,18 +528,19 @@ class SubmissionSampleAssociation(Base): __tablename__ = "_submission_sample" sample_id = Column(INTEGER, ForeignKey("_samples.id"), nullable=False) submission_id = Column(INTEGER, ForeignKey("_submissions.id"), primary_key=True) - row = Column(INTEGER, primary_key=True) - column = Column(INTEGER, primary_key=True) + row = Column(INTEGER, primary_key=True) #: row on the 96 well plate + column = Column(INTEGER, primary_key=True) #: column on the 96 well plate + # reference to the Submission object submission = relationship(BasicSubmission, back_populates="submission_sample_associations") - # reference to the "ReagentType" object - # sample = relationship("BasicSample") + # reference to the Sample object sample = relationship(BasicSample, back_populates="sample_submission_associations") base_sub_type = Column(String) - # """Refers to the type of parent.""" - + + # Refers to the type of parent. + # Hooooooo boy, polymorphic association type, now we're getting into the weeds! __mapper_args__ = { "polymorphic_identity": "basic_association", "polymorphic_on": base_sub_type, @@ -576,11 +557,14 @@ class SubmissionSampleAssociation(Base): return f" None: - """ - pulls info specific to bacterial culture sample type + def parse_bacterial_culture(self, input_dict) -> dict: """ + Update submission dictionary with type specific information - # def parse_reagents(df:pd.DataFrame) -> None: - # """ - # Pulls reagents from the bacterial sub-dataframe + Args: + input_dict (dict): Input sample dictionary - # Args: - # df (pd.DataFrame): input sub dataframe - # """ - # for ii, row in df.iterrows(): - # # skip positive control - # logger.debug(f"Running reagent parse for {row[1]} with type {type(row[1])} and value: {row[2]} with type {type(row[2])}") - # # if the lot number isn't a float and the reagent type isn't blank - # # if not isinstance(row[2], float) and check_not_nan(row[1]): - # if check_not_nan(row[1]): - # # must be prefixed with 'lot_' to be recognized by gui - # # This is no longer true since reagents are loaded into their own key in dictionary - # try: - # reagent_type = row[1].replace(' ', '_').lower().strip() - # except AttributeError: - # pass - # # If there is a double slash in the type field, such as ethanol/iso - # # Use the cell to the left for reagent type. - # if reagent_type == "//": - # if check_not_nan(row[2]): - # reagent_type = row[0].replace(' ', '_').lower().strip() - # else: - # continue - # try: - # output_var = convert_nans_to_nones(str(row[2]).upper()) - # except AttributeError: - # logger.debug(f"Couldn't upperize {row[2]}, must be a number") - # output_var = convert_nans_to_nones(str(row[2])) - # logger.debug(f"Output variable is {output_var}") - # logger.debug(f"Expiry date for imported reagent: {row[3]}") - # if check_not_nan(row[3]): - # try: - # expiry = row[3].date() - # except AttributeError as e: - # try: - # expiry = datetime.strptime(row[3], "%Y-%m-%d") - # except TypeError as e: - # expiry = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + row[3] - 2) - # else: - # logger.debug(f"Date: {row[3]}") - # # expiry = date.today() - # expiry = date(year=1970, month=1, day=1) - # # self.sub[f"lot_{reagent_type}"] = {'lot':output_var, 'exp':expiry} - # # self.sub['reagents'].append(dict(type=reagent_type, lot=output_var, exp=expiry)) - # self.sub['reagents'].append(PydReagent(type=reagent_type, lot=output_var, exp=expiry)) - # submission_info = self.xl.parse(sheet_name="Sample List", dtype=object) - # self.sub['extraction_kit'] = submission_info.iloc[3][3] - # submission_info = self.parse_generic("Sample List") - # # iloc is [row][column] and the first row is set as header row so -2 - # self.sub['technician'] = str(submission_info.iloc[11][1]) - # # reagents - # # must be prefixed with 'lot_' to be recognized by gui - # # This is no longer true wince the creation of self.sub['reagents'] - # self.sub['reagents'] = [] - # reagent_range = submission_info.iloc[1:14, 4:8] - # logger.debug(reagent_range) - # parse_reagents(reagent_range) - # get individual sample info - sample_parser = SampleParser(self.ctx, submission_info.iloc[16:112]) - logger.debug(f"Sample type: {self.sub['submission_type']}") - if isinstance(self.sub['submission_type'], dict): - getter = self.sub['submission_type']['value'] - else: - getter = self.sub['submission_type'] - sample_parse = getattr(sample_parser, f"parse_{getter.replace(' ', '_').lower()}_samples") - logger.debug(f"Parser result: {self.sub}") - self.sample_result, self.sub['samples'] = sample_parse() - - def parse_wastewater(self) -> None: - """ - pulls info specific to wastewater sample type + Returns: + dict: Updated sample dictionary """ - def retrieve_elution_map(): - full = self.xl.parse("Extraction Worksheet") - elu_map = full.iloc[9:18, 5:] - elu_map.set_index(elu_map.columns[0], inplace=True) - elu_map.columns = elu_map.iloc[0] - elu_map = elu_map.tail(-1) - return elu_map - # def parse_reagents(df:pd.DataFrame) -> None: - # """ - # Pulls reagents from the bacterial sub-dataframe - - # Args: - # df (pd.DataFrame): input sub dataframe - # """ - # # iterate through sub-df rows - # for ii, row in df.iterrows(): - # # logger.debug(f"Parsing this row for reagents: {row}") - # if check_not_nan(row[5]): - # # must be prefixed with 'lot_' to be recognized by gui - # # regex below will remove 80% from 80% ethanol in the Wastewater kit. - # output_key = re.sub(r"^\d{1,3}%\s?", "", row[0].lower().strip().replace(' ', '_')) - # output_key = output_key.strip("_") - # # output_var is the lot number - # try: - # output_var = convert_nans_to_nones(str(row[5].upper())) - # except AttributeError: - # logger.debug(f"Couldn't upperize {row[5]}, must be a number") - # output_var = convert_nans_to_nones(str(row[5])) - # if check_not_nan(row[7]): - # try: - # expiry = row[7].date() - # except AttributeError: - # expiry = date.today() - # else: - # expiry = date.today() - # logger.debug(f"Expiry date for {output_key}: {expiry} of type {type(expiry)}") - # # self.sub[f"lot_{output_key}"] = {'lot':output_var, 'exp':expiry} - # # self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry)) - # reagent = PydReagent(type=output_key, lot=output_var, exp=expiry) - # logger.debug(f"Here is the created reagent: {reagent}") - # self.sub['reagents'].append(reagent) - # parse submission sheet - submission_info = self.parse_generic("WW Submissions (ENTER HERE)") - # parse enrichment sheet - enrichment_info = self.xl.parse("Enrichment Worksheet", dtype=object) - # set enrichment reagent range - enr_reagent_range = enrichment_info.iloc[0:4, 9:20] - # parse extraction sheet - extraction_info = self.xl.parse("Extraction Worksheet", dtype=object) - # set extraction reagent range - ext_reagent_range = extraction_info.iloc[0:5, 9:20] - # parse qpcr sheet - qprc_info = self.xl.parse("qPCR Worksheet", dtype=object) - # set qpcr reagent range - pcr_reagent_range = qprc_info.iloc[0:5, 9:20] - # compile technician info from all sheets - if all(map(check_not_nan, [enrichment_info.columns[2], extraction_info.columns[2], qprc_info.columns[2]])): - parsed = True - else: - parsed = False - self.sub['technician'] = dict(value=f"Enr: {enrichment_info.columns[2]}, Ext: {extraction_info.columns[2]}, PCR: {qprc_info.columns[2]}", parsed=parsed) - self.sub['reagents'] = [] - # parse_reagents(enr_reagent_range) - # parse_reagents(ext_reagent_range) - # parse_reagents(pcr_reagent_range) - # parse samples - sample_parser = SampleParser(self.ctx, submission_info.iloc[16:], elution_map=retrieve_elution_map()) - sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type']['value'].lower()}_samples") - self.sample_result, self.sub['samples'] = sample_parse() - self.sub['csv'] = self.xl.parse("Copy to import file", dtype=object) - - def parse_wastewater_artic(self) -> None: + return input_dict + + def parse_wastewater(self, input_dict) -> dict: """ - pulls info specific to wastewater_arctic submission type + Update submission dictionary with type specific information + + Args: + input_dict (dict): Input sample dictionary + + Returns: + dict: Updated sample dictionary + """ + return input_dict + + def parse_wastewater_artic(self, input_dict:dict) -> dict: """ - if isinstance(self.sub['submission_type'], str): - self.sub['submission_type'] = dict(value=self.sub['submission_type'], parsed=True) - # def parse_reagents(df:pd.DataFrame): - # logger.debug(df) - # for ii, row in df.iterrows(): - # if check_not_nan(row[1]): - # try: - # output_key = re.sub(r"\(.+?\)", "", row[0].lower().strip().replace(' ', '_')) - # except AttributeError: - # continue - # output_key = output_key.strip("_") - # output_key = massage_common_reagents(output_key) - # try: - # output_var = convert_nans_to_nones(str(row[1].upper())) - # except AttributeError: - # logger.debug(f"Couldn't upperize {row[1]}, must be a number") - # output_var = convert_nans_to_nones(str(row[1])) - # logger.debug(f"Output variable is {output_var}") - # logger.debug(f"Expiry date for imported reagent: {row[2]}") - # if check_not_nan(row[2]): - # try: - # expiry = row[2].date() - # except AttributeError as e: - # try: - # expiry = datetime.strptime(row[2], "%Y-%m-%d") - # except TypeError as e: - # expiry = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + row[2] - 2) - # except ValueError as e: - # continue - # else: - # logger.debug(f"Date: {row[2]}") - # expiry = date.today() - # # self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry)) - # self.sub['reagents'].append(PydReagent(type=output_key, lot=output_var, exp=expiry)) - # else: - # continue - def massage_samples(df:pd.DataFrame, lookup_table:pd.DataFrame) -> pd.DataFrame: - """ - Takes sample info from Artic sheet format and converts to regular formate + Update submission dictionary with type specific information - Args: - df (pd.DataFrame): Elution plate map - lookup_table (pd.DataFrame): Sample submission form map. + Args: + input_dict (dict): Input sample dictionary - Returns: - pd.DataFrame: _description_ - """ - lookup_table.set_index(lookup_table.columns[0], inplace=True) - lookup_table.columns = lookup_table.iloc[0] - logger.debug(f"Massaging samples from {lookup_table}") - df.set_index(df.columns[0], inplace=True) - df.columns = df.iloc[0] - logger.debug(f"df to massage\n: {df}") - return_list = [] - for _, ii in df.iloc[1:,1:].iterrows(): - for c in df.columns.to_list(): - if not check_not_nan(c): - continue - logger.debug(f"Checking {ii.name}{c}") - if check_not_nan(df.loc[ii.name, int(c)]) and df.loc[ii.name, int(c)] != "EMPTY": - sample_name = df.loc[ii.name, int(c)] - row = lookup_table.loc[lookup_table['Sample Name (WW)'] == sample_name] - logger.debug(f"Looking up {row['Sample Name (LIMS)'][-1]}") - try: - return_list.append(dict(submitter_id=re.sub(r"\s?\(.*\)", "", df.loc[ii.name, int(c)]), \ - # well=f"{ii.name}{c}", - row = row_keys[ii.name], - column = c, - artic_plate=self.sub['rsl_plate_num'], - sample_name=row['Sample Name (LIMS)'][-1] - )) - except TypeError as e: - logger.error(f"Got an int for {c}, skipping.") - continue - logger.debug(f"massaged sample list for {self.sub['rsl_plate_num']}: {pprint.pprint(return_list)}") - return return_list - submission_info = self.xl.parse("First Strand", dtype=object) - biomek_info = self.xl.parse("ArticV4 Biomek", dtype=object) - sub_reagent_range = submission_info.iloc[56:, 1:4].dropna(how='all') - biomek_reagent_range = biomek_info.iloc[60:, 0:3].dropna(how='all') - # submission_info = self.xl.parse("cDNA", dtype=object) - # biomek_info = self.xl.parse("ArticV4_1 Biomek", dtype=object) - # # Reminder that the iloc uses row, column ordering - # # sub_reagent_range = submission_info.iloc[56:, 1:4].dropna(how='all') - # sub_reagent_range = submission_info.iloc[7:15, 5:9].dropna(how='all') - # biomek_reagent_range = biomek_info.iloc[62:, 0:3].dropna(how='all') - self.sub['submitter_plate_num'] = "" - self.sub['rsl_plate_num'] = RSLNamer(ctx=self.ctx, instr=self.filepath.__str__()).parsed_name - self.sub['submitted_date'] = biomek_info.iloc[1][1] - self.sub['submitting_lab'] = "Enterics Wastewater Genomics" - self.sub['sample_count'] = submission_info.iloc[4][6] - # self.sub['sample_count'] = submission_info.iloc[34][6] - self.sub['extraction_kit'] = "ArticV4.1" - self.sub['technician'] = f"MM: {biomek_info.iloc[2][1]}, Bio: {biomek_info.iloc[3][1]}" - self.sub['reagents'] = [] - # parse_reagents(sub_reagent_range) - # parse_reagents(biomek_reagent_range) - samples = massage_samples(biomek_info.iloc[22:31, 0:], submission_info.iloc[4:37, 1:5]) - # samples = massage_samples(biomek_info.iloc[25:33, 0:]) - sample_parser = SampleParser(self.ctx, pd.DataFrame.from_records(samples)) - sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type']['value'].lower()}_samples") - self.sample_result, self.sub['samples'] = sample_parse() - - # def parse_reagents(self): - # ext_kit = lookup_kittype_by_name(ctx=self.ctx, name=self.sub['extraction_kit']) - # if ext_kit != None: - # logger.debug(f"Querying extraction kit: {self.sub['submission_type']}") - # reagent_map = ext_kit.construct_xl_map_for_use(use=self.sub['submission_type']['value']) - # logger.debug(f"Reagent map: {pprint.pformat(reagent_map)}") - # else: - # raise AttributeError("No extraction kit found, unable to parse reagents") - # for sheet in self.xl.sheet_names: - # df = self.xl.parse(sheet) - # relevant = {k:v for k,v in reagent_map.items() if sheet in reagent_map[k]['sheet']} - # logger.debug(f"relevant map for {sheet}: {pprint.pformat(relevant)}") - # if relevant == {}: - # continue - # for item in relevant: - # try: - # # role = item - # name = df.iat[relevant[item]['name']['row']-2, relevant[item]['name']['column']-1] - # lot = df.iat[relevant[item]['lot']['row']-2, relevant[item]['lot']['column']-1] - # expiry = df.iat[relevant[item]['expiry']['row']-2, relevant[item]['expiry']['column']-1] - # except (KeyError, IndexError): - # continue - # # self.sub['reagents'].append(dict(name=name, lot=lot, expiry=expiry, role=role)) - # self.sub['reagents'].append(PydReagent(type=item, lot=lot, exp=expiry, name=name)) + Returns: + dict: Updated sample dictionary + """ + return input_dict def import_kit_validation_check(self): @@ -411,9 +168,6 @@ class SheetParser(object): else: if isinstance(self.sub['extraction_kit'], str): self.sub['extraction_kit'] = dict(value=self.sub['extraction_kit'], parsed=False) - - # logger.debug(f"Here is the validated parser dictionary:\n\n{pprint.pformat(self.sub)}\n\n") - # return parser_sub def import_reagent_validation_check(self): """ @@ -439,20 +193,16 @@ class InfoParser(object): def __init__(self, ctx:Settings, xl:pd.ExcelFile, submission_type:str): self.ctx = ctx - # self.submission_type = submission_type - # self.extraction_kit = extraction_kit self.map = self.fetch_submission_info_map(submission_type=submission_type) self.xl = xl logger.debug(f"Info map for InfoParser: {pprint.pformat(self.map)}") def fetch_submission_info_map(self, submission_type:dict) -> dict: + if isinstance(submission_type, str): + submission_type = dict(value=submission_type, parsed=False) logger.debug(f"Looking up submission type: {submission_type['value']}") submission_type = lookup_submissiontype_by_name(ctx=self.ctx, type_name=submission_type['value']) info_map = submission_type.info_map - # try: - # del info_map['samples'] - # except KeyError: - # pass return info_map def parse_info(self) -> dict: @@ -461,11 +211,13 @@ class InfoParser(object): df = self.xl.parse(sheet, header=None) relevant = {} for k, v in self.map.items(): + if isinstance(v, str): + dicto[k] = dict(value=v, parsed=True) + continue if k == "samples": continue if sheet in self.map[k]['sheets']: relevant[k] = v - # relevant = {k:v for k,v in self.map.items() if sheet in self.map[k]['sheets']} logger.debug(f"relevant map for {sheet}: {pprint.pformat(relevant)}") if relevant == {}: continue @@ -485,8 +237,6 @@ class InfoParser(object): continue else: dicto[item] = dict(value=convert_nans_to_nones(value), parsed=False) - # if "submitter_plate_num" not in dicto.keys(): - # dicto['submitter_plate_num'] = dict(value=None, parsed=False) return dicto class ReagentParser(object): @@ -515,7 +265,6 @@ class ReagentParser(object): for item in relevant: logger.debug(f"Attempting to scrape: {item}") try: - # role = item name = df.iat[relevant[item]['name']['row']-1, relevant[item]['name']['column']-1] lot = df.iat[relevant[item]['lot']['row']-1, relevant[item]['lot']['column']-1] expiry = df.iat[relevant[item]['expiry']['row']-1, relevant[item]['expiry']['column']-1] @@ -526,7 +275,6 @@ class ReagentParser(object): parsed = True else: parsed = False - # self.sub['reagents'].append(dict(name=name, lot=lot, expiry=expiry, role=role)) logger.debug(f"Got lot for {item}-{name}: {lot} as {type(lot)}") lot = str(lot) listo.append(dict(value=PydReagent(type=item.strip(), lot=lot, exp=expiry, name=name), parsed=parsed)) @@ -556,8 +304,9 @@ class SampleParser(object): self.lookup_table = self.construct_lookup_table(lookup_table_location=sample_info_map['lookup_table']) self.excel_to_db_map = sample_info_map['xl_db_translation'] self.create_basic_dictionaries_from_plate_map() - self.parse_lookup_table() - + if isinstance(self.lookup_table, pd.DataFrame): + self.parse_lookup_table() + def fetch_sample_info_map(self, submission_type:dict) -> dict: logger.debug(f"Looking up submission type: {submission_type}") submission_type = lookup_submissiontype_by_name(ctx=self.ctx, type_name=submission_type) @@ -575,7 +324,10 @@ class SampleParser(object): return df def construct_lookup_table(self, lookup_table_location) -> pd.DataFrame: - df = self.xl.parse(lookup_table_location['sheet'], header=None, dtype=object) + try: + df = self.xl.parse(lookup_table_location['sheet'], header=None, dtype=object) + except KeyError: + return None df = df.iloc[lookup_table_location['start_row']-1:lookup_table_location['end_row']] df = pd.DataFrame(df.values[1:], columns=df.iloc[0]) df = df.reset_index(drop=True) @@ -583,12 +335,16 @@ class SampleParser(object): return df def create_basic_dictionaries_from_plate_map(self): + invalids = [0, "0"] new_df = self.plate_map.dropna(axis=1, how='all') columns = new_df.columns.tolist() for _, iii in new_df.iterrows(): for c in columns: # logger.debug(f"Checking sample {iii[c]}") if check_not_nan(iii[c]): + if iii[c] in invalids: + logger.debug(f"Invalid sample name: {iii[c]}, skipping.") + continue id = iii[c] logger.debug(f"Adding sample {iii[c]}") try: @@ -600,8 +356,9 @@ class SampleParser(object): def parse_lookup_table(self): def determine_if_date(input_str) -> str|date: # logger.debug(f"Looks like we have a str: {input_str}") - regex = re.compile(r"\d{4}-?\d{2}-?\d{2}") + regex = re.compile(r"^\d{4}-?\d{2}-?\d{2}") if bool(regex.search(input_str)): + logger.warning(f"{input_str} is a date!") try: return parse(input_str) except ParserError: @@ -610,6 +367,7 @@ class SampleParser(object): return input_str for sample in self.samples: addition = self.lookup_table[self.lookup_table.isin([sample['submitter_id']]).any(axis=1)].squeeze().to_dict() + logger.debug(f"Lookuptable info: {addition}") for k,v in addition.items(): # logger.debug(f"Checking {k} in lookup table.") if check_not_nan(k) and isinstance(k, str): @@ -645,193 +403,89 @@ class SampleParser(object): case _: v = v try: - translated_dict[self.excel_to_db_map[k]] = v + translated_dict[self.excel_to_db_map[k]] = convert_nans_to_nones(v) except KeyError: translated_dict[k] = convert_nans_to_nones(v) - # translated_dict['sample_type'] = f"{self.submission_type.replace(' ', '_').lower()}_sample" translated_dict['sample_type'] = f"{self.submission_type} Sample" + parser_query = f"parse_{translated_dict['sample_type'].replace(' ', '_').lower()}" # logger.debug(f"New sample dictionary going into object creation:\n{translated_dict}") + try: + custom_parser = getattr(self, parser_query) + translated_dict = custom_parser(translated_dict) + except AttributeError: + logger.error(f"Couldn't get custom parser: {parser_query}") new_samples.append(self.generate_sample_object(translated_dict)) return result, new_samples def generate_sample_object(self, input_dict) -> models.BasicSample: - # query = input_dict['sample_type'].replace('_sample', '').replace("_", " ").title().replace(" ", "") query = input_dict['sample_type'].replace(" ", "") - database_obj = getattr(models, query) + try: + database_obj = getattr(models, query) + except AttributeError as e: + logger.error(f"Could not find the model {query}. Using generic.") + database_obj = models.BasicSample + logger.debug(f"Searching database for {input_dict['submitter_id']}...") instance = lookup_sample_by_submitter_id(ctx=self.ctx, submitter_id=input_dict['submitter_id']) if instance == None: + logger.debug(f"Couldn't find sample {input_dict['submitter_id']}. Creating new sample.") instance = database_obj() for k,v in input_dict.items(): try: - setattr(instance, k, v) + # setattr(instance, k, v) + instance.set_attribute(k, v) except Exception as e: logger.error(f"Failed to set {k} due to {type(e).__name__}: {e}") else: - logger.debug(f"Sample already exists, will run update.") + logger.debug(f"Sample {instance.submitter_id} already exists, will run update.") return dict(sample=instance, row=input_dict['row'], column=input_dict['column']) - # def parse_bacterial_culture_samples(self) -> Tuple[str|None, list[dict]]: + def parse_bacterial_culture_sample(self, input_dict:dict) -> dict: """ - construct bacterial culture specific sample objects + Update sample dictionary with bacterial culture specific information + + Args: + input_dict (dict): Input sample dictionary Returns: - list[BCSample]: list of sample objects - """ - # logger.debug(f"Samples: {self.samples}") - - new_list = [] - for sample in self.samples: - logger.debug(f"Well info: {sample['This section to be filled in completely by submittor']}") - instance = lookup_sample_by_submitter_id(ctx=self.ctx, submitter_id=sample['Unnamed: 1']) - if instance == None: - instance = BacterialCultureSample() - well_number = sample['This section to be filled in completely by submittor'] - row = row_keys[well_number[0]] - column = int(well_number[1:]) - instance.submitter_id = sample['Unnamed: 1'] - instance.organism = sample['Unnamed: 2'] - instance.concentration = sample['Unnamed: 3'] - # logger.debug(f"Sample object: {new.sample_id} = {type(new.sample_id)}") - logger.debug(f"Got sample_id: {instance.submitter_id}") - # need to exclude empties and blanks - if check_not_nan(instance.submitter_id): - new_list.append(dict(sample=instance, row=row, column=column)) - return None, new_list - - # def parse_wastewater_samples(self) -> Tuple[str|None, list[dict]]: - """ - construct wastewater specific sample objects - - Returns: - list[WWSample]: list of sample objects + dict: Updated sample dictionary """ - def search_df_for_sample(sample_rsl:str): - # logger.debug(f"Attempting to find sample {sample_rsl} in \n {self.elution_map}") - well = self.elution_map.where(self.elution_map==sample_rsl) - # logger.debug(f"Well: {well}") - well = well.dropna(how='all').dropna(axis=1, how="all") - if well.size > 1: - well = well.iloc[0].to_frame().dropna().T - logger.debug(f"well {sample_rsl} post processing: {well.size}: {type(well)}")#, {well.index[0]}, {well.columns[0]}") - try: - self.elution_map.at[well.index[0], well.columns[0]] = np.nan - except IndexError as e: - logger.error(f"Couldn't find the well for {sample_rsl}") - return 0, 0 - try: - column = int(well.columns[0]) - except TypeError as e: - logger.error(f"Problem parsing out column number for {well}:\n {e}") - row = row_keys[well.index[0]] - return row, column - new_list = [] - return_val = None - for sample in self.samples: - logger.debug(f"Sample: {sample}") - instance = lookup_ww_sample_by_ww_sample_num(ctx=self.ctx, sample_number=sample['Unnamed: 3']) - if instance == None: - instance = WastewaterSample() - if check_not_nan(sample["Unnamed: 7"]): - if sample["Unnamed: 7"] != "Fixed" and sample['Unnamed: 7'] != "Flex": - instance.rsl_number = sample['Unnamed: 7'] # previously Unnamed: 9 - elif check_not_nan(sample['Unnamed: 9']): - instance.rsl_number = sample['Unnamed: 9'] # previously Unnamed: 9 - else: - logger.error(f"No RSL sample number found for this sample.") - continue - else: - logger.error(f"No RSL sample number found for this sample.") - continue - instance.ww_processing_num = sample['Unnamed: 2'] - # need to ensure we have a sample id for database integrity - # if we don't have a sample full id, make one up - if check_not_nan(sample['Unnamed: 3']): - logger.debug(f"Sample name: {sample['Unnamed: 3']}") - instance.submitter_id = sample['Unnamed: 3'] - else: - instance.submitter_id = uuid.uuid4().hex.upper() - # logger.debug(f"The Submitter sample id is: {instance.submitter_id}") - # need to ensure we get a collection date - if check_not_nan(sample['Unnamed: 5']): - instance.collection_date = sample['Unnamed: 5'] - else: - instance.collection_date = date.today() - # new.testing_type = sample['Unnamed: 6'] - # new.site_status = sample['Unnamed: 7'] - instance.notes = str(sample['Unnamed: 6']) # previously Unnamed: 8 - instance.well_24 = sample['Unnamed: 1'] - else: - # What to do if the sample already exists - assert isinstance(instance, WastewaterSample) - if instance.rsl_number == None: - if check_not_nan(sample["Unnamed: 7"]): - if sample["Unnamed: 7"] != "Fixed" and sample['Unnamed: 7'] != "Flex": - instance.rsl_number = sample['Unnamed: 7'] # previously Unnamed: 9 - elif check_not_nan(sample['Unnamed: 9']): - instance.rsl_number = sample['Unnamed: 9'] # previously Unnamed: 9 - else: - logger.error(f"No RSL sample number found for this sample.") - if instance.collection_date == None: - if check_not_nan(sample['Unnamed: 5']): - instance.collection_date = sample['Unnamed: 5'] - else: - instance.collection_date = date.today() - if instance.notes == None: - instance.notes = str(sample['Unnamed: 6']) # previously Unnamed: 8 - if instance.well_24 == None: - instance.well_24 = sample['Unnamed: 1'] - logger.debug(f"Already have that sample, going to add association to this plate.") - row, column = search_df_for_sample(instance.rsl_number) - # if elu_well != None: - # row = elu_well[0] - # col = elu_well[1:].zfill(2) - # # new.well_number = f"{row}{col}" - # else: - # # try: - # return_val += f"{new.rsl_number}\n" - # # except TypeError: - # # return_val = f"{new.rsl_number}\n" - new_list.append(dict(sample=instance, row=row, column=column)) - return return_val, new_list + logger.debug("Called bacterial culture sample parser") + return input_dict + + def parse_wastewater_sample(self, input_dict:dict) -> dict: + """ + Update sample dictionary with wastewater specific information + + Args: + input_dict (dict): Input sample dictionary + + Returns: + dict: Updated sample dictionary + """ + logger.debug(f"Called wastewater sample parser") - # def parse_wastewater_artic_samples(self) -> Tuple[str|None, list[WastewaterSample]]: + def parse_wastewater_artic_sample(self, input_dict:dict) -> dict: """ - The artic samples are the wastewater samples that are to be sequenced - So we will need to lookup existing ww samples and append Artic well # and plate relation + Update sample dictionary with artic specific information + + Args: + input_dict (dict): Input sample dictionary Returns: - list[WWSample]: list of wastewater samples to be updated + dict: Updated sample dictionary """ + logger.debug("Called wastewater artic sample parser") + input_dict['sample_type'] = "Wastewater Sample" + # Because generate_sample_object needs the submitter_id and the artic has the "({origin well})" + # at the end, this has to be done here. No moving to sqlalchemy object :( + input_dict['submitter_id'] = re.sub(r"\s\(.+\)$", "", str(input_dict['submitter_id'])).strip() + return input_dict - new_list = [] - missed_samples = [] - for sample in self.samples: - with self.ctx.database_session.no_autoflush: - instance = lookup_ww_sample_by_ww_sample_num(ctx=self.ctx, sample_number=sample['sample_name']) - logger.debug(f"Checking: {sample}") - if instance == None: - logger.error(f"Unable to find match for: {sample['sample_name']}. Making new instance using {sample['submitter_id']}.") - instance = WastewaterSample() - instance.ww_processing_num = sample['sample_name'] - instance.submitter_id = sample['submitter_id'] - missed_samples.append(sample['sample_name']) - # continue - logger.debug(f"Got instance: {instance.submitter_id}") - # if sample['row'] != None: - # row = int(row_keys[sample['well'][0]]) - # if sample['column'] != None: - # column = int(sample['well'][1:]) - # sample['well'] = f"{row}{col}" - # instance.artic_well_number = sample['well'] - if instance.submitter_id != "NTC1" and instance.submitter_id != "NTC2": - new_list.append(dict(sample=instance, row=sample['row'], column=sample['column'])) - missed_str = "\n\t".join(missed_samples) - return f"Could not find matches for the following samples:\n\t {missed_str}", new_list - class PCRParser(object): """ Object to pull data from Design and Analysis PCR export file. + TODO: Generify this object. """ def __init__(self, ctx:dict, filepath:Path|None = None) -> None: """ diff --git a/src/submissions/backend/pydant/__init__.py b/src/submissions/backend/pydant/__init__.py index e831174..127d83a 100644 --- a/src/submissions/backend/pydant/__init__.py +++ b/src/submissions/backend/pydant/__init__.py @@ -1,5 +1,5 @@ import uuid -from pydantic import BaseModel, field_validator, Extra +from pydantic import BaseModel, field_validator, Extra, Field from datetime import date, datetime from dateutil.parser import parse from dateutil.parser._parser import ParserError @@ -32,10 +32,17 @@ class PydReagent(BaseModel): @field_validator("lot", mode='before') @classmethod - def enforce_lot_string(cls, value): + def rescue_lot_string(cls, value): if value != None: return convert_nans_to_nones(str(value)) return value + + @field_validator("lot") + @classmethod + def enforce_lot_string(cls, value): + if value != None: + return value.upper() + return value @field_validator("exp", mode="before") @classmethod @@ -66,8 +73,9 @@ class PydSubmission(BaseModel, extra=Extra.allow): ctx: Settings filepath: Path submission_type: dict|None - submitter_plate_num: dict|None - rsl_plate_num: dict|None + # For defaults + submitter_plate_num: dict|None = Field(default=dict(value=None, parsed=False), validate_default=True) + rsl_plate_num: dict|None = Field(default=dict(value=None, parsed=False), validate_default=True) submitted_date: dict|None submitting_lab: dict|None sample_count: dict|None @@ -77,12 +85,12 @@ class PydSubmission(BaseModel, extra=Extra.allow): samples: List[Any] # missing_fields: List[str] = [] - @field_validator("submitter_plate_num") - @classmethod - def rescue_submitter_id(cls, value): - if value == None: - return dict(value=None, parsed=False) - return value + # @field_validator("submitter_plate_num", mode="before") + # @classmethod + # def rescue_submitter_id(cls, value): + # if value == None: + # return dict(value=None, parsed=False) + # return value @field_validator("submitter_plate_num") @classmethod diff --git a/src/submissions/frontend/all_window_functions.py b/src/submissions/frontend/all_window_functions.py index 4afe9fc..1ce9945 100644 --- a/src/submissions/frontend/all_window_functions.py +++ b/src/submissions/frontend/all_window_functions.py @@ -28,6 +28,8 @@ def select_open_file(obj:QMainWindow, file_extension:str) -> Path: except FileNotFoundError: home_dir = Path.home().resolve().__str__() fname = Path(QFileDialog.getOpenFileName(obj, 'Open file', home_dir, filter = f"{file_extension}(*.{file_extension})")[0]) + # fname = Path(QFileDialog.getOpenFileName(obj, 'Open file', filter = f"{file_extension}(*.{file_extension})")[0]) + return fname def select_save_file(obj:QMainWindow, default_name:str, extension:str) -> Path: @@ -48,6 +50,8 @@ def select_save_file(obj:QMainWindow, default_name:str, extension:str) -> Path: except FileNotFoundError: home_dir = Path.home().joinpath(default_name).resolve().__str__() fname = Path(QFileDialog.getSaveFileName(obj, "Save File", home_dir, filter = f"{extension}(*.{extension})")[0]) + # fname = Path(QFileDialog.getSaveFileName(obj, "Save File", filter = f"{extension}(*.{extension})")[0]) + return fname def extract_form_info(object) -> dict: diff --git a/src/submissions/frontend/custom_widgets/sub_details.py b/src/submissions/frontend/custom_widgets/sub_details.py index c89cbd0..1b10aca 100644 --- a/src/submissions/frontend/custom_widgets/sub_details.py +++ b/src/submissions/frontend/custom_widgets/sub_details.py @@ -17,6 +17,7 @@ from PyQt6.QtCore import Qt, QAbstractTableModel, QSortFilterProxyModel from PyQt6.QtGui import QAction, QCursor, QPixmap, QPainter from backend.db import submissions_to_df, lookup_submission_by_id, delete_submission_by_id, lookup_submission_by_rsl_num, hitpick_plate from backend.excel import make_hitpicks +from tools import check_if_app from tools import jinja_template_loading from xhtml2pdf import pisa from pathlib import Path @@ -291,10 +292,14 @@ class SubmissionDetails(QDialog): # interior.resize(w,900) # txt_editor.setText(text) # interior.setWidget(txt_editor) - self.base_dict['barcode'] = base64.b64encode(make_plate_barcode(self.base_dict['Plate Number'], width=120, height=30)).decode('utf-8') + logger.debug(f"Creating barcode.") + if not check_if_app(): + self.base_dict['barcode'] = base64.b64encode(make_plate_barcode(self.base_dict['Plate Number'], width=120, height=30)).decode('utf-8') sub = lookup_submission_by_rsl_num(ctx=self.ctx, rsl_num=self.base_dict['Plate Number']) # plate_dicto = hitpick_plate(sub) + logger.debug(f"Hitpicking plate...") plate_dicto = sub.hitpick_plate() + logger.debug(f"Making platemap...") platemap = make_plate_map(plate_dicto) logger.debug(f"platemap: {platemap}") image_io = BytesIO() diff --git a/src/submissions/frontend/main_window_functions.py b/src/submissions/frontend/main_window_functions.py index f9a37be..96f4ef1 100644 --- a/src/submissions/frontend/main_window_functions.py +++ b/src/submissions/frontend/main_window_functions.py @@ -25,7 +25,7 @@ from backend.db.functions import ( lookup_all_orgs, lookup_kittype_by_use, lookup_kittype_by_name, construct_submission_info, lookup_reagent, store_submission, lookup_submissions_by_date_range, create_kit_from_yaml, create_org_from_yaml, get_control_subtypes, get_all_controls_by_type, - lookup_all_submissions_by_type, get_all_controls, lookup_submission_by_rsl_num, update_ww_sample, + lookup_all_submissions_by_type, get_all_controls, lookup_submission_by_rsl_num, update_subsampassoc_with_pcr, check_kit_integrity ) from backend.excel.parser import SheetParser, PCRParser @@ -133,7 +133,7 @@ def import_submission_function(obj:QMainWindow) -> Tuple[QMainWindow, dict|None] # lookup existing kits by 'submission_type' decided on by sheetparser # uses = [item.__str__() for item in lookup_kittype_by_use(ctx=obj.ctx, used_by=pyd.submission_type['value'].lower())] logger.debug(f"Looking up kits used for {pyd.submission_type['value']}") - uses = [item.__str__() for item in lookup_kittype_by_use(ctx=obj.ctx, used_by=pyd.submission_type['value'])] + uses = [item.__str__() for item in lookup_kittype_by_use(ctx=obj.ctx, used_for=pyd.submission_type['value'])] logger.debug(f"Kits received for {pyd.submission_type['value']}: {uses}") if check_not_nan(value['value']): logger.debug(f"The extraction kit in parser was: {value['value']}") @@ -365,7 +365,7 @@ def submit_new_sample_function(obj:QMainWindow) -> Tuple[QMainWindow, dict]: if kit_integrity != None: return obj, dict(message=kit_integrity['message'], status="critical") logger.debug(f"Sending submission: {base_submission.rsl_plate_num} to database.") - result = store_submission(ctx=obj.ctx, base_submission=base_submission, samples=obj.samples) + result = store_submission(ctx=obj.ctx, base_submission=base_submission) # check result of storing for issues # update summary sheet obj.table_widget.sub_wid.setData() @@ -383,7 +383,8 @@ def submit_new_sample_function(obj:QMainWindow) -> Tuple[QMainWindow, dict]: excel_map = extraction_kit.construct_xl_map_for_use(obj.current_submission_type) logger.debug(f"Extraction kit map:\n\n{pprint.pformat(excel_map)}") # excel_map.update(extraction_kit.used_for[obj.current_submission_type.replace('_', ' ').title()]) - input_reagents = [item.to_reagent_dict() for item in parsed_reagents] + input_reagents = [item.to_reagent_dict(extraction_kit=base_submission.extraction_kit) for item in parsed_reagents] + logger.debug(f"Parsed reagents going into autofile: {pprint.pformat(input_reagents)}") autofill_excel(obj=obj, xl_map=excel_map, reagents=input_reagents, missing_reagents=obj.missing_reagents, info=info, missing_info=obj.missing_info) if hasattr(obj, 'csv'): dlg = QuestionAsker("Export CSV?", "Would you like to export the csv file?") @@ -844,10 +845,16 @@ def import_pcr_results_function(obj:QMainWindow) -> Tuple[QMainWindow, dict]: obj.ctx.database_session.commit() logger.debug(f"Got {len(parser.samples)} samples to update!") logger.debug(f"Parser samples: {parser.samples}") - for sample in parser.samples: - logger.debug(f"Running update on: {sample['sample']}") - sample['plate_rsl'] = sub.rsl_plate_num - update_ww_sample(ctx=obj.ctx, sample_obj=sample) + for sample in sub.samples: + logger.debug(f"Running update on: {sample}") + try: + sample_dict = [item for item in parser.samples if item['sample']==sample.rsl_number][0] + except IndexError: + continue + # sample['plate_rsl'] = sub.rsl_plate_num + # update_ww_sample(ctx=obj.ctx, sample_obj=sample) + update_subsampassoc_with_pcr(ctx=obj.ctx, submission=sub, sample=sample, input_dict=sample_dict) + result = dict(message=f"We added PCR info to {sub.rsl_plate_num}.", status='information') return obj, result @@ -872,7 +879,16 @@ def autofill_excel(obj:QMainWindow, xl_map:dict, reagents:List[dict], missing_re # pare down the xl map to only the missing data. relevant_reagent_map = {k:v for k,v in xl_map.items() if k in [reagent.type for reagent in missing_reagents]} # pare down reagents to only what's missing + logger.debug(f"Checking {[item['type'] for item in reagents]} against {[reagent.type for reagent in missing_reagents]}") relevant_reagents = [item for item in reagents if item['type'] in [reagent.type for reagent in missing_reagents]] + # relevant_reagents = [] + # for item in reagents: + # logger.debug(f"Checking {item['type']} in {[reagent.type for reagent in missing_reagents]}") + # if item['type'] in [reagent.type for reagent in missing_reagents]: + # logger.debug("Hit!") + # relevant_reagents.append(item) + # else: + # logger.debug('Miss.') logger.debug(f"Here are the relevant reagents: {pprint.pformat(relevant_reagents)}") # hacky manipulation of submission type so it looks better. # info['submission_type'] = info['submission_type'].replace("_", " ").title() diff --git a/src/submissions/templates/submission_details.html b/src/submissions/templates/submission_details.html index 0d411fb..a2f50d9 100644 --- a/src/submissions/templates/submission_details.html +++ b/src/submissions/templates/submission_details.html @@ -5,7 +5,7 @@ {% set excluded = ['reagents', 'samples', 'controls', 'ext_info', 'pcr_info', 'comments', 'barcode', 'platemap'] %} -

Submission Details for {{ sub['Plate Number'] }}

    +

Submission Details for {{ sub['Plate Number'] }}

   {% if sub['barcode'] %}{% endif %}

{% for key, value in sub.items() if key not in excluded %}     {{ key }}: {% if key=='Cost' %} {{ "${:,.2f}".format(value) }}{% else %}{{ value }}{% endif %}
{% endfor %}

diff --git a/src/submissions/tools/__init__.py b/src/submissions/tools/__init__.py index 8654dcd..02507cb 100644 --- a/src/submissions/tools/__init__.py +++ b/src/submissions/tools/__init__.py @@ -195,7 +195,7 @@ class RSLNamer(object): # (?PRSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(?:_|-)\d?((?!\d)|R)?\d(?!\d))?)| (?PRSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?)| (?PRSL-?\d{2}-?\d{4})| - (?P(\d{4}-\d{2}-\d{2}_(?:\d_)?artic)|(RSL(?:-|_)?AR(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?)) + (?P(\d{4}-\d{2}-\d{2}(?:-|_)(?:\d_)?artic)|(RSL(?:-|_)?AR(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?)) """, flags = re.IGNORECASE | re.VERBOSE) m = regex.search(self.out_str) if m != None: @@ -308,10 +308,10 @@ class RSLNamer(object): except AttributeError: self.parsed_name = construct() try: - plate_number = int(re.search(r"_\d?_", self.parsed_name).group().strip("_")) - except AttributeError as e: + plate_number = int(re.search(r"_|-\d?_", self.parsed_name).group().strip("_").strip("-")) + except (AttributeError, ValueError) as e: plate_number = 1 - self.parsed_name = re.sub(r"(_\d)?_ARTIC", f"-{plate_number}", self.parsed_name) + self.parsed_name = re.sub(r"(_|-\d)?_ARTIC", f"-{plate_number}", self.parsed_name) class GroupWriteRotatingFileHandler(handlers.RotatingFileHandler): @@ -611,7 +611,6 @@ def jinja_template_loading(): loader_path = Path(sys._MEIPASS).joinpath("files", "templates") else: loader_path = Path(__file__).parents[1].joinpath('templates').absolute().__str__() - # jinja template loading loader = FileSystemLoader(loader_path) env = Environment(loader=loader)