""" Models for the main submission and sample types. """ from __future__ import annotations from getpass import getuser import logging, uuid, tempfile, re, yaml, base64 from zipfile import ZipFile from tempfile import TemporaryDirectory from operator import attrgetter, itemgetter from pprint import pformat from . import BaseClass, Reagent, SubmissionType, KitType, Organization from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, JSON, FLOAT, case from sqlalchemy.orm import relationship, validates, Query from sqlalchemy.orm.attributes import flag_modified from sqlalchemy.ext.associationproxy import association_proxy from sqlalchemy.exc import OperationalError as AlcOperationalError, IntegrityError as AlcIntegrityError, StatementError from sqlite3 import OperationalError as SQLOperationalError, IntegrityError as SQLIntegrityError import pandas as pd from openpyxl import Workbook, load_workbook from openpyxl.worksheet.worksheet import Worksheet from openpyxl.drawing.image import Image as OpenpyxlImage from tools import check_not_nan, row_map, setup_lookup, jinja_template_loading, rreplace from datetime import datetime, date from typing import List, Any, Tuple, Literal from dateutil.parser import parse from dateutil.parser import ParserError from pathlib import Path from jinja2.exceptions import TemplateNotFound from jinja2 import Template logger = logging.getLogger(f"submissions.{__name__}") class BasicSubmission(BaseClass): """ Concrete of basic submission which polymorphs into BacterialCulture and Wastewater """ id = Column(INTEGER, primary_key=True) #: primary key rsl_plate_num = Column(String(32), unique=True, nullable=False) #: RSL name (e.g. RSL-22-0012) submitter_plate_num = Column(String(127), unique=True) #: The number given to the submission by the submitting lab submitted_date = Column(TIMESTAMP) #: Date submission received submitting_lab = relationship("Organization", back_populates="submissions") #: client org submitting_lab_id = Column(INTEGER, ForeignKey("_organization.id", ondelete="SET NULL", name="fk_BS_sublab_id")) #: client lab id from _organizations sample_count = Column(INTEGER) #: Number of samples in the submission extraction_kit = relationship("KitType", back_populates="submissions") #: The extraction kit used extraction_kit_id = Column(INTEGER, ForeignKey("_kittype.id", ondelete="SET NULL", name="fk_BS_extkit_id")) #: id of joined extraction kit submission_type_name = Column(String, ForeignKey("_submissiontype.name", ondelete="SET NULL", name="fk_BS_subtype_name")) #: name of joined submission type technician = Column(String(64)) #: initials of processing tech(s) # Move this into custom types? reagents_id = Column(String, ForeignKey("_reagent.id", ondelete="SET NULL", name="fk_BS_reagents_id")) #: id of used reagents extraction_info = Column(JSON) #: unstructured output from the extraction table logger. run_cost = Column( FLOAT(2)) #: total cost of running the plate. Set from constant and mutable kit costs at time of creation. signed_by = Column(String(32)) #: user name of person who submitted the submission to the database. comment = Column(JSON) #: user notes submission_category = Column( String(64)) #: ["Research", "Diagnostic", "Surveillance", "Validation"], else defaults to submission_type_name cost_centre = Column( String(64)) #: Permanent storage of used cost centre in case organization field changed in the future. submission_sample_associations = relationship( "SubmissionSampleAssociation", back_populates="submission", cascade="all, delete-orphan", ) #: Relation to SubmissionSampleAssociation samples = association_proxy("submission_sample_associations", "sample") #: Association proxy to SubmissionSampleAssociation.samples submission_reagent_associations = relationship( "SubmissionReagentAssociation", back_populates="submission", cascade="all, delete-orphan", ) #: Relation to SubmissionReagentAssociation reagents = association_proxy("submission_reagent_associations", "reagent") #: Association proxy to SubmissionReagentAssociation.reagent submission_equipment_associations = relationship( "SubmissionEquipmentAssociation", back_populates="submission", cascade="all, delete-orphan" ) #: Relation to Equipment equipment = association_proxy("submission_equipment_associations", "equipment") #: Association proxy to SubmissionEquipmentAssociation.equipment # Allows for subclassing into ex. BacterialCulture, Wastewater, etc. __mapper_args__ = { "polymorphic_identity": "Basic Submission", "polymorphic_on": submission_type_name, "with_polymorphic": "*", } def __repr__(self) -> str: """ Returns: str: Representation of this BasicSubmission """ submission_type = self.submission_type or "Basic" return f"{submission_type}Submission({self.rsl_plate_num})" @classmethod def jsons(cls) -> List[str]: """ Get list of JSON db columns Returns: List[str]: List of column names """ output = [item.name for item in cls.__table__.columns if isinstance(item.type, JSON)] if issubclass(cls, BasicSubmission) and not cls.__name__ == "BasicSubmission": output += BasicSubmission.jsons() return output @classmethod def timestamps(cls) -> List[str]: """ Get list of TIMESTAMP columns Returns: List[str]: List of column names """ output = [item.name for item in cls.__table__.columns if isinstance(item.type, TIMESTAMP)] if issubclass(cls, BasicSubmission) and not cls.__name__ == "BasicSubmission": output += BasicSubmission.timestamps() return output # TODO: Beef up this to include info_map from DB @classmethod def get_default_info(cls, *args): # NOTE: Create defaults for all submission_types parent_defs = super().get_default_info() recover = ['filepath', 'samples', 'csv', 'comment', 'equipment'] dicto = dict( details_ignore=['excluded', 'reagents', 'samples', 'extraction_info', 'comment', 'barcode', 'platemap', 'export_map', 'equipment'], # NOTE: Fields not placed in ui form form_ignore=['reagents', 'ctx', 'id', 'cost', 'extraction_info', 'signed_by', 'comment'] + recover, # NOTE: Fields not placed in ui form to be moved to pydantic form_recover=recover ) # logger.debug(dicto['singles']) # NOTE: Singles tells the query which fields to set limit to 1 dicto['singles'] = parent_defs['singles'] # logger.debug(dicto['singles']) # NOTE: Grab subtype specific info. output = {} for k, v in dicto.items(): if len(args) > 0 and k not in args: # logger.debug(f"Don't want {k}") continue else: output[k] = v st = cls.get_submission_type() if st is None: logger.error("No default info for BasicSubmission.") else: output['submission_type'] = st.name for k, v in st.defaults.items(): if len(args) > 0 and k not in args: # logger.debug(f"Don't want {k}") continue else: match v: case list(): output[k] += v case _: output[k] = v if len(args) == 1: return output[args[0]] return output @classmethod def get_submission_type(cls) -> SubmissionType: """ Gets the SubmissionType associated with this class Returns: SubmissionType: SubmissionType with name equal to this polymorphic identity """ name = cls.__mapper_args__['polymorphic_identity'] return SubmissionType.query(name=name) @classmethod def construct_info_map(cls, mode:Literal["read", "write"]) -> dict: """ Method to call submission type's construct info map. Args: mode (Literal["read", "write"]): Which map to construct. Returns: dict: Map of info locations. """ return cls.get_submission_type().construct_info_map(mode=mode) @classmethod def construct_sample_map(cls) -> dict: """ Method to call submission type's construct_sample_map Returns: dict: sample location map """ return cls.get_submission_type().construct_sample_map() def to_dict(self, full_data: bool = False, backup: bool = False, report: bool = False) -> dict: """ Constructs dictionary used in submissions summary Args: full_data (bool, optional): indicates if sample dicts to be constructed. Defaults to False. backup (bool, optional): passed to adjust_to_dict_samples. Defaults to False. Returns: dict: dictionary used in submissions summary and details """ # NOTE: get lab from nested organization object # logger.debug(f"Converting {self.rsl_plate_num} to dict...") try: sub_lab = self.submitting_lab.name except AttributeError: sub_lab = None try: sub_lab = sub_lab.replace("_", " ").title() except AttributeError: pass # NOTE: get extraction kit name from nested kit object try: ext_kit = self.extraction_kit.name except AttributeError: ext_kit = None # NOTE: load scraped extraction info try: ext_info = self.extraction_info except TypeError: ext_info = None output = { "id": self.id, "Plate Number": self.rsl_plate_num, "Submission Type": self.submission_type_name, "Submitter Plate Number": self.submitter_plate_num, "Submitted Date": self.submitted_date.strftime("%Y-%m-%d"), "Submitting Lab": sub_lab, "Sample Count": self.sample_count, "Extraction Kit": ext_kit, "Cost": self.run_cost, } if report: return output if full_data: # logger.debug(f"Attempting reagents.") try: reagents = [item.to_sub_dict(extraction_kit=self.extraction_kit) for item in self.submission_reagent_associations] for k in self.extraction_kit.construct_xl_map_for_use(self.submission_type): if k == 'info': continue if not any([item['type'] == k for item in reagents]): reagents.append( dict(type=k, name="Not Applicable", lot="NA", expiry=date(year=1970, month=1, day=1), missing=True)) except Exception as e: logger.error(f"We got an error retrieving reagents: {e}") reagents = None # logger.debug(f"Running samples.") samples = self.adjust_to_dict_samples(backup=backup) # logger.debug("Running equipment") try: equipment = [item.to_sub_dict() for item in self.submission_equipment_associations] if len(equipment) == 0: equipment = None except Exception as e: logger.error(f"Error setting equipment: {e}") equipment = None cost_centre = self.cost_centre else: reagents = None samples = None equipment = None cost_centre = None # logger.debug("Getting comments") try: comments = self.comment except Exception as e: logger.error(f"Error setting comment: {self.comment}") comments = None output["Submission Category"] = self.submission_category output["Technician"] = self.technician output["reagents"] = reagents output["samples"] = samples output["extraction_info"] = ext_info output["comment"] = comments output["equipment"] = equipment output["Cost Centre"] = cost_centre output["Signed By"] = self.signed_by return output def calculate_column_count(self) -> int: """ Calculate the number of columns in this submission Returns: int: Number of unique columns. """ # logger.debug(f"Here's the samples: {self.samples}") columns = set([assoc.column for assoc in self.submission_sample_associations]) # logger.debug(f"Here are the columns for {self.rsl_plate_num}: {columns}") return len(columns) def calculate_base_cost(self): """ Calculates cost of the plate """ # Calculate number of columns based on largest column number try: cols_count_96 = self.calculate_column_count() except Exception as e: logger.error(f"Column count error: {e}") # Get kit associated with this submission assoc = [item for item in self.extraction_kit.kit_submissiontype_associations if item.submission_type == self.submission_type][0] # logger.debug(f"Came up with association: {assoc}") # If every individual cost is 0 this is probably an old plate. if all(item == 0.0 for item in [assoc.constant_cost, assoc.mutable_cost_column, assoc.mutable_cost_sample]): try: self.run_cost = self.extraction_kit.cost_per_run except Exception as e: logger.error(f"Calculation error: {e}") else: try: self.run_cost = assoc.constant_cost + (assoc.mutable_cost_column * cols_count_96) + ( assoc.mutable_cost_sample * int(self.sample_count)) except Exception as e: logger.error(f"Calculation error: {e}") self.run_cost = round(self.run_cost, 2) def hitpick_plate(self) -> list: """ Returns positve sample locations for plate Returns: list: list of htipick dictionaries for each sample """ output_list = [assoc.to_hitpick() for assoc in self.submission_sample_associations] return output_list def make_plate_map(self, plate_rows: int = 8, plate_columns=12) -> str: """ Constructs an html based plate map for submission details. Args: sample_list (list): List of submission samples plate_rows (int, optional): Number of rows in the plate. Defaults to 8. plate_columns (int, optional): Number of columns in the plate. Defaults to 12. Returns: str: html output string. """ # logger.debug("Creating basic hitpick") sample_list = self.hitpick_plate() # logger.debug("Setting background colours") for sample in sample_list: if sample['positive']: sample['background_color'] = "#f10f07" else: if "colour" in sample.keys(): sample['background_color'] = "#69d84f" else: sample['background_color'] = "#80cbc4" output_samples = [] # logger.debug("Setting locations.") for column in range(1, plate_columns + 1): for row in range(1, plate_rows + 1): try: well = [item for item in sample_list if item['Row'] == row and item['Column'] == column][0] except IndexError: well = dict(name="", row=row, column=column, background_color="#ffffff") output_samples.append(well) env = jinja_template_loading() template = env.get_template("plate_map.html") html = template.render(samples=output_samples, PLATE_ROWS=plate_rows, PLATE_COLUMNS=plate_columns) return html + "
" def get_used_equipment(self) -> List[str]: """ Gets EquipmentRole names associated with this BasicSubmission Returns: List[str]: List of names """ return [item.role for item in self.submission_equipment_associations] @classmethod def submissions_to_df(cls, submission_type: str | None = None, limit: int = 0, chronologic:bool=True) -> pd.DataFrame: """ Convert all submissions to dataframe Args: submission_type (str | None, optional): Filter by SubmissionType. Defaults to None. limit (int, optional): Maximum number of results to return. Defaults to 0. Returns: pd.DataFrame: Pandas Dataframe of all relevant submissions """ # logger.debug(f"Querying Type: {submission_type}") # logger.debug(f"Using limit: {limit}") # use lookup function to create list of dicts subs = [item.to_dict() for item in cls.query(submission_type=submission_type, limit=limit, chronologic=chronologic)] # logger.debug(f"Got {len(subs)} submissions.") df = pd.DataFrame.from_records(subs) # NOTE: Exclude sub information for item in ['controls', 'extraction_info', 'pcr_info', 'comment', 'comments', 'samples', 'reagents', 'equipment', 'gel_info', 'gel_image', 'dna_core_submission_number', 'gel_controls']: try: df = df.drop(item, axis=1) except: logger.warning(f"Couldn't drop '{item}' column from submissionsheet df.") if chronologic: df.sort_values(by="Submitted Date", axis=0, inplace=True, ascending=False) return df def set_attribute(self, key: str, value): """ Performs custom attribute setting based on values. Args: key (str): name of attribute value (_type_): value of attribute """ match key: case "extraction_kit": # logger.debug(f"Looking up kit {value}") field_value = KitType.query(name=value) # logger.debug(f"Got {field_value} for kit {value}") case "submitting_lab": # logger.debug(f"Looking up organization: {value}") field_value = Organization.query(name=value) # logger.debug(f"Got {field_value} for organization {value}") case "samples": for sample in value: # logger.debug(f"Parsing {sample} to sql.") sample, _ = sample.to_sql(submission=self) return case "reagents": logger.debug(f"Reagents coming into SQL: {value}") field_value = [reagent['value'].to_sql()[0] if isinstance(reagent, dict) else reagent.to_sql()[0] for reagent in value] logger.debug(f"Reagents coming out of SQL: {field_value}") case "submission_type": field_value = SubmissionType.query(name=value) case "sample_count": if value == None: field_value = len(self.samples) else: field_value = value case "ctx" | "csv" | "filepath" | "equipment": return case item if item in self.jsons(): logger.debug(f"Setting JSON attribute.") existing = self.__getattribute__(key) if value is None or value in ['', 'null']: logger.error(f"No value given, not setting.") return if existing is None: existing = [] if value in existing: logger.warning("Value already exists. Preventing duplicate addition.") return else: if isinstance(value, list): existing += value else: if value is not None: existing.append(value) self.__setattr__(key, existing) flag_modified(self, key) return case _: try: field_value = value.strip() except AttributeError: field_value = value # insert into field try: self.__setattr__(key, field_value) except AttributeError as e: logger.error(f"Could not set {self} attribute {key} to {value} due to \n{e}") def update_subsampassoc(self, sample: BasicSample, input_dict: dict): """ Update a joined submission sample association. Args: sample (BasicSample): Associated sample. input_dict (dict): values to be updated Returns: Result: _description_ """ assoc = [item for item in self.submission_sample_associations if item.sample == sample][0] for k, v in input_dict.items(): try: setattr(assoc, k, v) except AttributeError: logger.error(f"Can't set {k} to {v}") result = assoc.save() return result def to_pydantic(self, backup: bool = False) -> "PydSubmission": """ Converts this instance into a PydSubmission Returns: PydSubmission: converted object. """ from backend.validators import PydSubmission, PydSample, PydReagent, PydEquipment dicto = self.to_dict(full_data=True, backup=backup) # logger.debug("To dict complete") new_dict = {} for key, value in dicto.items(): # start = time() # logger.debug(f"Checking {key}") missing = value is None or value in ['', 'None'] match key: case "reagents": new_dict[key] = [PydReagent(**reagent) for reagent in value] case "samples": new_dict[key] = [PydSample(**{k.lower().replace(" ", "_"): v for k, v in sample.items()}) for sample in dicto['samples']] case "equipment": try: new_dict[key] = [PydEquipment(**equipment) for equipment in dicto['equipment']] except TypeError as e: logger.error(f"Possible no equipment error: {e}") case "Plate Number": new_dict['rsl_plate_num'] = dict(value=value, missing=missing) case "Submitter Plate Number": new_dict['submitter_plate_num'] = dict(value=value, missing=missing) case "id": pass case _: # logger.debug(f"Setting dict {key} to {value}") new_dict[key.lower().replace(" ", "_")] = dict(value=value, missing=missing) # logger.debug(f"{key} complete after {time()-start}") new_dict['filepath'] = Path(tempfile.TemporaryFile().name) # logger.debug("Done converting fields.") return PydSubmission(**new_dict) def save(self, original: bool = True): """ Adds this instance to database and commits. Args: original (bool, optional): Is this the first save. Defaults to True. """ logger.debug("Saving submission.") if original: self.uploaded_by = getuser() super().save() # Polymorphic functions @classmethod def construct_regex(cls) -> re.Pattern: """ Constructs catchall regex. Returns: re.Pattern: Regular expression pattern to discriminate between submission types. """ rstring = rf'{"|".join([item.get_regex() for item in cls.__subclasses__()])}' regex = re.compile(rstring, flags=re.IGNORECASE | re.VERBOSE) return regex @classmethod def find_polymorphic_subclass(cls, polymorphic_identity: str | SubmissionType | None = None, attrs: dict | None = None): """ Find subclass based on polymorphic identity or relevant attributes. Args: polymorphic_identity (str | None, optional): String representing polymorphic identity. Defaults to None. attrs (str | SubmissionType | None, optional): Attributes of the relevant class. Defaults to None. Returns: _type_: Subclass of interest. """ # logger.debug(f"Controlling for dict value") if isinstance(polymorphic_identity, dict): polymorphic_identity = polymorphic_identity['value'] if isinstance(polymorphic_identity, SubmissionType): polymorphic_identity = polymorphic_identity.name # if polymorphic_identity != None: model = cls match polymorphic_identity: case str(): try: logger.info(f"Recruiting: {cls}") model = [item for item in cls.__subclasses__() if item.__mapper_args__['polymorphic_identity'] == polymorphic_identity][0] except Exception as e: logger.error(f"Could not get polymorph {polymorphic_identity} of {cls} due to {e}") case _: pass if attrs is None or len(attrs) == 0: return model if any([not hasattr(cls, attr) for attr in attrs.keys()]): # looks for first model that has all included kwargs try: model = [subclass for subclass in cls.__subclasses__() if all([hasattr(subclass, attr) for attr in attrs.keys()])][0] except IndexError as e: raise AttributeError( f"Couldn't find existing class/subclass of {cls} with all attributes:\n{pformat(attrs.keys())}") logger.info(f"Recruiting model: {model}") return model # Child class custom functions # @classmethod # def custom_platemap(cls, xl: pd.ExcelFile, plate_map: pd.DataFrame) -> pd.DataFrame: # """ # Stupid stopgap solution to there being an issue with the Bacterial Culture plate map # # Args: # xl (pd.ExcelFile): original xl workbook, used for child classes mostly # plate_map (pd.DataFrame): original plate map # # Returns: # pd.DataFrame: updated plate map. # """ # logger.info(f"Calling {cls.__mapper_args__['polymorphic_identity']} plate mapper.") # return plate_map @classmethod def custom_info_parser(cls, input_dict: dict, xl: Workbook | None = None) -> dict: """ Update submission dictionary with type specific information Args: input_dict (dict): Input sample dictionary xl (pd.ExcelFile): original xl workbook, used for child classes mostly Returns: dict: Updated sample dictionary """ logger.info(f"Calling {cls.__mapper_args__['polymorphic_identity']} info parser.") return input_dict @classmethod def parse_samples(cls, input_dict: dict) -> dict: """ Update sample dictionary with type specific information Args: input_dict (dict): Input sample dictionary Returns: dict: Updated sample dictionary """ logger.info(f"Called {cls.__mapper_args__['polymorphic_identity']} sample parser") return input_dict @classmethod def finalize_parse(cls, input_dict: dict, xl: pd.ExcelFile | None = None, info_map: dict | None = None) -> dict: """ Performs any final custom parsing of the excel file. Args: input_dict (dict): Parser product up to this point. xl (pd.ExcelFile | None, optional): Excel submission form. Defaults to None. info_map (dict | None, optional): Map of information locations from SubmissionType. Defaults to None. plate_map (dict | None, optional): Constructed plate map of samples. Defaults to None. Returns: dict: Updated parser product. """ logger.info(f"Called {cls.__mapper_args__['polymorphic_identity']} finalizer") return input_dict @classmethod def custom_info_writer(cls, input_excel: Workbook, info: dict | None = None, backup: bool = False) -> Workbook: """ Adds custom autofill methods for submission Args: input_excel (Workbook): initial workbook. info (dict | None, optional): dictionary of additional info. Defaults to None. backup (bool, optional): Whether this is part of a backup operation. Defaults to False. Returns: Workbook: Updated workbook """ logger.info(f"Hello from {cls.__mapper_args__['polymorphic_identity']} autofill") return input_excel @classmethod def enforce_name(cls, instr: str, data: dict | None = {}) -> str: """ Custom naming method for this class. Args: instr (str): Initial name. data (dict | None, optional): Additional parameters for name. Defaults to None. Returns: str: Updated name. """ # logger.info(f"Hello from {cls.__mapper_args__['polymorphic_identity']} Enforcer!") # return instr from backend.validators import RSLNamer # logger.debug(f"instr coming into {cls}: {instr}") # logger.debug(f"data coming into {cls}: {data}") defaults = cls.get_default_info("abbreviation", "submission_type") data['abbreviation'] = defaults['abbreviation'] if 'submission_type' not in data.keys() or data['submission_type'] in [None, ""]: data['submission_type'] = defaults['submission_type'] # outstr = super().enforce_name(instr=instr, data=data) if instr in [None, ""]: # logger.debug("Sending to RSLNamer to make new plate name.") outstr = RSLNamer.construct_new_plate_name(data=data) else: outstr = instr if re.search(rf"{data['abbreviation']}", outstr, flags=re.IGNORECASE) is None: outstr = re.sub(rf"RSL-?", rf"RSL-{data['abbreviation']}-", outstr, flags=re.IGNORECASE) try: outstr = re.sub(r"(\d{4})-(\d{2})-(\d{2})", r"\1\2\3", outstr) outstr = re.sub(rf"{data['abbreviation']}(\d{6})", rf"{data['abbreviation']}-\1", outstr, flags=re.IGNORECASE).upper() except (AttributeError, TypeError) as e: logger.error(f"Error making outstr: {e}, sending to RSLNamer to make new plate name.") outstr = RSLNamer.construct_new_plate_name(data=data) try: plate_number = re.search(r"(?:(-|_)\d)(?!\d)", outstr).group().strip("_").strip("-") # logger.debug(f"Plate number is: {plate_number}") except AttributeError as e: plate_number = "1" outstr = re.sub(r"(\d{8})(-|_)?\d?(R\d?)?", rf"\1-{plate_number}\3", outstr) # logger.debug(f"After addition of plate number the plate name is: {outstr}") try: repeat = re.search(r"-\dR(?P\d)?", outstr).groupdict()['repeat'] if repeat == None: repeat = "1" except AttributeError as e: repeat = "" outstr = re.sub(r"(-\dR)\d?", rf"\1 {repeat}", outstr).replace(" ", "") abb = cls.get_default_info('abbreviation') return re.sub(rf"{abb}(\d)", rf"{abb}-\1", outstr) # return outstr # @classmethod # def parse_pcr(cls, xl: pd.DataFrame, rsl_number: str) -> list: # """ # Perform custom parsing of pcr info. # # Args: # xl (pd.DataFrame): pcr info form # rsl_number (str): rsl plate num of interest # # Returns: # list: _description_ # """ # logger.debug(f"Hello from {cls.__mapper_args__['polymorphic_identity']} PCR parser!") # return [] @classmethod def parse_pcr(cls, xl: Workbook, rsl_plate_num: str) -> list: """ Perform custom parsing of pcr info. Args: xl (pd.DataFrame): pcr info form rsl_plate_number (str): rsl plate num of interest Returns: list: _description_ """ # logger.debug(f"Hello from {cls.__mapper_args__['polymorphic_identity']} PCR parser!") pcr_sample_map = cls.get_submission_type().sample_map['pcr_samples'] # logger.debug(f'sample map: {pcr_sample_map}') main_sheet = xl[pcr_sample_map['main_sheet']] samples = [] fields = {k: v for k, v in pcr_sample_map.items() if k not in ['main_sheet', 'start_row']} for row in main_sheet.iter_rows(min_row=pcr_sample_map['start_row']): idx = row[0].row sample = {} for k, v in fields.items(): sheet = xl[v['sheet']] sample[k] = sheet.cell(row=idx, column=v['column']).value samples.append(sample) return samples @classmethod def filename_template(cls) -> str: """ Constructs template for filename of this class. Note: This is meant to be used with the dictionary constructed in self.to_dict(). Keys need to have spaces removed Returns: str: filename template in jinja friendly format. """ return "{{ rsl_plate_num }}" @classmethod def custom_sample_autofill_row(cls, sample, worksheet: Worksheet) -> int: """ _summary_ Args: sample (_type_): _description_ worksheet (Workbook): _description_ Returns: int: _description_ """ return None @classmethod def adjust_autofill_samples(cls, samples: List[Any]) -> List[Any]: logger.info(f"Hello from {cls.__mapper_args__['polymorphic_identity']} sampler") return samples def adjust_to_dict_samples(self, backup: bool = False) -> List[dict]: """ Updates sample dictionaries with custom values Args: backup (bool, optional): Whether to perform backup. Defaults to False. Returns: List[dict]: Updated dictionaries """ # logger.debug(f"Hello from {self.__class__.__name__} dictionary sample adjuster.") return [item.to_sub_dict() for item in self.submission_sample_associations] @classmethod def get_details_template(cls, base_dict: dict) -> Tuple[dict, Template]: """ Get the details jinja template for the correct class Args: base_dict (dict): incoming dictionary of Submission fields Returns: Tuple(dict, Template): (Updated dictionary, Template to be rendered) """ base_dict['excluded'] = cls.get_default_info('details_ignore') env = jinja_template_loading() temp_name = f"{cls.__name__.lower()}_details.html" # logger.debug(f"Returning template: {temp_name}") try: template = env.get_template(temp_name) except TemplateNotFound as e: logger.error(f"Couldn't find template due to {e}") template = env.get_template("basicsubmission_details.html") return base_dict, template # Query functions @classmethod @setup_lookup def query(cls, submission_type: str | SubmissionType | None = None, id: int | str | None = None, rsl_plate_num: str | None = None, start_date: date | str | int | None = None, end_date: date | str | int | None = None, reagent: Reagent | str | None = None, chronologic: bool = False, limit: int = 0, **kwargs ) -> BasicSubmission | List[BasicSubmission]: """ Lookup submissions based on a number of parameters. Overrides parent. Args: submission_type (str | models.SubmissionType | None, optional): Submission type of interest. Defaults to None. id (int | str | None, optional): Submission id in the database (limits results to 1). Defaults to None. rsl_plate_num (str | None, optional): Submission name in the database (limits results to 1). Defaults to None. start_date (date | str | int | None, optional): Beginning date to search by. Defaults to None. end_date (date | str | int | None, optional): Ending date to search by. Defaults to None. reagent (models.Reagent | str | None, optional): A reagent used in the submission. Defaults to None. chronologic (bool, optional): Return results in chronologic order. Defaults to False. limit (int, optional): Maximum number of results to return. Defaults to 0. Returns: models.BasicSubmission | List[models.BasicSubmission]: Submission(s) of interest """ # logger.debug(f"Incoming kwargs: {kwargs}") # NOTE: if you go back to using 'model' change the appropriate cls to model in the query filters if submission_type is not None: # if isinstance(submission_type, SubmissionType): # model = cls.find_subclasses(submission_type=submission_type.name) model = cls.find_polymorphic_subclass(polymorphic_identity=submission_type) # else: # model = cls.find_subclasses(submission_type=submission_type) elif len(kwargs) > 0: # find the subclass containing the relevant attributes # logger.debug(f"Attributes for search: {kwargs}") # model = cls.find_subclasses(attrs=kwargs) model = cls.find_polymorphic_subclass(attrs=kwargs) else: model = cls query: Query = cls.__database_session__.query(model) if start_date is not None and end_date is None: logger.warning(f"Start date with no end date, using today.") end_date = date.today() if end_date is not None and start_date is None: logger.warning(f"End date with no start date, using Jan 1, 2023") start_date = date(2023, 1, 1) if start_date is not None: # logger.debug(f"Querying with start date: {start_date} and end date: {end_date}") match start_date: case date(): # logger.debug(f"Lookup BasicSubmission by start_date({start_date})") start_date = start_date.strftime("%Y-%m-%d") case int(): # logger.debug(f"Lookup BasicSubmission by ordinal start_date {start_date}") start_date = datetime.fromordinal( datetime(1900, 1, 1).toordinal() + start_date - 2).date().strftime("%Y-%m-%d") case _: # logger.debug(f"Lookup BasicSubmission by parsed str start_date {start_date}") start_date = parse(start_date).strftime("%Y-%m-%d") match end_date: case date() | datetime(): # logger.debug(f"Lookup BasicSubmission by end_date({end_date})") end_date = end_date.strftime("%Y-%m-%d") case int(): # logger.debug(f"Lookup BasicSubmission by ordinal end_date {end_date}") end_date = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + end_date - 2).date().strftime( "%Y-%m-%d") case _: # logger.debug(f"Lookup BasicSubmission by parsed str end_date {end_date}") end_date = parse(end_date).strftime("%Y-%m-%d") # logger.debug(f"Looking up BasicSubmissions from start date: {start_date} and end date: {end_date}") # logger.debug(f"Start date {start_date} == End date {end_date}: {start_date == end_date}") # logger.debug(f"Compensating for same date by using time") if start_date == end_date: start_date = datetime.strptime(start_date, "%Y-%m-%d").strftime("%Y-%m-%d %H:%M:%S.%f") query = query.filter(model.submitted_date == start_date) else: query = query.filter(model.submitted_date.between(start_date, end_date)) # by reagent (for some reason) match reagent: case str(): # logger.debug(f"Looking up BasicSubmission with reagent: {reagent}") query = query.join(model.submission_reagent_associations).filter( SubmissionSampleAssociation.reagent.lot == reagent) case Reagent(): # logger.debug(f"Looking up BasicSubmission with reagent: {reagent}") query = query.join(model.submission_reagent_associations).join( SubmissionSampleAssociation.reagent).filter(Reagent.lot == reagent) case _: pass # by rsl number (returns only a single value) match rsl_plate_num: case str(): query = query.filter(model.rsl_plate_num == rsl_plate_num) # logger.debug(f"At this point the query gets: {query.all()}") limit = 1 case _: pass # by id (returns only a single value) match id: case int(): # logger.debug(f"Looking up BasicSubmission with id: {id}") query = query.filter(model.id == id) limit = 1 case str(): # logger.debug(f"Looking up BasicSubmission with id: {id}") query = query.filter(model.id == int(id)) limit = 1 case _: pass # for k, v in kwargs.items(): # logger.debug(f"Looking up attribute: {k}") # attr = getattr(model, k) # logger.debug(f"Got attr: {attr}") # query = query.filter(attr==v) # if len(kwargs) > 0: # limit = 1 # query = cls.query_by_keywords(query=query, model=model, **kwargs) # if any(x in kwargs.keys() for x in cls.get_default_info('singles')): # logger.debug(f"There's a singled out item in kwargs") # limit = 1 if chronologic: query.order_by(cls.submitted_date) return cls.execute_query(query=query, model=model, limit=limit, **kwargs) @classmethod def query_or_create(cls, submission_type: str | SubmissionType | None = None, **kwargs) -> BasicSubmission: """ Returns object from db if exists, else, creates new. Due to need for user input, doesn't see much use ATM. Args: submission_type (str | SubmissionType | None, optional): Submission type to be created. Defaults to None. Raises: ValueError: Raised if no kwargs passed. ValueError: Raised if disallowed key is passed. Returns: cls: _description_ """ code = 0 msg = "" disallowed = ["id"] if kwargs == {}: raise ValueError("Need to narrow down query or the first available instance will be returned.") for key in kwargs.keys(): if key in disallowed: raise ValueError( f"{key} is not allowed as a query argument as it could lead to creation of duplicate objects. Use .query() instead.") instance = cls.query(submission_type=submission_type, limit=1, **kwargs) # logger.debug(f"Retrieved instance: {instance}") if instance is None: used_class = cls.find_polymorphic_subclass(attrs=kwargs, polymorphic_identity=submission_type) instance = used_class(**kwargs) match submission_type: case str(): submission_type = SubmissionType.query(name=submission_type) case _: pass instance.submission_type = submission_type instance.submission_type_name = submission_type.name if "submitted_date" not in kwargs.keys(): instance.submitted_date = date.today() else: code = 1 msg = "This submission already exists.\nWould you like to overwrite?" return instance, code, msg # Custom context events for the ui def custom_context_events(self) -> dict: """ Creates dictionary of str:function to be passed to context menu Returns: dict: dictionary of functions """ names = ["Delete", "Details", "Edit", "Add Comment", "Add Equipment", "Export"] funcs = [self.delete, self.show_details, self.edit, self.add_comment, self.add_equipment, self.backup] dicto = {item[0]: item[1] for item in zip(names, funcs)} return dicto def delete(self, obj=None): """ Performs backup and deletes this instance from database. Args: obj (_type_, optional): Parent Widget. Defaults to None. Raises: e: _description_ """ from frontend.widgets.pop_ups import QuestionAsker # logger.debug("Hello from delete") fname = self.__backup_path__.joinpath(f"{self.rsl_plate_num}-backup({date.today().strftime('%Y%m%d')})") msg = QuestionAsker(title="Delete?", message=f"Are you sure you want to delete {self.rsl_plate_num}?\n") if msg.exec(): self.backup(fname=fname, full_backup=True) self.__database_session__.delete(self) try: self.__database_session__.commit() except (SQLIntegrityError, SQLOperationalError, AlcIntegrityError, AlcOperationalError) as e: self.__database_session__.rollback() raise e obj.setData() def show_details(self, obj): """ Creates Widget for showing submission details. Args: obj (_type_): parent widget """ # logger.debug("Hello from details") from frontend.widgets.submission_details import SubmissionDetails dlg = SubmissionDetails(parent=obj, sub=self) if dlg.exec(): pass def edit(self, obj): from frontend.widgets.submission_widget import SubmissionFormWidget for widg in obj.app.table_widget.formwidget.findChildren(SubmissionFormWidget): # logger.debug(widg) widg.setParent(None) pyd = self.to_pydantic(backup=True) form = pyd.to_form(parent=obj) obj.app.table_widget.formwidget.layout().addWidget(form) def add_comment(self, obj): """ Creates widget for adding comments to submissions Args: obj (_type_): parent widget """ from frontend.widgets.submission_details import SubmissionComment dlg = SubmissionComment(parent=obj, submission=self) if dlg.exec(): comment = dlg.parse_form() self.set_attribute(key='comment', value=comment) # logger.debug(self.comment) self.save(original=False) def add_equipment(self, obj): """ Creates widget for adding equipment to this submission Args: obj (_type_): parent widget """ from frontend.widgets.equipment_usage import EquipmentUsage dlg = EquipmentUsage(parent=obj, submission=self) if dlg.exec(): equipment = dlg.parse_form() # logger.debug(f"We've got equipment: {equipment}") for equip in equipment: # logger.debug(f"Processing: {equip}") _, assoc = equip.toSQL(submission=self) # logger.debug(f"Appending SubmissionEquipmentAssociation: {assoc}") assoc.save() else: pass def backup(self, obj=None, fname: Path | None = None, full_backup: bool = False): """ Exports xlsx and yml info files for this instance. Args: obj (_type_, optional): _description_. Defaults to None. fname (Path | None, optional): Filename of xlsx file. Defaults to None. full_backup (bool, optional): Whether or not to make yaml file. Defaults to False. """ # logger.debug("Hello from backup.") pyd = self.to_pydantic(backup=True) if fname is None: from frontend.widgets.functions import select_save_file fname = select_save_file(default_name=pyd.construct_filename(), extension="xlsx", obj=obj) # logger.debug(fname.name) if fname.name == "": # logger.debug(f"export cancelled.") return # pyd.filepath = fname if full_backup: backup = self.to_dict(full_data=True) try: with open(self.__backup_path__.joinpath(fname.with_suffix(".yml")), "w") as f: yaml.dump(backup, f) except KeyError as e: logger.error(f"Problem saving yml backup file: {e}") # wb = pyd.autofill_excel() # wb = pyd.autofill_samples(wb) # wb = pyd.autofill_equipment(wb) writer = pyd.to_writer() # wb.save(filename=fname.with_suffix(".xlsx")) writer.xl.save(filename=fname.with_suffix(".xlsx")) # Below are the custom submission types class BacterialCulture(BasicSubmission): """ derivative submission type from BasicSubmission """ id = Column(INTEGER, ForeignKey('_basicsubmission.id'), primary_key=True) controls = relationship("Control", back_populates="submission", uselist=True) #: A control sample added to submission __mapper_args__ = dict(polymorphic_identity="Bacterial Culture", polymorphic_load="inline", inherit_condition=(id == BasicSubmission.id)) def to_dict(self, full_data: bool = False, backup: bool = False, report: bool = False) -> dict: """ Extends parent class method to add controls to dict Returns: dict: dictionary used in submissions summary """ output = super().to_dict(full_data=full_data, backup=backup, report=report) if report: return output if full_data: output['controls'] = [item.to_sub_dict() for item in self.controls] return output # @classmethod # def custom_platemap(cls, xl: pd.ExcelFile, plate_map: pd.DataFrame) -> pd.DataFrame: # """ # Stupid stopgap solution to there being an issue with the Bacterial Culture plate map. Extends parent. # # Args: # xl (pd.ExcelFile): original xl workbook # plate_map (pd.DataFrame): original plate map # # Returns: # pd.DataFrame: updated plate map. # """ # plate_map = super().custom_platemap(xl, plate_map) # num1 = xl.parse("Sample List").iloc[40, 1] # num2 = xl.parse("Sample List").iloc[41, 1] # # logger.debug(f"Broken: {plate_map.iloc[5, 0]}, {plate_map.iloc[6, 0]}") # # logger.debug(f"Replace: {num1}, {num2}") # if not check_not_nan(plate_map.iloc[5, 0]): # plate_map.iloc[5, 0] = num1 # if not check_not_nan(plate_map.iloc[6, 0]): # plate_map.iloc[6, 0] = num2 # return plate_map # @classmethod # def custom_writer(cls, input_excel: Workbook, info: dict | None = None, backup: bool = False) -> Workbook: # """ # Stupid stopgap solution to there being an issue with the Bacterial Culture plate map. Extends parent. # # Args: # input_excel (Workbook): Input openpyxl workbook # # Returns: # Workbook: Updated openpyxl workbook # """ # input_excel = super().custom_writer(input_excel) # sheet = input_excel['Plate Map'] # if sheet.cell(12, 2).value == None: # sheet.cell(row=12, column=2, value="=IF(ISBLANK('Sample List'!$B42),\"\",'Sample List'!$B42)") # if sheet.cell(13, 2).value == None: # sheet.cell(row=13, column=2, value="=IF(ISBLANK('Sample List'!$B43),\"\",'Sample List'!$B43)") # input_excel["Sample List"].cell(row=15, column=2, value=getuser()) # return input_excel @classmethod def get_regex(cls) -> str: """ Retrieves string for regex construction. Returns: str: string for regex construction """ return "(?PRSL(?:-|_)?BC(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)?\d?([^_0123456789\sA-QS-Z]|$)?R?\d?)?)" @classmethod def filename_template(cls): """ extends parent """ template = super().filename_template() template += "_{{ submitting_lab }}_{{ submitter_plate_num }}" return template @classmethod def finalize_parse(cls, input_dict: dict, xl: pd.ExcelFile | None = None, info_map: dict | None = None) -> dict: """ Extends parent. Currently finds control sample and adds to reagents. Args: input_dict (dict): _description_ xl (pd.ExcelFile | None, optional): _description_. Defaults to None. info_map (dict | None, optional): _description_. Defaults to None. plate_map (dict | None, optional): _description_. Defaults to None. Returns: dict: _description_ """ from . import ControlType input_dict = super().finalize_parse(input_dict, xl, info_map) # build regex for all control types that have targets regex = ControlType.build_positive_regex() # search samples for match for sample in input_dict['samples']: matched = regex.match(sample['submitter_id']) if bool(matched): # logger.debug(f"Control match found: {sample['submitter_id']}") new_lot = matched.group() try: pos_control_reg = \ [reg for reg in input_dict['reagents'] if reg['type'] == "Bacterial-Positive Control"][0] except IndexError: logger.error(f"No positive control reagent listed") return input_dict pos_control_reg['lot'] = new_lot pos_control_reg['missing'] = False return input_dict @classmethod def custom_sample_autofill_row(cls, sample, worksheet: Worksheet) -> int: """ Extends parent """ # logger.debug(f"Checking {sample.well}") # logger.debug(f"here's the worksheet: {worksheet}") row = super().custom_sample_autofill_row(sample, worksheet) df = pd.DataFrame(list(worksheet.values)) # logger.debug(f"Here's the dataframe: {df}") idx = df[df[0] == sample.well] if idx.empty: new = f"{sample.well[0]}{sample.well[1:].zfill(2)}" logger.debug(f"Checking: {new}") idx = df[df[0] == new] # logger.debug(f"Here is the row: {idx}") row = idx.index.to_list()[0] return row + 1 class Wastewater(BasicSubmission): """ derivative submission type from BasicSubmission """ id = Column(INTEGER, ForeignKey('_basicsubmission.id'), primary_key=True) ext_technician = Column(String(64)) #: Name of technician doing extraction pcr_technician = Column(String(64)) #: Name of technician doing pcr pcr_info = Column(JSON) #: unstructured output from pcr table logger or user(Artic) __mapper_args__ = __mapper_args__ = dict(polymorphic_identity="Wastewater", polymorphic_load="inline", inherit_condition=(id == BasicSubmission.id)) def to_dict(self, full_data: bool = False, backup: bool = False, report: bool = False) -> dict: """ Extends parent class method to add controls to dict Returns: dict: dictionary used in submissions summary """ output = super().to_dict(full_data=full_data, backup=backup, report=report) if report: return output try: output['pcr_info'] = self.pcr_info except TypeError as e: pass if self.ext_technician is None or self.ext_technician == "None": output['Ext Technician'] = self.technician else: output["Ext Technician"] = self.ext_technician if self.pcr_technician is None or self.pcr_technician == "None": output["PCR Technician"] = self.technician else: output['PCR Technician'] = self.pcr_technician return output @classmethod def custom_info_parser(cls, input_dict: dict, xl: Workbook | None = None) -> dict: """ Update submission dictionary with type specific information. Extends parent Args: input_dict (dict): Input sample dictionary Returns: dict: Updated sample dictionary """ input_dict = super().custom_info_parser(input_dict) if xl != None: input_dict['csv'] = xl["Copy to import file"] return input_dict # @classmethod # def parse_pcr(cls, xl: pd.ExcelFile, rsl_number: str) -> list: # """ # Parse specific to wastewater samples. # """ # samples = super().parse_pcr(xl=xl, rsl_number=rsl_number) # df = xl.parse(sheet_name="Results", dtype=object).fillna("") # column_names = ["Well", "Well Position", "Omit", "Sample", "Target", "Task", " Reporter", "Quencher", # "Amp Status", "Amp Score", "Curve Quality", "Result Quality Issues", "Cq", "Cq Confidence", # "Cq Mean", "Cq SD", "Auto Threshold", "Threshold", "Auto Baseline", "Baseline Start", # "Baseline End"] # samples_df = df.iloc[23:][0:] # logger.debug(f"Dataframe of PCR results:\n\t{samples_df}") # samples_df.columns = column_names # logger.debug(f"Samples columns: {samples_df.columns}") # well_call_df = xl.parse(sheet_name="Well Call").iloc[24:][0:].iloc[:, -1:] # try: # samples_df['Assessment'] = well_call_df.values # except ValueError: # logger.error("Well call number doesn't match sample number") # logger.debug(f"Well call df: {well_call_df}") # for _, row in samples_df.iterrows(): # try: # sample_obj = [sample for sample in samples if sample['sample'] == row[3]][0] # except IndexError: # sample_obj = dict( # sample=row['Sample'], # plate_rsl=rsl_number, # ) # logger.debug(f"Got sample obj: {sample_obj}") # if isinstance(row['Cq'], float): # sample_obj[f"ct_{row['Target'].lower()}"] = row['Cq'] # else: # sample_obj[f"ct_{row['Target'].lower()}"] = 0.0 # try: # sample_obj[f"{row['Target'].lower()}_status"] = row['Assessment'] # except KeyError: # logger.error(f"No assessment for {sample_obj['sample']}") # samples.append(sample_obj) # return samples @classmethod def parse_pcr(cls, xl: Workbook, rsl_plate_num: str) -> list: """ Parse specific to wastewater samples. """ samples = super().parse_pcr(xl=xl, rsl_plate_num=rsl_plate_num) # logger.debug(f'Samples from parent pcr parser: {pformat(samples)}') output = [] for sample in samples: # NOTE: remove '-{target}' from controls sample['sample'] = re.sub('-N\\d$', '', sample['sample']) # NOTE: if sample is already in output skip if sample['sample'] in [item['sample'] for item in output]: continue # NOTE: Set ct values sample[f"ct_{sample['target'].lower()}"] = sample['ct'] if isinstance(sample['ct'], float) else 0.0 # NOTE: Set assessment sample[f"{sample['target'].lower()}_status"] = sample['assessment'] # NOTE: Get sample having other target other_targets = [s for s in samples if re.sub('-N\\d$', '', s['sample']) == sample['sample']] for s in other_targets: sample[f"ct_{s['target'].lower()}"] = s['ct'] if isinstance(s['ct'], float) else 0.0 sample[f"{s['target'].lower()}_status"] = s['assessment'] try: del sample['ct'] except KeyError: pass try: del sample['assessment'] except KeyError: pass output.append(sample) return output @classmethod def enforce_name(cls, instr: str, data: dict | None = {}) -> str: """ Extends parent """ try: # Deal with PCR file. instr = re.sub(r"PCR(-|_)", "", instr) except (AttributeError, TypeError) as e: logger.error(f"Problem using regex: {e}") outstr = super().enforce_name(instr=instr, data=data) return outstr @classmethod def get_regex(cls) -> str: """ Retrieves string for regex construction Returns: str: String for regex construction """ return "(?PRSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)?\d?([^_0123456789\sA-QS-Z]|$)?R?\d?)?)" @classmethod def adjust_autofill_samples(cls, samples: List[Any]) -> List[Any]: """ Extends parent """ samples = super().adjust_autofill_samples(samples) samples = [item for item in samples if not item.submitter_id.startswith("EN")] return samples @classmethod def custom_sample_autofill_row(cls, sample, worksheet: Worksheet) -> int: """ Extends parent """ # logger.debug(f"Checking {sample.well}") # logger.debug(f"here's the worksheet: {worksheet}") row = super().custom_sample_autofill_row(sample, worksheet) df = pd.DataFrame(list(worksheet.values)) # logger.debug(f"Here's the dataframe: {df}") idx = df[df[1] == sample.sample_location] # logger.debug(f"Here is the row: {idx}") row = idx.index.to_list()[0] return row + 1 def custom_context_events(self) -> dict: events = super().custom_context_events() events['Link PCR'] = self.link_pcr return events def link_pcr(self, obj): from backend.excel import PCRParser from frontend.widgets import select_open_file fname = select_open_file(obj=obj, file_extension="xlsx") parser = PCRParser(filepath=fname) self.set_attribute("pcr_info", parser.pcr) self.save(original=False) # logger.debug(f"Got {len(parser.samples)} samples to update!") # logger.debug(f"Parser samples: {parser.samples}") for sample in self.samples: # logger.debug(f"Running update on: {sample}") try: sample_dict = [item for item in parser.samples if item['sample'] == sample.rsl_number][0] except IndexError: continue self.update_subsampassoc(sample=sample, input_dict=sample_dict) # self.report.add_result(Result(msg=f"We added PCR info to {sub.rsl_plate_num}.", status='Information')) class WastewaterArtic(BasicSubmission): """ derivative submission type for artic wastewater """ id = Column(INTEGER, ForeignKey('_basicsubmission.id'), primary_key=True) artic_technician = Column(String(64)) #: Name of technician performing artic dna_core_submission_number = Column(String(64)) #: Number used by core as id pcr_info = Column(JSON) #: unstructured output from pcr table logger or user(Artic) gel_image = Column(String(64)) #: file name of gel image in zip file gel_info = Column(JSON) #: unstructured data from gel. gel_controls = Column(JSON) #: locations of controls on the gel source_plates = Column(JSON) #: wastewater plates that samples come from __mapper_args__ = dict(polymorphic_identity="Wastewater Artic", polymorphic_load="inline", inherit_condition=(id == BasicSubmission.id)) def to_dict(self, full_data: bool = False, backup: bool = False, report: bool = False) -> dict: """ Extends parent class method to add controls to dict Returns: dict: dictionary used in submissions summary """ output = super().to_dict(full_data=full_data, backup=backup, report=report) if report: return output output['gel_info'] = self.gel_info output['gel_image'] = self.gel_image output['dna_core_submission_number'] = self.dna_core_submission_number output['source_plates'] = self.source_plates return output @classmethod def custom_info_parser(cls, input_dict: dict, xl: Workbook | None = None) -> dict: """ Update submission dictionary with type specific information Args: input_dict (dict): Input sample dictionary xl (pd.ExcelFile): original xl workbook, used for child classes mostly Returns: dict: Updated sample dictionary """ input_dict = super().custom_info_parser(input_dict) # workbook = load_workbook(xl.io, data_only=True) ws = xl['Egel results'] data = [ws.cell(row=ii, column=jj) for jj in range(15, 27) for ii in range(10, 18)] data = [cell for cell in data if cell.value is not None and "NTC" in cell.value] input_dict['gel_controls'] = [ dict(sample_id=cell.value, location=f"{row_map[cell.row - 9]}{str(cell.column - 14).zfill(2)}") for cell in data] ws = xl['First Strand List'] data = [dict(plate=ws.cell(row=ii, column=3).value, starting_sample=ws.cell(row=ii, column=4).value) for ii in range(8, 11)] for datum in data: if datum['plate'] in ["None", None, ""]: continue else: from backend.validators import RSLNamer datum['plate'] = RSLNamer(filename=datum['plate'], sub_type="Wastewater").parsed_name input_dict['source_plates'] = data return input_dict @classmethod def enforce_name(cls, instr: str, data: dict = {}) -> str: """ Extends parent """ try: # NOTE: Deal with PCR file. instr = re.sub(r"Artic", "", instr, flags=re.IGNORECASE) except (AttributeError, TypeError) as e: logger.error(f"Problem using regex: {e}") # logger.debug(f"Before RSL addition: {instr}") try: instr = instr.replace("-", "") except AttributeError: instr = date.today().strftime("%Y%m%d") instr = re.sub(r"^(\d{6})", f"RSL-AR-\\1", instr) # logger.debug(f"name coming out of Artic namer: {instr}") outstr = super().enforce_name(instr=instr, data=data) return outstr @classmethod def parse_samples(cls, input_dict: dict) -> dict: """ Update sample dictionary with type specific information. Extends parent. Args: input_dict (dict): Input sample dictionary Returns: dict: Updated sample dictionary """ input_dict = super().parse_samples(input_dict) input_dict['sample_type'] = "Wastewater Sample" # Because generate_sample_object needs the submitter_id and the artic has the "({origin well})" # at the end, this has to be done here. No moving to sqlalchemy object :( input_dict['submitter_id'] = re.sub(r"\s\(.+\)\s?$", "", str(input_dict['submitter_id'])).strip() try: input_dict['ww_processing_num'] = input_dict['sample_name_(lims)'] del input_dict['sample_name_(lims)'] except KeyError: logger.error(f"Unable to set ww_processing_num for sample {input_dict['submitter_id']}") try: input_dict['ww_full_sample_id'] = input_dict['sample_name_(ww)'] del input_dict['sample_name_(ww)'] except KeyError: logger.error(f"Unable to set ww_processing_num for sample {input_dict['submitter_id']}") year = str(date.today().year)[-2:] # if "ENC" in input_dict['submitter_id']: if re.search(rf"^{year}-(ENC)", input_dict['submitter_id']): input_dict['rsl_number'] = cls.en_adapter(input_str=input_dict['submitter_id']) if re.search(rf"^{year}-(RSL)", input_dict['submitter_id']): input_dict['rsl_number'] = cls.pbs_adapter(input_str=input_dict['submitter_id']) return input_dict @classmethod def en_adapter(cls, input_str: str) -> str: """ Stopgap solution because WW names their ENs different Args: input_str (str): input name Returns: str: output name """ # logger.debug(f"input string raw: {input_str}") # Remove letters. processed = input_str.replace("RSL", "") processed = re.sub(r"\(.*\)$", "", processed).strip() processed = re.sub(r"[A-QS-Z]+\d*", "", processed) # Remove trailing '-' if any processed = processed.strip("-") # logger.debug(f"Processed after stripping letters: {processed}") try: en_num = re.search(r"\-\d{1}$", processed).group() processed = rreplace(processed, en_num, "") except AttributeError: en_num = "1" en_num = en_num.strip("-") # logger.debug(f"Processed after en_num: {processed}") try: plate_num = re.search(r"\-\d{1}R?\d?$", processed).group() processed = rreplace(processed, plate_num, "") except AttributeError: plate_num = "1" plate_num = plate_num.strip("-") # logger.debug(f"Processed after plate-num: {processed}") day = re.search(r"\d{2}$", processed).group() processed = rreplace(processed, day, "") # logger.debug(f"Processed after day: {processed}") month = re.search(r"\d{2}$", processed).group() processed = rreplace(processed, month, "") processed = processed.replace("--", "") # logger.debug(f"Processed after month: {processed}") year = re.search(r'^(?:\d{2})?\d{2}', processed).group() year = f"20{year}" final_en_name = f"EN{en_num}-{year}{month}{day}" # logger.debug(f"Final EN name: {final_en_name}") return final_en_name @classmethod def pbs_adapter(cls, input_str): """ Stopgap solution because WW names their ENs different Args: input_str (str): input name Returns: str: output name """ # logger.debug(f"input string raw: {input_str}") # Remove letters. processed = input_str.replace("RSL", "") processed = re.sub(r"\(.*\)$", "", processed).strip() processed = re.sub(r"[A-QS-Z]+\d*", "", processed) # Remove trailing '-' if any processed = processed.strip("-") # logger.debug(f"Processed after stripping letters: {processed}") # try: # en_num = re.search(r"\-\d{1}$", processed).group() # processed = rreplace(processed, en_num, "") # except AttributeError: # en_num = "1" # en_num = en_num.strip("-") # logger.debug(f"Processed after en_num: {processed}") try: plate_num = re.search(r"\-\d{1}R?\d?$", processed).group() processed = rreplace(processed, plate_num, "") except AttributeError: plate_num = "1" plate_num = plate_num.strip("-") # logger.debug(f"Plate num: {plate_num}") repeat_num = re.search(r"R(?P\d)?$", "PBS20240426-2R").groups()[0] if repeat_num is None and "R" in plate_num: repeat_num = "1" plate_num = re.sub(r"R", rf"R{repeat_num}", plate_num) # logger.debug(f"Processed after plate-num: {processed}") day = re.search(r"\d{2}$", processed).group() processed = rreplace(processed, day, "") # logger.debug(f"Processed after day: {processed}") month = re.search(r"\d{2}$", processed).group() processed = rreplace(processed, month, "") processed = processed.replace("--", "") # logger.debug(f"Processed after month: {processed}") year = re.search(r'^(?:\d{2})?\d{2}', processed).group() year = f"20{year}" final_en_name = f"PBS{year}{month}{day}-{plate_num}" # logger.debug(f"Final EN name: {final_en_name}") return final_en_name @classmethod def get_regex(cls) -> str: """ Retrieves string for regex construction Returns: str: string for regex construction. """ return "(?P(\\d{4}-\\d{2}-\\d{2}(?:-|_)(?:\\d_)?artic)|(RSL(?:-|_)?AR(?:-|_)?20\\d{2}-?\\d{2}-?\\d{2}(?:(_|-)\\d?(\\D|$)R?\\d?)?))" @classmethod def finalize_parse(cls, input_dict: dict, xl: pd.ExcelFile | None = None, info_map: dict | None = None) -> dict: """ Performs any final custom parsing of the excel file. Extends parent Args: input_dict (dict): Parser product up to this point. xl (pd.ExcelFile | None, optional): Excel submission form. Defaults to None. info_map (dict | None, optional): Map of information locations from SubmissionType. Defaults to None. plate_map (dict | None, optional): Constructed plate map of samples. Defaults to None. Returns: dict: Updated parser product. """ input_dict = super().finalize_parse(input_dict, xl, info_map) # logger.debug(f"Incoming input_dict: {pformat(input_dict)}") # TODO: Move to validator? for sample in input_dict['samples']: # logger.debug(f"Sample: {sample}") if re.search(r"^NTC", sample['submitter_id']): sample['submitter_id'] = f"{sample['submitter_id']}-WWG-{input_dict['rsl_plate_num']['value']}" input_dict['csv'] = xl["hitpicks_csv_to_export"] return input_dict @classmethod def custom_info_writer(cls, input_excel: Workbook, info: dict | None = None, backup: bool = False) -> Workbook: """ Adds custom autofill methods for submission. Extends Parent Args: input_excel (Workbook): initial workbook. info (dict | None, optional): dictionary of additional info. Defaults to None. backup (bool, optional): Whether this is part of a backup operation. Defaults to False. Returns: Workbook: Updated workbook """ input_excel = super().custom_info_writer(input_excel, info, backup) # worksheet = input_excel["First Strand List"] # samples = cls.query(rsl_number=info['rsl_plate_num']['value']).submission_sample_associations # samples = sorted(samples, key=attrgetter('column', 'row')) # logger.debug(f"Info:\n{pformat(info)}") check = 'source_plates' in info.keys() and info['source_plates'] is not None if check: worksheet = input_excel['First Strand List'] start_row = 8 for iii, plate in enumerate(info['source_plates']['value']): # logger.debug(f"Plate: {plate}") row = start_row + iii try: worksheet.cell(row=row, column=3, value=plate['plate']) except TypeError: pass try: worksheet.cell(row=row, column=4, value=plate['starting_sample']) except TypeError: pass check = 'gel_info' in info.keys() and info['gel_info']['value'] is not None if check: # logger.debug(f"Gel info check passed.") if info['gel_info'] != None: # logger.debug(f"Gel info not none.") worksheet = input_excel['Egel results'] start_row = 21 start_column = 15 for row, ki in enumerate(info['gel_info']['value'], start=1): # logger.debug(f"ki: {ki}") # logger.debug(f"vi: {vi}") row = start_row + row worksheet.cell(row=row, column=start_column, value=ki['name']) for jjj, kj in enumerate(ki['values'], start=1): # logger.debug(f"kj: {kj}") # logger.debug(f"vj: {vj}") column = start_column + 2 + jjj worksheet.cell(row=start_row, column=column, value=kj['name']) # logger.debug(f"Writing {kj['name']} with value {kj['value']} to row {row}, column {column}") try: worksheet.cell(row=row, column=column, value=kj['value']) except AttributeError: logger.error(f"Failed {kj['name']} with value {kj['value']} to row {row}, column {column}") check = 'gel_image' in info.keys() and info['gel_image']['value'] is not None if check: if info['gel_image'] != None: worksheet = input_excel['Egel results'] # logger.debug(f"We got an image: {info['gel_image']}") with ZipFile(cls.__directory_path__.joinpath("submission_imgs.zip")) as zipped: z = zipped.extract(info['gel_image']['value'], Path(TemporaryDirectory().name)) img = OpenpyxlImage(z) img.height = 400 # insert image height in pixels as float or int (e.g. 305.5) img.width = 600 img.anchor = 'B9' worksheet.add_image(img) return input_excel @classmethod def get_details_template(cls, base_dict: dict) -> Tuple[dict, Template]: """ Get the details jinja template for the correct class. Extends parent Args: base_dict (dict): incoming dictionary of Submission fields Returns: Tuple[dict, Template]: (Updated dictionary, Template to be rendered) """ base_dict, template = super().get_details_template(base_dict=base_dict) base_dict['excluded'] += ['gel_info', 'gel_image', 'headers', "dna_core_submission_number", "source_plates", "gel_controls"] base_dict['DNA Core ID'] = base_dict['dna_core_submission_number'] check = 'gel_info' in base_dict.keys() and base_dict['gel_info'] != None if check: headers = [item['name'] for item in base_dict['gel_info'][0]['values']] base_dict['headers'] = [''] * (4 - len(headers)) base_dict['headers'] += headers # logger.debug(f"Gel info: {pformat(base_dict['headers'])}") check = 'gel_image' in base_dict.keys() and base_dict['gel_image'] != None if check: with ZipFile(cls.__directory_path__.joinpath("submission_imgs.zip")) as zipped: base_dict['gel_image'] = base64.b64encode(zipped.read(base_dict['gel_image'])).decode('utf-8') return base_dict, template def adjust_to_dict_samples(self, backup: bool = False) -> List[dict]: """ Updates sample dictionaries with custom values Args: backup (bool, optional): Whether to perform backup. Defaults to False. Returns: List[dict]: Updated dictionaries """ # logger.debug(f"Hello from {self.__class__.__name__} dictionary sample adjuster.") output = [] set_plate = None for assoc in self.submission_sample_associations: dicto = assoc.to_sub_dict() for item in self.source_plates: old_plate = WastewaterAssociation.query(submission=item['plate'], sample=assoc.sample, limit=1) if old_plate is not None: set_plate = old_plate.submission.rsl_plate_num break dicto['plate_name'] = set_plate output.append(dicto) return output def custom_context_events(self) -> dict: """ Creates dictionary of str:function to be passed to context menu. Extends parent Returns: dict: dictionary of functions """ events = super().custom_context_events() events['Gel Box'] = self.gel_box return events def set_attribute(self, key: str, value): super().set_attribute(key=key, value=value) if key == 'gel_info': if len(self.gel_info) > 3: self.gel_info = self.gel_info[-3:] def gel_box(self, obj): """ Creates widget to perform gel viewing operations Args: obj (_type_): parent widget """ from frontend.widgets.gel_checker import GelBox from frontend.widgets import select_open_file fname = select_open_file(obj=obj, file_extension="jpg") dlg = GelBox(parent=obj, img_path=fname, submission=self) if dlg.exec(): self.dna_core_submission_number, img_path, output, comment = dlg.parse_form() self.gel_image = img_path.name self.gel_info = output dt = datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S") com = dict(text=comment, name=getuser(), time=dt) if com['text'] != None and com['text'] != "": if self.comment is not None: self.comment.append(com) else: self.comment = [com] # logger.debug(pformat(self.gel_info)) with ZipFile(self.__directory_path__.joinpath("submission_imgs.zip"), 'a') as zipf: # Add a file located at the source_path to the destination within the zip # file. It will overwrite existing files if the names collide, but it # will give a warning zipf.write(img_path, self.gel_image) self.save() # Sample Classes class BasicSample(BaseClass): """ Base of basic sample which polymorphs into BCSample and WWSample """ id = Column(INTEGER, primary_key=True) #: primary key submitter_id = Column(String(64), nullable=False, unique=True) #: identification from submitter sample_type = Column(String(32)) #: subtype of sample sample_submission_associations = relationship( "SubmissionSampleAssociation", back_populates="sample", cascade="all, delete-orphan", ) #: associated submissions __mapper_args__ = { "polymorphic_identity": "Basic Sample", "polymorphic_on": case( (sample_type == "Wastewater Sample", "Wastewater Sample"), (sample_type == "Wastewater Artic Sample", "Wastewater Sample"), (sample_type == "Bacterial Culture Sample", "Bacterial Culture Sample"), else_="Basic Sample" ), "with_polymorphic": "*", } submissions = association_proxy("sample_submission_associations", "submission") #: proxy of associated submissions @validates('submitter_id') def create_id(self, key: str, value: str): """ Creates a random string as a submitter id. Args: key (str): name of attribute value (str): submitter id Returns: str: new (or unchanged) submitter id """ if value == None: return uuid.uuid4().hex.upper() else: return value def __repr__(self) -> str: try: return f"<{self.sample_type.replace('_', ' ').title().replace(' ', '')}({self.submitter_id})>" except AttributeError: return f" List[str]: output = [item.name for item in cls.__table__.columns if isinstance(item.type, TIMESTAMP)] if issubclass(cls, BasicSample) and not cls.__name__ == "BasicSample": output += BasicSample.timestamps() return output def to_sub_dict(self, full_data: bool = False) -> dict: """ gui friendly dictionary, extends parent method. Returns: dict: well location and name (sample id, organism) NOTE: keys must sync with WWSample to_sub_dict above """ # logger.debug(f"Converting {self} to dict.") # start = time() sample = {} sample['Submitter ID'] = self.submitter_id sample['Sample Type'] = self.sample_type if full_data: sample['submissions'] = sorted([item.to_sub_dict() for item in self.sample_submission_associations], key=itemgetter('submitted_date')) # logger.debug(f"Done converting {self} after {time()-start}") return sample def set_attribute(self, name: str, value): """ Custom attribute setter (depreciated over built-in __setattr__) Args: name (str): name of attribute value (_type_): value to be set to attribute """ try: setattr(self, name, value) except AttributeError: logger.error(f"Attribute {name} not found") @classmethod def find_polymorphic_subclass(cls, polymorphic_identity: str | None = None, attrs: dict | None = None) -> BasicSample: """ Retrieves subclasses of BasicSample based on type name. Args: attrs (dict | None, optional): name: value of attributes in the wanted subclass polymorphic_identity (str | None, optional): Name of subclass fed to polymorphic identity. Defaults to None. Returns: BasicSample: Subclass of interest. """ if isinstance(polymorphic_identity, dict): polymorphic_identity = polymorphic_identity['value'] if polymorphic_identity is not None: try: return [item for item in cls.__subclasses__() if item.__mapper_args__['polymorphic_identity'] == polymorphic_identity][0] except Exception as e: logger.error(f"Could not get polymorph {polymorphic_identity} of {cls} due to {e}") model = cls else: model = cls if attrs is None or len(attrs) == 0: return model if any([not hasattr(cls, attr) for attr in attrs.keys()]): # looks for first model that has all included kwargs try: model = [subclass for subclass in cls.__subclasses__() if all([hasattr(subclass, attr) for attr in attrs.keys()])][0] except IndexError as e: raise AttributeError( f"Couldn't find existing class/subclass of {cls} with all attributes:\n{pformat(attrs.keys())}") logger.info(f"Recruiting model: {model}") return model @classmethod def sql_enforcer(cls, pyd_sample:"PydSample"): return pyd_sample @classmethod def parse_sample(cls, input_dict: dict) -> dict: f""" Custom sample parser Args: input_dict (dict): Basic parser results. Returns: dict: Updated parser results. """ # logger.debug(f"Hello from {cls.__name__} sample parser!") return input_dict @classmethod def get_details_template(cls, base_dict: dict) -> Tuple[dict, Template]: """ Get the details jinja template for the correct class Args: base_dict (dict): incoming dictionary of Submission fields Returns: Tuple(dict, Template): (Updated dictionary, Template to be rendered) """ base_dict['excluded'] = ['submissions', 'excluded', 'colour', 'tooltip'] env = jinja_template_loading() temp_name = f"{cls.__name__.lower()}_details.html" # logger.debug(f"Returning template: {temp_name}") try: template = env.get_template(temp_name) except TemplateNotFound as e: logger.error(f"Couldn't find template {e}") template = env.get_template("basicsample_details.html") return base_dict, template @classmethod @setup_lookup def query(cls, submitter_id: str | None = None, sample_type: str | None = None, limit: int = 0, **kwargs ) -> BasicSample | List[BasicSample]: """ Lookup samples in the database by a number of parameters. Args: submitter_id (str | None, optional): Name of the sample (limits results to 1). Defaults to None. sample_type (str | None, optional): Sample type. Defaults to None. limit (int, optional): Maximum number of results to return (0 = all). Defaults to 0. Returns: models.BasicSample|List[models.BasicSample]: Sample(s) of interest. """ if sample_type is None: # model = cls.find_subclasses(attrs=kwargs) model = cls.find_polymorphic_subclass(attrs=kwargs) else: model = cls.find_polymorphic_subclass(polymorphic_identity=sample_type) # logger.debug(f"Length of kwargs: {len(kwargs)}") # model = models.BasicSample.find_subclasses(ctx=ctx, attrs=kwargs) # query: Query = setup_lookup(ctx=ctx, locals=locals()).query(model) query: Query = cls.__database_session__.query(model) match submitter_id: case str(): # logger.debug(f"Looking up {model} with submitter id: {submitter_id}") query = query.filter(model.submitter_id == submitter_id) limit = 1 case _: pass # match sample_type: # case str(): # logger.warning(f"Looking up samples with sample_type is disabled.") # # query = query.filter(models.BasicSample.sample_type==sample_type) # case _: # pass # for k, v in kwargs.items(): # attr = getattr(model, k) # # logger.debug(f"Got attr: {attr}") # query = query.filter(attr==v) # if len(kwargs) > 0: # limit = 1 return cls.execute_query(query=query, model=model, limit=limit, **kwargs) # return cls.execute_query(query=query, limit=limit) @classmethod def query_or_create(cls, sample_type: str | None = None, **kwargs) -> BasicSample: """ Queries for a sample, if none found creates a new one. Args: sample_type (str): sample subclass name Raises: ValueError: Raised if no kwargs are passed to narrow down instances ValueError: Raised if unallowed key is given. Returns: _type_: _description_ """ disallowed = ["id"] if kwargs == {}: raise ValueError("Need to narrow down query or the first available instance will be returned.") # for key in kwargs.keys(): # if key in disallowed: # raise ValueError( # f"{key} is not allowed as a query argument as it could lead to creation of duplicate objects.") sanitized_kwargs = {k:v for k,v in kwargs.items() if k not in disallowed} instance = cls.query(sample_type=sample_type, limit=1, **kwargs) # logger.debug(f"Retrieved instance: {instance}") if instance is None: used_class = cls.find_polymorphic_subclass(attrs=sanitized_kwargs, polymorphic_identity=sample_type) instance = used_class(**sanitized_kwargs) instance.sample_type = sample_type logger.debug(f"Creating instance: {instance}") return instance def delete(self): raise AttributeError(f"Delete not implemented for {self.__class__}") #Below are the custom sample types class WastewaterSample(BasicSample): """ Derivative wastewater sample """ id = Column(INTEGER, ForeignKey('_basicsample.id'), primary_key=True) ww_processing_num = Column(String(64)) #: wastewater processing number ww_full_sample_id = Column(String(64)) #: full id given by entrics rsl_number = Column(String(64)) #: rsl plate identification number collection_date = Column(TIMESTAMP) #: Date sample collected received_date = Column(TIMESTAMP) #: Date sample received notes = Column(String(2000)) #: notes from submission form sample_location = Column(String(8)) #: location on 24 well plate __mapper_args__ = dict(polymorphic_identity="Wastewater Sample", polymorphic_load="inline", inherit_condition=(id == BasicSample.id)) @classmethod def get_default_info(cls, *args): dicto = super().get_default_info(*args) match dicto: case dict(): dicto['singles'] += ['ww_processing_num'] output = {} for k, v in dicto.items(): if len(args) > 0 and k not in args: # logger.debug(f"Don't want {k}") continue else: output[k] = v if len(args) == 1: return output[args[0]] case list(): if "singles" in args: dicto += ['ww_processing_num'] return dicto case _: pass def to_sub_dict(self, full_data: bool = False) -> dict: """ gui friendly dictionary, extends parent method. Returns: dict: well location and name (sample id, organism) NOTE: keys must sync with WWSample to_sub_dict above """ sample = super().to_sub_dict(full_data=full_data) sample['WW Processing Num'] = self.ww_processing_num sample['Sample Location'] = self.sample_location sample['Received Date'] = self.received_date sample['Collection Date'] = self.collection_date return sample @classmethod def parse_sample(cls, input_dict: dict) -> dict: """ Custom sample parser. Extends parent Args: input_dict (dict): Basic parser results. Returns: dict: Updated parser results. """ output_dict = super().parse_sample(input_dict) # logger.debug(f"Initial sample dict: {pformat(output_dict)}") disallowed = ["", None, "None"] try: check = output_dict['rsl_number'] in [None, "None"] except KeyError: check = True if check: output_dict['rsl_number'] = "RSL-WW-" + output_dict['ww_processing_num'] if output_dict['ww_full_sample_id'] is not None and output_dict["submitter_id"] in disallowed: output_dict["submitter_id"] = output_dict['ww_full_sample_id'] # if re.search(r"^NTC", output_dict['submitter_id']): # output_dict['submitter_id'] = "Artic-" + output_dict['submitter_id'] # Ad hoc repair method for WW (or possibly upstream) not formatting some dates properly. # NOTE: Should be handled by validator. # match output_dict['collection_date']: # case str(): # try: # output_dict['collection_date'] = parse(output_dict['collection_date']).date() # except ParserError: # logger.error(f"Problem parsing collection_date: {output_dict['collection_date']}") # output_dict['collection_date'] = date(1970, 1, 1) # case datetime(): # output_dict['collection_date'] = output_dict['collection_date'].date() # case date(): # pass # case _: # del output_dict['collection_date'] return output_dict def get_previous_ww_submission(self, current_artic_submission: WastewaterArtic): # assocs = [assoc for assoc in self.sample_submission_associations if assoc.submission.submission_type_name=="Wastewater"] # subs = self.submissions[:self.submissions.index(current_artic_submission)] try: plates = [item['plate'] for item in current_artic_submission.source_plates] except TypeError as e: logger.error(f"source_plates must not be present") plates = [item.rsl_plate_num for item in self.submissions[:self.submissions.index(current_artic_submission)]] subs = [sub for sub in self.submissions if sub.rsl_plate_num in plates] # logger.debug(f"Submissions: {subs}") try: return subs[-1] except IndexError: return None class BacterialCultureSample(BasicSample): """ base of bacterial culture sample """ id = Column(INTEGER, ForeignKey('_basicsample.id'), primary_key=True) organism = Column(String(64)) #: bacterial specimen concentration = Column(String(16)) #: sample concentration control = relationship("Control", back_populates="sample", uselist=False) __mapper_args__ = dict(polymorphic_identity="Bacterial Culture Sample", polymorphic_load="inline", inherit_condition=(id == BasicSample.id)) def to_sub_dict(self, full_data: bool = False) -> dict: """ gui friendly dictionary, extends parent method. Returns: dict: well location and name (sample id, organism) NOTE: keys must sync with WWSample to_sub_dict above """ # start = time() sample = super().to_sub_dict(full_data=full_data) sample['Name'] = self.submitter_id sample['Organism'] = self.organism sample['Concentration'] = self.concentration if self.control != None: sample['colour'] = [0, 128, 0] sample['tooltip'] = f"Control: {self.control.controltype.name} - {self.control.controltype.targets}" # logger.debug(f"Done converting to {self} to dict after {time()-start}") return sample # Submission to Sample Associations class SubmissionSampleAssociation(BaseClass): """ table containing submission/sample associations DOC: https://docs.sqlalchemy.org/en/14/orm/extensions/associationproxy.html """ id = Column(INTEGER, unique=True, nullable=False) #: id to be used for inheriting purposes sample_id = Column(INTEGER, ForeignKey("_basicsample.id"), nullable=False) #: id of associated sample submission_id = Column(INTEGER, ForeignKey("_basicsubmission.id"), primary_key=True) #: id of associated submission row = Column(INTEGER, primary_key=True) #: row on the 96 well plate column = Column(INTEGER, primary_key=True) #: column on the 96 well plate submission_rank = Column(INTEGER, nullable=False, default=0) #: Location in sample list # reference to the Submission object submission = relationship(BasicSubmission, back_populates="submission_sample_associations") #: associated submission # reference to the Sample object sample = relationship(BasicSample, back_populates="sample_submission_associations") #: associated sample base_sub_type = Column(String) #: string of subtype name # Refers to the type of parent. # Hooooooo boy, polymorphic association type, now we're getting into the weeds! __mapper_args__ = { "polymorphic_identity": "Basic Association", "polymorphic_on": base_sub_type, "with_polymorphic": "*", } def __init__(self, submission: BasicSubmission = None, sample: BasicSample = None, row: int = 1, column: int = 1, id: int | None = None, submission_rank: int = 0): self.submission = submission self.sample = sample self.row = row self.column = column self.submission_rank = submission_rank if id is not None: self.id = id else: self.id = self.__class__.autoincrement_id() # logger.debug(f"Using submission sample association id: {self.id}") def __repr__(self) -> str: try: return f"<{self.__class__.__name__}({self.submission.rsl_plate_num} & {self.sample.submitter_id})" except AttributeError as e: logger.error(f"Unable to construct __repr__ due to: {e}") return super().__repr__() def to_sub_dict(self) -> dict: """ Returns a sample dictionary updated with instance information Returns: dict: Updated dictionary with row, column and well updated """ # Get sample info # logger.debug(f"Running {self.__repr__()}") sample = self.sample.to_sub_dict() # logger.debug("Sample conversion complete.") sample['Name'] = self.sample.submitter_id sample['Row'] = self.row sample['Column'] = self.column try: sample['Well'] = f"{row_map[self.row]}{self.column}" except KeyError as e: logger.error(f"Unable to find row {self.row} in row_map.") sample['Well'] = None sample['Plate Name'] = self.submission.rsl_plate_num sample['positive'] = False sample['submitted_date'] = self.submission.submitted_date sample['submission_rank'] = self.submission_rank return sample def to_hitpick(self) -> dict | None: """ Outputs a dictionary usable for html plate maps. Returns: dict: dictionary of sample id, row and column in elution plate """ # Since there is no PCR, negliable result is necessary. sample = self.to_sub_dict() # logger.debug(f"Sample dict to hitpick: {sample}") env = jinja_template_loading() template = env.get_template("tooltip.html") tooltip_text = template.render(fields=sample) try: tooltip_text += sample['tooltip'] except KeyError: pass sample.update(dict(Name=self.sample.submitter_id[:10], tooltip=tooltip_text)) return sample @classmethod def autoincrement_id(cls) -> int: """ Increments the association id automatically Returns: int: incremented id """ try: return max([item.id for item in cls.query()]) + 1 except ValueError as e: logger.error(f"Problem incrementing id: {e}") return 1 @classmethod def find_polymorphic_subclass(cls, polymorphic_identity: str | None = None) -> SubmissionSampleAssociation: """ Retrieves subclasses of SubmissionSampleAssociation based on type name. Args: polymorphic_identity (str | None, optional): Name of subclass fed to polymorphic identity. Defaults to None. Returns: SubmissionSampleAssociation: Subclass of interest. """ if isinstance(polymorphic_identity, dict): polymorphic_identity = polymorphic_identity['value'] if polymorphic_identity == None: output = cls else: try: output = [item for item in cls.__subclasses__() if item.__mapper_args__['polymorphic_identity'] == polymorphic_identity][0] except Exception as e: logger.error(f"Could not get polymorph {polymorphic_identity} of {cls} due to {e}") output = cls # logger.debug(f"Using SubmissionSampleAssociation subclass: {output}") return output @classmethod @setup_lookup def query(cls, submission: BasicSubmission | str | None = None, exclude_submission_type: str | None = None, sample: BasicSample | str | None = None, row: int = 0, column: int = 0, limit: int = 0, chronologic: bool = False, reverse: bool = False, **kwargs ) -> SubmissionSampleAssociation | List[SubmissionSampleAssociation]: """ Lookup junction of Submission and Sample in the database Args: submission (models.BasicSubmission | str | None, optional): Submission of interest. Defaults to None. sample (models.BasicSample | str | None, optional): Sample of interest. Defaults to None. row (int, optional): Row of the sample location on submission plate. Defaults to 0. column (int, optional): Column of the sample location on the submission plate. Defaults to 0. limit (int, optional): Maximum number of results to return (0 = all). Defaults to 0. chronologic (bool, optional): Return results in chronologic order. Defaults to False. Returns: models.SubmissionSampleAssociation|List[models.SubmissionSampleAssociation]: Junction(s) of interest """ query: Query = cls.__database_session__.query(cls) match submission: case BasicSubmission(): # logger.debug(f"Lookup SampleSubmissionAssociation with submission BasicSubmission {submission}") query = query.filter(cls.submission == submission) case str(): # logger.debug(f"Lookup SampleSubmissionAssociation with submission str {submission}") query = query.join(BasicSubmission).filter(BasicSubmission.rsl_plate_num == submission) case _: pass match sample: case BasicSample(): # logger.debug(f"Lookup SampleSubmissionAssociation with sample BasicSample {sample}") query = query.filter(cls.sample == sample) case str(): # logger.debug(f"Lookup SampleSubmissionAssociation with sample str {sample}") query = query.join(BasicSample).filter(BasicSample.submitter_id == sample) case _: pass if row > 0: query = query.filter(cls.row == row) if column > 0: query = query.filter(cls.column == column) match exclude_submission_type: case str(): # logger.debug(f"filter SampleSubmissionAssociation to exclude submission type {exclude_submission_type}") query = query.join(BasicSubmission).filter( BasicSubmission.submission_type_name != exclude_submission_type) case _: pass # logger.debug(f"Query count: {query.count()}") if reverse and not chronologic: query = query.order_by(BasicSubmission.id.desc()) if chronologic: if reverse: query = query.order_by(BasicSubmission.submitted_date.desc()) else: query = query.order_by(BasicSubmission.submitted_date) return cls.execute_query(query=query, limit=limit, **kwargs) @classmethod def query_or_create(cls, association_type: str = "Basic Association", submission: BasicSubmission | str | None = None, sample: BasicSample | str | None = None, id:int|None=None, **kwargs) -> SubmissionSampleAssociation: """ Queries for an association, if none exists creates a new one. Args: association_type (str, optional): Subclass name. Defaults to "Basic Association". submission (BasicSubmission | str | None, optional): associated submission. Defaults to None. sample (BasicSample | str | None, optional): associated sample. Defaults to None. id (int | None, optional): association id. Defaults to None. Returns: SubmissionSampleAssociation: Queried or new association. """ # logger.debug(f"Attempting create or query with {kwargs}") match submission: case BasicSubmission(): pass case str(): submission = BasicSubmission.query(rsl_plate_num=submission) case _: raise ValueError() match sample: case BasicSample(): pass case str(): sample = BasicSample.query(submitter_id=sample) case _: raise ValueError() try: row = kwargs['row'] except KeyError: row = None try: column = kwargs['column'] except KeyError: column = None try: instance = cls.query(submission=submission, sample=sample, row=row, column=column, limit=1) except StatementError: instance = None if instance is None: # sanitized_kwargs = {k:v for k,v in kwargs.items() if k not in ['id']} used_cls = cls.find_polymorphic_subclass(polymorphic_identity=association_type) # instance = used_cls(submission=submission, sample=sample, id=id, **kwargs) instance = used_cls(submission=submission, sample=sample, id=id, **kwargs) return instance def delete(self): raise AttributeError(f"Delete not implemented for {self.__class__}") class WastewaterAssociation(SubmissionSampleAssociation): id = Column(INTEGER, ForeignKey("_submissionsampleassociation.id"), primary_key=True) ct_n1 = Column(FLOAT(2)) #: AKA ct for N1 ct_n2 = Column(FLOAT(2)) #: AKA ct for N2 n1_status = Column(String(32)) #: positive or negative for N1 n2_status = Column(String(32)) #: positive or negative for N2 pcr_results = Column(JSON) #: imported PCR status from QuantStudio __mapper_args__ = dict(polymorphic_identity="Wastewater Association", polymorphic_load="inline", inherit_condition=(id == SubmissionSampleAssociation.id)) def to_sub_dict(self) -> dict: """ Returns a sample dictionary updated with instance information. Extends parent Returns: dict: Updated dictionary with row, column and well updated """ sample = super().to_sub_dict() sample['ct'] = f"({self.ct_n1}, {self.ct_n2})" try: sample['positive'] = any(["positive" in item for item in [self.n1_status, self.n2_status]]) except (TypeError, AttributeError) as e: logger.error(f"Couldn't check positives for {self.sample.rsl_number}. Looks like there isn't PCR data.") return sample def to_hitpick(self) -> dict | None: """ Outputs a dictionary usable for html plate maps. Extends parent Returns: dict: dictionary of sample id, row and column in elution plate """ sample = super().to_hitpick() try: sample[ 'tooltip'] += f"
- ct N1: {'{:.2f}'.format(self.ct_n1)} ({self.n1_status})
- ct N2: {'{:.2f}'.format(self.ct_n2)} ({self.n2_status})" except (TypeError, AttributeError) as e: logger.error(f"Couldn't set tooltip for {self.sample.rsl_number}. Looks like there isn't PCR data.") return sample @classmethod def autoincrement_id(cls) -> int: """ Increments the association id automatically. Overrides parent Returns: int: incremented id """ try: parent = [base for base in cls.__bases__ if base.__name__ == "SubmissionSampleAssociation"][0] return max([item.id for item in parent.query()]) + 1 except ValueError as e: logger.error(f"Problem incrementing id: {e}") return 1