diff --git a/CHANGELOG.md b/CHANGELOG.md index bed9412..12c914e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## 202306.01 + +- Large scale shake up of import and scraper functions. +- Addition of Artic scrapers. + ## 202305.05 - Hitpicking now creates source plate map image. diff --git a/alembic.ini b/alembic.ini index 37e6f8c..b8f47d4 100644 --- a/alembic.ini +++ b/alembic.ini @@ -55,9 +55,9 @@ version_path_separator = os # Use os.pathsep. Default configuration used for ne # are written from script.py.mako # output_encoding = utf-8 -sqlalchemy.url = sqlite:///L:\Robotics Laboratory Support\Submissions\submissions.db -; sqlalchemy.url = sqlite:///C:\Users\lwark\Documents\Archives\DB_backups\submissions-20230427.db -; msqlalchemy.url = sqlite:///C:\Users\lwark\Documents\python\submissions\tests\test_assets\submissions_test.db +; sqlalchemy.url = sqlite:///L:\Robotics Laboratory Support\Submissions\submissions.db +sqlalchemy.url = sqlite:///C:\Users\lwark\Documents\Archives\DB_backups\submissions-20230605.db +; sqlalchemy.url = sqlite:///C:\Users\lwark\Documents\python\submissions\tests\test_assets\submissions_test.db [post_write_hooks] diff --git a/alembic/versions/8d32abdafe2b_moved_artic_info_to_ww_samples.py b/alembic/versions/8d32abdafe2b_moved_artic_info_to_ww_samples.py new file mode 100644 index 0000000..d0288fb --- /dev/null +++ b/alembic/versions/8d32abdafe2b_moved_artic_info_to_ww_samples.py @@ -0,0 +1,30 @@ +"""moved artic info to ww_samples + +Revision ID: 8d32abdafe2b +Revises: aac569c672de +Create Date: 2023-06-05 10:10:37.650733 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '8d32abdafe2b' +down_revision = 'aac569c672de' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('_ww_samples', schema=None) as batch_op: + batch_op.add_column(sa.Column('artic_well_number', sa.String(length=8), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('_ww_samples', schema=None) as batch_op: + batch_op.drop_column('artic_well_number') + # ### end Alembic commands ### diff --git a/alembic/versions/aac569c672de_added_in_artic_information.py b/alembic/versions/aac569c672de_added_in_artic_information.py new file mode 100644 index 0000000..b3d1f6c --- /dev/null +++ b/alembic/versions/aac569c672de_added_in_artic_information.py @@ -0,0 +1,63 @@ +"""added in artic information + +Revision ID: aac569c672de +Revises: 64fec6271a50 +Create Date: 2023-06-02 15:14:13.726489 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import sqlite + +# revision identifiers, used by Alembic. +revision = 'aac569c672de' +down_revision = '64fec6271a50' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + # op.create_table('_artic_samples', + # sa.Column('id', sa.INTEGER(), nullable=False), + # sa.Column('well_number', sa.String(length=8), nullable=True), + # sa.Column('rsl_plate_id', sa.INTEGER(), nullable=True), + # sa.Column('ww_sample_full_id', sa.String(length=64), nullable=False), + # sa.Column('lims_sample_id', sa.String(length=64), nullable=False), + # sa.Column('ct_1', sa.FLOAT(precision=2), nullable=True), + # sa.Column('ct_2', sa.FLOAT(precision=2), nullable=True), + # sa.ForeignKeyConstraint(['rsl_plate_id'], ['_submissions.id'], name='fk_WWA_submission_id', ondelete='SET NULL'), + # sa.PrimaryKeyConstraint('id') + # ) + op.drop_table('_alembic_tmp__submissions') + + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('_alembic_tmp__submissions', + sa.Column('id', sa.INTEGER(), nullable=False), + sa.Column('rsl_plate_num', sa.VARCHAR(length=32), nullable=False), + sa.Column('submitter_plate_num', sa.VARCHAR(length=127), nullable=True), + sa.Column('submitted_date', sa.TIMESTAMP(), nullable=True), + sa.Column('submitting_lab_id', sa.INTEGER(), nullable=True), + sa.Column('sample_count', sa.INTEGER(), nullable=True), + sa.Column('extraction_kit_id', sa.INTEGER(), nullable=True), + sa.Column('submission_type', sa.VARCHAR(length=32), nullable=True), + sa.Column('technician', sa.VARCHAR(length=64), nullable=True), + sa.Column('reagents_id', sa.VARCHAR(), nullable=True), + sa.Column('extraction_info', sqlite.JSON(), nullable=True), + sa.Column('run_cost', sa.FLOAT(), nullable=True), + sa.Column('uploaded_by', sa.VARCHAR(length=32), nullable=True), + sa.Column('pcr_info', sqlite.JSON(), nullable=True), + sa.Column('comment', sqlite.JSON(), nullable=True), + sa.ForeignKeyConstraint(['extraction_kit_id'], ['_kits.id'], ondelete='SET NULL'), + sa.ForeignKeyConstraint(['reagents_id'], ['_reagents.id'], ondelete='SET NULL'), + sa.ForeignKeyConstraint(['submitting_lab_id'], ['_organizations.id'], ondelete='SET NULL'), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('rsl_plate_num'), + sa.UniqueConstraint('submitter_plate_num') + ) + # op.drop_table('_artic_samples') + # ### end Alembic commands ### diff --git a/src/submissions/__init__.py b/src/submissions/__init__.py index e3a8397..3b7d213 100644 --- a/src/submissions/__init__.py +++ b/src/submissions/__init__.py @@ -4,7 +4,7 @@ from pathlib import Path # Version of the realpython-reader package __project__ = "submissions" -__version__ = "202305.4b" +__version__ = "202306.1b" __author__ = {"name":"Landon Wark", "email":"Landon.Wark@phac-aspc.gc.ca"} __copyright__ = "2022-2023, Government of Canada" @@ -27,4 +27,8 @@ class bcolors: # for the submission itself as well as for any samples you can pull out of that same workbook. # Second, you will have to update the model in backend.db.models.submissions and provide a new polymorph to the BasicSubmission object. # The BSO should hold the majority of the general info. -# You can also update any of the parsers to pull out any custom info you need, like enforcing RSL plate numbers, scraping PCR results, etc. \ No newline at end of file +# You can also update any of the parsers to pull out any custom info you need, like enforcing RSL plate numbers, scraping PCR results, etc. + +# Landon, this is your slightly less past self here. For the most part, Past Landon has not screwed us. I've been able to add in the +# Wastewater Artic with minimal difficulties, except that the parser of the non-standard, user-generated excel sheets required slightly +# more work. \ No newline at end of file diff --git a/src/submissions/backend/db/functions.py b/src/submissions/backend/db/functions.py index b8fcf2c..cc769c3 100644 --- a/src/submissions/backend/db/functions.py +++ b/src/submissions/backend/db/functions.py @@ -21,6 +21,7 @@ import numpy as np import yaml from pathlib import Path + logger = logging.getLogger(f"submissions.{__name__}") # The below _should_ allow automatic creation of foreign keys in the database @@ -41,12 +42,19 @@ def store_submission(ctx:dict, base_submission:models.BasicSubmission) -> None|d Returns: None|dict : object that indicates issue raised for reporting in gui """ - from tools import format_rsl_number + from tools import RSLNamer logger.debug(f"Hello from store_submission") # Add all samples to sample table - base_submission.rsl_plate_num = format_rsl_number(base_submission.rsl_plate_num) + typer = RSLNamer(base_submission.rsl_plate_num) + base_submission.rsl_plate_num = typer.parsed_name for sample in base_submission.samples: - sample.rsl_plate = base_submission + logger.debug(f"Typer: {typer.submission_type}") + # Suuuuuper hacky way to be sure that the artic doesn't overwrite the ww plate in a ww sample + # need something more elegant + if "_artic" not in typer.submission_type: + sample.rsl_plate = base_submission + else: + sample.artic_rsl_plate = base_submission logger.debug(f"Attempting to add sample: {sample.to_string()}") try: ctx['database_session'].add(sample) @@ -152,7 +160,7 @@ def construct_submission_info(ctx:dict, info_dict:dict) -> models.BasicSubmissio # Because of unique constraint, there will be problems with # multiple submissions named 'None', so... logger.debug(f"Submitter plate id: {info_dict[item]}") - if info_dict[item] == None or info_dict[item] == "None": + if info_dict[item] == None or info_dict[item] == "None" or info_dict[item] == "": logger.debug(f"Got None as a submitter plate number, inserting random string to preserve database unique constraint.") info_dict[item] = uuid.uuid4().hex.upper() field_value = info_dict[item] @@ -170,8 +178,6 @@ def construct_submission_info(ctx:dict, info_dict:dict) -> models.BasicSubmissio # ceil(instance.sample_count / 8) will get number of columns # the cost of a full run multiplied by (that number / 12) is x twelfths the cost of a full run logger.debug(f"Calculating costs for procedure...") - # cols_count = ceil(int(instance.sample_count) / 8) - # instance.run_cost = instance.extraction_kit.constant_cost + (instance.extraction_kit.mutable_cost * (cols_count / 12)) instance.calculate_base_cost() except (TypeError, AttributeError) as e: logger.debug(f"Looks like that kit doesn't have cost breakdown yet due to: {e}, using full plate cost.") @@ -471,7 +477,7 @@ def create_kit_from_yaml(ctx:dict, exp:dict) -> dict: Returns: dict: a dictionary containing results of db addition """ - from tools import check_is_power_user + from tools import check_is_power_user, massage_common_reagents # Don't want just anyone adding kits if not check_is_power_user(ctx=ctx): logger.debug(f"{getuser()} does not have permission to add kits.") @@ -491,6 +497,7 @@ def create_kit_from_yaml(ctx:dict, exp:dict) -> dict: # A kit contains multiple reagent types. for r in exp[type]['kits'][kt]['reagenttypes']: # check if reagent type already exists. + r = massage_common_reagents(r) look_up = ctx['database_session'].query(models.ReagentType).filter(models.ReagentType.name==r).first() if look_up == None: rt = models.ReagentType(name=r.replace(" ", "_").lower(), eol_ext=timedelta(30*exp[type]['kits'][kt]['reagenttypes'][r]['eol_ext']), kits=[kit]) @@ -689,7 +696,10 @@ def delete_submission_by_id(ctx:dict, id:int) -> None: pass sub.reagents = [] for sample in sub.samples: - ctx['database_session'].delete(sample) + if sample.rsl_plate == sub: + ctx['database_session'].delete(sample) + else: + logger.warning(f"Not deleting sample {sample.ww_sample_full_id} because it belongs to another plate.") ctx["database_session"].delete(sub) ctx["database_session"].commit() @@ -706,6 +716,19 @@ def lookup_ww_sample_by_rsl_sample_number(ctx:dict, rsl_number:str) -> models.WW """ return ctx['database_session'].query(models.WWSample).filter(models.WWSample.rsl_number==rsl_number).first() +def lookup_ww_sample_by_ww_sample_num(ctx:dict, sample_number:str) -> models.WWSample: + """ + Retrieves wastewater sample from database by ww sample number + + Args: + ctx (dict): settings passed down from gui + sample_number (str): sample number assigned by wastewater + + Returns: + models.WWSample: instance of wastewater sample + """ + return ctx['database_session'].query(models.WWSample).filter(models.WWSample.ww_sample_full_id==sample_number).first() + def lookup_ww_sample_by_sub_sample_rsl(ctx:dict, sample_rsl:str, plate_rsl:str) -> models.WWSample: """ Retrieves a wastewater sample from the database by its rsl sample number and parent rsl plate number. @@ -775,7 +798,6 @@ def lookup_discounts_by_org_and_kit(ctx:dict, kit_id:int, lab_id:int): models.Organization.id==lab_id )).all() - def hitpick_plate(submission:models.BasicSubmission, plate_number:int=0) -> list: plate_dicto = [] for sample in submission.samples: diff --git a/src/submissions/backend/db/models/__init__.py b/src/submissions/backend/db/models/__init__.py index 7524ff3..ac2d028 100644 --- a/src/submissions/backend/db/models/__init__.py +++ b/src/submissions/backend/db/models/__init__.py @@ -10,4 +10,4 @@ from .controls import Control, ControlType from .kits import KitType, ReagentType, Reagent, Discount from .organizations import Organization, Contact from .samples import WWSample, BCSample -from .submissions import BasicSubmission, BacterialCulture, Wastewater +from .submissions import BasicSubmission, BacterialCulture, Wastewater, WastewaterArtic diff --git a/src/submissions/backend/db/models/samples.py b/src/submissions/backend/db/models/samples.py index ccf7f68..9409c27 100644 --- a/src/submissions/backend/db/models/samples.py +++ b/src/submissions/backend/db/models/samples.py @@ -37,6 +37,8 @@ class WWSample(Base): sample_type = Column(String(8)) pcr_results = Column(JSON) elution_well = Column(String(8)) #: location on 96 well plate + artic_rsl_plate = relationship("WastewaterArtic", back_populates="samples") + artic_well_number = Column(String(8)) def to_string(self) -> str: @@ -131,3 +133,41 @@ class BCSample(Base): "well": self.well_number, "name": f"{self.sample_id} - ({self.organism})", } + + +# class ArticSample(Base): +# """ +# base of artic sample +# """ +# __tablename__ = "_artic_samples" + +# id = Column(INTEGER, primary_key=True) #: primary key +# well_number = Column(String(8)) #: location on parent plate +# rsl_plate = relationship("WastewaterArtic", back_populates="samples") #: relationship to parent plate +# rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_WWA_submission_id")) +# ww_sample_full_id = Column(String(64), nullable=False) +# lims_sample_id = Column(String(64), nullable=False) +# ct_1 = Column(FLOAT(2)) #: first ct value in column +# ct_2 = Column(FLOAT(2)) #: second ct value in column + +# def to_string(self) -> str: +# """ +# string representing sample object + +# Returns: +# str: string representing location and sample id +# """ +# return f"{self.well_number}: {self.ww_sample_full_id}" + +# def to_sub_dict(self) -> dict: +# """ +# gui friendly dictionary + +# Returns: +# dict: well location and name (sample id, organism) NOTE: keys must sync with WWSample to_sub_dict above +# """ +# return { +# "well": self.well_number, +# "name": self.ww_sample_full_id, +# } + diff --git a/src/submissions/backend/db/models/submissions.py b/src/submissions/backend/db/models/submissions.py index 560bf9e..fd81312 100644 --- a/src/submissions/backend/db/models/submissions.py +++ b/src/submissions/backend/db/models/submissions.py @@ -161,7 +161,16 @@ class BasicSubmission(Base): } return output - + def calculate_base_cost(self): + try: + cols_count_96 = ceil(int(self.sample_count) / 8) + except Exception as e: + logger.error(f"Column count error: {e}") + # cols_count_24 = ceil(int(self.sample_count) / 3) + try: + self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count)) + except Exception as e: + logger.error(f"Calculation error: {e}") # Below are the custom submission types @@ -185,16 +194,16 @@ class BacterialCulture(BasicSubmission): return output - def calculate_base_cost(self): - try: - cols_count_96 = ceil(int(self.sample_count) / 8) - except Exception as e: - logger.error(f"Column count error: {e}") - # cols_count_24 = ceil(int(self.sample_count) / 3) - try: - self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count)) - except Exception as e: - logger.error(f"Calculation error: {e}") + # def calculate_base_cost(self): + # try: + # cols_count_96 = ceil(int(self.sample_count) / 8) + # except Exception as e: + # logger.error(f"Column count error: {e}") + # # cols_count_24 = ceil(int(self.sample_count) / 3) + # try: + # self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count)) + # except Exception as e: + # logger.error(f"Calculation error: {e}") class Wastewater(BasicSubmission): @@ -220,14 +229,22 @@ class Wastewater(BasicSubmission): pass return output - def calculate_base_cost(self): - try: - cols_count_96 = ceil(int(self.sample_count) / 8) + 1 #: Adding in one column to account for 24 samples + ext negatives - except Exception as e: - logger.error(f"Column count error: {e}") - # cols_count_24 = ceil(int(self.sample_count) / 3) - try: - self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count)) - except Exception as e: - logger.error(f"Calculation error: {e}") - \ No newline at end of file + # def calculate_base_cost(self): + # try: + # cols_count_96 = ceil(int(self.sample_count) / 8) + 1 #: Adding in one column to account for 24 samples + ext negatives + # except Exception as e: + # logger.error(f"Column count error: {e}") + # # cols_count_24 = ceil(int(self.sample_count) / 3) + # try: + # self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count)) + # except Exception as e: + # logger.error(f"Calculation error: {e}") + + +class WastewaterArtic(BasicSubmission): + """ + derivative submission type for artic wastewater + """ + samples = relationship("WWSample", back_populates="artic_rsl_plate", uselist=True) + # Can in use the pcr_info from the wastewater? Cause I can't define pcr_info here due to conflicts with that + __mapper_args__ = {"polymorphic_identity": "wastewater_artic", "polymorphic_load": "inline"} \ No newline at end of file diff --git a/src/submissions/backend/excel/parser.py b/src/submissions/backend/excel/parser.py index 3fe35e9..28b868e 100644 --- a/src/submissions/backend/excel/parser.py +++ b/src/submissions/backend/excel/parser.py @@ -2,18 +2,19 @@ contains parser object for pulling values from client generated submission sheets. ''' from getpass import getuser +import math from typing import Tuple import pandas as pd from pathlib import Path from backend.db.models import WWSample, BCSample -# from backend.db import lookup_ww_sample_by_rsl_sample_number +from backend.db import lookup_ww_sample_by_ww_sample_num import logging from collections import OrderedDict import re import numpy as np from datetime import date, datetime import uuid -from tools import check_not_nan, RSLNamer +from tools import check_not_nan, RSLNamer, massage_common_reagents logger = logging.getLogger(f"submissions.{__name__}") @@ -21,20 +22,22 @@ class SheetParser(object): """ object to pull and contain data from excel file """ - def __init__(self, filepath:Path|None = None, **kwargs): + def __init__(self, ctx:dict, filepath:Path|None = None): """ Args: filepath (Path | None, optional): file path to excel sheet. Defaults to None. """ + self.ctx = ctx logger.debug(f"Parsing {filepath.__str__()}") # set attributes based on kwargs from gui ctx - for kwarg in kwargs: - setattr(self, f"_{kwarg}", kwargs[kwarg]) + # for kwarg in kwargs: + # setattr(self, f"_{kwarg}", kwargs[kwarg]) # self.__dict__.update(kwargs) if filepath == None: logger.error(f"No filepath given.") self.xl = None else: + self.filepath = filepath try: self.xl = pd.ExcelFile(filepath.__str__()) except ValueError as e: @@ -55,8 +58,8 @@ class SheetParser(object): str: submission type name """ try: - for type in self._submission_types: - if self.xl.sheet_names == self._submission_types[type]['excel_map']: + for type in self.ctx['submission_types']: + if self.xl.sheet_names == self.ctx['submission_types'][type]['excel_map']: return type.title() return "Unknown" except Exception as e: @@ -74,7 +77,7 @@ class SheetParser(object): def parse_generic(self, sheet_name:str) -> pd.DataFrame: """ - Pulls information common to all submission types and passes on dataframe + Pulls information common to all wasterwater/bacterial culture types and passes on dataframe Args: sheet_name (str): name of excel worksheet to pull from @@ -107,8 +110,6 @@ class SheetParser(object): """ for ii, row in df.iterrows(): # skip positive control - # if ii == 12: - # continue logger.debug(f"Running reagent parse for {row[1]} with type {type(row[1])} and value: {row[2]} with type {type(row[2])}") if not isinstance(row[2], float) and check_not_nan(row[1]): # must be prefixed with 'lot_' to be recognized by gui @@ -156,7 +157,7 @@ class SheetParser(object): logger.debug(reagent_range) parse_reagents(reagent_range) # get individual sample info - sample_parser = SampleParser(submission_info.iloc[16:112]) + sample_parser = SampleParser(self.ctx, submission_info.iloc[16:112]) sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples") logger.debug(f"Parser result: {self.sub}") self.sub['samples'] = sample_parse() @@ -181,6 +182,7 @@ class SheetParser(object): # regex below will remove 80% from 80% ethanol in the Wastewater kit. output_key = re.sub(r"^\d{1,3}%\s?", "", row[0].lower().strip().replace(' ', '_')) output_key = output_key.strip("_") + # output_var is the lot number try: output_var = row[5].upper() except AttributeError: @@ -214,24 +216,97 @@ class SheetParser(object): parse_reagents(ext_reagent_range) parse_reagents(pcr_reagent_range) # parse samples - sample_parser = SampleParser(submission_info.iloc[16:]) + sample_parser = SampleParser(self.ctx, submission_info.iloc[16:]) sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples") self.sub['samples'] = sample_parse() self.sub['csv'] = self.xl.parse("Copy to import file", dtype=object) + def parse_wastewater_artic(self) -> None: + """ + pulls info specific to wastewater_arctic submission type + """ + def parse_reagents(df:pd.DataFrame): + logger.debug(df) + for ii, row in df.iterrows(): + if check_not_nan(row[0]): + try: + output_key = re.sub(r"\(.+?\)", "", row[0].lower().strip().replace(' ', '_')) + except AttributeError: + continue + output_key = output_key.strip("_") + output_key = massage_common_reagents(output_key) + try: + output_var = row[1].upper() + except AttributeError: + logger.debug(f"Couldn't upperize {row[1]}, must be a number") + output_var = row[1] + logger.debug(f"Output variable is {output_var}") + logger.debug(f"Expiry date for imported reagent: {row[2]}") + if check_not_nan(row[2]): + try: + expiry = row[2].date() + except AttributeError as e: + try: + expiry = datetime.strptime(row[2], "%Y-%m-%d") + except TypeError as e: + expiry = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + row[2] - 2) + except ValueError as e: + continue + else: + logger.debug(f"Date: {row[2]}") + expiry = date.today() + self.sub[f"lot_{output_key}"] = {'lot':output_var, 'exp':expiry} + else: + continue + def massage_samples(df:pd.DataFrame) -> pd.DataFrame: + df.set_index(df.columns[0], inplace=True) + df.columns = df.iloc[0] + logger.debug(f"df to massage\n: {df}") + return_list = [] + for _, ii in df.iloc[1:,1:].iterrows(): + for c in df.columns.to_list(): + logger.debug(f"Checking {ii.name}{c}") + if check_not_nan(df.loc[ii.name, int(c)]) and df.loc[ii.name, int(c)] != "EMPTY": + + return_list.append(dict(sample_name=re.sub(r"\s?\(.*\)", "", df.loc[ii.name, int(c)]), \ + well=f"{ii.name}{c}", + artic_plate=self.sub['rsl_plate_num'])) + logger.debug(f"massaged sample list for {self.sub['rsl_plate_num']}: {return_list}") + return return_list + submission_info = self.xl.parse("First Strand", dtype=object) + biomek_info = self.xl.parse("ArticV4 Biomek", dtype=object) + sub_reagent_range = submission_info.iloc[56:, 1:4].dropna(how='all') + biomek_reagent_range = biomek_info.iloc[60:, 0:3].dropna(how='all') + self.sub['submitter_plate_num'] = "" + self.sub['rsl_plate_num'] = RSLNamer(self.filepath.__str__()).parsed_name + self.sub['submitted_date'] = submission_info.iloc[0][2] + self.sub['submitting_lab'] = "Enterics Wastewater Genomics" + self.sub['sample_count'] = submission_info.iloc[4][6] + self.sub['extraction_kit'] = "ArticV4.1" + self.sub['technician'] = f"MM: {biomek_info.iloc[2][1]}, Bio: {biomek_info.iloc[3][1]}" + parse_reagents(sub_reagent_range) + parse_reagents(biomek_reagent_range) + samples = massage_samples(biomek_info.iloc[22:31, 0:]) + sample_parser = SampleParser(self.ctx, pd.DataFrame.from_records(samples)) + sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples") + self.sub['samples'] = sample_parse() + + + class SampleParser(object): """ object to pull data for samples in excel sheet and construct individual sample objects """ - def __init__(self, df:pd.DataFrame) -> None: + def __init__(self, ctx:dict, df:pd.DataFrame) -> None: """ convert sample sub-dataframe to dictionary of records Args: df (pd.DataFrame): input sample dataframe """ + self.ctx = ctx self.samples = df.to_dict("records") @@ -295,6 +370,29 @@ class SampleParser(object): new.well_number = sample['Unnamed: 1'] new_list.append(new) return new_list + + def parse_wastewater_artic_samples(self) -> list[WWSample]: + """ + The artic samples are the wastewater samples that are to be sequenced + So we will need to lookup existing ww samples and append Artic well # and plate relation + + Returns: + list[WWSample]: list of wastewater samples to be updated + """ + new_list = [] + for sample in self.samples: + with self.ctx['database_session'].no_autoflush: + instance = lookup_ww_sample_by_ww_sample_num(ctx=self.ctx, sample_number=sample['sample_name']) + logger.debug(f"Checking: {sample['sample_name']}") + if instance == None: + logger.error(f"Unable to find match for: {sample['sample_name']}") + continue + logger.debug(f"Got instance: {instance.ww_sample_full_id}") + instance.artic_well_number = sample['well'] + new_list.append(instance) + return new_list + + class PCRParser(object): diff --git a/src/submissions/frontend/custom_widgets/sub_details.py b/src/submissions/frontend/custom_widgets/sub_details.py index aaf8de4..cfb43e3 100644 --- a/src/submissions/frontend/custom_widgets/sub_details.py +++ b/src/submissions/frontend/custom_widgets/sub_details.py @@ -423,7 +423,7 @@ class SubmissionComment(QDialog): def add_comment(self): commenter = getuser() comment = self.txt_editor.toPlainText() - dt = datetime.strftime(datetime.now(), "%Y-%m-d %H:%M:%S") + dt = datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S") full_comment = {"name":commenter, "time": dt, "text": comment} logger.debug(f"Full comment: {full_comment}") sub = lookup_submission_by_rsl_num(ctx = self.ctx, rsl_num=self.rsl) diff --git a/src/submissions/frontend/main_window_functions.py b/src/submissions/frontend/main_window_functions.py index ee43724..108fe0e 100644 --- a/src/submissions/frontend/main_window_functions.py +++ b/src/submissions/frontend/main_window_functions.py @@ -53,7 +53,7 @@ def import_submission_function(obj:QMainWindow) -> Tuple[QMainWindow, dict|None] return obj, result # create sheetparser using excel sheet and context from gui try: - prsr = SheetParser(fname, **obj.ctx) + prsr = SheetParser(ctx=obj.ctx, filepath=fname) except PermissionError: logger.error(f"Couldn't get permission to access file: {fname}") return diff --git a/src/submissions/templates/submission_details.html b/src/submissions/templates/submission_details.html index 99e6f6a..d3b66cb 100644 --- a/src/submissions/templates/submission_details.html +++ b/src/submissions/templates/submission_details.html @@ -94,7 +94,7 @@ {% endfor %}

{% endif %} {% if sub['platemap'] %} -

>Plate map:

+

Plate map:

{% endif %} diff --git a/src/submissions/tools/__init__.py b/src/submissions/tools/__init__.py index 422eab5..420a125 100644 --- a/src/submissions/tools/__init__.py +++ b/src/submissions/tools/__init__.py @@ -83,7 +83,10 @@ def check_kit_integrity(sub:BasicSubmission|KitType, reagenttypes:list|None=None case BasicSubmission(): ext_kit_rtypes = [reagenttype.name for reagenttype in sub.extraction_kit.reagent_types] # Overwrite function parameter reagenttypes - reagenttypes = [reagent.type.name for reagent in sub.reagents] + try: + reagenttypes = [reagent.type.name for reagent in sub.reagents] + except AttributeError as e: + logger.error(f"Problem parsing reagents: {[f'{reagent.lot}, {reagent.type}' for reagent in sub.reagents]}") case KitType(): ext_kit_rtypes = [reagenttype.name for reagenttype in sub.reagent_types] logger.debug(f"Kit reagents: {ext_kit_rtypes}") @@ -191,9 +194,12 @@ class RSLNamer(object): self.submission_type = None return logger.debug(f"Attempting match of {in_str}") + print(f"The initial plate name is: {in_str}") regex = re.compile(r""" - (?PRSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(?:_|-)\d?(?!\d)R?\d(?!\d))?)| - (?PRSL-?\d{2}-?\d{4}) + # (?PRSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(?:_|-)\d?((?!\d)|R)?\d(?!\d))?)| + (?PRSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?)| + (?PRSL-?\d{2}-?\d{4})| + (?P(\d{4}-\d{2}-\d{2}_(?:\d_)?artic)|(RSL(?:-|_)?AR(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?)) """, flags = re.IGNORECASE | re.VERBOSE) m = regex.search(in_str) try: @@ -212,6 +218,25 @@ class RSLNamer(object): self.parsed_name = self.parsed_name.replace("RSLWW", "RSL-WW") self.parsed_name = re.sub(r"WW(\d{4})", r"WW-\1", self.parsed_name, flags=re.IGNORECASE) self.parsed_name = re.sub(r"(\d{4})-(\d{2})-(\d{2})", r"\1\2\3", self.parsed_name) + print(f"Coming out of the preliminary parsing, the plate name is {self.parsed_name}") + try: + plate_number = re.search(r"(?:(-|_)\d)(?!\d)", self.parsed_name).group().strip("_").strip("-") + print(f"Plate number is: {plate_number}") + except AttributeError as e: + plate_number = "1" + # self.parsed_name = re.sub(r"(\d{8})(-|_\d)?(R\d)?", fr"\1-{plate_number}\3", self.parsed_name) + self.parsed_name = re.sub(r"(\d{8})(-|_)?\d?(R\d?)?", rf"\1-{plate_number}\3", self.parsed_name) + print(f"After addition of plate number the plate name is: {self.parsed_name}") + try: + repeat = re.search(r"-\dR(?P\d)?", self.parsed_name).groupdict()['repeat'] + if repeat == None: + repeat = "1" + except AttributeError as e: + repeat = "" + self.parsed_name = re.sub(r"(-\dR)\d?", rf"\1 {repeat}", self.parsed_name).replace(" ", "") + + + def enforce_bacterial_culture(self): """ @@ -219,4 +244,20 @@ class RSLNamer(object): """ self.parsed_name = re.sub(r"RSL(\d{2})", r"RSL-\1", self.parsed_name, flags=re.IGNORECASE) self.parsed_name = re.sub(r"RSL-(\d{2})(\d{4})", r"RSL-\1-\2", self.parsed_name, flags=re.IGNORECASE) + + def enforce_wastewater_artic(self): + self.parsed_name = re.sub(r"(\d{4})-(\d{2})-(\d{2})", r"RSL-AR-\1\2\3", self.parsed_name, flags=re.IGNORECASE) + try: + plate_number = int(re.search(r"_\d?_", self.parsed_name).group().strip("_")) + except AttributeError as e: + plate_number = 1 + self.parsed_name = re.sub(r"(_\d)?_ARTIC", f"-{plate_number}", self.parsed_name) + + +def massage_common_reagents(reagent_name:str): + logger.debug(f"Attempting to massage {reagent_name}") + if reagent_name.endswith("water") or "H2O" in reagent_name: + reagent_name = "molecular_grade_water" + reagent_name = reagent_name.replace("ยต", "u") + return reagent_name \ No newline at end of file