Addition of WW artic parsers, large-scale shake-up of parser structure.

This commit is contained in:
Landon Wark
2023-06-08 14:43:36 -05:00
parent 1d6823705c
commit a7132cd1b4
14 changed files with 376 additions and 56 deletions

View File

@@ -1,3 +1,8 @@
## 202306.01
- Large scale shake up of import and scraper functions.
- Addition of Artic scrapers.
## 202305.05 ## 202305.05
- Hitpicking now creates source plate map image. - Hitpicking now creates source plate map image.

View File

@@ -55,9 +55,9 @@ version_path_separator = os # Use os.pathsep. Default configuration used for ne
# are written from script.py.mako # are written from script.py.mako
# output_encoding = utf-8 # output_encoding = utf-8
sqlalchemy.url = sqlite:///L:\Robotics Laboratory Support\Submissions\submissions.db ; sqlalchemy.url = sqlite:///L:\Robotics Laboratory Support\Submissions\submissions.db
; sqlalchemy.url = sqlite:///C:\Users\lwark\Documents\Archives\DB_backups\submissions-20230427.db sqlalchemy.url = sqlite:///C:\Users\lwark\Documents\Archives\DB_backups\submissions-20230605.db
; msqlalchemy.url = sqlite:///C:\Users\lwark\Documents\python\submissions\tests\test_assets\submissions_test.db ; sqlalchemy.url = sqlite:///C:\Users\lwark\Documents\python\submissions\tests\test_assets\submissions_test.db
[post_write_hooks] [post_write_hooks]

View File

@@ -0,0 +1,30 @@
"""moved artic info to ww_samples
Revision ID: 8d32abdafe2b
Revises: aac569c672de
Create Date: 2023-06-05 10:10:37.650733
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '8d32abdafe2b'
down_revision = 'aac569c672de'
branch_labels = None
depends_on = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('_ww_samples', schema=None) as batch_op:
batch_op.add_column(sa.Column('artic_well_number', sa.String(length=8), nullable=True))
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('_ww_samples', schema=None) as batch_op:
batch_op.drop_column('artic_well_number')
# ### end Alembic commands ###

View File

@@ -0,0 +1,63 @@
"""added in artic information
Revision ID: aac569c672de
Revises: 64fec6271a50
Create Date: 2023-06-02 15:14:13.726489
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import sqlite
# revision identifiers, used by Alembic.
revision = 'aac569c672de'
down_revision = '64fec6271a50'
branch_labels = None
depends_on = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
# op.create_table('_artic_samples',
# sa.Column('id', sa.INTEGER(), nullable=False),
# sa.Column('well_number', sa.String(length=8), nullable=True),
# sa.Column('rsl_plate_id', sa.INTEGER(), nullable=True),
# sa.Column('ww_sample_full_id', sa.String(length=64), nullable=False),
# sa.Column('lims_sample_id', sa.String(length=64), nullable=False),
# sa.Column('ct_1', sa.FLOAT(precision=2), nullable=True),
# sa.Column('ct_2', sa.FLOAT(precision=2), nullable=True),
# sa.ForeignKeyConstraint(['rsl_plate_id'], ['_submissions.id'], name='fk_WWA_submission_id', ondelete='SET NULL'),
# sa.PrimaryKeyConstraint('id')
# )
op.drop_table('_alembic_tmp__submissions')
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('_alembic_tmp__submissions',
sa.Column('id', sa.INTEGER(), nullable=False),
sa.Column('rsl_plate_num', sa.VARCHAR(length=32), nullable=False),
sa.Column('submitter_plate_num', sa.VARCHAR(length=127), nullable=True),
sa.Column('submitted_date', sa.TIMESTAMP(), nullable=True),
sa.Column('submitting_lab_id', sa.INTEGER(), nullable=True),
sa.Column('sample_count', sa.INTEGER(), nullable=True),
sa.Column('extraction_kit_id', sa.INTEGER(), nullable=True),
sa.Column('submission_type', sa.VARCHAR(length=32), nullable=True),
sa.Column('technician', sa.VARCHAR(length=64), nullable=True),
sa.Column('reagents_id', sa.VARCHAR(), nullable=True),
sa.Column('extraction_info', sqlite.JSON(), nullable=True),
sa.Column('run_cost', sa.FLOAT(), nullable=True),
sa.Column('uploaded_by', sa.VARCHAR(length=32), nullable=True),
sa.Column('pcr_info', sqlite.JSON(), nullable=True),
sa.Column('comment', sqlite.JSON(), nullable=True),
sa.ForeignKeyConstraint(['extraction_kit_id'], ['_kits.id'], ondelete='SET NULL'),
sa.ForeignKeyConstraint(['reagents_id'], ['_reagents.id'], ondelete='SET NULL'),
sa.ForeignKeyConstraint(['submitting_lab_id'], ['_organizations.id'], ondelete='SET NULL'),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint('rsl_plate_num'),
sa.UniqueConstraint('submitter_plate_num')
)
# op.drop_table('_artic_samples')
# ### end Alembic commands ###

View File

@@ -4,7 +4,7 @@ from pathlib import Path
# Version of the realpython-reader package # Version of the realpython-reader package
__project__ = "submissions" __project__ = "submissions"
__version__ = "202305.4b" __version__ = "202306.1b"
__author__ = {"name":"Landon Wark", "email":"Landon.Wark@phac-aspc.gc.ca"} __author__ = {"name":"Landon Wark", "email":"Landon.Wark@phac-aspc.gc.ca"}
__copyright__ = "2022-2023, Government of Canada" __copyright__ = "2022-2023, Government of Canada"
@@ -28,3 +28,7 @@ class bcolors:
# Second, you will have to update the model in backend.db.models.submissions and provide a new polymorph to the BasicSubmission object. # Second, you will have to update the model in backend.db.models.submissions and provide a new polymorph to the BasicSubmission object.
# The BSO should hold the majority of the general info. # The BSO should hold the majority of the general info.
# You can also update any of the parsers to pull out any custom info you need, like enforcing RSL plate numbers, scraping PCR results, etc. # You can also update any of the parsers to pull out any custom info you need, like enforcing RSL plate numbers, scraping PCR results, etc.
# Landon, this is your slightly less past self here. For the most part, Past Landon has not screwed us. I've been able to add in the
# Wastewater Artic with minimal difficulties, except that the parser of the non-standard, user-generated excel sheets required slightly
# more work.

View File

@@ -21,6 +21,7 @@ import numpy as np
import yaml import yaml
from pathlib import Path from pathlib import Path
logger = logging.getLogger(f"submissions.{__name__}") logger = logging.getLogger(f"submissions.{__name__}")
# The below _should_ allow automatic creation of foreign keys in the database # The below _should_ allow automatic creation of foreign keys in the database
@@ -41,12 +42,19 @@ def store_submission(ctx:dict, base_submission:models.BasicSubmission) -> None|d
Returns: Returns:
None|dict : object that indicates issue raised for reporting in gui None|dict : object that indicates issue raised for reporting in gui
""" """
from tools import format_rsl_number from tools import RSLNamer
logger.debug(f"Hello from store_submission") logger.debug(f"Hello from store_submission")
# Add all samples to sample table # Add all samples to sample table
base_submission.rsl_plate_num = format_rsl_number(base_submission.rsl_plate_num) typer = RSLNamer(base_submission.rsl_plate_num)
base_submission.rsl_plate_num = typer.parsed_name
for sample in base_submission.samples: for sample in base_submission.samples:
logger.debug(f"Typer: {typer.submission_type}")
# Suuuuuper hacky way to be sure that the artic doesn't overwrite the ww plate in a ww sample
# need something more elegant
if "_artic" not in typer.submission_type:
sample.rsl_plate = base_submission sample.rsl_plate = base_submission
else:
sample.artic_rsl_plate = base_submission
logger.debug(f"Attempting to add sample: {sample.to_string()}") logger.debug(f"Attempting to add sample: {sample.to_string()}")
try: try:
ctx['database_session'].add(sample) ctx['database_session'].add(sample)
@@ -152,7 +160,7 @@ def construct_submission_info(ctx:dict, info_dict:dict) -> models.BasicSubmissio
# Because of unique constraint, there will be problems with # Because of unique constraint, there will be problems with
# multiple submissions named 'None', so... # multiple submissions named 'None', so...
logger.debug(f"Submitter plate id: {info_dict[item]}") logger.debug(f"Submitter plate id: {info_dict[item]}")
if info_dict[item] == None or info_dict[item] == "None": if info_dict[item] == None or info_dict[item] == "None" or info_dict[item] == "":
logger.debug(f"Got None as a submitter plate number, inserting random string to preserve database unique constraint.") logger.debug(f"Got None as a submitter plate number, inserting random string to preserve database unique constraint.")
info_dict[item] = uuid.uuid4().hex.upper() info_dict[item] = uuid.uuid4().hex.upper()
field_value = info_dict[item] field_value = info_dict[item]
@@ -170,8 +178,6 @@ def construct_submission_info(ctx:dict, info_dict:dict) -> models.BasicSubmissio
# ceil(instance.sample_count / 8) will get number of columns # ceil(instance.sample_count / 8) will get number of columns
# the cost of a full run multiplied by (that number / 12) is x twelfths the cost of a full run # the cost of a full run multiplied by (that number / 12) is x twelfths the cost of a full run
logger.debug(f"Calculating costs for procedure...") logger.debug(f"Calculating costs for procedure...")
# cols_count = ceil(int(instance.sample_count) / 8)
# instance.run_cost = instance.extraction_kit.constant_cost + (instance.extraction_kit.mutable_cost * (cols_count / 12))
instance.calculate_base_cost() instance.calculate_base_cost()
except (TypeError, AttributeError) as e: except (TypeError, AttributeError) as e:
logger.debug(f"Looks like that kit doesn't have cost breakdown yet due to: {e}, using full plate cost.") logger.debug(f"Looks like that kit doesn't have cost breakdown yet due to: {e}, using full plate cost.")
@@ -471,7 +477,7 @@ def create_kit_from_yaml(ctx:dict, exp:dict) -> dict:
Returns: Returns:
dict: a dictionary containing results of db addition dict: a dictionary containing results of db addition
""" """
from tools import check_is_power_user from tools import check_is_power_user, massage_common_reagents
# Don't want just anyone adding kits # Don't want just anyone adding kits
if not check_is_power_user(ctx=ctx): if not check_is_power_user(ctx=ctx):
logger.debug(f"{getuser()} does not have permission to add kits.") logger.debug(f"{getuser()} does not have permission to add kits.")
@@ -491,6 +497,7 @@ def create_kit_from_yaml(ctx:dict, exp:dict) -> dict:
# A kit contains multiple reagent types. # A kit contains multiple reagent types.
for r in exp[type]['kits'][kt]['reagenttypes']: for r in exp[type]['kits'][kt]['reagenttypes']:
# check if reagent type already exists. # check if reagent type already exists.
r = massage_common_reagents(r)
look_up = ctx['database_session'].query(models.ReagentType).filter(models.ReagentType.name==r).first() look_up = ctx['database_session'].query(models.ReagentType).filter(models.ReagentType.name==r).first()
if look_up == None: if look_up == None:
rt = models.ReagentType(name=r.replace(" ", "_").lower(), eol_ext=timedelta(30*exp[type]['kits'][kt]['reagenttypes'][r]['eol_ext']), kits=[kit]) rt = models.ReagentType(name=r.replace(" ", "_").lower(), eol_ext=timedelta(30*exp[type]['kits'][kt]['reagenttypes'][r]['eol_ext']), kits=[kit])
@@ -689,7 +696,10 @@ def delete_submission_by_id(ctx:dict, id:int) -> None:
pass pass
sub.reagents = [] sub.reagents = []
for sample in sub.samples: for sample in sub.samples:
if sample.rsl_plate == sub:
ctx['database_session'].delete(sample) ctx['database_session'].delete(sample)
else:
logger.warning(f"Not deleting sample {sample.ww_sample_full_id} because it belongs to another plate.")
ctx["database_session"].delete(sub) ctx["database_session"].delete(sub)
ctx["database_session"].commit() ctx["database_session"].commit()
@@ -706,6 +716,19 @@ def lookup_ww_sample_by_rsl_sample_number(ctx:dict, rsl_number:str) -> models.WW
""" """
return ctx['database_session'].query(models.WWSample).filter(models.WWSample.rsl_number==rsl_number).first() return ctx['database_session'].query(models.WWSample).filter(models.WWSample.rsl_number==rsl_number).first()
def lookup_ww_sample_by_ww_sample_num(ctx:dict, sample_number:str) -> models.WWSample:
"""
Retrieves wastewater sample from database by ww sample number
Args:
ctx (dict): settings passed down from gui
sample_number (str): sample number assigned by wastewater
Returns:
models.WWSample: instance of wastewater sample
"""
return ctx['database_session'].query(models.WWSample).filter(models.WWSample.ww_sample_full_id==sample_number).first()
def lookup_ww_sample_by_sub_sample_rsl(ctx:dict, sample_rsl:str, plate_rsl:str) -> models.WWSample: def lookup_ww_sample_by_sub_sample_rsl(ctx:dict, sample_rsl:str, plate_rsl:str) -> models.WWSample:
""" """
Retrieves a wastewater sample from the database by its rsl sample number and parent rsl plate number. Retrieves a wastewater sample from the database by its rsl sample number and parent rsl plate number.
@@ -775,7 +798,6 @@ def lookup_discounts_by_org_and_kit(ctx:dict, kit_id:int, lab_id:int):
models.Organization.id==lab_id models.Organization.id==lab_id
)).all() )).all()
def hitpick_plate(submission:models.BasicSubmission, plate_number:int=0) -> list: def hitpick_plate(submission:models.BasicSubmission, plate_number:int=0) -> list:
plate_dicto = [] plate_dicto = []
for sample in submission.samples: for sample in submission.samples:

View File

@@ -10,4 +10,4 @@ from .controls import Control, ControlType
from .kits import KitType, ReagentType, Reagent, Discount from .kits import KitType, ReagentType, Reagent, Discount
from .organizations import Organization, Contact from .organizations import Organization, Contact
from .samples import WWSample, BCSample from .samples import WWSample, BCSample
from .submissions import BasicSubmission, BacterialCulture, Wastewater from .submissions import BasicSubmission, BacterialCulture, Wastewater, WastewaterArtic

View File

@@ -37,6 +37,8 @@ class WWSample(Base):
sample_type = Column(String(8)) sample_type = Column(String(8))
pcr_results = Column(JSON) pcr_results = Column(JSON)
elution_well = Column(String(8)) #: location on 96 well plate elution_well = Column(String(8)) #: location on 96 well plate
artic_rsl_plate = relationship("WastewaterArtic", back_populates="samples")
artic_well_number = Column(String(8))
def to_string(self) -> str: def to_string(self) -> str:
@@ -131,3 +133,41 @@ class BCSample(Base):
"well": self.well_number, "well": self.well_number,
"name": f"{self.sample_id} - ({self.organism})", "name": f"{self.sample_id} - ({self.organism})",
} }
# class ArticSample(Base):
# """
# base of artic sample
# """
# __tablename__ = "_artic_samples"
# id = Column(INTEGER, primary_key=True) #: primary key
# well_number = Column(String(8)) #: location on parent plate
# rsl_plate = relationship("WastewaterArtic", back_populates="samples") #: relationship to parent plate
# rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_WWA_submission_id"))
# ww_sample_full_id = Column(String(64), nullable=False)
# lims_sample_id = Column(String(64), nullable=False)
# ct_1 = Column(FLOAT(2)) #: first ct value in column
# ct_2 = Column(FLOAT(2)) #: second ct value in column
# def to_string(self) -> str:
# """
# string representing sample object
# Returns:
# str: string representing location and sample id
# """
# return f"{self.well_number}: {self.ww_sample_full_id}"
# def to_sub_dict(self) -> dict:
# """
# gui friendly dictionary
# Returns:
# dict: well location and name (sample id, organism) NOTE: keys must sync with WWSample to_sub_dict above
# """
# return {
# "well": self.well_number,
# "name": self.ww_sample_full_id,
# }

View File

@@ -161,7 +161,16 @@ class BasicSubmission(Base):
} }
return output return output
def calculate_base_cost(self):
try:
cols_count_96 = ceil(int(self.sample_count) / 8)
except Exception as e:
logger.error(f"Column count error: {e}")
# cols_count_24 = ceil(int(self.sample_count) / 3)
try:
self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count))
except Exception as e:
logger.error(f"Calculation error: {e}")
# Below are the custom submission types # Below are the custom submission types
@@ -185,16 +194,16 @@ class BacterialCulture(BasicSubmission):
return output return output
def calculate_base_cost(self): # def calculate_base_cost(self):
try: # try:
cols_count_96 = ceil(int(self.sample_count) / 8) # cols_count_96 = ceil(int(self.sample_count) / 8)
except Exception as e: # except Exception as e:
logger.error(f"Column count error: {e}") # logger.error(f"Column count error: {e}")
# cols_count_24 = ceil(int(self.sample_count) / 3) # # cols_count_24 = ceil(int(self.sample_count) / 3)
try: # try:
self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count)) # self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count))
except Exception as e: # except Exception as e:
logger.error(f"Calculation error: {e}") # logger.error(f"Calculation error: {e}")
class Wastewater(BasicSubmission): class Wastewater(BasicSubmission):
@@ -220,14 +229,22 @@ class Wastewater(BasicSubmission):
pass pass
return output return output
def calculate_base_cost(self): # def calculate_base_cost(self):
try: # try:
cols_count_96 = ceil(int(self.sample_count) / 8) + 1 #: Adding in one column to account for 24 samples + ext negatives # cols_count_96 = ceil(int(self.sample_count) / 8) + 1 #: Adding in one column to account for 24 samples + ext negatives
except Exception as e: # except Exception as e:
logger.error(f"Column count error: {e}") # logger.error(f"Column count error: {e}")
# cols_count_24 = ceil(int(self.sample_count) / 3) # # cols_count_24 = ceil(int(self.sample_count) / 3)
try: # try:
self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count)) # self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count))
except Exception as e: # except Exception as e:
logger.error(f"Calculation error: {e}") # logger.error(f"Calculation error: {e}")
class WastewaterArtic(BasicSubmission):
"""
derivative submission type for artic wastewater
"""
samples = relationship("WWSample", back_populates="artic_rsl_plate", uselist=True)
# Can in use the pcr_info from the wastewater? Cause I can't define pcr_info here due to conflicts with that
__mapper_args__ = {"polymorphic_identity": "wastewater_artic", "polymorphic_load": "inline"}

View File

@@ -2,18 +2,19 @@
contains parser object for pulling values from client generated submission sheets. contains parser object for pulling values from client generated submission sheets.
''' '''
from getpass import getuser from getpass import getuser
import math
from typing import Tuple from typing import Tuple
import pandas as pd import pandas as pd
from pathlib import Path from pathlib import Path
from backend.db.models import WWSample, BCSample from backend.db.models import WWSample, BCSample
# from backend.db import lookup_ww_sample_by_rsl_sample_number from backend.db import lookup_ww_sample_by_ww_sample_num
import logging import logging
from collections import OrderedDict from collections import OrderedDict
import re import re
import numpy as np import numpy as np
from datetime import date, datetime from datetime import date, datetime
import uuid import uuid
from tools import check_not_nan, RSLNamer from tools import check_not_nan, RSLNamer, massage_common_reagents
logger = logging.getLogger(f"submissions.{__name__}") logger = logging.getLogger(f"submissions.{__name__}")
@@ -21,20 +22,22 @@ class SheetParser(object):
""" """
object to pull and contain data from excel file object to pull and contain data from excel file
""" """
def __init__(self, filepath:Path|None = None, **kwargs): def __init__(self, ctx:dict, filepath:Path|None = None):
""" """
Args: Args:
filepath (Path | None, optional): file path to excel sheet. Defaults to None. filepath (Path | None, optional): file path to excel sheet. Defaults to None.
""" """
self.ctx = ctx
logger.debug(f"Parsing {filepath.__str__()}") logger.debug(f"Parsing {filepath.__str__()}")
# set attributes based on kwargs from gui ctx # set attributes based on kwargs from gui ctx
for kwarg in kwargs: # for kwarg in kwargs:
setattr(self, f"_{kwarg}", kwargs[kwarg]) # setattr(self, f"_{kwarg}", kwargs[kwarg])
# self.__dict__.update(kwargs) # self.__dict__.update(kwargs)
if filepath == None: if filepath == None:
logger.error(f"No filepath given.") logger.error(f"No filepath given.")
self.xl = None self.xl = None
else: else:
self.filepath = filepath
try: try:
self.xl = pd.ExcelFile(filepath.__str__()) self.xl = pd.ExcelFile(filepath.__str__())
except ValueError as e: except ValueError as e:
@@ -55,8 +58,8 @@ class SheetParser(object):
str: submission type name str: submission type name
""" """
try: try:
for type in self._submission_types: for type in self.ctx['submission_types']:
if self.xl.sheet_names == self._submission_types[type]['excel_map']: if self.xl.sheet_names == self.ctx['submission_types'][type]['excel_map']:
return type.title() return type.title()
return "Unknown" return "Unknown"
except Exception as e: except Exception as e:
@@ -74,7 +77,7 @@ class SheetParser(object):
def parse_generic(self, sheet_name:str) -> pd.DataFrame: def parse_generic(self, sheet_name:str) -> pd.DataFrame:
""" """
Pulls information common to all submission types and passes on dataframe Pulls information common to all wasterwater/bacterial culture types and passes on dataframe
Args: Args:
sheet_name (str): name of excel worksheet to pull from sheet_name (str): name of excel worksheet to pull from
@@ -107,8 +110,6 @@ class SheetParser(object):
""" """
for ii, row in df.iterrows(): for ii, row in df.iterrows():
# skip positive control # skip positive control
# if ii == 12:
# continue
logger.debug(f"Running reagent parse for {row[1]} with type {type(row[1])} and value: {row[2]} with type {type(row[2])}") logger.debug(f"Running reagent parse for {row[1]} with type {type(row[1])} and value: {row[2]} with type {type(row[2])}")
if not isinstance(row[2], float) and check_not_nan(row[1]): if not isinstance(row[2], float) and check_not_nan(row[1]):
# must be prefixed with 'lot_' to be recognized by gui # must be prefixed with 'lot_' to be recognized by gui
@@ -156,7 +157,7 @@ class SheetParser(object):
logger.debug(reagent_range) logger.debug(reagent_range)
parse_reagents(reagent_range) parse_reagents(reagent_range)
# get individual sample info # get individual sample info
sample_parser = SampleParser(submission_info.iloc[16:112]) sample_parser = SampleParser(self.ctx, submission_info.iloc[16:112])
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples") sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
logger.debug(f"Parser result: {self.sub}") logger.debug(f"Parser result: {self.sub}")
self.sub['samples'] = sample_parse() self.sub['samples'] = sample_parse()
@@ -181,6 +182,7 @@ class SheetParser(object):
# regex below will remove 80% from 80% ethanol in the Wastewater kit. # regex below will remove 80% from 80% ethanol in the Wastewater kit.
output_key = re.sub(r"^\d{1,3}%\s?", "", row[0].lower().strip().replace(' ', '_')) output_key = re.sub(r"^\d{1,3}%\s?", "", row[0].lower().strip().replace(' ', '_'))
output_key = output_key.strip("_") output_key = output_key.strip("_")
# output_var is the lot number
try: try:
output_var = row[5].upper() output_var = row[5].upper()
except AttributeError: except AttributeError:
@@ -214,24 +216,97 @@ class SheetParser(object):
parse_reagents(ext_reagent_range) parse_reagents(ext_reagent_range)
parse_reagents(pcr_reagent_range) parse_reagents(pcr_reagent_range)
# parse samples # parse samples
sample_parser = SampleParser(submission_info.iloc[16:]) sample_parser = SampleParser(self.ctx, submission_info.iloc[16:])
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples") sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
self.sub['samples'] = sample_parse() self.sub['samples'] = sample_parse()
self.sub['csv'] = self.xl.parse("Copy to import file", dtype=object) self.sub['csv'] = self.xl.parse("Copy to import file", dtype=object)
def parse_wastewater_artic(self) -> None:
"""
pulls info specific to wastewater_arctic submission type
"""
def parse_reagents(df:pd.DataFrame):
logger.debug(df)
for ii, row in df.iterrows():
if check_not_nan(row[0]):
try:
output_key = re.sub(r"\(.+?\)", "", row[0].lower().strip().replace(' ', '_'))
except AttributeError:
continue
output_key = output_key.strip("_")
output_key = massage_common_reagents(output_key)
try:
output_var = row[1].upper()
except AttributeError:
logger.debug(f"Couldn't upperize {row[1]}, must be a number")
output_var = row[1]
logger.debug(f"Output variable is {output_var}")
logger.debug(f"Expiry date for imported reagent: {row[2]}")
if check_not_nan(row[2]):
try:
expiry = row[2].date()
except AttributeError as e:
try:
expiry = datetime.strptime(row[2], "%Y-%m-%d")
except TypeError as e:
expiry = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + row[2] - 2)
except ValueError as e:
continue
else:
logger.debug(f"Date: {row[2]}")
expiry = date.today()
self.sub[f"lot_{output_key}"] = {'lot':output_var, 'exp':expiry}
else:
continue
def massage_samples(df:pd.DataFrame) -> pd.DataFrame:
df.set_index(df.columns[0], inplace=True)
df.columns = df.iloc[0]
logger.debug(f"df to massage\n: {df}")
return_list = []
for _, ii in df.iloc[1:,1:].iterrows():
for c in df.columns.to_list():
logger.debug(f"Checking {ii.name}{c}")
if check_not_nan(df.loc[ii.name, int(c)]) and df.loc[ii.name, int(c)] != "EMPTY":
return_list.append(dict(sample_name=re.sub(r"\s?\(.*\)", "", df.loc[ii.name, int(c)]), \
well=f"{ii.name}{c}",
artic_plate=self.sub['rsl_plate_num']))
logger.debug(f"massaged sample list for {self.sub['rsl_plate_num']}: {return_list}")
return return_list
submission_info = self.xl.parse("First Strand", dtype=object)
biomek_info = self.xl.parse("ArticV4 Biomek", dtype=object)
sub_reagent_range = submission_info.iloc[56:, 1:4].dropna(how='all')
biomek_reagent_range = biomek_info.iloc[60:, 0:3].dropna(how='all')
self.sub['submitter_plate_num'] = ""
self.sub['rsl_plate_num'] = RSLNamer(self.filepath.__str__()).parsed_name
self.sub['submitted_date'] = submission_info.iloc[0][2]
self.sub['submitting_lab'] = "Enterics Wastewater Genomics"
self.sub['sample_count'] = submission_info.iloc[4][6]
self.sub['extraction_kit'] = "ArticV4.1"
self.sub['technician'] = f"MM: {biomek_info.iloc[2][1]}, Bio: {biomek_info.iloc[3][1]}"
parse_reagents(sub_reagent_range)
parse_reagents(biomek_reagent_range)
samples = massage_samples(biomek_info.iloc[22:31, 0:])
sample_parser = SampleParser(self.ctx, pd.DataFrame.from_records(samples))
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
self.sub['samples'] = sample_parse()
class SampleParser(object): class SampleParser(object):
""" """
object to pull data for samples in excel sheet and construct individual sample objects object to pull data for samples in excel sheet and construct individual sample objects
""" """
def __init__(self, df:pd.DataFrame) -> None: def __init__(self, ctx:dict, df:pd.DataFrame) -> None:
""" """
convert sample sub-dataframe to dictionary of records convert sample sub-dataframe to dictionary of records
Args: Args:
df (pd.DataFrame): input sample dataframe df (pd.DataFrame): input sample dataframe
""" """
self.ctx = ctx
self.samples = df.to_dict("records") self.samples = df.to_dict("records")
@@ -296,6 +371,29 @@ class SampleParser(object):
new_list.append(new) new_list.append(new)
return new_list return new_list
def parse_wastewater_artic_samples(self) -> list[WWSample]:
"""
The artic samples are the wastewater samples that are to be sequenced
So we will need to lookup existing ww samples and append Artic well # and plate relation
Returns:
list[WWSample]: list of wastewater samples to be updated
"""
new_list = []
for sample in self.samples:
with self.ctx['database_session'].no_autoflush:
instance = lookup_ww_sample_by_ww_sample_num(ctx=self.ctx, sample_number=sample['sample_name'])
logger.debug(f"Checking: {sample['sample_name']}")
if instance == None:
logger.error(f"Unable to find match for: {sample['sample_name']}")
continue
logger.debug(f"Got instance: {instance.ww_sample_full_id}")
instance.artic_well_number = sample['well']
new_list.append(instance)
return new_list
class PCRParser(object): class PCRParser(object):
""" """

View File

@@ -423,7 +423,7 @@ class SubmissionComment(QDialog):
def add_comment(self): def add_comment(self):
commenter = getuser() commenter = getuser()
comment = self.txt_editor.toPlainText() comment = self.txt_editor.toPlainText()
dt = datetime.strftime(datetime.now(), "%Y-%m-d %H:%M:%S") dt = datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S")
full_comment = {"name":commenter, "time": dt, "text": comment} full_comment = {"name":commenter, "time": dt, "text": comment}
logger.debug(f"Full comment: {full_comment}") logger.debug(f"Full comment: {full_comment}")
sub = lookup_submission_by_rsl_num(ctx = self.ctx, rsl_num=self.rsl) sub = lookup_submission_by_rsl_num(ctx = self.ctx, rsl_num=self.rsl)

View File

@@ -53,7 +53,7 @@ def import_submission_function(obj:QMainWindow) -> Tuple[QMainWindow, dict|None]
return obj, result return obj, result
# create sheetparser using excel sheet and context from gui # create sheetparser using excel sheet and context from gui
try: try:
prsr = SheetParser(fname, **obj.ctx) prsr = SheetParser(ctx=obj.ctx, filepath=fname)
except PermissionError: except PermissionError:
logger.error(f"Couldn't get permission to access file: {fname}") logger.error(f"Couldn't get permission to access file: {fname}")
return return

View File

@@ -94,7 +94,7 @@
{% endfor %}</p> {% endfor %}</p>
{% endif %} {% endif %}
{% if sub['platemap'] %} {% if sub['platemap'] %}
<h3><u>>Plate map:</u></h3> <h3><u>Plate map:</u></h3>
<img height="300px" width="650px" src="data:image/jpeg;base64,{{ sub['platemap'] | safe }}"> <img height="300px" width="650px" src="data:image/jpeg;base64,{{ sub['platemap'] | safe }}">
{% endif %} {% endif %}
</body> </body>

View File

@@ -83,7 +83,10 @@ def check_kit_integrity(sub:BasicSubmission|KitType, reagenttypes:list|None=None
case BasicSubmission(): case BasicSubmission():
ext_kit_rtypes = [reagenttype.name for reagenttype in sub.extraction_kit.reagent_types] ext_kit_rtypes = [reagenttype.name for reagenttype in sub.extraction_kit.reagent_types]
# Overwrite function parameter reagenttypes # Overwrite function parameter reagenttypes
try:
reagenttypes = [reagent.type.name for reagent in sub.reagents] reagenttypes = [reagent.type.name for reagent in sub.reagents]
except AttributeError as e:
logger.error(f"Problem parsing reagents: {[f'{reagent.lot}, {reagent.type}' for reagent in sub.reagents]}")
case KitType(): case KitType():
ext_kit_rtypes = [reagenttype.name for reagenttype in sub.reagent_types] ext_kit_rtypes = [reagenttype.name for reagenttype in sub.reagent_types]
logger.debug(f"Kit reagents: {ext_kit_rtypes}") logger.debug(f"Kit reagents: {ext_kit_rtypes}")
@@ -191,9 +194,12 @@ class RSLNamer(object):
self.submission_type = None self.submission_type = None
return return
logger.debug(f"Attempting match of {in_str}") logger.debug(f"Attempting match of {in_str}")
print(f"The initial plate name is: {in_str}")
regex = re.compile(r""" regex = re.compile(r"""
(?P<wastewater>RSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(?:_|-)\d?(?!\d)R?\d(?!\d))?)| # (?P<wastewater>RSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(?:_|-)\d?((?!\d)|R)?\d(?!\d))?)|
(?P<bacterial_culture>RSL-?\d{2}-?\d{4}) (?P<wastewater>RSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?)|
(?P<bacterial_culture>RSL-?\d{2}-?\d{4})|
(?P<wastewater_artic>(\d{4}-\d{2}-\d{2}_(?:\d_)?artic)|(RSL(?:-|_)?AR(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?))
""", flags = re.IGNORECASE | re.VERBOSE) """, flags = re.IGNORECASE | re.VERBOSE)
m = regex.search(in_str) m = regex.search(in_str)
try: try:
@@ -212,6 +218,25 @@ class RSLNamer(object):
self.parsed_name = self.parsed_name.replace("RSLWW", "RSL-WW") self.parsed_name = self.parsed_name.replace("RSLWW", "RSL-WW")
self.parsed_name = re.sub(r"WW(\d{4})", r"WW-\1", self.parsed_name, flags=re.IGNORECASE) self.parsed_name = re.sub(r"WW(\d{4})", r"WW-\1", self.parsed_name, flags=re.IGNORECASE)
self.parsed_name = re.sub(r"(\d{4})-(\d{2})-(\d{2})", r"\1\2\3", self.parsed_name) self.parsed_name = re.sub(r"(\d{4})-(\d{2})-(\d{2})", r"\1\2\3", self.parsed_name)
print(f"Coming out of the preliminary parsing, the plate name is {self.parsed_name}")
try:
plate_number = re.search(r"(?:(-|_)\d)(?!\d)", self.parsed_name).group().strip("_").strip("-")
print(f"Plate number is: {plate_number}")
except AttributeError as e:
plate_number = "1"
# self.parsed_name = re.sub(r"(\d{8})(-|_\d)?(R\d)?", fr"\1-{plate_number}\3", self.parsed_name)
self.parsed_name = re.sub(r"(\d{8})(-|_)?\d?(R\d?)?", rf"\1-{plate_number}\3", self.parsed_name)
print(f"After addition of plate number the plate name is: {self.parsed_name}")
try:
repeat = re.search(r"-\dR(?P<repeat>\d)?", self.parsed_name).groupdict()['repeat']
if repeat == None:
repeat = "1"
except AttributeError as e:
repeat = ""
self.parsed_name = re.sub(r"(-\dR)\d?", rf"\1 {repeat}", self.parsed_name).replace(" ", "")
def enforce_bacterial_culture(self): def enforce_bacterial_culture(self):
""" """
@@ -220,3 +245,19 @@ class RSLNamer(object):
self.parsed_name = re.sub(r"RSL(\d{2})", r"RSL-\1", self.parsed_name, flags=re.IGNORECASE) self.parsed_name = re.sub(r"RSL(\d{2})", r"RSL-\1", self.parsed_name, flags=re.IGNORECASE)
self.parsed_name = re.sub(r"RSL-(\d{2})(\d{4})", r"RSL-\1-\2", self.parsed_name, flags=re.IGNORECASE) self.parsed_name = re.sub(r"RSL-(\d{2})(\d{4})", r"RSL-\1-\2", self.parsed_name, flags=re.IGNORECASE)
def enforce_wastewater_artic(self):
self.parsed_name = re.sub(r"(\d{4})-(\d{2})-(\d{2})", r"RSL-AR-\1\2\3", self.parsed_name, flags=re.IGNORECASE)
try:
plate_number = int(re.search(r"_\d?_", self.parsed_name).group().strip("_"))
except AttributeError as e:
plate_number = 1
self.parsed_name = re.sub(r"(_\d)?_ARTIC", f"-{plate_number}", self.parsed_name)
def massage_common_reagents(reagent_name:str):
logger.debug(f"Attempting to massage {reagent_name}")
if reagent_name.endswith("water") or "H2O" in reagent_name:
reagent_name = "molecular_grade_water"
reagent_name = reagent_name.replace("µ", "u")
return reagent_name