Missing sample message after Artic parsing.

This commit is contained in:
Landon Wark
2023-06-16 13:58:28 -05:00
parent a7132cd1b4
commit 0bdcad0eee
11 changed files with 199 additions and 45 deletions

View File

@@ -31,6 +31,7 @@ def set_sqlite_pragma(dbapi_connection, connection_record):
cursor.execute("PRAGMA foreign_keys=ON")
cursor.close()
def store_submission(ctx:dict, base_submission:models.BasicSubmission) -> None|dict:
"""
Upserts submissions into database
@@ -799,9 +800,21 @@ def lookup_discounts_by_org_and_kit(ctx:dict, kit_id:int, lab_id:int):
)).all()
def hitpick_plate(submission:models.BasicSubmission, plate_number:int=0) -> list:
"""
Creates a list of sample positions and statuses to be used by plate mapping and csv output to biomek software.
Args:
submission (models.BasicSubmission): Input submission
plate_number (int, optional): plate position in the series of selected plates. Defaults to 0.
Returns:
list: list of sample dictionaries.
"""
plate_dicto = []
for sample in submission.samples:
# have sample report back its info if it's positive, otherwise, None
method_list = [func for func in dir(sample) if callable(getattr(sample, func))]
logger.debug(f"Method list of sample: {method_list}")
samp = sample.to_hitpick()
if samp == None:
continue
@@ -811,6 +824,43 @@ def hitpick_plate(submission:models.BasicSubmission, plate_number:int=0) -> list
# if len(dicto) < 88:
this_sample = dict(
plate_number = plate_number,
sample_name = samp['name'],
column = samp['col'],
row = samp['row'],
positive = samp['positive'],
plate_name = submission.rsl_plate_num
)
# append to plate samples
plate_dicto.append(this_sample)
# append to all samples
# image = make_plate_map(plate_dicto)
return plate_dicto
def platemap_plate(submission:models.BasicSubmission) -> list:
"""
Depreciated. Replaced by new functionality in hitpick_plate
Args:
submission (models.BasicSubmission): Input submission
Returns:
list: list of sample dictionaries
"""
plate_dicto = []
for sample in submission.samples:
# have sample report back its info if it's positive, otherwise, None
try:
samp = sample.to_platemap()
except AttributeError:
continue
if samp == None:
continue
else:
logger.debug(f"Item name: {samp['name']}")
# plate can handle 88 samples to leave column for controls
# if len(dicto) < 88:
this_sample = dict(
sample_name = samp['name'],
column = samp['col'],
row = samp['row'],

View File

@@ -62,8 +62,10 @@ class WWSample(Base):
# if well_col > 4:
# well
if self.ct_n1 != None and self.ct_n2 != None:
# logger.debug(f"Using well info in name.")
name = f"{self.ww_sample_full_id}\n\t- ct N1: {'{:.2f}'.format(self.ct_n1)} ({self.n1_status})\n\t- ct N2: {'{:.2f}'.format(self.ct_n2)} ({self.n2_status})"
else:
# logger.debug(f"NOT using well info in name for: {self.ww_sample_full_id}")
name = self.ww_sample_full_id
return {
"well": self.well_number,
@@ -85,18 +87,23 @@ class WWSample(Base):
except TypeError as e:
logger.error(f"Couldn't check positives for {self.rsl_number}. Looks like there isn't PCR data.")
return None
if positive:
try:
# The first character of the elution well is the row
well_row = row_dict[self.elution_well[0]]
# The remaining charagers are the columns
well_col = self.elution_well[1:]
except TypeError as e:
logger.error(f"This sample doesn't have elution plate info.")
return None
return dict(name=self.ww_sample_full_id, row=well_row, col=well_col)
else:
return None
well_row = row_dict[self.elution_well[0]]
well_col = self.elution_well[1:]
# if positive:
# try:
# # The first character of the elution well is the row
# well_row = row_dict[self.elution_well[0]]
# # The remaining charagers are the columns
# well_col = self.elution_well[1:]
# except TypeError as e:
# logger.error(f"This sample doesn't have elution plate info.")
# return None
return dict(name=self.ww_sample_full_id,
row=well_row,
col=well_col,
positive=positive)
# else:
# return None
class BCSample(Base):
@@ -134,7 +141,24 @@ class BCSample(Base):
"name": f"{self.sample_id} - ({self.organism})",
}
def to_hitpick(self) -> dict|None:
"""
Outputs a dictionary of locations
Returns:
dict: dictionary of sample id, row and column in elution plate
"""
# dictionary to translate row letters into numbers
row_dict = dict(A=1, B=2, C=3, D=4, E=5, F=6, G=7, H=8)
# if either n1 or n2 is positive, include this sample
well_row = row_dict[self.well_number[0]]
# The remaining charagers are the columns
well_col = self.well_number[1:]
return dict(name=self.sample_id,
row=well_row,
col=well_col,
positive=False)
# class ArticSample(Base):
# """
# base of artic sample

View File

@@ -1,6 +1,7 @@
'''
Models for the main submission types.
'''
import math
from . import Base
from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, Table, JSON, FLOAT
from sqlalchemy.orm import relationship
@@ -246,5 +247,24 @@ class WastewaterArtic(BasicSubmission):
derivative submission type for artic wastewater
"""
samples = relationship("WWSample", back_populates="artic_rsl_plate", uselist=True)
# Can in use the pcr_info from the wastewater? Cause I can't define pcr_info here due to conflicts with that
__mapper_args__ = {"polymorphic_identity": "wastewater_artic", "polymorphic_load": "inline"}
# Can it use the pcr_info from the wastewater? Cause I can't define pcr_info here due to conflicts with that
# Not necessary because we don't get any results for this procedure.
__mapper_args__ = {"polymorphic_identity": "wastewater_artic", "polymorphic_load": "inline"}
def calculate_base_cost(self):
"""
This method overrides parent method due to multiple output plates from a single submission
"""
logger.debug(f"Hello from calculate base cost in WWArtic")
try:
cols_count_96 = ceil(int(self.sample_count) / 8)
except Exception as e:
logger.error(f"Column count error: {e}")
# Since we have multiple output plates per submission form, the constant cost will have to reflect this.
output_plate_count = math.ceil(int(self.sample_count) / 16)
logger.debug(f"Looks like we have {output_plate_count} output plates.")
const_cost = self.extraction_kit.constant_cost * output_plate_count
try:
self.run_cost = const_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count))
except Exception as e:
logger.error(f"Calculation error: {e}")

View File

@@ -3,6 +3,7 @@ contains parser object for pulling values from client generated submission sheet
'''
from getpass import getuser
import math
import pprint
from typing import Tuple
import pandas as pd
from pathlib import Path
@@ -160,14 +161,19 @@ class SheetParser(object):
sample_parser = SampleParser(self.ctx, submission_info.iloc[16:112])
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
logger.debug(f"Parser result: {self.sub}")
self.sub['samples'] = sample_parse()
self.sample_result, self.sub['samples'] = sample_parse()
def parse_wastewater(self) -> None:
"""
pulls info specific to wastewater sample type
"""
def retrieve_elution_map():
full = self.xl.parse("Extraction Worksheet")
elu_map = full.iloc[9:18, 5:]
elu_map.set_index(elu_map.columns[0], inplace=True)
elu_map.columns = elu_map.iloc[0]
return elu_map
def parse_reagents(df:pd.DataFrame) -> None:
"""
Pulls reagents from the bacterial sub-dataframe
@@ -216,9 +222,9 @@ class SheetParser(object):
parse_reagents(ext_reagent_range)
parse_reagents(pcr_reagent_range)
# parse samples
sample_parser = SampleParser(self.ctx, submission_info.iloc[16:])
sample_parser = SampleParser(self.ctx, submission_info.iloc[16:], elution_map=retrieve_elution_map())
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
self.sub['samples'] = sample_parse()
self.sample_result, self.sub['samples'] = sample_parse()
self.sub['csv'] = self.xl.parse("Copy to import file", dtype=object)
@@ -272,7 +278,7 @@ class SheetParser(object):
return_list.append(dict(sample_name=re.sub(r"\s?\(.*\)", "", df.loc[ii.name, int(c)]), \
well=f"{ii.name}{c}",
artic_plate=self.sub['rsl_plate_num']))
logger.debug(f"massaged sample list for {self.sub['rsl_plate_num']}: {return_list}")
logger.debug(f"massaged sample list for {self.sub['rsl_plate_num']}: {pprint.pprint(return_list)}")
return return_list
submission_info = self.xl.parse("First Strand", dtype=object)
biomek_info = self.xl.parse("ArticV4 Biomek", dtype=object)
@@ -280,7 +286,7 @@ class SheetParser(object):
biomek_reagent_range = biomek_info.iloc[60:, 0:3].dropna(how='all')
self.sub['submitter_plate_num'] = ""
self.sub['rsl_plate_num'] = RSLNamer(self.filepath.__str__()).parsed_name
self.sub['submitted_date'] = submission_info.iloc[0][2]
self.sub['submitted_date'] = biomek_info.iloc[1][1]
self.sub['submitting_lab'] = "Enterics Wastewater Genomics"
self.sub['sample_count'] = submission_info.iloc[4][6]
self.sub['extraction_kit'] = "ArticV4.1"
@@ -290,7 +296,7 @@ class SheetParser(object):
samples = massage_samples(biomek_info.iloc[22:31, 0:])
sample_parser = SampleParser(self.ctx, pd.DataFrame.from_records(samples))
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
self.sub['samples'] = sample_parse()
self.sample_result, self.sub['samples'] = sample_parse()
@@ -299,18 +305,21 @@ class SampleParser(object):
object to pull data for samples in excel sheet and construct individual sample objects
"""
def __init__(self, ctx:dict, df:pd.DataFrame) -> None:
def __init__(self, ctx:dict, df:pd.DataFrame, elution_map:pd.DataFrame|None=None) -> None:
"""
convert sample sub-dataframe to dictionary of records
Args:
ctx (dict): setting passed down from gui
df (pd.DataFrame): input sample dataframe
elution_map (pd.DataFrame | None, optional): optional map of elution plate. Defaults to None.
"""
self.ctx = ctx
self.samples = df.to_dict("records")
self.elution_map = elution_map
def parse_bacterial_culture_samples(self) -> list[BCSample]:
def parse_bacterial_culture_samples(self) -> Tuple[str|None, list[BCSample]]:
"""
construct bacterial culture specific sample objects
@@ -334,16 +343,28 @@ class SampleParser(object):
not_a_nan = True
if not_a_nan:
new_list.append(new)
return new_list
return None, new_list
def parse_wastewater_samples(self) -> list[WWSample]:
def parse_wastewater_samples(self) -> Tuple[str|None, list[WWSample]]:
"""
construct wastewater specific sample objects
Returns:
list[WWSample]: list of sample objects
"""
def search_df_for_sample(sample_rsl:str):
logger.debug(f"Attempting to find sample {sample_rsl} in \n {self.elution_map}")
print(f"Attempting to find sample {sample_rsl} in \n {self.elution_map}")
well = self.elution_map.where(self.elution_map==sample_rsl).dropna(how='all').dropna(axis=1)
self.elution_map.at[well.index[0], well.columns[0]] = np.nan
try:
col = str(int(well.columns[0]))
except ValueError:
col = str(well.columns[0])
except TypeError as e:
logger.error(f"Problem parsing out column number for {well}:\n {e}")
return f"{well.index[0]}{col}"
new_list = []
for sample in self.samples:
new = WWSample()
@@ -368,10 +389,11 @@ class SampleParser(object):
# new.site_status = sample['Unnamed: 7']
new.notes = str(sample['Unnamed: 6']) # previously Unnamed: 8
new.well_number = sample['Unnamed: 1']
new.elution_well = search_df_for_sample(new.rsl_number)
new_list.append(new)
return new_list
return None, new_list
def parse_wastewater_artic_samples(self) -> list[WWSample]:
def parse_wastewater_artic_samples(self) -> Tuple[str|None, list[WWSample]]:
"""
The artic samples are the wastewater samples that are to be sequenced
So we will need to lookup existing ww samples and append Artic well # and plate relation
@@ -380,17 +402,20 @@ class SampleParser(object):
list[WWSample]: list of wastewater samples to be updated
"""
new_list = []
missed_samples = []
for sample in self.samples:
with self.ctx['database_session'].no_autoflush:
instance = lookup_ww_sample_by_ww_sample_num(ctx=self.ctx, sample_number=sample['sample_name'])
logger.debug(f"Checking: {sample['sample_name']}")
if instance == None:
logger.error(f"Unable to find match for: {sample['sample_name']}")
missed_samples.append(sample['sample_name'])
continue
logger.debug(f"Got instance: {instance.ww_sample_full_id}")
instance.artic_well_number = sample['well']
new_list.append(instance)
return new_list
missed_str = "\n\t".join(missed_samples)
return f"Could not find matches for the following samples:\n\t {missed_str}", new_list
@@ -472,6 +497,7 @@ class PCRParser(object):
df = self.parse_general(sheet_name="Results")
column_names = ["Well", "Well Position", "Omit","Sample","Target","Task"," Reporter","Quencher","Amp Status","Amp Score","Curve Quality","Result Quality Issues","Cq","Cq Confidence","Cq Mean","Cq SD","Auto Threshold","Threshold", "Auto Baseline", "Baseline Start", "Baseline End"]
self.samples_df = df.iloc[23:][0:]
logger.debug(f"Dataframe of PCR results:\n\t{self.samples_df}")
self.samples_df.columns = column_names
logger.debug(f"Samples columns: {self.samples_df.columns}")
well_call_df = self.xl.parse(sheet_name="Well Call").iloc[24:][0:].iloc[:,-1:]
@@ -488,7 +514,7 @@ class PCRParser(object):
sample_obj = dict(
sample = row['Sample'],
plate_rsl = self.plate_num,
elution_well = row['Well Position']
# elution_well = row['Well Position']
)
logger.debug(f"Got sample obj: {sample_obj}")
# logger.debug(f"row: {row}")