Working new version.

This commit is contained in:
Landon Wark
2023-09-11 13:45:10 -05:00
parent e0b80f6c7a
commit 5a978c9bff
15 changed files with 417 additions and 743 deletions

View File

@@ -6,7 +6,7 @@ import pprint
from typing import List
import pandas as pd
from pathlib import Path
from backend.db import lookup_ww_sample_by_ww_sample_num, lookup_sample_by_submitter_id, get_reagents_in_extkit, lookup_kittype_by_name, lookup_submissiontype_by_name, models
from backend.db import lookup_sample_by_submitter_id, get_reagents_in_extkit, lookup_kittype_by_name, lookup_submissiontype_by_name, models
from backend.pydant import PydSubmission, PydReagent
import logging
from collections import OrderedDict
@@ -14,8 +14,6 @@ import re
import numpy as np
from datetime import date
from dateutil.parser import parse, ParserError
import uuid
# from submissions.backend.db.functions import
from tools import check_not_nan, RSLNamer, convert_nans_to_nones, Settings
from frontend.custom_widgets.pop_ups import SubmissionTypeSelector, KitSelector
@@ -69,7 +67,7 @@ class SheetParser(object):
# Check metadata for category, return first category
if self.xl.book.properties.category != None:
logger.debug("Using file properties to find type...")
categories = [item.strip().title() for item in self.xl.book.properties.category.split(";")]
categories = [item.strip().replace("_", " ").title() for item in self.xl.book.properties.category.split(";")]
return dict(value=categories[0], parsed=False)
else:
# This code is going to be depreciated once there is full adoption of the client sheets
@@ -95,7 +93,13 @@ class SheetParser(object):
"""
_summary_
"""
info = InfoParser(ctx=self.ctx, xl=self.xl, submission_type=self.sub['submission_type']).parse_info()
info = InfoParser(ctx=self.ctx, xl=self.xl, submission_type=self.sub['submission_type']['value']).parse_info()
parser_query = f"parse_{self.sub['submission_type']['value'].replace(' ', '_').lower()}"
try:
custom_parser = getattr(self, parser_query)
info = custom_parser(info)
except AttributeError:
logger.error(f"Couldn't find submission parser: {parser_query}")
for k,v in info.items():
if k != "sample":
self.sub[k] = v
@@ -107,288 +111,41 @@ class SheetParser(object):
def parse_samples(self):
self.sample_result, self.sub['samples'] = SampleParser(ctx=self.ctx, xl=self.xl, submission_type=self.sub['submission_type']['value']).parse_samples()
def parse_bacterial_culture(self) -> None:
"""
pulls info specific to bacterial culture sample type
def parse_bacterial_culture(self, input_dict) -> dict:
"""
Update submission dictionary with type specific information
# def parse_reagents(df:pd.DataFrame) -> None:
# """
# Pulls reagents from the bacterial sub-dataframe
Args:
input_dict (dict): Input sample dictionary
# Args:
# df (pd.DataFrame): input sub dataframe
# """
# for ii, row in df.iterrows():
# # skip positive control
# logger.debug(f"Running reagent parse for {row[1]} with type {type(row[1])} and value: {row[2]} with type {type(row[2])}")
# # if the lot number isn't a float and the reagent type isn't blank
# # if not isinstance(row[2], float) and check_not_nan(row[1]):
# if check_not_nan(row[1]):
# # must be prefixed with 'lot_' to be recognized by gui
# # This is no longer true since reagents are loaded into their own key in dictionary
# try:
# reagent_type = row[1].replace(' ', '_').lower().strip()
# except AttributeError:
# pass
# # If there is a double slash in the type field, such as ethanol/iso
# # Use the cell to the left for reagent type.
# if reagent_type == "//":
# if check_not_nan(row[2]):
# reagent_type = row[0].replace(' ', '_').lower().strip()
# else:
# continue
# try:
# output_var = convert_nans_to_nones(str(row[2]).upper())
# except AttributeError:
# logger.debug(f"Couldn't upperize {row[2]}, must be a number")
# output_var = convert_nans_to_nones(str(row[2]))
# logger.debug(f"Output variable is {output_var}")
# logger.debug(f"Expiry date for imported reagent: {row[3]}")
# if check_not_nan(row[3]):
# try:
# expiry = row[3].date()
# except AttributeError as e:
# try:
# expiry = datetime.strptime(row[3], "%Y-%m-%d")
# except TypeError as e:
# expiry = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + row[3] - 2)
# else:
# logger.debug(f"Date: {row[3]}")
# # expiry = date.today()
# expiry = date(year=1970, month=1, day=1)
# # self.sub[f"lot_{reagent_type}"] = {'lot':output_var, 'exp':expiry}
# # self.sub['reagents'].append(dict(type=reagent_type, lot=output_var, exp=expiry))
# self.sub['reagents'].append(PydReagent(type=reagent_type, lot=output_var, exp=expiry))
# submission_info = self.xl.parse(sheet_name="Sample List", dtype=object)
# self.sub['extraction_kit'] = submission_info.iloc[3][3]
# submission_info = self.parse_generic("Sample List")
# # iloc is [row][column] and the first row is set as header row so -2
# self.sub['technician'] = str(submission_info.iloc[11][1])
# # reagents
# # must be prefixed with 'lot_' to be recognized by gui
# # This is no longer true wince the creation of self.sub['reagents']
# self.sub['reagents'] = []
# reagent_range = submission_info.iloc[1:14, 4:8]
# logger.debug(reagent_range)
# parse_reagents(reagent_range)
# get individual sample info
sample_parser = SampleParser(self.ctx, submission_info.iloc[16:112])
logger.debug(f"Sample type: {self.sub['submission_type']}")
if isinstance(self.sub['submission_type'], dict):
getter = self.sub['submission_type']['value']
else:
getter = self.sub['submission_type']
sample_parse = getattr(sample_parser, f"parse_{getter.replace(' ', '_').lower()}_samples")
logger.debug(f"Parser result: {self.sub}")
self.sample_result, self.sub['samples'] = sample_parse()
def parse_wastewater(self) -> None:
"""
pulls info specific to wastewater sample type
Returns:
dict: Updated sample dictionary
"""
def retrieve_elution_map():
full = self.xl.parse("Extraction Worksheet")
elu_map = full.iloc[9:18, 5:]
elu_map.set_index(elu_map.columns[0], inplace=True)
elu_map.columns = elu_map.iloc[0]
elu_map = elu_map.tail(-1)
return elu_map
# def parse_reagents(df:pd.DataFrame) -> None:
# """
# Pulls reagents from the bacterial sub-dataframe
# Args:
# df (pd.DataFrame): input sub dataframe
# """
# # iterate through sub-df rows
# for ii, row in df.iterrows():
# # logger.debug(f"Parsing this row for reagents: {row}")
# if check_not_nan(row[5]):
# # must be prefixed with 'lot_' to be recognized by gui
# # regex below will remove 80% from 80% ethanol in the Wastewater kit.
# output_key = re.sub(r"^\d{1,3}%\s?", "", row[0].lower().strip().replace(' ', '_'))
# output_key = output_key.strip("_")
# # output_var is the lot number
# try:
# output_var = convert_nans_to_nones(str(row[5].upper()))
# except AttributeError:
# logger.debug(f"Couldn't upperize {row[5]}, must be a number")
# output_var = convert_nans_to_nones(str(row[5]))
# if check_not_nan(row[7]):
# try:
# expiry = row[7].date()
# except AttributeError:
# expiry = date.today()
# else:
# expiry = date.today()
# logger.debug(f"Expiry date for {output_key}: {expiry} of type {type(expiry)}")
# # self.sub[f"lot_{output_key}"] = {'lot':output_var, 'exp':expiry}
# # self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry))
# reagent = PydReagent(type=output_key, lot=output_var, exp=expiry)
# logger.debug(f"Here is the created reagent: {reagent}")
# self.sub['reagents'].append(reagent)
# parse submission sheet
submission_info = self.parse_generic("WW Submissions (ENTER HERE)")
# parse enrichment sheet
enrichment_info = self.xl.parse("Enrichment Worksheet", dtype=object)
# set enrichment reagent range
enr_reagent_range = enrichment_info.iloc[0:4, 9:20]
# parse extraction sheet
extraction_info = self.xl.parse("Extraction Worksheet", dtype=object)
# set extraction reagent range
ext_reagent_range = extraction_info.iloc[0:5, 9:20]
# parse qpcr sheet
qprc_info = self.xl.parse("qPCR Worksheet", dtype=object)
# set qpcr reagent range
pcr_reagent_range = qprc_info.iloc[0:5, 9:20]
# compile technician info from all sheets
if all(map(check_not_nan, [enrichment_info.columns[2], extraction_info.columns[2], qprc_info.columns[2]])):
parsed = True
else:
parsed = False
self.sub['technician'] = dict(value=f"Enr: {enrichment_info.columns[2]}, Ext: {extraction_info.columns[2]}, PCR: {qprc_info.columns[2]}", parsed=parsed)
self.sub['reagents'] = []
# parse_reagents(enr_reagent_range)
# parse_reagents(ext_reagent_range)
# parse_reagents(pcr_reagent_range)
# parse samples
sample_parser = SampleParser(self.ctx, submission_info.iloc[16:], elution_map=retrieve_elution_map())
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type']['value'].lower()}_samples")
self.sample_result, self.sub['samples'] = sample_parse()
self.sub['csv'] = self.xl.parse("Copy to import file", dtype=object)
def parse_wastewater_artic(self) -> None:
return input_dict
def parse_wastewater(self, input_dict) -> dict:
"""
pulls info specific to wastewater_arctic submission type
Update submission dictionary with type specific information
Args:
input_dict (dict): Input sample dictionary
Returns:
dict: Updated sample dictionary
"""
return input_dict
def parse_wastewater_artic(self, input_dict:dict) -> dict:
"""
if isinstance(self.sub['submission_type'], str):
self.sub['submission_type'] = dict(value=self.sub['submission_type'], parsed=True)
# def parse_reagents(df:pd.DataFrame):
# logger.debug(df)
# for ii, row in df.iterrows():
# if check_not_nan(row[1]):
# try:
# output_key = re.sub(r"\(.+?\)", "", row[0].lower().strip().replace(' ', '_'))
# except AttributeError:
# continue
# output_key = output_key.strip("_")
# output_key = massage_common_reagents(output_key)
# try:
# output_var = convert_nans_to_nones(str(row[1].upper()))
# except AttributeError:
# logger.debug(f"Couldn't upperize {row[1]}, must be a number")
# output_var = convert_nans_to_nones(str(row[1]))
# logger.debug(f"Output variable is {output_var}")
# logger.debug(f"Expiry date for imported reagent: {row[2]}")
# if check_not_nan(row[2]):
# try:
# expiry = row[2].date()
# except AttributeError as e:
# try:
# expiry = datetime.strptime(row[2], "%Y-%m-%d")
# except TypeError as e:
# expiry = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + row[2] - 2)
# except ValueError as e:
# continue
# else:
# logger.debug(f"Date: {row[2]}")
# expiry = date.today()
# # self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry))
# self.sub['reagents'].append(PydReagent(type=output_key, lot=output_var, exp=expiry))
# else:
# continue
def massage_samples(df:pd.DataFrame, lookup_table:pd.DataFrame) -> pd.DataFrame:
"""
Takes sample info from Artic sheet format and converts to regular formate
Update submission dictionary with type specific information
Args:
df (pd.DataFrame): Elution plate map
lookup_table (pd.DataFrame): Sample submission form map.
Args:
input_dict (dict): Input sample dictionary
Returns:
pd.DataFrame: _description_
"""
lookup_table.set_index(lookup_table.columns[0], inplace=True)
lookup_table.columns = lookup_table.iloc[0]
logger.debug(f"Massaging samples from {lookup_table}")
df.set_index(df.columns[0], inplace=True)
df.columns = df.iloc[0]
logger.debug(f"df to massage\n: {df}")
return_list = []
for _, ii in df.iloc[1:,1:].iterrows():
for c in df.columns.to_list():
if not check_not_nan(c):
continue
logger.debug(f"Checking {ii.name}{c}")
if check_not_nan(df.loc[ii.name, int(c)]) and df.loc[ii.name, int(c)] != "EMPTY":
sample_name = df.loc[ii.name, int(c)]
row = lookup_table.loc[lookup_table['Sample Name (WW)'] == sample_name]
logger.debug(f"Looking up {row['Sample Name (LIMS)'][-1]}")
try:
return_list.append(dict(submitter_id=re.sub(r"\s?\(.*\)", "", df.loc[ii.name, int(c)]), \
# well=f"{ii.name}{c}",
row = row_keys[ii.name],
column = c,
artic_plate=self.sub['rsl_plate_num'],
sample_name=row['Sample Name (LIMS)'][-1]
))
except TypeError as e:
logger.error(f"Got an int for {c}, skipping.")
continue
logger.debug(f"massaged sample list for {self.sub['rsl_plate_num']}: {pprint.pprint(return_list)}")
return return_list
submission_info = self.xl.parse("First Strand", dtype=object)
biomek_info = self.xl.parse("ArticV4 Biomek", dtype=object)
sub_reagent_range = submission_info.iloc[56:, 1:4].dropna(how='all')
biomek_reagent_range = biomek_info.iloc[60:, 0:3].dropna(how='all')
# submission_info = self.xl.parse("cDNA", dtype=object)
# biomek_info = self.xl.parse("ArticV4_1 Biomek", dtype=object)
# # Reminder that the iloc uses row, column ordering
# # sub_reagent_range = submission_info.iloc[56:, 1:4].dropna(how='all')
# sub_reagent_range = submission_info.iloc[7:15, 5:9].dropna(how='all')
# biomek_reagent_range = biomek_info.iloc[62:, 0:3].dropna(how='all')
self.sub['submitter_plate_num'] = ""
self.sub['rsl_plate_num'] = RSLNamer(ctx=self.ctx, instr=self.filepath.__str__()).parsed_name
self.sub['submitted_date'] = biomek_info.iloc[1][1]
self.sub['submitting_lab'] = "Enterics Wastewater Genomics"
self.sub['sample_count'] = submission_info.iloc[4][6]
# self.sub['sample_count'] = submission_info.iloc[34][6]
self.sub['extraction_kit'] = "ArticV4.1"
self.sub['technician'] = f"MM: {biomek_info.iloc[2][1]}, Bio: {biomek_info.iloc[3][1]}"
self.sub['reagents'] = []
# parse_reagents(sub_reagent_range)
# parse_reagents(biomek_reagent_range)
samples = massage_samples(biomek_info.iloc[22:31, 0:], submission_info.iloc[4:37, 1:5])
# samples = massage_samples(biomek_info.iloc[25:33, 0:])
sample_parser = SampleParser(self.ctx, pd.DataFrame.from_records(samples))
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type']['value'].lower()}_samples")
self.sample_result, self.sub['samples'] = sample_parse()
# def parse_reagents(self):
# ext_kit = lookup_kittype_by_name(ctx=self.ctx, name=self.sub['extraction_kit'])
# if ext_kit != None:
# logger.debug(f"Querying extraction kit: {self.sub['submission_type']}")
# reagent_map = ext_kit.construct_xl_map_for_use(use=self.sub['submission_type']['value'])
# logger.debug(f"Reagent map: {pprint.pformat(reagent_map)}")
# else:
# raise AttributeError("No extraction kit found, unable to parse reagents")
# for sheet in self.xl.sheet_names:
# df = self.xl.parse(sheet)
# relevant = {k:v for k,v in reagent_map.items() if sheet in reagent_map[k]['sheet']}
# logger.debug(f"relevant map for {sheet}: {pprint.pformat(relevant)}")
# if relevant == {}:
# continue
# for item in relevant:
# try:
# # role = item
# name = df.iat[relevant[item]['name']['row']-2, relevant[item]['name']['column']-1]
# lot = df.iat[relevant[item]['lot']['row']-2, relevant[item]['lot']['column']-1]
# expiry = df.iat[relevant[item]['expiry']['row']-2, relevant[item]['expiry']['column']-1]
# except (KeyError, IndexError):
# continue
# # self.sub['reagents'].append(dict(name=name, lot=lot, expiry=expiry, role=role))
# self.sub['reagents'].append(PydReagent(type=item, lot=lot, exp=expiry, name=name))
Returns:
dict: Updated sample dictionary
"""
return input_dict
def import_kit_validation_check(self):
@@ -411,9 +168,6 @@ class SheetParser(object):
else:
if isinstance(self.sub['extraction_kit'], str):
self.sub['extraction_kit'] = dict(value=self.sub['extraction_kit'], parsed=False)
# logger.debug(f"Here is the validated parser dictionary:\n\n{pprint.pformat(self.sub)}\n\n")
# return parser_sub
def import_reagent_validation_check(self):
"""
@@ -439,20 +193,16 @@ class InfoParser(object):
def __init__(self, ctx:Settings, xl:pd.ExcelFile, submission_type:str):
self.ctx = ctx
# self.submission_type = submission_type
# self.extraction_kit = extraction_kit
self.map = self.fetch_submission_info_map(submission_type=submission_type)
self.xl = xl
logger.debug(f"Info map for InfoParser: {pprint.pformat(self.map)}")
def fetch_submission_info_map(self, submission_type:dict) -> dict:
if isinstance(submission_type, str):
submission_type = dict(value=submission_type, parsed=False)
logger.debug(f"Looking up submission type: {submission_type['value']}")
submission_type = lookup_submissiontype_by_name(ctx=self.ctx, type_name=submission_type['value'])
info_map = submission_type.info_map
# try:
# del info_map['samples']
# except KeyError:
# pass
return info_map
def parse_info(self) -> dict:
@@ -461,11 +211,13 @@ class InfoParser(object):
df = self.xl.parse(sheet, header=None)
relevant = {}
for k, v in self.map.items():
if isinstance(v, str):
dicto[k] = dict(value=v, parsed=True)
continue
if k == "samples":
continue
if sheet in self.map[k]['sheets']:
relevant[k] = v
# relevant = {k:v for k,v in self.map.items() if sheet in self.map[k]['sheets']}
logger.debug(f"relevant map for {sheet}: {pprint.pformat(relevant)}")
if relevant == {}:
continue
@@ -485,8 +237,6 @@ class InfoParser(object):
continue
else:
dicto[item] = dict(value=convert_nans_to_nones(value), parsed=False)
# if "submitter_plate_num" not in dicto.keys():
# dicto['submitter_plate_num'] = dict(value=None, parsed=False)
return dicto
class ReagentParser(object):
@@ -515,7 +265,6 @@ class ReagentParser(object):
for item in relevant:
logger.debug(f"Attempting to scrape: {item}")
try:
# role = item
name = df.iat[relevant[item]['name']['row']-1, relevant[item]['name']['column']-1]
lot = df.iat[relevant[item]['lot']['row']-1, relevant[item]['lot']['column']-1]
expiry = df.iat[relevant[item]['expiry']['row']-1, relevant[item]['expiry']['column']-1]
@@ -526,7 +275,6 @@ class ReagentParser(object):
parsed = True
else:
parsed = False
# self.sub['reagents'].append(dict(name=name, lot=lot, expiry=expiry, role=role))
logger.debug(f"Got lot for {item}-{name}: {lot} as {type(lot)}")
lot = str(lot)
listo.append(dict(value=PydReagent(type=item.strip(), lot=lot, exp=expiry, name=name), parsed=parsed))
@@ -556,8 +304,9 @@ class SampleParser(object):
self.lookup_table = self.construct_lookup_table(lookup_table_location=sample_info_map['lookup_table'])
self.excel_to_db_map = sample_info_map['xl_db_translation']
self.create_basic_dictionaries_from_plate_map()
self.parse_lookup_table()
if isinstance(self.lookup_table, pd.DataFrame):
self.parse_lookup_table()
def fetch_sample_info_map(self, submission_type:dict) -> dict:
logger.debug(f"Looking up submission type: {submission_type}")
submission_type = lookup_submissiontype_by_name(ctx=self.ctx, type_name=submission_type)
@@ -575,7 +324,10 @@ class SampleParser(object):
return df
def construct_lookup_table(self, lookup_table_location) -> pd.DataFrame:
df = self.xl.parse(lookup_table_location['sheet'], header=None, dtype=object)
try:
df = self.xl.parse(lookup_table_location['sheet'], header=None, dtype=object)
except KeyError:
return None
df = df.iloc[lookup_table_location['start_row']-1:lookup_table_location['end_row']]
df = pd.DataFrame(df.values[1:], columns=df.iloc[0])
df = df.reset_index(drop=True)
@@ -583,12 +335,16 @@ class SampleParser(object):
return df
def create_basic_dictionaries_from_plate_map(self):
invalids = [0, "0"]
new_df = self.plate_map.dropna(axis=1, how='all')
columns = new_df.columns.tolist()
for _, iii in new_df.iterrows():
for c in columns:
# logger.debug(f"Checking sample {iii[c]}")
if check_not_nan(iii[c]):
if iii[c] in invalids:
logger.debug(f"Invalid sample name: {iii[c]}, skipping.")
continue
id = iii[c]
logger.debug(f"Adding sample {iii[c]}")
try:
@@ -600,8 +356,9 @@ class SampleParser(object):
def parse_lookup_table(self):
def determine_if_date(input_str) -> str|date:
# logger.debug(f"Looks like we have a str: {input_str}")
regex = re.compile(r"\d{4}-?\d{2}-?\d{2}")
regex = re.compile(r"^\d{4}-?\d{2}-?\d{2}")
if bool(regex.search(input_str)):
logger.warning(f"{input_str} is a date!")
try:
return parse(input_str)
except ParserError:
@@ -610,6 +367,7 @@ class SampleParser(object):
return input_str
for sample in self.samples:
addition = self.lookup_table[self.lookup_table.isin([sample['submitter_id']]).any(axis=1)].squeeze().to_dict()
logger.debug(f"Lookuptable info: {addition}")
for k,v in addition.items():
# logger.debug(f"Checking {k} in lookup table.")
if check_not_nan(k) and isinstance(k, str):
@@ -645,193 +403,89 @@ class SampleParser(object):
case _:
v = v
try:
translated_dict[self.excel_to_db_map[k]] = v
translated_dict[self.excel_to_db_map[k]] = convert_nans_to_nones(v)
except KeyError:
translated_dict[k] = convert_nans_to_nones(v)
# translated_dict['sample_type'] = f"{self.submission_type.replace(' ', '_').lower()}_sample"
translated_dict['sample_type'] = f"{self.submission_type} Sample"
parser_query = f"parse_{translated_dict['sample_type'].replace(' ', '_').lower()}"
# logger.debug(f"New sample dictionary going into object creation:\n{translated_dict}")
try:
custom_parser = getattr(self, parser_query)
translated_dict = custom_parser(translated_dict)
except AttributeError:
logger.error(f"Couldn't get custom parser: {parser_query}")
new_samples.append(self.generate_sample_object(translated_dict))
return result, new_samples
def generate_sample_object(self, input_dict) -> models.BasicSample:
# query = input_dict['sample_type'].replace('_sample', '').replace("_", " ").title().replace(" ", "")
query = input_dict['sample_type'].replace(" ", "")
database_obj = getattr(models, query)
try:
database_obj = getattr(models, query)
except AttributeError as e:
logger.error(f"Could not find the model {query}. Using generic.")
database_obj = models.BasicSample
logger.debug(f"Searching database for {input_dict['submitter_id']}...")
instance = lookup_sample_by_submitter_id(ctx=self.ctx, submitter_id=input_dict['submitter_id'])
if instance == None:
logger.debug(f"Couldn't find sample {input_dict['submitter_id']}. Creating new sample.")
instance = database_obj()
for k,v in input_dict.items():
try:
setattr(instance, k, v)
# setattr(instance, k, v)
instance.set_attribute(k, v)
except Exception as e:
logger.error(f"Failed to set {k} due to {type(e).__name__}: {e}")
else:
logger.debug(f"Sample already exists, will run update.")
logger.debug(f"Sample {instance.submitter_id} already exists, will run update.")
return dict(sample=instance, row=input_dict['row'], column=input_dict['column'])
# def parse_bacterial_culture_samples(self) -> Tuple[str|None, list[dict]]:
def parse_bacterial_culture_sample(self, input_dict:dict) -> dict:
"""
construct bacterial culture specific sample objects
Update sample dictionary with bacterial culture specific information
Args:
input_dict (dict): Input sample dictionary
Returns:
list[BCSample]: list of sample objects
"""
# logger.debug(f"Samples: {self.samples}")
new_list = []
for sample in self.samples:
logger.debug(f"Well info: {sample['This section to be filled in completely by submittor']}")
instance = lookup_sample_by_submitter_id(ctx=self.ctx, submitter_id=sample['Unnamed: 1'])
if instance == None:
instance = BacterialCultureSample()
well_number = sample['This section to be filled in completely by submittor']
row = row_keys[well_number[0]]
column = int(well_number[1:])
instance.submitter_id = sample['Unnamed: 1']
instance.organism = sample['Unnamed: 2']
instance.concentration = sample['Unnamed: 3']
# logger.debug(f"Sample object: {new.sample_id} = {type(new.sample_id)}")
logger.debug(f"Got sample_id: {instance.submitter_id}")
# need to exclude empties and blanks
if check_not_nan(instance.submitter_id):
new_list.append(dict(sample=instance, row=row, column=column))
return None, new_list
# def parse_wastewater_samples(self) -> Tuple[str|None, list[dict]]:
"""
construct wastewater specific sample objects
Returns:
list[WWSample]: list of sample objects
dict: Updated sample dictionary
"""
def search_df_for_sample(sample_rsl:str):
# logger.debug(f"Attempting to find sample {sample_rsl} in \n {self.elution_map}")
well = self.elution_map.where(self.elution_map==sample_rsl)
# logger.debug(f"Well: {well}")
well = well.dropna(how='all').dropna(axis=1, how="all")
if well.size > 1:
well = well.iloc[0].to_frame().dropna().T
logger.debug(f"well {sample_rsl} post processing: {well.size}: {type(well)}")#, {well.index[0]}, {well.columns[0]}")
try:
self.elution_map.at[well.index[0], well.columns[0]] = np.nan
except IndexError as e:
logger.error(f"Couldn't find the well for {sample_rsl}")
return 0, 0
try:
column = int(well.columns[0])
except TypeError as e:
logger.error(f"Problem parsing out column number for {well}:\n {e}")
row = row_keys[well.index[0]]
return row, column
new_list = []
return_val = None
for sample in self.samples:
logger.debug(f"Sample: {sample}")
instance = lookup_ww_sample_by_ww_sample_num(ctx=self.ctx, sample_number=sample['Unnamed: 3'])
if instance == None:
instance = WastewaterSample()
if check_not_nan(sample["Unnamed: 7"]):
if sample["Unnamed: 7"] != "Fixed" and sample['Unnamed: 7'] != "Flex":
instance.rsl_number = sample['Unnamed: 7'] # previously Unnamed: 9
elif check_not_nan(sample['Unnamed: 9']):
instance.rsl_number = sample['Unnamed: 9'] # previously Unnamed: 9
else:
logger.error(f"No RSL sample number found for this sample.")
continue
else:
logger.error(f"No RSL sample number found for this sample.")
continue
instance.ww_processing_num = sample['Unnamed: 2']
# need to ensure we have a sample id for database integrity
# if we don't have a sample full id, make one up
if check_not_nan(sample['Unnamed: 3']):
logger.debug(f"Sample name: {sample['Unnamed: 3']}")
instance.submitter_id = sample['Unnamed: 3']
else:
instance.submitter_id = uuid.uuid4().hex.upper()
# logger.debug(f"The Submitter sample id is: {instance.submitter_id}")
# need to ensure we get a collection date
if check_not_nan(sample['Unnamed: 5']):
instance.collection_date = sample['Unnamed: 5']
else:
instance.collection_date = date.today()
# new.testing_type = sample['Unnamed: 6']
# new.site_status = sample['Unnamed: 7']
instance.notes = str(sample['Unnamed: 6']) # previously Unnamed: 8
instance.well_24 = sample['Unnamed: 1']
else:
# What to do if the sample already exists
assert isinstance(instance, WastewaterSample)
if instance.rsl_number == None:
if check_not_nan(sample["Unnamed: 7"]):
if sample["Unnamed: 7"] != "Fixed" and sample['Unnamed: 7'] != "Flex":
instance.rsl_number = sample['Unnamed: 7'] # previously Unnamed: 9
elif check_not_nan(sample['Unnamed: 9']):
instance.rsl_number = sample['Unnamed: 9'] # previously Unnamed: 9
else:
logger.error(f"No RSL sample number found for this sample.")
if instance.collection_date == None:
if check_not_nan(sample['Unnamed: 5']):
instance.collection_date = sample['Unnamed: 5']
else:
instance.collection_date = date.today()
if instance.notes == None:
instance.notes = str(sample['Unnamed: 6']) # previously Unnamed: 8
if instance.well_24 == None:
instance.well_24 = sample['Unnamed: 1']
logger.debug(f"Already have that sample, going to add association to this plate.")
row, column = search_df_for_sample(instance.rsl_number)
# if elu_well != None:
# row = elu_well[0]
# col = elu_well[1:].zfill(2)
# # new.well_number = f"{row}{col}"
# else:
# # try:
# return_val += f"{new.rsl_number}\n"
# # except TypeError:
# # return_val = f"{new.rsl_number}\n"
new_list.append(dict(sample=instance, row=row, column=column))
return return_val, new_list
logger.debug("Called bacterial culture sample parser")
return input_dict
def parse_wastewater_sample(self, input_dict:dict) -> dict:
"""
Update sample dictionary with wastewater specific information
Args:
input_dict (dict): Input sample dictionary
Returns:
dict: Updated sample dictionary
"""
logger.debug(f"Called wastewater sample parser")
# def parse_wastewater_artic_samples(self) -> Tuple[str|None, list[WastewaterSample]]:
def parse_wastewater_artic_sample(self, input_dict:dict) -> dict:
"""
The artic samples are the wastewater samples that are to be sequenced
So we will need to lookup existing ww samples and append Artic well # and plate relation
Update sample dictionary with artic specific information
Args:
input_dict (dict): Input sample dictionary
Returns:
list[WWSample]: list of wastewater samples to be updated
dict: Updated sample dictionary
"""
logger.debug("Called wastewater artic sample parser")
input_dict['sample_type'] = "Wastewater Sample"
# Because generate_sample_object needs the submitter_id and the artic has the "({origin well})"
# at the end, this has to be done here. No moving to sqlalchemy object :(
input_dict['submitter_id'] = re.sub(r"\s\(.+\)$", "", str(input_dict['submitter_id'])).strip()
return input_dict
new_list = []
missed_samples = []
for sample in self.samples:
with self.ctx.database_session.no_autoflush:
instance = lookup_ww_sample_by_ww_sample_num(ctx=self.ctx, sample_number=sample['sample_name'])
logger.debug(f"Checking: {sample}")
if instance == None:
logger.error(f"Unable to find match for: {sample['sample_name']}. Making new instance using {sample['submitter_id']}.")
instance = WastewaterSample()
instance.ww_processing_num = sample['sample_name']
instance.submitter_id = sample['submitter_id']
missed_samples.append(sample['sample_name'])
# continue
logger.debug(f"Got instance: {instance.submitter_id}")
# if sample['row'] != None:
# row = int(row_keys[sample['well'][0]])
# if sample['column'] != None:
# column = int(sample['well'][1:])
# sample['well'] = f"{row}{col}"
# instance.artic_well_number = sample['well']
if instance.submitter_id != "NTC1" and instance.submitter_id != "NTC2":
new_list.append(dict(sample=instance, row=sample['row'], column=sample['column']))
missed_str = "\n\t".join(missed_samples)
return f"Could not find matches for the following samples:\n\t {missed_str}", new_list
class PCRParser(object):
"""
Object to pull data from Design and Analysis PCR export file.
TODO: Generify this object.
"""
def __init__(self, ctx:dict, filepath:Path|None = None) -> None:
"""