Pydantic added for validation.

This commit is contained in:
Landon Wark
2023-07-07 14:27:26 -05:00
parent 0c81c74f70
commit 1c804bfc6a
12 changed files with 497 additions and 141 deletions

View File

@@ -8,6 +8,7 @@ import pandas as pd
from pathlib import Path
from backend.db.models import WWSample, BCSample
from backend.db import lookup_ww_sample_by_ww_sample_num
from backend.pydant import PydSubmission
import logging
from collections import OrderedDict
import re
@@ -149,19 +150,22 @@ class SheetParser(object):
else:
logger.debug(f"Date: {row[3]}")
expiry = date.today()
self.sub[f"lot_{reagent_type}"] = {'lot':output_var, 'exp':expiry}
# self.sub[f"lot_{reagent_type}"] = {'lot':output_var, 'exp':expiry}
self.sub['reagents'].append(dict(type=reagent_type, lot=output_var, exp=expiry))
submission_info = self.parse_generic("Sample List")
# iloc is [row][column] and the first row is set as header row so -2
tech = str(submission_info.iloc[11][1])
if tech == "nan":
tech = "Unknown"
elif len(tech.split(",")) > 1:
tech_reg = re.compile(r"[A-Z]{2}")
tech = ", ".join(tech_reg.findall(tech))
# moved to pydantic model
# if tech == "nan":
# tech = "Unknown"
# elif len(tech.split(",")) > 1:
# tech_reg = re.compile(r"[A-Z]{2}")
# tech = ", ".join(tech_reg.findall(tech))
self.sub['technician'] = tech
# reagents
# must be prefixed with 'lot_' to be recognized by gui
# Todo: find a more adaptable way to read reagents.
# TODO: find a more adaptable way to read reagents.
self.sub['reagents'] = []
reagent_range = submission_info.iloc[1:14, 4:8]
logger.debug(reagent_range)
parse_reagents(reagent_range)
@@ -210,7 +214,8 @@ class SheetParser(object):
expiry = date.today()
else:
expiry = date.today()
self.sub[f"lot_{output_key}"] = {'lot':output_var, 'exp':expiry}
# self.sub[f"lot_{output_key}"] = {'lot':output_var, 'exp':expiry}
self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry))
# parse submission sheet
submission_info = self.parse_generic("WW Submissions (ENTER HERE)")
# parse enrichment sheet
@@ -227,6 +232,7 @@ class SheetParser(object):
pcr_reagent_range = qprc_info.iloc[0:5, 9:20]
# compile technician info
self.sub['technician'] = f"Enr: {enrichment_info.columns[2]}, Ext: {extraction_info.columns[2]}, PCR: {qprc_info.columns[2]}"
self.sub['reagents'] = []
parse_reagents(enr_reagent_range)
parse_reagents(ext_reagent_range)
parse_reagents(pcr_reagent_range)
@@ -271,7 +277,7 @@ class SheetParser(object):
else:
logger.debug(f"Date: {row[2]}")
expiry = date.today()
self.sub[f"lot_{output_key}"] = {'lot':output_var, 'exp':expiry}
self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry))
else:
continue
def massage_samples(df:pd.DataFrame) -> pd.DataFrame:
@@ -303,6 +309,7 @@ class SheetParser(object):
self.sub['sample_count'] = submission_info.iloc[4][6]
self.sub['extraction_kit'] = "ArticV4.1"
self.sub['technician'] = f"MM: {biomek_info.iloc[2][1]}, Bio: {biomek_info.iloc[3][1]}"
self.sub['reagents'] = []
parse_reagents(sub_reagent_range)
parse_reagents(biomek_reagent_range)
samples = massage_samples(biomek_info.iloc[22:31, 0:])
@@ -311,6 +318,18 @@ class SheetParser(object):
self.sample_result, self.sub['samples'] = sample_parse()
def to_pydantic(self) -> PydSubmission:
"""
Generates a pydantic model of scraped data for validation
Returns:
PydSubmission: output pydantic model
"""
psm = PydSubmission(filepath=self.filepath, **self.sub)
delattr(psm, "filepath")
return psm
class SampleParser(object):
"""
@@ -366,7 +385,7 @@ class SampleParser(object):
list[WWSample]: list of sample objects
"""
def search_df_for_sample(sample_rsl:str):
logger.debug(f"Attempting to find sample {sample_rsl} in \n {self.elution_map}")
# logger.debug(f"Attempting to find sample {sample_rsl} in \n {self.elution_map}")
well = self.elution_map.where(self.elution_map==sample_rsl)
# logger.debug(f"Well: {well}")
well = well.dropna(how='all').dropna(axis=1, how="all")

View File

@@ -9,6 +9,7 @@ import sys
from pathlib import Path
import re
from tools import check_if_app
from typing import Tuple
logger = logging.getLogger(f"submissions.{__name__}")
@@ -154,23 +155,61 @@ def displace_date(df:DataFrame) -> DataFrame:
# get submitted dates for each control
dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in sorted(df['name'].unique())]
previous_dates = []
for ii, item in enumerate(dict_list):
try:
# check = item['date'] == dict_list[ii-1]['date']
check = item['date'] in previous_dates
except IndexError:
check = False
if check:
# occurences = previous_dates.count(item['date'])
logger.debug(f"We found one! Increment date!\n\t{item['date'] - timedelta(days=1)}")
# get df locations where name == item name
mask = df['name'] == item['name']
# increment date in dataframe
df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
previous_dates.append(item['date'] + timedelta(days=1))
else:
previous_dates.append(item['date'])
for _, item in enumerate(dict_list):
# try:
# # check = item['date'] == dict_list[ii-1]['date']
# check = item['date'] in previous_dates
# except IndexError:
# check = False
# if check:
# # occurences = previous_dates.count(item['date'])
# logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
# # get df locations where name == item name
# mask = df['name'] == item['name']
# # increment date in dataframe
# df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
# outdate = item['date'] + timedelta(days=1)
# # previous_dates.append(item['date'] + timedelta(days=1))
# else:
# outdate = item['date']
# previous_dates.append(outdate)
# logger.debug(f"\n\tCurrent date: {outdate}\n\tPrevious dates:{previous_dates}")
# logger.debug(type(item))
df, previous_dates = check_date(df=df, item=item, previous_dates=previous_dates)
return df
def check_date(df:DataFrame, item:dict, previous_dates:list) -> Tuple[DataFrame, list]:
try:
# check = item['date'] == dict_list[ii-1]['date']
check = item['date'] in previous_dates
except IndexError:
check = False
previous_dates.append(item['date'])
if check:
# occurences = previous_dates.count(item['date'])
logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
# get df locations where name == item name
mask = df['name'] == item['name']
# increment date in dataframe
df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
item['date'] += timedelta(days=1)
# previous_dates.append(item['date'] + timedelta(days=1))
passed = False
else:
passed = True
logger.debug(f"\n\tCurrent date: {item['date']}\n\tPrevious dates:{previous_dates}")
logger.debug(f"DF: {type(df)}, previous_dates: {type(previous_dates)}")
# if run didn't lead to changed date, return values
if passed:
logger.debug(f"Date check passed, returning.")
return df, previous_dates
# if date was changed, rerun with new date
else:
logger.warning(f"Date check failed, running recursion")
df, previous_dates = check_date(df, item, previous_dates)
return df, previous_dates
def get_unique_values_in_df_column(df: DataFrame, column_name: str) -> list: