Pydantic added for validation.

This commit is contained in:
Landon Wark
2023-07-07 14:27:26 -05:00
parent 0c81c74f70
commit 1c804bfc6a
12 changed files with 497 additions and 141 deletions

View File

@@ -233,19 +233,19 @@ def construct_reagent(ctx:dict, info_dict:dict) -> models.Reagent:
# pass
return reagent
def lookup_reagent(ctx:dict, reagent_lot:str) -> models.Reagent:
"""
Query db for reagent based on lot number
# def lookup_reagent(ctx:dict, reagent_lot:str) -> models.Reagent:
# """
# Query db for reagent based on lot number
Args:
ctx (dict): settings passed down from gui
reagent_lot (str): lot number to query
# Args:
# ctx (dict): settings passed down from gui
# reagent_lot (str): lot number to query
Returns:
models.Reagent: looked up reagent
"""
lookedup = ctx['database_session'].query(models.Reagent).filter(models.Reagent.lot==reagent_lot).first()
return lookedup
# Returns:
# models.Reagent: looked up reagent
# """
# lookedup = ctx['database_session'].query(models.Reagent).filter(models.Reagent.lot==reagent_lot).first()
# return lookedup
def get_all_reagenttype_names(ctx:dict) -> list[str]:
"""
@@ -501,7 +501,7 @@ def create_kit_from_yaml(ctx:dict, exp:dict) -> dict:
r = massage_common_reagents(r)
look_up = ctx['database_session'].query(models.ReagentType).filter(models.ReagentType.name==r).first()
if look_up == None:
rt = models.ReagentType(name=r.replace(" ", "_").lower(), eol_ext=timedelta(30*exp[type]['kits'][kt]['reagenttypes'][r]['eol_ext']), kits=[kit])
rt = models.ReagentType(name=r.replace(" ", "_").lower(), eol_ext=timedelta(30*exp[type]['kits'][kt]['reagenttypes'][r]['eol_ext']), kits=[kit], required=1)
else:
rt = look_up
rt.kits.append(kit)
@@ -609,7 +609,7 @@ def get_all_controls_by_type(ctx:dict, con_type:str, start_date:date|None=None,
output = ctx['database_session'].query(models.Control).join(models.ControlType).filter_by(name=con_type).filter(models.Control.submitted_date.between(start_date, end_date)).all()
else:
output = ctx['database_session'].query(models.Control).join(models.ControlType).filter_by(name=con_type).all()
logger.debug(f"Returned controls between dates: {output}")
logger.debug(f"Returned controls between dates: {[item.submitted_date for item in output]}")
return output
def get_control_subtypes(ctx:dict, type:str, mode:str) -> list[str]:
@@ -870,4 +870,21 @@ def platemap_plate(submission:models.BasicSubmission) -> list:
plate_dicto.append(this_sample)
# append to all samples
# image = make_plate_map(plate_dicto)
return plate_dicto
return plate_dicto
def lookup_reagent(ctx:dict, reagent_lot:str|None=None, type_name:str|None=None) -> models.Reagent:
"""
Query db for reagent based on lot number
Args:
ctx (dict): settings passed down from gui
reagent_lot (str): lot number to query
Returns:
models.Reagent: looked up reagent
"""
if reagent_lot != None and type_name != None:
return ctx['database_session'].query(models.Reagent).join(models.Reagent.type, aliased=True).filter(models.ReagentType.name==type_name).filter(models.Reagent.lot==reagent_lot).all()
elif type_name == None:
return ctx['database_session'].query(models.Reagent).filter(models.Reagent.lot==reagent_lot).first()

View File

@@ -2,8 +2,8 @@
All kit and reagent related models
'''
from . import Base
from sqlalchemy import Column, String, TIMESTAMP, JSON, INTEGER, ForeignKey, Interval, Table, FLOAT
from sqlalchemy.orm import relationship
from sqlalchemy import Column, String, TIMESTAMP, JSON, INTEGER, ForeignKey, Interval, Table, FLOAT, CheckConstraint
from sqlalchemy.orm import relationship, validates
from datetime import date
import logging
@@ -54,6 +54,17 @@ class ReagentType(Base):
kits = relationship("KitType", back_populates="reagent_types", uselist=True, foreign_keys=[kit_id]) #: kits this reagent is used in
instances = relationship("Reagent", back_populates="type") #: concrete instances of this reagent type
eol_ext = Column(Interval()) #: extension of life interval
required = Column(INTEGER, server_default="1") #: sqlite boolean to determine if reagent type is essential for the kit
# __table_args__ = (
# CheckConstraint(required >= 0, name='check_required_positive'),
# CheckConstraint(required < 2, name='check_required_less_2'),
# {})
@validates('required')
def validate_age(self, key, value):
if not 0 <= value < 2:
raise ValueError(f'Invalid required value {value}')
return value
def __str__(self) -> str:
"""

View File

@@ -8,6 +8,7 @@ import pandas as pd
from pathlib import Path
from backend.db.models import WWSample, BCSample
from backend.db import lookup_ww_sample_by_ww_sample_num
from backend.pydant import PydSubmission
import logging
from collections import OrderedDict
import re
@@ -149,19 +150,22 @@ class SheetParser(object):
else:
logger.debug(f"Date: {row[3]}")
expiry = date.today()
self.sub[f"lot_{reagent_type}"] = {'lot':output_var, 'exp':expiry}
# self.sub[f"lot_{reagent_type}"] = {'lot':output_var, 'exp':expiry}
self.sub['reagents'].append(dict(type=reagent_type, lot=output_var, exp=expiry))
submission_info = self.parse_generic("Sample List")
# iloc is [row][column] and the first row is set as header row so -2
tech = str(submission_info.iloc[11][1])
if tech == "nan":
tech = "Unknown"
elif len(tech.split(",")) > 1:
tech_reg = re.compile(r"[A-Z]{2}")
tech = ", ".join(tech_reg.findall(tech))
# moved to pydantic model
# if tech == "nan":
# tech = "Unknown"
# elif len(tech.split(",")) > 1:
# tech_reg = re.compile(r"[A-Z]{2}")
# tech = ", ".join(tech_reg.findall(tech))
self.sub['technician'] = tech
# reagents
# must be prefixed with 'lot_' to be recognized by gui
# Todo: find a more adaptable way to read reagents.
# TODO: find a more adaptable way to read reagents.
self.sub['reagents'] = []
reagent_range = submission_info.iloc[1:14, 4:8]
logger.debug(reagent_range)
parse_reagents(reagent_range)
@@ -210,7 +214,8 @@ class SheetParser(object):
expiry = date.today()
else:
expiry = date.today()
self.sub[f"lot_{output_key}"] = {'lot':output_var, 'exp':expiry}
# self.sub[f"lot_{output_key}"] = {'lot':output_var, 'exp':expiry}
self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry))
# parse submission sheet
submission_info = self.parse_generic("WW Submissions (ENTER HERE)")
# parse enrichment sheet
@@ -227,6 +232,7 @@ class SheetParser(object):
pcr_reagent_range = qprc_info.iloc[0:5, 9:20]
# compile technician info
self.sub['technician'] = f"Enr: {enrichment_info.columns[2]}, Ext: {extraction_info.columns[2]}, PCR: {qprc_info.columns[2]}"
self.sub['reagents'] = []
parse_reagents(enr_reagent_range)
parse_reagents(ext_reagent_range)
parse_reagents(pcr_reagent_range)
@@ -271,7 +277,7 @@ class SheetParser(object):
else:
logger.debug(f"Date: {row[2]}")
expiry = date.today()
self.sub[f"lot_{output_key}"] = {'lot':output_var, 'exp':expiry}
self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry))
else:
continue
def massage_samples(df:pd.DataFrame) -> pd.DataFrame:
@@ -303,6 +309,7 @@ class SheetParser(object):
self.sub['sample_count'] = submission_info.iloc[4][6]
self.sub['extraction_kit'] = "ArticV4.1"
self.sub['technician'] = f"MM: {biomek_info.iloc[2][1]}, Bio: {biomek_info.iloc[3][1]}"
self.sub['reagents'] = []
parse_reagents(sub_reagent_range)
parse_reagents(biomek_reagent_range)
samples = massage_samples(biomek_info.iloc[22:31, 0:])
@@ -311,6 +318,18 @@ class SheetParser(object):
self.sample_result, self.sub['samples'] = sample_parse()
def to_pydantic(self) -> PydSubmission:
"""
Generates a pydantic model of scraped data for validation
Returns:
PydSubmission: output pydantic model
"""
psm = PydSubmission(filepath=self.filepath, **self.sub)
delattr(psm, "filepath")
return psm
class SampleParser(object):
"""
@@ -366,7 +385,7 @@ class SampleParser(object):
list[WWSample]: list of sample objects
"""
def search_df_for_sample(sample_rsl:str):
logger.debug(f"Attempting to find sample {sample_rsl} in \n {self.elution_map}")
# logger.debug(f"Attempting to find sample {sample_rsl} in \n {self.elution_map}")
well = self.elution_map.where(self.elution_map==sample_rsl)
# logger.debug(f"Well: {well}")
well = well.dropna(how='all').dropna(axis=1, how="all")

View File

@@ -9,6 +9,7 @@ import sys
from pathlib import Path
import re
from tools import check_if_app
from typing import Tuple
logger = logging.getLogger(f"submissions.{__name__}")
@@ -154,23 +155,61 @@ def displace_date(df:DataFrame) -> DataFrame:
# get submitted dates for each control
dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in sorted(df['name'].unique())]
previous_dates = []
for ii, item in enumerate(dict_list):
try:
# check = item['date'] == dict_list[ii-1]['date']
check = item['date'] in previous_dates
except IndexError:
check = False
if check:
# occurences = previous_dates.count(item['date'])
logger.debug(f"We found one! Increment date!\n\t{item['date'] - timedelta(days=1)}")
# get df locations where name == item name
mask = df['name'] == item['name']
# increment date in dataframe
df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
previous_dates.append(item['date'] + timedelta(days=1))
else:
previous_dates.append(item['date'])
for _, item in enumerate(dict_list):
# try:
# # check = item['date'] == dict_list[ii-1]['date']
# check = item['date'] in previous_dates
# except IndexError:
# check = False
# if check:
# # occurences = previous_dates.count(item['date'])
# logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
# # get df locations where name == item name
# mask = df['name'] == item['name']
# # increment date in dataframe
# df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
# outdate = item['date'] + timedelta(days=1)
# # previous_dates.append(item['date'] + timedelta(days=1))
# else:
# outdate = item['date']
# previous_dates.append(outdate)
# logger.debug(f"\n\tCurrent date: {outdate}\n\tPrevious dates:{previous_dates}")
# logger.debug(type(item))
df, previous_dates = check_date(df=df, item=item, previous_dates=previous_dates)
return df
def check_date(df:DataFrame, item:dict, previous_dates:list) -> Tuple[DataFrame, list]:
try:
# check = item['date'] == dict_list[ii-1]['date']
check = item['date'] in previous_dates
except IndexError:
check = False
previous_dates.append(item['date'])
if check:
# occurences = previous_dates.count(item['date'])
logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
# get df locations where name == item name
mask = df['name'] == item['name']
# increment date in dataframe
df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
item['date'] += timedelta(days=1)
# previous_dates.append(item['date'] + timedelta(days=1))
passed = False
else:
passed = True
logger.debug(f"\n\tCurrent date: {item['date']}\n\tPrevious dates:{previous_dates}")
logger.debug(f"DF: {type(df)}, previous_dates: {type(previous_dates)}")
# if run didn't lead to changed date, return values
if passed:
logger.debug(f"Date check passed, returning.")
return df, previous_dates
# if date was changed, rerun with new date
else:
logger.warning(f"Date check failed, running recursion")
df, previous_dates = check_date(df, item, previous_dates)
return df, previous_dates
def get_unique_values_in_df_column(df: DataFrame, column_name: str) -> list:

View File

@@ -0,0 +1,65 @@
import uuid
from pydantic import BaseModel, validator
from datetime import date
from typing import List, Any
from tools import RSLNamer
from pathlib import Path
import re
import logging
logger = logging.getLogger(f"submissions.{__name__}")
class PydSubmission(BaseModel):
filepath: Path
submission_type: str
submitter_plate_num: str|None
rsl_plate_num: str
submitted_date: date
submitting_lab: str
sample_count: int
extraction_kit: str
technician: str
reagents: List[dict]
samples: List[Any]
@validator("submitted_date", pre=True)
@classmethod
def strip_datetime_string(cls, value):
return re.sub(r"_\d$", "", value)
@validator("submitter_plate_num")
@classmethod
def enforce_with_uuid(cls, value):
if value == None or value == "" or value == "None":
return uuid.uuid4().hex.upper()
@validator("rsl_plate_num", pre=True)
@classmethod
def rsl_from_file(cls, value, values):
if value == None:
logger.debug(f"Pydant values:\n{values}")
return RSLNamer(values['filepath'].__str__()).parsed_name
else:
return value
@validator("technician")
@classmethod
def enforce_tech(cls, value):
if value == "nan" or value == "None":
value = "Unknown"
# elif len(value.split(",")) > 1:
# tech_reg = re.compile(r"\b[A-Z]{2}\b")
# value = ", ".join(tech_reg.findall(value))
return value
@validator("reagents")
@classmethod
def remove_atcc(cls, value):
return_val = []
for reagent in value:
match reagent['type']:
case 'atcc':
continue
case _:
return_val.append(reagent)
return return_val