Code cleanup and documentation

This commit is contained in:
Landon Wark
2024-02-09 14:03:35 -06:00
parent eda62fba5a
commit a534d229a8
30 changed files with 1558 additions and 1347 deletions

View File

@@ -2,7 +2,7 @@
Contains all models for sqlalchemy
'''
import sys
from sqlalchemy.orm import DeclarativeMeta, declarative_base
from sqlalchemy.orm import DeclarativeMeta, declarative_base, Query
from sqlalchemy.ext.declarative import declared_attr
if 'pytest' in sys.modules:
from pathlib import Path
@@ -23,10 +23,16 @@ class BaseClass(Base):
@declared_attr
def __tablename__(cls):
"""
Set tablename to lowercase class name
"""
return f"_{cls.__name__.lower()}"
@declared_attr
def __database_session__(cls):
"""
Pull db session from ctx
"""
if not 'pytest' in sys.modules:
from tools import ctx
else:
@@ -35,6 +41,9 @@ class BaseClass(Base):
@declared_attr
def __directory_path__(cls):
"""
Pull submission directory from ctx
"""
if not 'pytest' in sys.modules:
from tools import ctx
else:
@@ -43,14 +52,39 @@ class BaseClass(Base):
@declared_attr
def __backup_path__(cls):
"""
Pull backup directory from ctx
"""
if not 'pytest' in sys.modules:
from tools import ctx
else:
from test_settings import ctx
return ctx.backup_path
def query_return(query:Query, limit:int=0):
"""
Execute sqlalchemy query.
Args:
query (Query): Query object
limit (int, optional): Maximum number of results to return (0 = all). Defaults to 0.
Returns:
_type_: Query result.
"""
with query.session.no_autoflush:
match limit:
case 0:
return query.all()
case 1:
return query.first()
case _:
return query.limit(limit).all()
def save(self):
# logger.debug(f"Saving {self}")
"""
Add the object to the database and commit
"""
try:
self.__database_session__.add(self)
self.__database_session__.commit()

View File

@@ -7,7 +7,7 @@ from sqlalchemy.orm import relationship, Query
import logging, json
from operator import itemgetter
from . import BaseClass
from tools import setup_lookup, query_return
from tools import setup_lookup
from datetime import date, datetime
from typing import List
from dateutil.parser import parse
@@ -18,7 +18,6 @@ class ControlType(BaseClass):
"""
Base class of a control archetype.
"""
# __tablename__ = '_control_types'
id = Column(INTEGER, primary_key=True) #: primary key
name = Column(String(255), unique=True) #: controltype name (e.g. MCS)
@@ -48,7 +47,7 @@ class ControlType(BaseClass):
limit = 1
case _:
pass
return query_return(query=query, limit=limit)
return cls.query_return(query=query, limit=limit)
def get_subtypes(self, mode:str) -> List[str]:
"""
@@ -60,10 +59,13 @@ class ControlType(BaseClass):
Returns:
List[str]: list of subtypes available
"""
# Get first instance since all should have same subtypes
outs = self.instances[0]
# Get mode of instance
jsoner = json.loads(getattr(outs, mode))
logger.debug(f"JSON out: {jsoner.keys()}")
try:
# Pick genera (all should have same subtypes)
genera = list(jsoner.keys())[0]
except IndexError:
return []
@@ -74,8 +76,6 @@ class Control(BaseClass):
"""
Base class of a control sample.
"""
# __tablename__ = '_control_samples'
id = Column(INTEGER, primary_key=True) #: primary key
parent_id = Column(String, ForeignKey("_controltype.id", name="fk_control_parent_id")) #: primary key of control type
@@ -90,10 +90,14 @@ class Control(BaseClass):
refseq_version = Column(String(16)) #: version of refseq used in fastq parsing
kraken2_version = Column(String(16)) #: version of kraken2 used in fastq parsing
kraken2_db_version = Column(String(32)) #: folder name of kraken2 db
sample = relationship("BacterialCultureSample", back_populates="control")
sample_id = Column(INTEGER, ForeignKey("_basicsample.id", ondelete="SET NULL", name="cont_BCS_id"))
sample = relationship("BacterialCultureSample", back_populates="control") #: This control's submission sample
sample_id = Column(INTEGER, ForeignKey("_basicsample.id", ondelete="SET NULL", name="cont_BCS_id")) #: sample id key
def __repr__(self) -> str:
"""
Returns:
str: Representation of self
"""
return f"<Control({self.name})>"
def to_sub_dict(self) -> dict:
@@ -103,25 +107,25 @@ class Control(BaseClass):
Returns:
dict: output dictionary containing: Name, Type, Targets, Top Kraken results
"""
# load json string into dict
# logger.debug("loading json string into dict")
try:
kraken = json.loads(self.kraken)
except TypeError:
kraken = {}
# calculate kraken count total to use in percentage
# logger.debug("calculating kraken count total to use in percentage")
kraken_cnt_total = sum([kraken[item]['kraken_count'] for item in kraken])
new_kraken = []
for item in kraken:
# calculate kraken percent (overwrites what's already been scraped)
# logger.debug("calculating kraken percent (overwrites what's already been scraped)")
kraken_percent = kraken[item]['kraken_count'] / kraken_cnt_total
new_kraken.append({'name': item, 'kraken_count':kraken[item]['kraken_count'], 'kraken_percent':"{0:.0%}".format(kraken_percent)})
new_kraken = sorted(new_kraken, key=itemgetter('kraken_count'), reverse=True)
# set targets
# logger.debug("setting targets")
if self.controltype.targets == []:
targets = ["None"]
else:
targets = self.controltype.targets
# construct output dictionary
# logger.debug("constructing output dictionary")
output = {
"name" : self.name,
"type" : self.controltype.name,
@@ -141,49 +145,28 @@ class Control(BaseClass):
list[dict]: list of records
"""
output = []
# load json string for mode (i.e. contains, matches, kraken2)
# logger.debug("load json string for mode (i.e. contains, matches, kraken2)")
try:
data = json.loads(getattr(self, mode))
except TypeError:
data = {}
logger.debug(f"Length of data: {len(data)}")
# dict keys are genera of bacteria, e.g. 'Streptococcus'
# logger.debug("dict keys are genera of bacteria, e.g. 'Streptococcus'")
for genus in data:
_dict = {}
_dict['name'] = self.name
_dict['submitted_date'] = self.submitted_date
_dict['genus'] = genus
# get Target or Off-target of genus
# logger.debug("get Target or Off-target of genus")
_dict['target'] = 'Target' if genus.strip("*") in self.controltype.targets else "Off-target"
# set 'contains_hashes', etc for genus,
# logger.debug("set 'contains_hashes', etc for genus")
for key in data[genus]:
_dict[key] = data[genus][key]
output.append(_dict)
# Have to triage kraken data to keep program from getting overwhelmed
# logger.debug("Have to triage kraken data to keep program from getting overwhelmed")
if "kraken" in mode:
output = sorted(output, key=lambda d: d[f"{mode}_count"], reverse=True)[:49]
return output
def create_dummy_data(self, mode:str) -> dict:
"""
Create non-zero length data to maintain entry of zero length 'contains' (depreciated)
Args:
mode (str): analysis type, 'contains', etc
Returns:
dict: dictionary of 'Nothing' genus
"""
match mode:
case "contains":
data = {"Nothing": {"contains_hashes":"0/400", "contains_ratio":0.0}}
case "matches":
data = {"Nothing": {"matches_hashes":"0/400", "matches_ratio":0.0}}
case "kraken":
data = {"Nothing": {"kraken_percent":0.0, "kraken_count":0}}
case _:
data = {}
return data
@classmethod
def get_modes(cls) -> List[str]:
@@ -194,6 +177,7 @@ class Control(BaseClass):
List[str]: List of control mode names.
"""
try:
# logger.debug("Creating a list of JSON columns in _controls table")
cols = [item.name for item in list(cls.__table__.columns) if isinstance(item.type, JSON)]
except AttributeError as e:
logger.error(f"Failed to get available modes from db: {e}")
@@ -243,25 +227,32 @@ class Control(BaseClass):
if start_date != None:
match start_date:
case date():
# logger.debug(f"Lookup control by start date({start_date})")
start_date = start_date.strftime("%Y-%m-%d")
case int():
# logger.debug(f"Lookup control by ordinal start date {start_date}")
start_date = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + start_date - 2).date().strftime("%Y-%m-%d")
case _:
# logger.debug(f"Lookup control with parsed start date {start_date}")
start_date = parse(start_date).strftime("%Y-%m-%d")
match end_date:
case date():
# logger.debug(f"Lookup control by end date({end_date})")
end_date = end_date.strftime("%Y-%m-%d")
case int():
# logger.debug(f"Lookup control by ordinal end date {end_date}")
end_date = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + end_date - 2).date().strftime("%Y-%m-%d")
case _:
# logger.debug(f"Lookup control with parsed end date {end_date}")
end_date = parse(end_date).strftime("%Y-%m-%d")
# logger.debug(f"Looking up BasicSubmissions from start date: {start_date} and end date: {end_date}")
query = query.filter(cls.submitted_date.between(start_date, end_date))
match control_name:
case str():
# logger.debug(f"Lookup control by name {control_name}")
query = query.filter(cls.name.startswith(control_name))
limit = 1
case _:
pass
return query_return(query=query, limit=limit)
return cls.query_return(query=query, limit=limit)

File diff suppressed because it is too large Load Diff

View File

@@ -5,7 +5,7 @@ from __future__ import annotations
from sqlalchemy import Column, String, INTEGER, ForeignKey, Table
from sqlalchemy.orm import relationship, Query
from . import Base, BaseClass
from tools import check_authorization, setup_lookup, query_return, Settings
from tools import check_authorization, setup_lookup
from typing import List
import logging
@@ -25,8 +25,7 @@ class Organization(BaseClass):
"""
Base of organization
"""
# __tablename__ = "_organizations"
id = Column(INTEGER, primary_key=True) #: primary key
name = Column(String(64)) #: organization name
submissions = relationship("BasicSubmission", back_populates="submitting_lab") #: submissions this organization has submitted
@@ -34,11 +33,12 @@ class Organization(BaseClass):
contacts = relationship("Contact", back_populates="organization", secondary=orgs_contacts) #: contacts involved with this org
def __repr__(self) -> str:
"""
Returns:
str: Representation of this Organization
"""
return f"<Organization({self.name})>"
def set_attribute(self, name:str, value):
setattr(self, name, value)
@classmethod
@setup_lookup
def query(cls,
@@ -63,24 +63,17 @@ class Organization(BaseClass):
limit = 1
case _:
pass
return query_return(query=query, limit=limit)
return cls.query_return(query=query, limit=limit)
@check_authorization
def save(self, ctx:Settings):
"""
Adds this instance to the database and commits
Args:
ctx (Settings): Settings object passed down from GUI. Necessary to check authorization
"""
def save(self):
super().save()
class Contact(BaseClass):
"""
Base of Contact
"""
# __tablename__ = "_contacts"
id = Column(INTEGER, primary_key=True) #: primary key
name = Column(String(64)) #: contact name
email = Column(String(64)) #: contact email
@@ -88,6 +81,10 @@ class Contact(BaseClass):
organization = relationship("Organization", back_populates="contacts", uselist=True, secondary=orgs_contacts) #: relationship to joined organization
def __repr__(self) -> str:
"""
Returns:
str: Representation of this Contact
"""
return f"<Contact({self.name})>"
@classmethod
@@ -133,5 +130,5 @@ class Contact(BaseClass):
limit = 1
case _:
pass
return query_return(query=query, limit=limit)
return cls.query_return(query=query, limit=limit)

File diff suppressed because it is too large Load Diff

View File

@@ -13,23 +13,21 @@ import logging, re
from collections import OrderedDict
from datetime import date
from dateutil.parser import parse, ParserError
from tools import check_not_nan, convert_nans_to_nones, Settings, is_missing
from tools import check_not_nan, convert_nans_to_nones, is_missing, row_map
logger = logging.getLogger(f"submissions.{__name__}")
row_keys = dict(A=1, B=2, C=3, D=4, E=5, F=6, G=7, H=8)
row_keys = {v:k for k,v in row_map.items()}
class SheetParser(object):
"""
object to pull and contain data from excel file
"""
def __init__(self, ctx:Settings, filepath:Path|None = None):
def __init__(self, filepath:Path|None = None):
"""
Args:
ctx (Settings): Settings object passed down from gui. Necessary for Bacterial to get directory path.
filepath (Path | None, optional): file path to excel sheet. Defaults to None.
"""
self.ctx = ctx
logger.debug(f"\n\nParsing {filepath.__str__()}\n\n")
match filepath:
case Path():
@@ -46,7 +44,7 @@ class SheetParser(object):
raise FileNotFoundError(f"Couldn't parse file {self.filepath}")
self.sub = OrderedDict()
# make decision about type of sample we have
self.sub['submission_type'] = dict(value=RSLNamer.retrieve_submission_type(instr=self.filepath), missing=True)
self.sub['submission_type'] = dict(value=RSLNamer.retrieve_submission_type(filename=self.filepath), missing=True)
# # grab the info map from the submission type in database
self.parse_info()
self.import_kit_validation_check()
@@ -144,7 +142,6 @@ class InfoParser(object):
def __init__(self, xl:pd.ExcelFile, submission_type:str):
logger.info(f"\n\Hello from InfoParser!\n\n")
# self.ctx = ctx
self.map = self.fetch_submission_info_map(submission_type=submission_type)
self.xl = xl
logger.debug(f"Info map for InfoParser: {pformat(self.map)}")
@@ -209,7 +206,6 @@ class ReagentParser(object):
def __init__(self, xl:pd.ExcelFile, submission_type:str, extraction_kit:str):
logger.debug("\n\nHello from ReagentParser!\n\n")
# self.ctx = ctx
self.map = self.fetch_kit_info_map(extraction_kit=extraction_kit, submission_type=submission_type)
logger.debug(f"Reagent Parser map: {self.map}")
self.xl = xl
@@ -227,7 +223,6 @@ class ReagentParser(object):
"""
if isinstance(extraction_kit, dict):
extraction_kit = extraction_kit['value']
# kit = lookup_kit_types(ctx=self.ctx, name=extraction_kit)
kit = KitType.query(name=extraction_kit)
if isinstance(submission_type, dict):
submission_type = submission_type['value']
@@ -272,7 +267,6 @@ class ReagentParser(object):
lot = str(lot)
logger.debug(f"Going into pydantic: name: {name}, lot: {lot}, expiry: {expiry}, type: {item.strip()}, comment: {comment}")
listo.append(PydReagent(type=item.strip(), lot=lot, expiry=expiry, name=name, comment=comment, missing=missing))
# logger.debug(f"Returning listo: {listo}")
return listo
class SampleParser(object):
@@ -290,7 +284,6 @@ class SampleParser(object):
"""
logger.debug("\n\nHello from SampleParser!\n\n")
self.samples = []
# self.ctx = ctx
self.xl = xl
self.submission_type = submission_type
sample_info_map = self.fetch_sample_info_map(submission_type=submission_type)
@@ -316,11 +309,9 @@ class SampleParser(object):
dict: Info locations.
"""
logger.debug(f"Looking up submission type: {submission_type}")
# submission_type = lookup_submission_type(ctx=self.ctx, name=submission_type)
submission_type = SubmissionType.query(name=submission_type)
logger.debug(f"info_map: {pformat(submission_type.info_map)}")
sample_info_map = submission_type.info_map['samples']
# self.custom_parser = get_polymorphic_subclass(models.BasicSubmission, submission_type.name).parse_samples
self.custom_sub_parser = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=submission_type.name).parse_samples
self.custom_sample_parser = BasicSample.find_polymorphic_subclass(polymorphic_identity=f"{submission_type.name} Sample").parse_sample
return sample_info_map
@@ -341,7 +332,6 @@ class SampleParser(object):
df = pd.DataFrame(df.values[1:], columns=df.iloc[0])
df = df.set_index(df.columns[0])
logger.debug(f"Vanilla platemap: {df}")
# custom_mapper = get_polymorphic_subclass(models.BasicSubmission, self.submission_type)
custom_mapper = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type)
df = custom_mapper.custom_platemap(self.xl, df)
logger.debug(f"Custom platemap:\n{df}")
@@ -402,7 +392,6 @@ class SampleParser(object):
else:
return input_str
for sample in self.samples:
# addition = self.lookup_table[self.lookup_table.isin([sample['submitter_id']]).any(axis=1)].squeeze().to_dict()
addition = self.lookup_table[self.lookup_table.isin([sample['submitter_id']]).any(axis=1)].squeeze()
# logger.debug(addition)
if isinstance(addition, pd.DataFrame) and not addition.empty:
@@ -433,25 +422,17 @@ class SampleParser(object):
# logger.debug(f"Output sample dict: {sample}")
logger.debug(f"Final lookup_table: \n\n {self.lookup_table}")
def parse_samples(self, generate:bool=True) -> List[dict]|List[BasicSample]:
def parse_samples(self) -> List[dict]|List[BasicSample]:
"""
Parse merged platemap\lookup info into dicts/samples
Args:
generate (bool, optional): Indicates if sample objects to be generated from dicts. Defaults to True.
Returns:
List[dict]|List[models.BasicSample]: List of samples
"""
result = None
new_samples = []
logger.debug(f"Starting samples: {pformat(self.samples)}")
for ii, sample in enumerate(self.samples):
# try:
# if sample['submitter_id'] in [check_sample['sample'].submitter_id for check_sample in new_samples]:
# sample['submitter_id'] = f"{sample['submitter_id']}-{ii}"
# except KeyError as e:
# logger.error(f"Sample obj: {sample}, error: {e}")
for sample in self.samples:
translated_dict = {}
for k, v in sample.items():
match v:
@@ -483,7 +464,7 @@ class SampleParser(object):
for plate in self.plates:
df = self.xl.parse(plate['sheet'], header=None)
if isinstance(df.iat[plate['row']-1, plate['column']-1], str):
output = RSLNamer.retrieve_rsl_number(instr=df.iat[plate['row']-1, plate['column']-1])
output = RSLNamer.retrieve_rsl_number(filename=df.iat[plate['row']-1, plate['column']-1])
else:
continue
plates.append(output)
@@ -495,25 +476,43 @@ class EquipmentParser(object):
self.submission_type = submission_type
self.xl = xl
self.map = self.fetch_equipment_map()
# self.equipment = self.parse_equipment()
def fetch_equipment_map(self) -> List[dict]:
"""
Gets the map of equipment locations in the submission type's spreadsheet
Returns:
List[dict]: List of locations
"""
submission_type = SubmissionType.query(name=self.submission_type)
return submission_type.construct_equipment_map()
def get_asset_number(self, input:str) -> str:
"""
Pulls asset number from string.
Args:
input (str): String to be scraped
Returns:
str: asset number
"""
regex = Equipment.get_regex()
logger.debug(f"Using equipment regex: {regex} on {input}")
try:
return regex.search(input).group().strip("-")
except AttributeError:
return input
def parse_equipment(self):
def parse_equipment(self) -> List[PydEquipment]:
"""
Scrapes equipment from xl sheet
Returns:
List[PydEquipment]: list of equipment
"""
logger.debug(f"Equipment parser going into parsing: {pformat(self.__dict__)}")
output = []
# sheets = list(set([item['sheet'] for item in self.map]))
# logger.debug(f"Sheets: {sheets}")
for sheet in self.xl.sheet_names:
df = self.xl.parse(sheet, header=None, dtype=object)
@@ -550,7 +549,6 @@ class PCRParser(object):
Args:
filepath (Path | None, optional): file to parse. Defaults to None.
"""
# self.ctx = ctx
logger.debug(f"Parsing {filepath.__str__()}")
if filepath == None:
logger.error(f"No filepath given.")
@@ -564,9 +562,8 @@ class PCRParser(object):
except PermissionError:
logger.error(f"Couldn't get permissions for {filepath.__str__()}. Operation might have been cancelled.")
return
# self.pcr = OrderedDict()
self.parse_general(sheet_name="Results")
namer = RSLNamer(instr=filepath.__str__())
namer = RSLNamer(filename=filepath.__str__())
self.plate_num = namer.parsed_name
self.submission_type = namer.submission_type
logger.debug(f"Set plate number to {self.plate_num} and type to {self.submission_type}")

View File

@@ -219,7 +219,7 @@ def drop_reruns_from_df(ctx:Settings, df: DataFrame) -> DataFrame:
def make_hitpicks(input:List[dict]) -> DataFrame:
"""
Converts lsit of dictionaries constructed by hitpicking to dataframe
Converts list of dictionaries constructed by hitpicking to dataframe
Args:
input (List[dict]): list of hitpicked dictionaries

View File

@@ -2,8 +2,8 @@ import logging, re
from pathlib import Path
from openpyxl import load_workbook
from backend.db.models import BasicSubmission, SubmissionType
from datetime import date
from tools import jinja_template_loading
from jinja2 import Template
logger = logging.getLogger(f"submissions.{__name__}")
@@ -11,14 +11,16 @@ class RSLNamer(object):
"""
Object that will enforce proper formatting on RSL plate names.
"""
def __init__(self, instr:str, sub_type:str|None=None, data:dict|None=None):
def __init__(self, filename:str, sub_type:str|None=None, data:dict|None=None):
self.submission_type = sub_type
if self.submission_type == None:
self.submission_type = self.retrieve_submission_type(instr=instr)
# logger.debug("Creating submission type because none exists")
self.submission_type = self.retrieve_submission_type(filename=filename)
logger.debug(f"got submission type: {self.submission_type}")
if self.submission_type != None:
# logger.debug("Retrieving BasicSubmission subclass")
enforcer = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type)
self.parsed_name = self.retrieve_rsl_number(instr=instr, regex=enforcer.get_regex())
self.parsed_name = self.retrieve_rsl_number(filename=filename, regex=enforcer.get_regex())
if data == None:
data = dict(submission_type=self.submission_type)
if "submission_type" not in data.keys():
@@ -26,26 +28,25 @@ class RSLNamer(object):
self.parsed_name = enforcer.enforce_name(instr=self.parsed_name, data=data)
@classmethod
def retrieve_submission_type(cls, instr:str|Path) -> str:
def retrieve_submission_type(cls, filename:str|Path) -> str:
"""
Gets submission type from excel file properties or sheet names or regex pattern match or user input
Args:
instr (str | Path): filename
filename (str | Path): filename
Returns:
str: parsed submission type
"""
match instr:
match filename:
case Path():
logger.debug(f"Using path method for {instr}.")
if instr.exists():
wb = load_workbook(instr)
logger.debug(f"Using path method for {filename}.")
if filename.exists():
wb = load_workbook(filename)
try:
submission_type = [item.strip().title() for item in wb.properties.category.split(";")][0]
except AttributeError:
try:
# sts = {item.name:item.info_map['all_sheets'] for item in SubmissionType.query(key="all_sheets")}
sts = {item.name:item.get_template_file_sheets() for item in SubmissionType.query()}
for k,v in sts.items():
# This gets the *first* submission type that matches the sheet names in the workbook
@@ -54,13 +55,13 @@ class RSLNamer(object):
break
except:
# On failure recurse using filename as string for string method
submission_type = cls.retrieve_submission_type(instr=instr.stem.__str__())
submission_type = cls.retrieve_submission_type(filename=filename.stem.__str__())
else:
submission_type = cls.retrieve_submission_type(instr=instr.stem.__str__())
submission_type = cls.retrieve_submission_type(filename=filename.stem.__str__())
case str():
regex = BasicSubmission.construct_regex()
logger.debug(f"Using string method for {instr}.")
m = regex.search(instr)
logger.debug(f"Using string method for {filename}.")
m = regex.search(filename)
try:
submission_type = m.lastgroup
except AttributeError as e:
@@ -72,6 +73,7 @@ class RSLNamer(object):
except UnboundLocalError:
check = True
if check:
# logger.debug("Final option, ask the user for submission type")
from frontend.widgets import SubmissionTypeSelector
dlg = SubmissionTypeSelector(title="Couldn't parse submission type.", message="Please select submission type from list below.")
if dlg.exec():
@@ -80,25 +82,25 @@ class RSLNamer(object):
return submission_type
@classmethod
def retrieve_rsl_number(cls, instr:str|Path, regex:str|None=None):
def retrieve_rsl_number(cls, filename:str|Path, regex:str|None=None):
"""
Uses regex to retrieve the plate number and submission type from an input string
Args:
in_str (str): string to be parsed
"""
logger.debug(f"Input string to be parsed: {instr}")
logger.debug(f"Input string to be parsed: {filename}")
if regex == None:
regex = BasicSubmission.construct_regex()
else:
regex = re.compile(rf'{regex}', re.IGNORECASE | re.VERBOSE)
logger.debug(f"Using regex: {regex}")
match instr:
match filename:
case Path():
m = regex.search(instr.stem)
m = regex.search(filename.stem)
case str():
logger.debug(f"Using string method.")
m = regex.search(instr)
m = regex.search(filename)
case _:
pass
if m != None:
@@ -113,6 +115,15 @@ class RSLNamer(object):
@classmethod
def construct_new_plate_name(cls, data:dict) -> str:
"""
Make a brand new plate name from submission data.
Args:
data (dict): incoming submission data
Returns:
str: Output filename
"""
if "submitted_date" in data.keys():
if isinstance(data['submitted_date'], dict):
if data['submitted_date']['value'] != None:
@@ -135,12 +146,20 @@ class RSLNamer(object):
return f"RSL-{data['abbreviation']}-{today.year}{str(today.month).zfill(2)}{str(today.day).zfill(2)}-{plate_number}"
@classmethod
def construct_export_name(cls, template, **kwargs):
def construct_export_name(cls, template:Template, **kwargs) -> str:
"""
Make export file name from jinja template. (currently unused)
Args:
template (jinja2.Template): Template stored in BasicSubmission
Returns:
str: output file name.
"""
logger.debug(f"Kwargs: {kwargs}")
logger.debug(f"Template: {template}")
environment = jinja_template_loading()
template = environment.from_string(template)
return template.render(**kwargs)
from .pydant import *
from .pydant import *

View File

@@ -11,17 +11,17 @@ from dateutil.parser._parser import ParserError
from typing import List, Tuple
from . import RSLNamer
from pathlib import Path
from tools import check_not_nan, convert_nans_to_nones, jinja_template_loading, Report, Result, row_map
from tools import check_not_nan, convert_nans_to_nones, Report, Result, row_map
from backend.db.models import *
from sqlalchemy.exc import StatementError, IntegrityError
from PyQt6.QtWidgets import QComboBox, QWidget
# from pprint import pformat
from openpyxl import load_workbook, Workbook
from io import BytesIO
logger = logging.getLogger(f"submissions.{__name__}")
class PydReagent(BaseModel):
lot: str|None
type: str|None
expiry: date|None
@@ -103,6 +103,7 @@ class PydReagent(BaseModel):
Tuple[Reagent, Report]: Reagent instance and result of function
"""
report = Report()
# logger.debug("Adding extra fields.")
if self.model_extra != None:
self.__dict__.update(self.model_extra)
logger.debug(f"Reagent SQL constructor is looking up type: {self.type}, lot: {self.lot}")
@@ -118,16 +119,17 @@ class PydReagent(BaseModel):
match key:
case "lot":
reagent.lot = value.upper()
case "expiry":
reagent.expiry = value
case "type":
reagent_type = ReagentType.query(name=value)
if reagent_type != None:
reagent.type.append(reagent_type)
case "name":
reagent.name = value
case "comment":
continue
case _:
try:
reagent.__setattr__(key, value)
except AttributeError:
logger.error(f"Couldn't set {key} to {value}")
if submission != None:
assoc = SubmissionReagentAssociation(reagent=reagent, submission=submission)
assoc.comments = self.comment
@@ -190,7 +192,8 @@ class PydSample(BaseModel, extra='allow'):
case "row" | "column":
continue
case _:
instance.set_attribute(name=key, value=value)
# instance.set_attribute(name=key, value=value)
instance.__setattr__(key, value)
out_associations = []
if submission != None:
assoc_type = self.sample_type.replace("Sample", "").strip()
@@ -228,11 +231,16 @@ class PydEquipment(BaseModel, extra='ignore'):
value=['']
return value
# def toForm(self, parent):
# from frontend.widgets.equipment_usage import EquipmentCheckBox
# return EquipmentCheckBox(parent=parent, equipment=self)
def toSQL(self, submission:BasicSubmission|str=None):
def toSQL(self, submission:BasicSubmission|str=None) -> Tuple[Equipment, SubmissionEquipmentAssociation]:
"""
Creates Equipment and SubmssionEquipmentAssociations for this PydEquipment
Args:
submission ( BasicSubmission | str ): BasicSubmission of interest
Returns:
Tuple[Equipment, SubmissionEquipmentAssociation]: SQL objects
"""
if isinstance(submission, str):
submission = BasicSubmission.query(rsl_number=submission)
equipment = Equipment.query(asset_number=self.asset_number)
@@ -242,6 +250,7 @@ class PydEquipment(BaseModel, extra='ignore'):
assoc = SubmissionEquipmentAssociation(submission=submission, equipment=equipment)
process = Process.query(name=self.processes[0])
if process == None:
# logger.debug("Adding in unknown process.")
from frontend.widgets.pop_ups import QuestionAsker
dlg = QuestionAsker(title="Add Process?", message=f"Unable to find {self.processes[0]} in the database.\nWould you like to add it?")
if dlg.exec():
@@ -254,8 +263,6 @@ class PydEquipment(BaseModel, extra='ignore'):
process.save()
assoc.process = process
assoc.role = self.role
# equipment.equipment_submission_associations.append(assoc)
# equipment.equipment_submission_associations.append(assoc)
else:
assoc = None
return equipment, assoc
@@ -357,7 +364,7 @@ class PydSubmission(BaseModel, extra='allow'):
if check_not_nan(value['value']):
return value
else:
output = RSLNamer(instr=values.data['filepath'].__str__(), sub_type=sub_type, data=values.data).parsed_name
output = RSLNamer(filename=values.data['filepath'].__str__(), sub_type=sub_type, data=values.data).parsed_name
return dict(value=output, missing=True)
@field_validator("technician", mode="before")
@@ -407,9 +414,10 @@ class PydSubmission(BaseModel, extra='allow'):
return dict(value=value, missing=False)
else:
# return dict(value=RSLNamer(instr=values.data['filepath'].__str__()).submission_type.title(), missing=True)
return dict(value=RSLNamer.retrieve_submission_type(instr=values.data['filepath']).title(), missing=True)
return dict(value=RSLNamer.retrieve_submission_type(filename=values.data['filepath']).title(), missing=True)
@field_validator("submission_category", mode="before")
@classmethod
def create_category(cls, value):
if not isinstance(value, dict):
return dict(value=value, missing=True)
@@ -423,6 +431,7 @@ class PydSubmission(BaseModel, extra='allow'):
return value
@field_validator("samples")
@classmethod
def assign_ids(cls, value, values):
starting_id = SubmissionSampleAssociation.autoincrement_id()
output = []
@@ -431,7 +440,6 @@ class PydSubmission(BaseModel, extra='allow'):
output.append(sample)
return output
def handle_duplicate_samples(self):
"""
Collapses multiple samples with same submitter id into one with lists for rows, columns.
@@ -439,7 +447,7 @@ class PydSubmission(BaseModel, extra='allow'):
"""
submitter_ids = list(set([sample.submitter_id for sample in self.samples]))
output = []
for iii, id in enumerate(submitter_ids, start=1):
for id in submitter_ids:
relevants = [item for item in self.samples if item.submitter_id==id]
if len(relevants) <= 1:
output += relevants
@@ -447,9 +455,6 @@ class PydSubmission(BaseModel, extra='allow'):
rows = [item.row[0] for item in relevants]
columns = [item.column[0] for item in relevants]
ids = [item.assoc_id[0] for item in relevants]
# for jjj, rel in enumerate(relevants, start=1):
# starting_id += jjj
# ids.append(starting_id)
dummy = relevants[0]
dummy.assoc_id = ids
dummy.row = rows
@@ -471,6 +476,7 @@ class PydSubmission(BaseModel, extra='allow'):
if dictionaries:
output = {k:getattr(self, k) for k in fields}
else:
# logger.debug("Extracting 'value' from attributes")
output = {k:(getattr(self, k) if not isinstance(getattr(self, k), dict) else getattr(self, k)['value']) for k in fields}
return output
@@ -493,12 +499,14 @@ class PydSubmission(BaseModel, extra='allow'):
Returns:
Tuple[BasicSubmission, Result]: BasicSubmission instance, result object
"""
self.__dict__.update(self.model_extra)
# self.__dict__.update(self.model_extra)
dicto = self.improved_dict()
instance, code, msg = BasicSubmission.query_or_create(submission_type=self.submission_type['value'], rsl_plate_num=self.rsl_plate_num['value'])
result = Result(msg=msg, code=code)
self.handle_duplicate_samples()
logger.debug(f"Here's our list of duplicate removed samples: {self.samples}")
for key, value in self.__dict__.items():
# for key, value in self.__dict__.items():
for key, value in dicto.items():
if isinstance(value, dict):
value = value['value']
logger.debug(f"Setting {key} to {value}")
@@ -600,6 +608,7 @@ class PydSubmission(BaseModel, extra='allow'):
info = {k:v for k,v in self.improved_dict().items() if isinstance(v, dict)}
reagents = self.reagents
if len(reagents + list(info.keys())) == 0:
# logger.warning("No info to fill in, returning")
return None
logger.debug(f"We have blank info and/or reagents in the excel sheet.\n\tLet's try to fill them in.")
# extraction_kit = lookup_kit_types(ctx=self.ctx, name=self.extraction_kit['value'])
@@ -610,6 +619,7 @@ class PydSubmission(BaseModel, extra='allow'):
# logger.debug(f"Missing reagents going into autofile: {pformat(reagents)}")
# logger.debug(f"Missing info going into autofile: {pformat(info)}")
new_reagents = []
# logger.debug("Constructing reagent map and values")
for reagent in reagents:
new_reagent = {}
new_reagent['type'] = reagent.type
@@ -626,6 +636,7 @@ class PydSubmission(BaseModel, extra='allow'):
logger.error(f"Couldn't get name due to {e}")
new_reagents.append(new_reagent)
new_info = []
# logger.debug("Constructing info map and values")
for k,v in info.items():
try:
new_item = {}
@@ -678,6 +689,7 @@ class PydSubmission(BaseModel, extra='allow'):
logger.debug(f"Sample info: {pformat(sample_info)}")
logger.debug(f"Workbook sheets: {workbook.sheetnames}")
worksheet = workbook[sample_info["lookup_table"]['sheet']]
# logger.debug("Sorting samples by row/column")
samples = sorted(self.samples, key=attrgetter('column', 'row'))
submission_obj = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type)
samples = submission_obj.adjust_autofill_samples(samples=samples)
@@ -704,6 +716,15 @@ class PydSubmission(BaseModel, extra='allow'):
return workbook
def autofill_equipment(self, workbook:Workbook) -> Workbook:
"""
Fill in equipment on the excel sheet
Args:
workbook (Workbook): Input excel workbook
Returns:
Workbook: Updated excel workbook
"""
equipment_map = SubmissionType.query(name=self.submission_type['value']).construct_equipment_map()
logger.debug(f"Equipment map: {equipment_map}")
# See if all equipment has a location map
@@ -712,6 +733,7 @@ class PydSubmission(BaseModel, extra='allow'):
logger.warning("Creating 'Equipment' sheet to hold unmapped equipment")
workbook.create_sheet("Equipment")
equipment = []
# logger.debug("Contructing equipment info map/values")
for ii, equip in enumerate(self.equipment, start=1):
loc = [item for item in equipment_map if item['role'] == equip.role][0]
try:
@@ -746,12 +768,10 @@ class PydSubmission(BaseModel, extra='allow'):
Returns:
str: Output filename
"""
env = jinja_template_loading()
template = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type).filename_template()
logger.debug(f"Using template string: {template}")
template = env.from_string(template)
render = template.render(**self.improved_dict(dictionaries=False)).replace("/", "")
logger.debug(f"Template rendered as: {render}")
# logger.debug(f"Using template string: {template}")
render = RSLNamer.construct_export_name(template=template, **self.improved_dict(dictionaries=False)).replace("/", "")
# logger.debug(f"Template rendered as: {render}")
return render
def check_kit_integrity(self, reagenttypes:list=[]) -> Report:
@@ -785,6 +805,7 @@ class PydSubmission(BaseModel, extra='allow'):
return report
class PydContact(BaseModel):
name: str
phone: str|None
email: str|None
@@ -818,7 +839,8 @@ class PydOrganization(BaseModel):
value = [item.toSQL() for item in getattr(self, field)]
case _:
value = getattr(self, field)
instance.set_attribute(name=field, value=value)
# instance.set_attribute(name=field, value=value)
instance.__setattr__(name=field, value=value)
return instance
class PydReagentType(BaseModel):
@@ -845,19 +867,16 @@ class PydReagentType(BaseModel):
Returns:
ReagentType: ReagentType instance
"""
# instance: ReagentType = lookup_reagent_types(ctx=ctx, name=self.name)
instance: ReagentType = ReagentType.query(name=self.name)
if instance == None:
instance = ReagentType(name=self.name, eol_ext=self.eol_ext)
logger.debug(f"This is the reagent type instance: {instance.__dict__}")
try:
# assoc = lookup_reagenttype_kittype_association(ctx=ctx, reagent_type=instance, kit_type=kit)
assoc = KitTypeReagentTypeAssociation.query(reagent_type=instance, kit_type=kit)
except StatementError:
assoc = None
if assoc == None:
assoc = KitTypeReagentTypeAssociation(kit_type=kit, reagent_type=instance, uses=self.uses, required=self.required)
# kit.kit_reagenttype_associations.append(assoc)
return instance
class PydKit(BaseModel):
@@ -872,13 +891,10 @@ class PydKit(BaseModel):
Returns:
Tuple[KitType, Report]: KitType instance and report of results.
"""
# result = dict(message=None, status='Information')
report = Report()
# instance = lookup_kit_types(ctx=ctx, name=self.name)
instance = KitType.query(name=self.name)
if instance == None:
instance = KitType(name=self.name)
# instance.reagent_types = [item.toSQL(ctx, instance) for item in self.reagent_types]
[item.toSQL(instance) for item in self.reagent_types]
return instance, report
@@ -888,7 +904,17 @@ class PydEquipmentRole(BaseModel):
equipment: List[PydEquipment]
processes: List[str]|None
def toForm(self, parent, submission_type, used):
def toForm(self, parent, used:list) -> "RoleComboBox":
"""
Creates a widget for user input into this class.
Args:
parent (_type_): parent widget
used (list): list of equipment already added to submission
Returns:
RoleComboBox: widget
"""
from frontend.widgets.equipment_usage import RoleComboBox
return RoleComboBox(parent=parent, role=self, submission_type=submission_type, used=used)
return RoleComboBox(parent=parent, role=self, used=used)