Large scale refactor to improve db efficiency

This commit is contained in:
Landon Wark
2023-09-27 14:16:28 -05:00
parent 82ab06efad
commit e484eabb22
37 changed files with 1782 additions and 1697 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,91 @@
'''Contains or imports all database convenience functions'''
from tools import Settings, package_dir
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, event
from sqlalchemy.engine import Engine
from sqlalchemy.exc import OperationalError as AlcOperationalError, IntegrityError as AlcIntegrityError
from sqlite3 import OperationalError as SQLOperationalError, IntegrityError as SQLIntegrityError
from pathlib import Path
import logging
logger = logging.getLogger(f"Submissions_{__name__}")
@event.listens_for(Engine, "connect")
def set_sqlite_pragma(dbapi_connection, connection_record):
"""
*should* allow automatic creation of foreign keys in the database
I have no idea how it actually works.
Args:
dbapi_connection (_type_): _description_
connection_record (_type_): _description_
"""
cursor = dbapi_connection.cursor()
cursor.execute("PRAGMA foreign_keys=ON")
cursor.close()
def create_database_session(ctx:Settings) -> Session:
"""
Create database session for app.
Args:
ctx (Settings): settings passed down from gui
Raises:
FileNotFoundError: Raised if sqlite file not found
Returns:
Session: Sqlalchemy session object.
"""
database_path = ctx.database_path
if database_path == None:
# check in user's .submissions directory for submissions.db
if Path.home().joinpath(".submissions", "submissions.db").exists():
database_path = Path.home().joinpath(".submissions", "submissions.db")
# finally, look in the local dir
else:
database_path = package_dir.joinpath("submissions.db")
else:
if database_path == ":memory:":
pass
# check if user defined path is directory
elif database_path.is_dir():
database_path = database_path.joinpath("submissions.db")
# check if user defined path is a file
elif database_path.is_file():
database_path = database_path
else:
raise FileNotFoundError("No database file found. Exiting program.")
logger.debug(f"Using {database_path} for database file.")
engine = create_engine(f"sqlite:///{database_path}", echo=True, future=True)
session = Session(engine)
return session
def store_object(ctx:Settings, object) -> dict|None:
"""
Store an object in the database
Args:
ctx (Settings): Settings object passed down from gui
object (_type_): Object to be stored
Returns:
dict|None: Result of action
"""
dbs = ctx.database_session
dbs.merge(object)
try:
dbs.commit()
except (sqlite3.IntegrityError, sqlalchemy.exc.IntegrityError) as e:
logger.debug(f"Hit an integrity error : {e}")
dbs.rollback()
return {"message":f"This object {object} already exists, so we can't add it.", "status":"Critical"}
except (SQLOperationalError, AlcOperationalError):
logger.error(f"Hit an operational error: {e}")
dbs.rollback()
return {"message":"The database is locked for editing."}
return None
from .lookups import *
from .constructions import *
from .misc import *

View File

@@ -0,0 +1,276 @@
'''
Used to construct models from input dictionaries.
'''
from getpass import getuser
from tools import Settings, RSLNamer, check_regex_match
from .. import models
from .lookups import *
import logging
from datetime import date, timedelta
from dateutil.parser import parse
from typing import Tuple
from sqlalchemy.exc import IntegrityError, SAWarning
logger = logging.getLogger(f"submissions.{__name__}")
def construct_reagent(ctx:Settings, info_dict:dict) -> models.Reagent:
"""
Construct reagent object from dictionary
Args:
ctx (Settings): settings object passed down from gui
info_dict (dict): dictionary to be converted
Returns:
models.Reagent: Constructed reagent object
"""
reagent = models.Reagent()
for item in info_dict:
logger.debug(f"Reagent info item for {item}: {info_dict[item]}")
# set fields based on keys in dictionary
match item:
case "lot":
reagent.lot = info_dict[item].upper()
case "expiry":
if isinstance(info_dict[item], date):
reagent.expiry = info_dict[item]
else:
reagent.expiry = parse(info_dict[item]).date()
case "type":
reagent_type = lookup_reagent_types(ctx=ctx, name=info_dict[item])
if reagent_type != None:
reagent.type.append(reagent_type)
case "name":
if item == None:
reagent.name = reagent.type.name
else:
reagent.name = info_dict[item]
# add end-of-life extension from reagent type to expiry date
# NOTE: this will now be done only in the reporting phase to account for potential changes in end-of-life extensions
return reagent
def construct_submission_info(ctx:Settings, info_dict:dict) -> Tuple[models.BasicSubmission, dict]:
"""
Construct submission object from dictionary pulled from gui form
Args:
ctx (Settings): settings object passed down from gui
info_dict (dict): dictionary to be transformed
Returns:
models.BasicSubmission: Constructed submission object
"""
# convert submission type into model name
query = info_dict['submission_type'].replace(" ", "")
# Ensure an rsl plate number exists for the plate
if not check_regex_match("^RSL", info_dict["rsl_plate_num"]):
instance = None
msg = "A proper RSL plate number is required."
return instance, {'code': 2, 'message': "A proper RSL plate number is required."}
else:
# enforce conventions on the rsl plate number from the form
info_dict['rsl_plate_num'] = RSLNamer(ctx=ctx, instr=info_dict["rsl_plate_num"]).parsed_name
# check database for existing object
instance = lookup_submissions(ctx=ctx, rsl_number=info_dict['rsl_plate_num'])
# get model based on submission type converted above
logger.debug(f"Looking at models for submission type: {query}")
model = getattr(models, query)
logger.debug(f"We've got the model: {type(model)}")
# if query return nothing, ie doesn't already exist in db
if instance == None:
instance = model()
logger.debug(f"Submission doesn't exist yet, creating new instance: {instance}")
msg = None
code = 0
else:
code = 1
msg = "This submission already exists.\nWould you like to overwrite?"
for item in info_dict:
value = info_dict[item]
logger.debug(f"Setting {item} to {value}")
# set fields based on keys in dictionary
match item:
case "extraction_kit":
logger.debug(f"Looking up kit {value}")
field_value = lookup_kit_types(ctx=ctx, name=value)
logger.debug(f"Got {field_value} for kit {value}")
case "submitting_lab":
logger.debug(f"Looking up organization: {value}")
field_value = lookup_organizations(ctx=ctx, name=value)
logger.debug(f"Got {field_value} for organization {value}")
case "submitter_plate_num":
logger.debug(f"Submitter plate id: {value}")
field_value = value
case "samples":
instance = construct_samples(ctx=ctx, instance=instance, samples=value)
continue
case "submission_type":
field_value = lookup_submission_type(ctx=ctx, name=value)
case _:
field_value = value
# insert into field
try:
setattr(instance, item, field_value)
except AttributeError:
logger.debug(f"Could not set attribute: {item} to {info_dict[item]}")
continue
except KeyError:
continue
# calculate cost of the run: immutable cost + mutable times number of columns
# This is now attached to submission upon creation to preserve at-run costs incase of cost increase in the future.
try:
logger.debug(f"Calculating costs for procedure...")
instance.calculate_base_cost()
except (TypeError, AttributeError) as e:
logger.debug(f"Looks like that kit doesn't have cost breakdown yet due to: {e}, using full plate cost.")
instance.run_cost = instance.extraction_kit.cost_per_run
logger.debug(f"Calculated base run cost of: {instance.run_cost}")
# Apply any discounts that are applicable for client and kit.
try:
logger.debug("Checking and applying discounts...")
discounts = [item.amount for item in lookup_discounts(ctx=ctx, kit_type=instance.extraction_kit, organization=instance.submitting_lab)]
logger.debug(f"We got discounts: {discounts}")
if len(discounts) > 0:
discounts = sum(discounts)
instance.run_cost = instance.run_cost - discounts
except Exception as e:
logger.error(f"An unknown exception occurred when calculating discounts: {e}")
# We need to make sure there's a proper rsl plate number
logger.debug(f"We've got a total cost of {instance.run_cost}")
try:
logger.debug(f"Constructed instance: {instance.to_string()}")
except AttributeError as e:
logger.debug(f"Something went wrong constructing instance {info_dict['rsl_plate_num']}: {e}")
logger.debug(f"Constructed submissions message: {msg}")
return instance, {'code':code, 'message':msg}
def construct_samples(ctx:Settings, instance:models.BasicSubmission, samples:List[dict]) -> models.BasicSubmission:
"""
constructs sample objects and adds to submission
Args:
ctx (Settings): settings passed down from gui
instance (models.BasicSubmission): Submission samples scraped from.
samples (List[dict]): List of parsed samples
Returns:
models.BasicSubmission: Updated submission object.
"""
for sample in samples:
sample_instance = lookup_samples(ctx=ctx, submitter_id=sample['sample'].submitter_id)
if sample_instance == None:
sample_instance = sample['sample']
else:
logger.warning(f"Sample {sample} already exists, creating association.")
logger.debug(f"Adding {sample_instance.__dict__}")
if sample_instance in instance.samples:
logger.error(f"Looks like there's a duplicate sample on this plate: {sample_instance.submitter_id}!")
continue
try:
with ctx.database_session.no_autoflush:
try:
sample_query = sample_instance.sample_type.replace('Sample', '').strip()
logger.debug(f"Here is the sample instance type: {sample_instance}")
try:
assoc = getattr(models, f"{sample_query}Association")
except AttributeError as e:
logger.error(f"Couldn't get type specific association. Getting generic.")
assoc = models.SubmissionSampleAssociation
assoc = assoc(submission=instance, sample=sample_instance, row=sample['row'], column=sample['column'])
instance.submission_sample_associations.append(assoc)
except IntegrityError:
logger.error(f"Hit integrity error for: {sample}")
continue
except SAWarning:
logger.error(f"Looks like the association already exists for submission: {instance} and sample: {sample_instance}")
continue
except IntegrityError as e:
logger.critical(e)
continue
return instance
def construct_kit_from_yaml(ctx:Settings, exp:dict) -> dict:
"""
Create and store a new kit in the database based on a .yml file
TODO: split into create and store functions
Args:
ctx (Settings): Context object passed down from frontend
exp (dict): Experiment dictionary created from yaml file
Returns:
dict: a dictionary containing results of db addition
"""
from tools import check_is_power_user, massage_common_reagents
# Don't want just anyone adding kits
if not check_is_power_user(ctx=ctx):
logger.debug(f"{getuser()} does not have permission to add kits.")
return {'code':1, 'message':"This user does not have permission to add kits.", "status":"warning"}
# iterate through keys in dict
for type in exp:
# A submission type may use multiple kits.
for kt in exp[type]['kits']:
logger.debug(f"Looking up submission type: {type}")
# submission_type = lookup_submissiontype_by_name(ctx=ctx, type_name=type)
submission_type = lookup_submission_type(ctx=ctx, name=type)
logger.debug(f"Looked up submission type: {submission_type}")
kit = models.KitType(name=kt)
kt_st_assoc = models.SubmissionTypeKitTypeAssociation(kit_type=kit, submission_type=submission_type)
kt_st_assoc.constant_cost = exp[type]["kits"][kt]["constant_cost"]
kt_st_assoc.mutable_cost_column = exp[type]["kits"][kt]["mutable_cost_column"]
kt_st_assoc.mutable_cost_sample = exp[type]["kits"][kt]["mutable_cost_sample"]
kit.kit_submissiontype_associations.append(kt_st_assoc)
# A kit contains multiple reagent types.
for r in exp[type]['kits'][kt]['reagenttypes']:
# check if reagent type already exists.
r = massage_common_reagents(r)
look_up = ctx.database_session.query(models.ReagentType).filter(models.ReagentType.name==r).first()
if look_up == None:
rt = models.ReagentType(name=r.strip(), eol_ext=timedelta(30*exp[type]['kits'][kt]['reagenttypes'][r]['eol_ext']), last_used="")
else:
rt = look_up
assoc = models.KitTypeReagentTypeAssociation(kit_type=kit, reagent_type=rt, uses={})
ctx.database_session.add(rt)
kit.kit_reagenttype_associations.append(assoc)
logger.debug(f"Kit construction reagent type: {rt.__dict__}")
logger.debug(f"Kit construction kit: {kit.__dict__}")
ctx.database_session.add(kit)
ctx.database_session.commit()
return {'code':0, 'message':'Kit has been added', 'status': 'information'}
def construct_org_from_yaml(ctx:Settings, org:dict) -> dict:
"""
Create and store a new organization based on a .yml file
Args:
ctx (Settings): Context object passed down from frontend
org (dict): Dictionary containing organization info.
Returns:
dict: dictionary containing results of db addition
"""
from tools import check_is_power_user
# Don't want just anyone adding in clients
if not check_is_power_user(ctx=ctx):
logger.debug(f"{getuser()} does not have permission to add kits.")
return {'code':1, 'message':"This user does not have permission to add organizations."}
# the yml can contain multiple clients
for client in org:
cli_org = models.Organization(name=client.replace(" ", "_").lower(), cost_centre=org[client]['cost centre'])
# a client can contain multiple contacts
for contact in org[client]['contacts']:
cont_name = list(contact.keys())[0]
# check if contact already exists
look_up = ctx.database_session.query(models.Contact).filter(models.Contact.name==cont_name).first()
if look_up == None:
cli_cont = models.Contact(name=cont_name, phone=contact[cont_name]['phone'], email=contact[cont_name]['email'], organization=[cli_org])
else:
cli_cont = look_up
cli_cont.organization.append(cli_org)
ctx.database_session.add(cli_cont)
logger.debug(f"Client creation contact: {cli_cont.__dict__}")
logger.debug(f"Client creation client: {cli_org.__dict__}")
ctx.database_session.add(cli_org)
ctx.database_session.commit()
return {"code":0, "message":"Organization has been added."}

View File

@@ -0,0 +1,483 @@
from .. import models
from tools import Settings, RSLNamer
from typing import List
import logging
from datetime import date, datetime
from dateutil.parser import parse
from sqlalchemy.orm.query import Query
from sqlalchemy import and_, JSON
from sqlalchemy.orm import Session
logger = logging.getLogger(f"submissions.{__name__}")
def query_return(query:Query, limit:int=0):
with query.session.no_autoflush:
match limit:
case 0:
return query.all()
case 1:
return query.first()
case _:
return query.limit(limit).all()
def setup_lookup(ctx:Settings, locals:dict) -> Session:
for k, v in locals.items():
if k == "kwargs":
continue
if isinstance(v, dict):
raise ValueError("Cannot use dictionary in query. Make sure you parse it first.")
# return create_database_session(ctx=ctx)
return ctx.database_session
################## Basic Lookups ####################################
def lookup_reagents(ctx:Settings,
reagent_type:str|models.ReagentType|None=None,
lot_number:str|None=None,
limit:int=0
) -> models.Reagent|List[models.Reagent]:
"""
Lookup a list of reagents from the database.
Args:
ctx (Settings): Settings object passed down from gui
reagent_type (str | models.ReagentType | None, optional): Reagent type. Defaults to None.
lot_number (str | None, optional): Reagent lot number. Defaults to None.
limit (int, optional): limit of results returned. Defaults to 0.
Returns:
models.Reagent | List[models.Reagent]: reagent or list of reagents matching filter.
"""
query = setup_lookup(ctx=ctx, locals=locals()).query(models.Reagent)
match reagent_type:
case str():
logger.debug(f"Looking up reagents by reagent type: {reagent_type}")
query = query.join(models.Reagent.type, aliased=True).filter(models.ReagentType.name==reagent_type)
case models.ReagentType():
logger.debug(f"Looking up reagents by reagent type: {reagent_type}")
query = query.filter(models.Reagent.type.contains(reagent_type))
case _:
pass
match lot_number:
case str():
logger.debug(f"Looking up reagent by lot number: {lot_number}")
query = query.filter(models.Reagent.lot==lot_number)
# In this case limit number returned.
limit = 1
case _:
pass
return query_return(query=query, limit=limit)
def lookup_kit_types(ctx:Settings,
name:str=None,
used_for:str|None=None,
id:int|None=None,
limit:int=0
) -> models.KitType|List[models.KitType]:
query = setup_lookup(ctx=ctx, locals=locals()).query(models.KitType)
match used_for:
case str():
logger.debug(f"Looking up kit type by use: {used_for}")
query = query.filter(models.KitType.used_for.any(name=used_for))
case _:
pass
match name:
case str():
logger.debug(f"Looking up kit type by name: {name}")
query = query.filter(models.KitType.name==name)
limit = 1
case _:
pass
match id:
case int():
logger.debug(f"Looking up kit type by id: {id}")
query = query.filter(models.KitType.id==id)
limit = 1
case str():
logger.debug(f"Looking up kit type by id: {id}")
query = query.filter(models.KitType.id==int(id))
limit = 1
case _:
pass
return query_return(query=query, limit=limit)
def lookup_reagent_types(ctx:Settings,
name: str|None=None,
kit_type: models.KitType|str|None=None,
reagent: models.Reagent|str|None=None,
limit:int=0,
) -> models.ReagentType|List[models.ReagentType]:
"""
_summary_
Args:
ctx (Settings): Settings object passed down from gui.
name (str | None, optional): Reagent type name. Defaults to None.
limit (int, optional): limit of results to return. Defaults to 0.
Returns:
models.ReagentType|List[models.ReagentType]: ReagentType or list of ReagentTypes matching filter.
"""
query = setup_lookup(ctx=ctx, locals=locals()).query(models.ReagentType)
if (kit_type != None and reagent == None) or (reagent != None and kit_type == None):
raise ValueError("Cannot filter without both reagent and kit type.")
elif kit_type == None and reagent == None:
pass
else:
match kit_type:
case str():
kit_type = lookup_kit_types(ctx=ctx, name=kit_type)
case _:
pass
match reagent:
case str():
reagent = lookup_reagents(ctx=ctx, lot_number=reagent)
case _:
pass
return list(set(kit_type.reagent_types).intersection(reagent.type))[0]
match name:
case str():
logger.debug(f"Looking up reagent type by name: {name}")
query = query.filter(models.ReagentType.name==name)
limit = 1
case _:
pass
return query_return(query=query, limit=limit)
def lookup_submissions(ctx:Settings,
submission_type:str|models.SubmissionType|None=None,
id:int|str|None=None,
rsl_number:str|None=None,
start_date:date|str|int|None=None,
end_date:date|str|int|None=None,
reagent:models.Reagent|str|None=None,
chronologic:bool=False, limit:int=0,
**kwargs
) -> models.BasicSubmission | List[models.BasicSubmission]:
model = models.find_subclasses(parent=models.BasicSubmission, attrs=kwargs)
query = setup_lookup(ctx=ctx, locals=locals()).query(model)
# by submission type
match submission_type:
case models.SubmissionType():
logger.debug(f"Looking up BasicSubmission with submission type: {submission_type}")
# query = query.filter(models.BasicSubmission.submission_type_name==submission_type.name)
query = query.filter(model.submission_type_name==submission_type.name)
case str():
logger.debug(f"Looking up BasicSubmission with submission type: {submission_type}")
# query = query.filter(models.BasicSubmission.submission_type_name==submission_type)
query = query.filter(model.submission_type_name==submission_type)
case _:
pass
# by date range
if start_date != None and end_date == None:
logger.warning(f"Start date with no end date, using today.")
end_date = date.today()
if end_date != None and start_date == None:
logger.warning(f"End date with no start date, using Jan 1, 2023")
start_date = date(2023, 1, 1)
if start_date != None:
match start_date:
case date():
start_date = start_date.strftime("%Y-%m-%d")
case int():
start_date = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + start_date - 2).date().strftime("%Y-%m-%d")
case _:
start_date = parse(start_date).strftime("%Y-%m-%d")
match end_date:
case date():
end_date = end_date.strftime("%Y-%m-%d")
case int():
end_date = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + end_date - 2).date().strftime("%Y-%m-%d")
case _:
end_date = parse(end_date).strftime("%Y-%m-%d")
logger.debug(f"Looking up BasicSubmissions from start date: {start_date} and end date: {end_date}")
# query = query.filter(models.BasicSubmission.submitted_date.between(start_date, end_date))
query = query.filter(model.submitted_date.between(start_date, end_date))
# by reagent (for some reason)
match reagent:
case str():
logger.debug(f"Looking up BasicSubmission with reagent: {reagent}")
reagent = lookup_reagents(ctx=ctx, lot_number=reagent)
query = query.join(models.submissions.reagents_submissions).filter(models.submissions.reagents_submissions.c.reagent_id==reagent.id).all()
case models.Reagent:
logger.debug(f"Looking up BasicSubmission with reagent: {reagent}")
query = query.join(models.submissions.reagents_submissions).filter(models.submissions.reagents_submissions.c.reagent_id==reagent.id).all()
case _:
pass
# by rsl number (returns only a single value)
match rsl_number:
case str():
logger.debug(f"Looking up BasicSubmission with rsl number: {rsl_number}")
rsl_number = RSLNamer(ctx=ctx, instr=rsl_number).parsed_name
# query = query.filter(models.BasicSubmission.rsl_plate_num==rsl_number)
query = query.filter(model.rsl_plate_num==rsl_number)
limit = 1
case _:
pass
# by id (returns only a single value)
match id:
case int():
logger.debug(f"Looking up BasicSubmission with id: {id}")
# query = query.filter(models.BasicSubmission.id==id)
query = query.filter(model.id==id)
limit = 1
case str():
logger.debug(f"Looking up BasicSubmission with id: {id}")
# query = query.filter(models.BasicSubmission.id==int(id))
query = query.filter(model.id==int(id))
limit = 1
case _:
pass
for k, v in kwargs.items():
attr = getattr(model, k)
logger.debug(f"Got attr: {attr}")
query = query.filter(attr==v)
if len(kwargs) > 0:
limit = 1
if chronologic:
# query.order_by(models.BasicSubmission.submitted_date)
query.order_by(model.submitted_date)
return query_return(query=query, limit=limit)
def lookup_submission_type(ctx:Settings,
name:str|None=None,
limit:int=0
) -> models.SubmissionType|List[models.SubmissionType]:
query = setup_lookup(ctx=ctx, locals=locals()).query(models.SubmissionType)
match name:
case str():
logger.debug(f"Looking up submission type by name: {name}")
query = query.filter(models.SubmissionType.name==name)
limit = 1
case _:
pass
return query_return(query=query, limit=limit)
def lookup_organizations(ctx:Settings,
name:str|None=None,
limit:int=0,
) -> models.Organization|List[models.Organization]:
query = setup_lookup(ctx=ctx, locals=locals()).query(models.Organization)
match name:
case str():
logger.debug(f"Looking up organization with name: {name}")
query = query.filter(models.Organization.name==name)
limit = 1
case _:
pass
return query_return(query=query, limit=limit)
def lookup_discounts(ctx:Settings,
organization:models.Organization|str|int,
kit_type:models.KitType|str|int,
) -> models.Discount|List[models.Discount]:
query = setup_lookup(ctx=ctx, locals=locals()).query(models.Discount)
match organization:
case models.Organization():
logger.debug(f"Looking up discount with organization: {organization}")
organization = organization.id
case str():
logger.debug(f"Looking up discount with organization: {organization}")
organization = lookup_organizations(ctx=ctx, name=organization).id
case int():
logger.debug(f"Looking up discount with organization id: {organization}")
pass
case _:
raise ValueError(f"Invalid value for organization: {organization}")
match kit_type:
case models.KitType():
logger.debug(f"Looking up discount with kit type: {kit_type}")
kit_type = kit_type.id
case str():
logger.debug(f"Looking up discount with kit type: {kit_type}")
kit_type = lookup_kit_types(ctx=ctx, name=kit_type).id
case int():
logger.debug(f"Looking up discount with kit type id: {organization}")
pass
case _:
raise ValueError(f"Invalid value for kit type: {kit_type}")
return query.join(models.KitType).join(models.Organization).filter(and_(
models.KitType.id==kit_type,
models.Organization.id==organization
)).all()
def lookup_controls(ctx:Settings,
control_type:models.ControlType|str|None=None,
start_date:date|str|int|None=None,
end_date:date|str|int|None=None,
limit:int=0
) -> models.Control|List[models.Control]:
query = setup_lookup(ctx=ctx, locals=locals()).query(models.Control)
# by control type
match control_type:
case models.ControlType():
logger.debug(f"Looking up control by control type: {control_type}")
query = query.join(models.ControlType).filter(models.ControlType==control_type)
case str():
logger.debug(f"Looking up control by control type: {control_type}")
query = query.join(models.ControlType).filter(models.ControlType.name==control_type)
case _:
pass
# by date range
if start_date != None and end_date == None:
logger.warning(f"Start date with no end date, using today.")
end_date = date.today()
if end_date != None and start_date == None:
logger.warning(f"End date with no start date, using Jan 1, 2023")
start_date = date(2023, 1, 1)
if start_date != None:
match start_date:
case date():
start_date = start_date.strftime("%Y-%m-%d")
case int():
start_date = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + start_date - 2).date().strftime("%Y-%m-%d")
case _:
start_date = parse(start_date).strftime("%Y-%m-%d")
match end_date:
case date():
end_date = end_date.strftime("%Y-%m-%d")
case int():
end_date = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + end_date - 2).date().strftime("%Y-%m-%d")
case _:
end_date = parse(end_date).strftime("%Y-%m-%d")
logger.debug(f"Looking up BasicSubmissions from start date: {start_date} and end date: {end_date}")
query = query.filter(models.Control.submitted_date.between(start_date, end_date))
return query_return(query=query, limit=limit)
def lookup_control_types(ctx:Settings, limit:int=0) -> models.ControlType|List[models.ControlType]:
query = setup_lookup(ctx=ctx, locals=locals()).query(models.ControlType)
return query_return(query=query, limit=limit)
def lookup_samples(ctx:Settings,
submitter_id:str|None=None,
sample_type:str|None=None,
limit:int=0,
**kwargs
) -> models.BasicSample|models.WastewaterSample|List[models.BasicSample]:
logger.debug(f"Length of kwargs: {len(kwargs)}")
model = models.find_subclasses(parent=models.BasicSample, attrs=kwargs)
query = setup_lookup(ctx=ctx, locals=locals()).query(model)
match submitter_id:
case str():
logger.debug(f"Looking up {model} with submitter id: {submitter_id}")
query = query.filter(models.BasicSample.submitter_id==submitter_id)
limit = 1
case _:
pass
match sample_type:
case str():
logger.debug(f"Looking up {model} with sample type: {sample_type}")
query = query.filter(models.BasicSample.sample_type==sample_type)
case _:
pass
for k, v in kwargs.items():
attr = getattr(model, k)
logger.debug(f"Got attr: {attr}")
query = query.filter(attr==v)
if len(kwargs) > 0:
limit = 1
return query_return(query=query, limit=limit)
def lookup_reagenttype_kittype_association(ctx:Settings,
kit_type:models.KitType|str|None,
reagent_type:models.ReagentType|str|None,
limit:int=0
) -> models.KitTypeReagentTypeAssociation|List[models.KitTypeReagentTypeAssociation]:
query = setup_lookup(ctx=ctx, locals=locals()).query(models.KitTypeReagentTypeAssociation)
match kit_type:
case models.KitType():
query = query.filter(models.KitTypeReagentTypeAssociation.kit_type==kit_type)
case str():
query = query.join(models.KitType).filter(models.KitType.name==kit_type)
case _:
pass
match reagent_type:
case models.ReagentType():
query = query.filter(models.KitTypeReagentTypeAssociation.reagent_type==reagent_type)
case str():
query = query.join(models.ReagentType).filter(models.ReagentType.name==reagent_type)
case _:
pass
if kit_type != None and reagent_type != None:
limit = 1
return query_return(query=query, limit=limit)
def lookup_submission_sample_association(ctx:Settings,
submission:models.BasicSubmission|str|None=None,
sample:models.BasicSample|str|None=None,
limit:int=0
) -> models.SubmissionSampleAssociation|List[models.SubmissionSampleAssociation]:
query = setup_lookup(ctx=ctx, locals=locals()).query(models.SubmissionSampleAssociation)
match submission:
case models.BasicSubmission():
query = query.filter(models.SubmissionSampleAssociation.submission==submission)
case str():
query = query.join(models.BasicSubmission).filter(models.BasicSubmission.rsl_plate_num==submission)
case _:
pass
match sample:
case models.BasicSample():
query = query.filter(models.SubmissionSampleAssociation.sample==sample)
case str():
query = query.join(models.BasicSample).filter(models.BasicSample.submitter_id==sample)
case _:
pass
logger.debug(f"Query count: {query.count()}")
if query.count() == 1:
limit = 1
return query_return(query=query, limit=limit)
def lookup_modes(ctx:Settings) -> List[str]:
rel = ctx.database_session.query(models.Control).first()
try:
cols = [item.name for item in list(rel.__table__.columns) if isinstance(item.type, JSON)]
except AttributeError as e:
logger.debug(f"Failed to get available modes from db: {e}")
cols = []
return cols
############### Complex Lookups ###################################
def lookup_sub_samp_association_by_plate_sample(ctx:Settings, rsl_plate_num:str|models.BasicSample, rsl_sample_num:str|models.BasicSubmission) -> models.WastewaterAssociation:
"""
_summary_
Args:
ctx (Settings): _description_
rsl_plate_num (str): _description_
sample_submitter_id (_type_): _description_
Returns:
models.SubmissionSampleAssociation: _description_
"""
# logger.debug(f"{type(rsl_plate_num)}, {type(rsl_sample_num)}")
match rsl_plate_num:
case models.BasicSubmission()|models.Wastewater():
# logger.debug(f"Model for rsl_plate_num: {rsl_plate_num}")
first_query = ctx.database_session.query(models.SubmissionSampleAssociation)\
.filter(models.SubmissionSampleAssociation.submission==rsl_plate_num)
case str():
# logger.debug(f"String for rsl_plate_num: {rsl_plate_num}")
first_query = ctx.database_session.query(models.SubmissionSampleAssociation)\
.join(models.BasicSubmission)\
.filter(models.BasicSubmission.rsl_plate_num==rsl_plate_num)
case _:
logger.error(f"Unknown case for rsl_plate_num {rsl_plate_num}")
match rsl_sample_num:
case models.BasicSample()|models.WastewaterSample():
# logger.debug(f"Model for rsl_sample_num: {rsl_sample_num}")
second_query = first_query.filter(models.SubmissionSampleAssociation.sample==rsl_sample_num)
# case models.WastewaterSample:
# second_query = first_query.filter(models.SubmissionSampleAssociation.sample==rsl_sample_num)
case str():
# logger.debug(f"String for rsl_sample_num: {rsl_sample_num}")
second_query = first_query.join(models.BasicSample)\
.filter(models.BasicSample.submitter_id==rsl_sample_num)
case _:
logger.error(f"Unknown case for rsl_sample_num {rsl_sample_num}")
try:
return second_query.first()
except UnboundLocalError:
logger.error(f"Couldn't construct second query")
return None

View File

@@ -0,0 +1,238 @@
'''
Contains convenience functions for using database
'''
from tools import Settings
from .lookups import *
import pandas as pd
import json
from pathlib import Path
import yaml
from .. import models
from . import store_object
from sqlalchemy.exc import OperationalError as AlcOperationalError, IntegrityError as AlcIntegrityError
from sqlite3 import OperationalError as SQLOperationalError, IntegrityError as SQLIntegrityError
from pprint import pformat
def submissions_to_df(ctx:Settings, submission_type:str|None=None, limit:int=0) -> pd.DataFrame:
"""
Convert submissions looked up by type to dataframe
Args:
ctx (Settings): settings object passed by gui
submission_type (str | None, optional): submission type (should be string in D3 of excel sheet) Defaults to None.
limit (int): Maximum number of submissions to return. Defaults to 0.
Returns:
pd.DataFrame: dataframe constructed from retrieved submissions
"""
logger.debug(f"Querying Type: {submission_type}")
logger.debug(f"Using limit: {limit}")
# use lookup function to create list of dicts
subs = [item.to_dict() for item in lookup_submissions(ctx=ctx, submission_type=submission_type, limit=limit)]
logger.debug(f"Got {len(subs)} results.")
# make df from dicts (records) in list
df = pd.DataFrame.from_records(subs)
# Exclude sub information
try:
df = df.drop("controls", axis=1)
except:
logger.warning(f"Couldn't drop 'controls' column from submissionsheet df.")
try:
df = df.drop("ext_info", axis=1)
except:
logger.warning(f"Couldn't drop 'ext_info' column from submissionsheet df.")
try:
df = df.drop("pcr_info", axis=1)
except:
logger.warning(f"Couldn't drop 'pcr_info' column from submissionsheet df.")
# NOTE: Moved to submissions_to_df function
try:
del df['samples']
except KeyError:
pass
try:
del df['reagents']
except KeyError:
pass
try:
del df['comments']
except KeyError:
pass
return df
def get_control_subtypes(ctx:Settings, type:str, mode:str) -> list[str]:
"""
Get subtypes for a control analysis mode
Args:
ctx (Settings): settings object passed from gui
type (str): control type name
mode (str): analysis mode name
Returns:
list[str]: list of subtype names
"""
# Only the first control of type is necessary since they all share subtypes
try:
outs = lookup_controls(ctx=ctx, control_type=type, limit=1)
except (TypeError, IndexError):
return []
# Get analysis mode data as dict
jsoner = json.loads(getattr(outs, mode))
logger.debug(f"JSON out: {jsoner}")
try:
genera = list(jsoner.keys())[0]
except IndexError:
return []
subtypes = [item for item in jsoner[genera] if "_hashes" not in item and "_ratio" not in item]
return subtypes
def update_last_used(ctx:Settings, reagent:models.Reagent, kit:models.KitType):
"""
Updates the 'last_used' field in kittypes/reagenttypes
Args:
ctx (Settings): settings object passed down from gui
reagent (models.Reagent): reagent to be used for update
kit (models.KitType): kit to be used for lookup
"""
# rt = list(set(reagent.type).intersection(kit.reagent_types))[0]
rt = lookup_reagent_types(ctx=ctx, kit_type=kit, reagent=reagent)
if rt != None:
assoc = lookup_reagenttype_kittype_association(ctx=ctx, kit_type=kit, reagent_type=rt)
if assoc != None:
if assoc.last_used != reagent.lot:
logger.debug(f"Updating {assoc} last used to {reagent.lot}")
assoc.last_used = reagent.lot
# ctx.database_session.merge(assoc)
# ctx.database_session.commit()
result = store_object(ctx=ctx, object=assoc)
return result
return dict(message=f"Updating last used {rt} was not performed.")
def delete_submission(ctx:Settings, id:int) -> dict|None:
"""
Deletes a submission and its associated samples from the database.
Args:
ctx (Settings): settings object passed down from gui
id (int): id of submission to be deleted.
"""
# In order to properly do this Im' going to have to delete all of the secondary table stuff as well.
# Retrieve submission
sub = lookup_submissions(ctx=ctx, id=id)
# Convert to dict for storing backup as a yml
backup = sub.to_dict()
try:
with open(Path(ctx.backup_path).joinpath(f"{sub.rsl_plate_num}-backup({date.today().strftime('%Y%m%d')}).yml"), "w") as f:
yaml.dump(backup, f)
except KeyError:
pass
ctx.database_session.delete(sub)
try:
ctx.database_session.commit()
except (SQLIntegrityError, SQLOperationalError, AlcIntegrityError, AlcOperationalError) as e:
ctx.database_session.rollback()
raise e
return None
def update_ww_sample(ctx:Settings, sample_obj:dict) -> dict|None:
"""
Retrieves wastewater sample by rsl number (sample_obj['sample']) and updates values from constructed dictionary
Args:
ctx (Settings): settings object passed down from gui
sample_obj (dict): dictionary representing new values for database object
"""
logger.debug(f"dictionary to use for update: {pformat(sample_obj)}")
logger.debug(f"Looking up {sample_obj['sample']} in plate {sample_obj['plate_rsl']}")
assoc = lookup_submission_sample_association(ctx=ctx, submission=sample_obj['plate_rsl'], sample=sample_obj['sample'])
if assoc != None:
for key, value in sample_obj.items():
# set attribute 'key' to 'value'
try:
check = getattr(assoc, key)
except AttributeError as e:
logger.error(f"Item doesn't have field {key} due to {e}")
continue
if check != value:
logger.debug(f"Setting association key: {key} to {value}")
try:
setattr(assoc, key, value)
except AttributeError as e:
logger.error(f"Can't set field {key} to {value} due to {e}")
continue
else:
logger.error(f"Unable to find sample {sample_obj['sample']}")
return
result = store_object(ctx=ctx, object=assoc)
return result
def check_kit_integrity(sub:models.BasicSubmission|models.KitType, reagenttypes:list|None=None) -> dict|None:
"""
Ensures all reagents expected in kit are listed in Submission
Args:
sub (BasicSubmission | KitType): Object containing complete list of reagent types.
reagenttypes (list | None, optional): List to check against complete list. Defaults to None.
Returns:
dict|None: Result object containing a message and any missing components.
"""
logger.debug(type(sub))
# What type is sub?
reagenttypes = []
match sub:
case models.BasicSubmission():
# Get all required reagent types for this kit.
ext_kit_rtypes = [item.name for item in sub.extraction_kit.get_reagents(required=True, submission_type=sub.submission_type_name)]
# Overwrite function parameter reagenttypes
for reagent in sub.reagents:
try:
rt = list(set(reagent.type).intersection(sub.extraction_kit.reagent_types))[0].name
logger.debug(f"Got reagent type: {rt}")
reagenttypes.append(rt)
except AttributeError as e:
logger.error(f"Problem parsing reagents: {[f'{reagent.lot}, {reagent.type}' for reagent in sub.reagents]}")
reagenttypes.append(reagent.type[0].name)
case models.KitType():
ext_kit_rtypes = [item.name for item in sub.get_reagents(required=True)]
case _:
raise ValueError(f"There was no match for the integrity object.\n\nCheck to make sure they are imported from the same place because it matters.")
logger.debug(f"Kit reagents: {ext_kit_rtypes}")
logger.debug(f"Submission reagents: {reagenttypes}")
# check if lists are equal
check = set(ext_kit_rtypes) == set(reagenttypes)
logger.debug(f"Checking if reagents match kit contents: {check}")
# what reagent types are in both lists?
missing = list(set(ext_kit_rtypes).difference(reagenttypes))
logger.debug(f"Missing reagents types: {missing}")
# if lists are equal return no problem
if len(missing)==0:
result = None
else:
result = {'message' : f"The submission you are importing is missing some reagents expected by the kit.\n\nIt looks like you are missing: {[item.upper() for item in missing]}\n\nAlternatively, you may have set the wrong extraction kit.\n\nThe program will populate lists using existing reagents.\n\nPlease make sure you check the lots carefully!", 'missing': missing}
return result
def update_subsampassoc_with_pcr(ctx:Settings, submission:models.BasicSubmission, sample:models.BasicSample, input_dict:dict) -> dict|None:
"""
Inserts PCR results into wastewater submission/sample association
Args:
ctx (Settings): settings object passed down from gui
submission (models.BasicSubmission): Submission object
sample (models.BasicSample): Sample object
input_dict (dict): dictionary with info to be updated.
Returns:
dict|None: result object
"""
assoc = lookup_submission_sample_association(ctx, submission=submission, sample=sample)
for k,v in input_dict.items():
try:
setattr(assoc, k, v)
except AttributeError:
logger.error(f"Can't set {k} to {v}")
result = store_object(ctx=ctx, object=assoc)
return result

View File

@@ -1,12 +1,46 @@
'''
Contains all models for sqlalchemy
'''
from sqlalchemy.ext.declarative import declarative_base
from typing import Any
from sqlalchemy.orm import declarative_base
import logging
from pprint import pformat
Base = declarative_base()
metadata = Base.metadata
logger = logging.getLogger(f"submissions.{__name__}")
def find_subclasses(parent:Any, attrs:dict) -> Any:
"""
Finds subclasses of a parent that does contain all
attributes if the parent does not.
Args:
parent (_type_): Parent class.
attrs (dict): Key:Value dictionary of attributes
Raises:
AttributeError: Raised if no subclass is found.
Returns:
_type_: Parent or subclass.
"""
if len(attrs) == 0:
return parent
if any([not hasattr(parent, attr) for attr in attrs]):
# looks for first model that has all included kwargs
try:
model = [subclass for subclass in parent.__subclasses__() if all([hasattr(subclass, attr) for attr in attrs])][0]
except IndexError as e:
raise AttributeError(f"Couldn't find existing class/subclass of {parent} with all attributes:\n{pformat(attrs)}")
else:
model = parent
logger.debug(f"Using model: {model}")
return model
from .controls import Control, ControlType
from .kits import KitType, ReagentType, Reagent, Discount, KitTypeReagentTypeAssociation, SubmissionType, SubmissionTypeKitTypeAssociation
from .organizations import Organization, Contact
from .submissions import BasicSubmission, BacterialCulture, Wastewater, WastewaterArtic, WastewaterSample, BacterialCultureSample, BasicSample, SubmissionSampleAssociation, WastewaterAssociation

View File

@@ -19,9 +19,7 @@ class ControlType(Base):
id = Column(INTEGER, primary_key=True) #: primary key
name = Column(String(255), unique=True) #: controltype name (e.g. MCS)
targets = Column(JSON) #: organisms checked for
# instances_id = Column(INTEGER, ForeignKey("_control_samples.id", ondelete="SET NULL", name="fk_ctype_instances_id"))
instances = relationship("Control", back_populates="controltype") #: control samples created of this type.
# UniqueConstraint('name', name='uq_controltype_name')
class Control(Base):
@@ -39,13 +37,14 @@ class Control(Base):
contains = Column(JSON) #: unstructured hashes in contains.tsv for each organism
matches = Column(JSON) #: unstructured hashes in matches.tsv for each organism
kraken = Column(JSON) #: unstructured output from kraken_report
# UniqueConstraint('name', name='uq_control_name')
submission_id = Column(INTEGER, ForeignKey("_submissions.id")) #: parent submission id
submission = relationship("BacterialCulture", back_populates="controls", foreign_keys=[submission_id]) #: parent submission
refseq_version = Column(String(16)) #: version of refseq used in fastq parsing
kraken2_version = Column(String(16)) #: version of kraken2 used in fastq parsing
kraken2_db_version = Column(String(32)) #: folder name of kraken2 db
def __repr__(self) -> str:
return f"<Control({self.name})>"
def to_sub_dict(self) -> dict:
"""

View File

@@ -5,13 +5,11 @@ from . import Base
from sqlalchemy import Column, String, TIMESTAMP, JSON, INTEGER, ForeignKey, Interval, Table, FLOAT
from sqlalchemy.orm import relationship, validates
from sqlalchemy.ext.associationproxy import association_proxy
from datetime import date
import logging
logger = logging.getLogger(f'submissions.{__name__}')
reagenttypes_reagents = Table("_reagenttypes_reagents", Base.metadata, Column("reagent_id", INTEGER, ForeignKey("_reagents.id")), Column("reagenttype_id", INTEGER, ForeignKey("_reagent_types.id")))
@@ -55,22 +53,26 @@ class KitType(Base):
"""
return self.name
def get_reagents(self, required:bool=False) -> list:
def get_reagents(self, required:bool=False, submission_type:str|None=None) -> list:
"""
Return ReagentTypes linked to kit through KitTypeReagentTypeAssociation.
Args:
required (bool, optional): If true only return required types. Defaults to False.
submission_type (str | None, optional): Submission type to narrow results. Defaults to None.
Returns:
list: List of ReagentTypes
list: List of reagent types
"""
if required:
return [item.reagent_type for item in self.kit_reagenttype_associations if item.required == 1]
if submission_type != None:
relevant_associations = [item for item in self.kit_reagenttype_associations if submission_type in item.uses.keys()]
else:
return [item.reagent_type for item in self.kit_reagenttype_associations]
relevant_associations = [item for item in self.kit_reagenttype_associations]
if required:
return [item.reagent_type for item in relevant_associations if item.required == 1]
else:
return [item.reagent_type for item in relevant_associations]
def construct_xl_map_for_use(self, use:str) -> dict:
"""
Creates map of locations in excel workbook for a SubmissionType
@@ -96,8 +98,6 @@ class KitType(Base):
except IndexError as e:
map['info'] = {}
return map
class ReagentType(Base):
"""
@@ -118,13 +118,7 @@ class ReagentType(Base):
# association proxy of "user_keyword_associations" collection
# to "keyword" attribute
kit_types = association_proxy("kit_reagenttype_associations", "kit_type")
@validates('required')
def validate_age(self, key, value):
if not 0 <= value < 2:
raise ValueError(f'Invalid required value {value}. Must be 0 or 1.')
return value
kit_types = association_proxy("reagenttype_kit_associations", "kit_type")
def __str__(self) -> str:
"""
@@ -205,13 +199,17 @@ class Reagent(Base):
str: string representing this object's type and lot number
"""
return str(self.lot)
def to_sub_dict(self, extraction_kit:KitType=None) -> dict:
"""
dictionary containing values necessary for gui
Args:
extraction_kit (KitType, optional): KitType to use to get reagent type. Defaults to None.
Returns:
dict: gui friendly dictionary
dict: _description_
"""
if extraction_kit != None:
# Get the intersection of this reagent's ReagentType and all ReagentTypes in KitType
@@ -245,6 +243,9 @@ class Reagent(Base):
"""
Returns basic reagent dictionary.
Args:
extraction_kit (KitType, optional): KitType to use to get reagent type. Defaults to None.
Returns:
dict: Basic reagent dictionary of 'type', 'lot', 'expiry'
"""
@@ -268,7 +269,6 @@ class Reagent(Base):
"expiry": self.expiry.strftime("%Y-%m-%d")
}
class Discount(Base):
"""
Relationship table for client labs for certain kits.
@@ -303,7 +303,7 @@ class SubmissionType(Base):
cascade="all, delete-orphan",
)
kit_types = association_proxy("kit_submissiontype_associations", "kit_type")
kit_types = association_proxy("submissiontype_kit_associations", "kit_type")
def __repr__(self) -> str:
return f"<SubmissionType({self.name})>"
@@ -321,7 +321,7 @@ class SubmissionTypeKitTypeAssociation(Base):
kit_type = relationship(KitType, back_populates="kit_submissiontype_associations")
# reference to the "ReagentType" object
# reference to the "SubmissionType" object
submission_type = relationship(SubmissionType, back_populates="submissiontype_kit_associations")
def __init__(self, kit_type=None, submission_type=None):

View File

@@ -5,7 +5,6 @@ from . import Base
from sqlalchemy import Column, String, INTEGER, ForeignKey, Table
from sqlalchemy.orm import relationship
# table containing organization/contact relationship
orgs_contacts = Table("_orgs_contacts", Base.metadata, Column("org_id", INTEGER, ForeignKey("_organizations.id")), Column("contact_id", INTEGER, ForeignKey("_contacts.id")))

View File

@@ -74,10 +74,13 @@ class BasicSubmission(Base):
def to_dict(self, full_data:bool=False) -> dict:
"""
dictionary used in submissions summary
Constructs dictionary used in submissions summary
Args:
full_data (bool, optional): indicates if sample dicts to be constructed. Defaults to False.
Returns:
dict: dictionary used in submissions summary
dict: dictionary used in submissions summary and details
"""
# get lab from nested organization object
logger.debug(f"Converting {self.rsl_plate_num} to dict...")
@@ -113,10 +116,6 @@ class BasicSubmission(Base):
else:
reagents = None
samples = None
# Updated 2023-09 to get sample association with plate number
# for item in self.submission_sample_associations:
# sample = item.sample.to_sub_dict(submission_rsl=self.rsl_plate_num)
# samples.append(sample)
try:
comments = self.comment
except:
@@ -383,7 +382,6 @@ class BasicSample(Base):
Returns:
dict: dictionary of sample id, row and column in elution plate
"""
# self.assoc = [item for item in self.sample_submission_associations if item.submission.rsl_plate_num==submission_rsl][0]
# Since there is no PCR, negliable result is necessary.
return dict(name=self.submitter_id, positive=False)