documentation and converted to username based exclusion of adding new kits

This commit is contained in:
Landon Wark
2023-01-30 12:07:38 -06:00
parent bbb65d3fe6
commit 1f832dccf2
16 changed files with 876 additions and 296 deletions

View File

@@ -12,6 +12,7 @@ import base64
from sqlalchemy import JSON
import json
from dateutil.relativedelta import relativedelta
from getpass import getuser
logger = logging.getLogger(f"submissions.{__name__}")
@@ -20,8 +21,19 @@ def get_kits_by_use( ctx:dict, kittype_str:str|None) -> list:
# ctx dict should contain the database session
def store_submission(ctx:dict, base_submission:models.BasicSubmission) -> None:
def store_submission(ctx:dict, base_submission:models.BasicSubmission) -> None|dict:
"""
Upserts submissions into database
Args:
ctx (dict): settings passed down from gui
base_submission (models.BasicSubmission): submission to be add to db
Returns:
None|dict : object that indicates issue raised for reporting in gui
"""
logger.debug(f"Hello from store_submission")
# Add all samples to sample table
for sample in base_submission.samples:
sample.rsl_plate = base_submission
logger.debug(f"Attempting to add sample: {sample.to_string()}")
@@ -30,6 +42,7 @@ def store_submission(ctx:dict, base_submission:models.BasicSubmission) -> None:
except (sqlite3.IntegrityError, sqlalchemy.exc.IntegrityError) as e:
logger.debug(f"Hit an integrity error : {e}")
continue
# Add submission to submission table
ctx['database_session'].add(base_submission)
logger.debug(f"Attempting to add submission: {base_submission.rsl_plate_num}")
try:
@@ -45,26 +58,51 @@ def store_submission(ctx:dict, base_submission:models.BasicSubmission) -> None:
return None
def store_reagent(ctx:dict, reagent:models.Reagent) -> None:
def store_reagent(ctx:dict, reagent:models.Reagent) -> None|dict:
"""
_summary_
Args:
ctx (dict): settings passed down from gui
reagent (models.Reagent): Reagent object to be added to db
Returns:
None|dict: obejct indicating issue to be reported in the gui
"""
logger.debug(reagent.__dict__)
ctx['database_session'].add(reagent)
try:
ctx['database_session'].commit()
except OperationalError:
except (sqlite3.OperationalError, sqlalchemy.exc.OperationalError):
return {"message":"The database is locked for editing."}
def construct_submission_info(ctx:dict, info_dict:dict) -> models.BasicSubmission:
"""
Construct submission obejct from dictionary
Args:
ctx (dict): settings passed down from gui
info_dict (dict): dictionary to be transformed
Returns:
models.BasicSubmission: Constructed submission object
"""
# convert submission type into model name
query = info_dict['submission_type'].replace(" ", "")
# check database for existing object
instance = ctx['database_session'].query(models.BasicSubmission).filter(models.BasicSubmission.rsl_plate_num==info_dict['rsl_plate_num']).first()
msg = "This submission already exists.\nWould you like to overwrite?"
# get model based on submission type converted above
model = getattr(models, query)
info_dict['submission_type'] = info_dict['submission_type'].replace(" ", "_").lower()
# if query return nothing, ie doesn't already exist in db
if instance == None:
instance = model()
msg = None
for item in info_dict:
logger.debug(f"Setting {item} to {info_dict[item]}")
# set fields based on keys in dictionary
match item:
case "extraction_kit":
q_str = info_dict[item]
@@ -86,35 +124,34 @@ def construct_submission_info(ctx:dict, info_dict:dict) -> models.BasicSubmissio
logger.debug(f"Got None as a submitter plate number, inserting random string to preserve database unique constraint.")
info_dict[item] = uuid.uuid4().hex.upper()
field_value = info_dict[item]
# case "samples":
# for sample in info_dict[item]:
# instance.samples.append(sample)
# continue
case _:
field_value = info_dict[item]
# insert into field
try:
setattr(instance, item, field_value)
except AttributeError:
logger.debug(f"Could not set attribute: {item} to {info_dict[item]}")
continue
# logger.debug(instance.__dict__)
logger.debug(f"Constructed instance: {instance.to_string()}")
logger.debug(msg)
return instance, {'message':msg}
# looked_up = []
# for reagent in reagents:
# my_reagent = lookup_reagent(reagent)
# logger.debug(my_reagent)
# looked_up.append(my_reagent)
# logger.debug(looked_up)
# instance.reagents = looked_up
# ctx['database_session'].add(instance)
# ctx['database_session'].commit()
def construct_reagent(ctx:dict, info_dict:dict) -> models.Reagent:
"""
Construct reagent object from dictionary
Args:
ctx (dict): settings passed down from gui
info_dict (dict): dictionary to be converted
Returns:
models.Reagent: Constructed reagent object
"""
reagent = models.Reagent()
for item in info_dict:
logger.debug(f"Reagent info item: {item}")
# set fields based on keys in dictionary
match item:
case "lot":
reagent.lot = info_dict[item].upper()
@@ -122,25 +159,55 @@ def construct_reagent(ctx:dict, info_dict:dict) -> models.Reagent:
reagent.expiry = info_dict[item]
case "type":
reagent.type = lookup_reagenttype_by_name(ctx=ctx, rt_name=info_dict[item].replace(" ", "_").lower())
# add end-of-life extension from reagent type to expiry date
try:
reagent.expiry = reagent.expiry + reagent.type.eol_ext
except TypeError as e:
logger.debug(f"WE got a type error: {e}.")
logger.debug(f"We got a type error: {e}.")
except AttributeError:
pass
return reagent
def lookup_reagent(ctx:dict, reagent_lot:str):
def lookup_reagent(ctx:dict, reagent_lot:str) -> models.Reagent:
"""
Query db for reagent based on lot number
Args:
ctx (dict): settings passed down from gui
reagent_lot (str): lot number to query
Returns:
models.Reagent: looked up reagent
"""
lookedup = ctx['database_session'].query(models.Reagent).filter(models.Reagent.lot==reagent_lot).first()
return lookedup
def get_all_reagenttype_names(ctx:dict) -> list[str]:
"""
Lookup all reagent types and get names
Args:
ctx (dict): settings passed from gui
Returns:
list[str]: reagent type names
"""
lookedup = [item.__str__() for item in ctx['database_session'].query(models.ReagentType).all()]
return lookedup
def lookup_reagenttype_by_name(ctx:dict, rt_name:str) -> models.ReagentType:
"""
Lookup a single reagent type by name
Args:
ctx (dict): settings passed from gui
rt_name (str): reagent type name to look up
Returns:
models.ReagentType: looked up reagent type
"""
logger.debug(f"Looking up ReagentType by name: {rt_name}")
lookedup = ctx['database_session'].query(models.ReagentType).filter(models.ReagentType.name==rt_name).first()
logger.debug(f"Found ReagentType: {lookedup}")
@@ -148,27 +215,78 @@ def lookup_reagenttype_by_name(ctx:dict, rt_name:str) -> models.ReagentType:
def lookup_kittype_by_use(ctx:dict, used_by:str) -> list[models.KitType]:
# return [item for item in
"""
Lookup a kit by an sample type its used for
Args:
ctx (dict): settings passed from gui
used_by (str): sample type (should be string in D3 of excel sheet)
Returns:
list[models.KitType]: list of kittypes that have that sample type in their uses
"""
return ctx['database_session'].query(models.KitType).filter(models.KitType.used_for.contains(used_by))
def lookup_kittype_by_name(ctx:dict, name:str) -> models.KitType:
"""
Lookup a kit type by name
Args:
ctx (dict): settings passed from bui
name (str): name of kit to query
Returns:
models.KitType: retrieved kittype
"""
logger.debug(f"Querying kittype: {name}")
return ctx['database_session'].query(models.KitType).filter(models.KitType.name==name).first()
def lookup_regent_by_type_name(ctx:dict, type_name:str) -> list[models.ReagentType]:
def lookup_regent_by_type_name(ctx:dict, type_name:str) -> list[models.Reagent]:
"""
Lookup reagents by their type name
Args:
ctx (dict): settings passed from gui
type_name (str): reagent type name
Returns:
list[models.Reagent]: list of retrieved reagents
"""
# return [item for item in ctx['database_session'].query(models.Reagent).join(models.Reagent.type, aliased=True).filter(models.ReagentType.name==type_name).all()]
return ctx['database_session'].query(models.Reagent).join(models.Reagent.type, aliased=True).filter(models.ReagentType.name==type_name).all()
def lookup_regent_by_type_name_and_kit_name(ctx:dict, type_name:str, kit_name:str) -> list[models.Reagent]:
"""
Lookup reagents by their type name and kits they belong to
Args:
ctx (dict): settings pass by gui
type_name (str): reagent type name
kit_name (str): kit name
Returns:
list[models.Reagent]: list of retrieved reagents
"""
# Hang on, this is going to be a long one.
by_type = ctx['database_session'].query(models.Reagent).join(models.Reagent.type, aliased=True).filter(models.ReagentType.name.endswith(type_name))
# add filter for kit name
add_in = by_type.join(models.ReagentType.kits).filter(models.KitType.name==kit_name)
return add_in
def lookup_all_submissions_by_type(ctx:dict, type:str|None=None):
def lookup_all_submissions_by_type(ctx:dict, type:str|None=None) -> list[models.BasicSubmission]:
"""
Get all submissions, filtering by type if given
Args:
ctx (dict): settings pass from gui
type (str | None, optional): submission type (should be string in D3 of excel sheet). Defaults to None.
Returns:
_type_: list of retrieved submissions
"""
if type == None:
subs = ctx['database_session'].query(models.BasicSubmission).all()
else:
@@ -176,20 +294,60 @@ def lookup_all_submissions_by_type(ctx:dict, type:str|None=None):
return subs
def lookup_all_orgs(ctx:dict) -> list[models.Organization]:
"""
Lookup all organizations (labs)
Args:
ctx (dict): settings passed from gui
Returns:
list[models.Organization]: list of retrieved organizations
"""
return ctx['database_session'].query(models.Organization).all()
def lookup_org_by_name(ctx:dict, name:str|None) -> models.Organization:
"""
Lookup organization (lab) by name.
Args:
ctx (dict): settings passed from gui
name (str | None): name of organization
Returns:
models.Organization: retrieved organization
"""
logger.debug(f"Querying organization: {name}")
return ctx['database_session'].query(models.Organization).filter(models.Organization.name==name).first()
def submissions_to_df(ctx:dict, type:str|None=None):
def submissions_to_df(ctx:dict, type:str|None=None) -> pd.DataFrame:
"""
Convert submissions looked up by type to dataframe
Args:
ctx (dict): settings passed by gui
type (str | None, optional): submission type (should be string in D3 of excel sheet) Defaults to None.
Returns:
pd.DataFrame: dataframe constructed from retrieved submissions
"""
logger.debug(f"Type: {type}")
# pass to lookup function
subs = [item.to_dict() for item in lookup_all_submissions_by_type(ctx=ctx, type=type)]
df = pd.DataFrame.from_records(subs)
return df
def lookup_submission_by_id(ctx:dict, id:int) -> models.BasicSubmission:
"""
Lookup submission by id number
Args:
ctx (dict): settings passed from gui
id (int): submission id number
Returns:
models.BasicSubmission: retrieved submission
"""
return ctx['database_session'].query(models.BasicSubmission).filter(models.BasicSubmission.id==id).first()
@@ -198,6 +356,17 @@ def create_submission_details(ctx:dict, sub_id:int) -> dict:
def lookup_submissions_by_date_range(ctx:dict, start_date:datetime.date, end_date:datetime.date) -> list[models.BasicSubmission]:
"""
Lookup submissions by range of submitted dates
Args:
ctx (dict): settings passed from gui
start_date (datetime.date): date to start looking
end_date (datetime.date): date to end looking
Returns:
list[models.BasicSubmission]: list of retrieved submissions
"""
return ctx['database_session'].query(models.BasicSubmission).filter(and_(models.BasicSubmission.submitted_date > start_date, models.BasicSubmission.submitted_date < end_date)).all()
@@ -226,12 +395,13 @@ def create_kit_from_yaml(ctx:dict, exp:dict) -> None:
exp (dict): Experiment dictionary created from yaml file
"""
try:
exp['password'].decode()
except (UnicodeDecodeError, AttributeError):
exp['password'] = exp['password'].encode()
if base64.b64encode(exp['password']) != b'cnNsX3N1Ym1pNTVpb25z':
logger.debug(f"Not the correct password.")
return
super_users = ctx['super_users']
except KeyError:
logger.debug("This user does not have permission to add kits.")
return {'code':1,'message':"This user does not have permission to add kits."}
if getuser not in super_users:
logger.debug("This user does not have permission to add kits.")
return {'code':1, 'message':"This user does not have permission to add kits."}
for type in exp:
if type == "password":
continue
@@ -249,9 +419,19 @@ def create_kit_from_yaml(ctx:dict, exp:dict) -> None:
logger.debug(kit.__dict__)
ctx['database_session'].add(kit)
ctx['database_session'].commit()
return {'code':0, 'message':'Kit has been added'}
def lookup_all_sample_types(ctx:dict) -> list[str]:
"""
Lookup all sample types and get names
Args:
ctx (dict): settings pass from gui
Returns:
list[str]: list of sample type names
"""
uses = [item.used_for for item in ctx['database_session'].query(models.KitType).all()]
uses = list(set([item for sublist in uses for item in sublist]))
return uses
@@ -259,6 +439,15 @@ def lookup_all_sample_types(ctx:dict) -> list[str]:
def get_all_available_modes(ctx:dict) -> list[str]:
"""
Get types of analysis for controls
Args:
ctx (dict): settings passed from gui
Returns:
list[str]: list of analysis types
"""
rel = ctx['database_session'].query(models.Control).first()
try:
cols = [item.name for item in list(rel.__table__.columns) if isinstance(item.type, JSON)]
@@ -294,7 +483,18 @@ def get_all_controls_by_type(ctx:dict, con_type:str, start_date:date|None=None,
return output
def get_control_subtypes(ctx:dict, type:str, mode:str):
def get_control_subtypes(ctx:dict, type:str, mode:str) -> list[str]:
"""
Get subtypes for a control analysis type
Args:
ctx (dict): settings passed from gui
type (str): control type name
mode (str): analysis type name
Returns:
list[str]: list of subtype names
"""
try:
outs = get_all_controls_by_type(ctx=ctx, con_type=type)[0]
except TypeError:

View File

@@ -1,5 +1,5 @@
from . import Base
from sqlalchemy import Column, String, TIMESTAMP, text, JSON, INTEGER, ForeignKey, UniqueConstraint
from sqlalchemy import Column, String, TIMESTAMP, JSON, INTEGER, ForeignKey
from sqlalchemy.orm import relationship
class ControlType(Base):
@@ -32,6 +32,6 @@ class Control(Base):
matches = Column(JSON) #: unstructured hashes in matches.tsv for each organism
kraken = Column(JSON) #: unstructured output from kraken_report
# UniqueConstraint('name', name='uq_control_name')
submission_id = Column(INTEGER, ForeignKey("_submissions.id"))
submission = relationship("BacterialCulture", back_populates="controls", foreign_keys=[submission_id])
submission_id = Column(INTEGER, ForeignKey("_submissions.id")) #: parent submission id
submission = relationship("BacterialCulture", back_populates="controls", foreign_keys=[submission_id]) #: parent submission

View File

@@ -3,57 +3,88 @@ from sqlalchemy import Column, String, TIMESTAMP, JSON, INTEGER, ForeignKey, Int
from sqlalchemy.orm import relationship
# Table containing reagenttype-kittype relationships
reagenttypes_kittypes = Table("_reagentstypes_kittypes", Base.metadata, Column("reagent_types_id", INTEGER, ForeignKey("_reagent_types.id")), Column("kits_id", INTEGER, ForeignKey("_kits.id")))
class KitType(Base):
"""
Base of kits used in submission processing
"""
__tablename__ = "_kits"
id = Column(INTEGER, primary_key=True) #: primary key
name = Column(String(64), unique=True)
submissions = relationship("BasicSubmission", back_populates="extraction_kit")
used_for = Column(JSON)
cost_per_run = Column(FLOAT(2))
reagent_types = relationship("ReagentType", back_populates="kits", uselist=True, secondary=reagenttypes_kittypes)
reagent_types_id = Column(INTEGER, ForeignKey("_reagent_types.id", ondelete='SET NULL', use_alter=True, name="fk_KT_reagentstype_id"))
name = Column(String(64), unique=True) #: name of kit
submissions = relationship("BasicSubmission", back_populates="extraction_kit") #: submissions this kit was used for
used_for = Column(JSON) #: list of names of sample types this kit can process
cost_per_run = Column(FLOAT(2)) #: dollar amount for each full run of this kit
reagent_types = relationship("ReagentType", back_populates="kits", uselist=True, secondary=reagenttypes_kittypes) #: reagent types this kit contains
reagent_types_id = Column(INTEGER, ForeignKey("_reagent_types.id", ondelete='SET NULL', use_alter=True, name="fk_KT_reagentstype_id")) #: joined reagent type id
def __str__(self):
def __str__(self) -> str:
"""
a string representing this object
Returns:
str: a string representing this object's name
"""
return self.name
class ReagentType(Base):
"""
Base of reagent type abstract
"""
__tablename__ = "_reagent_types"
id = Column(INTEGER, primary_key=True) #: primary key
name = Column(String(64))
kit_id = Column(INTEGER, ForeignKey("_kits.id", ondelete="SET NULL", use_alter=True, name="fk_RT_kits_id"))
kits = relationship("KitType", back_populates="reagent_types", uselist=True, foreign_keys=[kit_id])
instances = relationship("Reagent", back_populates="type")
name = Column(String(64)) #: name of reagent type
kit_id = Column(INTEGER, ForeignKey("_kits.id", ondelete="SET NULL", use_alter=True, name="fk_RT_kits_id")) #: id of joined kit type
kits = relationship("KitType", back_populates="reagent_types", uselist=True, foreign_keys=[kit_id]) #: kits this reagent is used in
instances = relationship("Reagent", back_populates="type") #: concrete instances of this reagent type
# instances_id = Column(INTEGER, ForeignKey("_reagents.id", ondelete='SET NULL'))
eol_ext = Column(Interval())
eol_ext = Column(Interval()) #: extension of life interval
def __str__(self):
def __str__(self) -> str:
"""
string representing this object
Returns:
str: string representing this object's name
"""
return self.name
class Reagent(Base):
"""
Concrete reagent instance
"""
__tablename__ = "_reagents"
id = Column(INTEGER, primary_key=True) #: primary key
type = relationship("ReagentType", back_populates="instances")
type_id = Column(INTEGER, ForeignKey("_reagent_types.id", ondelete='SET NULL', name="fk_reagent_type_id"))
name = Column(String(64))
lot = Column(String(64))
expiry = Column(TIMESTAMP)
submissions = relationship("BasicSubmission", back_populates="reagents", uselist=True)
type = relationship("ReagentType", back_populates="instances") #: joined parent reagent type
type_id = Column(INTEGER, ForeignKey("_reagent_types.id", ondelete='SET NULL', name="fk_reagent_type_id")) #: id of parent reagent type
name = Column(String(64)) #: reagent name
lot = Column(String(64)) #: lot number of reagent
expiry = Column(TIMESTAMP) #: expiry date - extended by eol_ext of parent programmatically
submissions = relationship("BasicSubmission", back_populates="reagents", uselist=True) #: submissions this reagent is used in
def __str__(self):
def __str__(self) -> str:
"""
string representing this object
Returns:
str: string representing this object's lot number
"""
return self.lot
def to_sub_dict(self):
def to_sub_dict(self) -> dict:
"""
dictionary containing values necessary for gui
Returns:
dict: gui friendly dictionary
"""
try:
type = self.type.name.replace("_", " ").title()
except AttributeError:
@@ -62,7 +93,4 @@ class Reagent(Base):
"type": type,
"lot": self.lot,
"expiry": self.expiry.strftime("%Y-%m-%d")
}
}

View File

@@ -3,32 +3,43 @@ from sqlalchemy import Column, String, TIMESTAMP, JSON, Float, INTEGER, ForeignK
from sqlalchemy.orm import relationship, validates
# table containing organization/contact relationship
orgs_contacts = Table("_orgs_contacts", Base.metadata, Column("org_id", INTEGER, ForeignKey("_organizations.id")), Column("contact_id", INTEGER, ForeignKey("_contacts.id")))
class Organization(Base):
"""
Base of organization
"""
__tablename__ = "_organizations"
id = Column(INTEGER, primary_key=True) #: primary key
name = Column(String(64))
submissions = relationship("BasicSubmission", back_populates="submitting_lab")
cost_centre = Column(String())
contacts = relationship("Contact", back_populates="organization", secondary=orgs_contacts)
contact_ids = Column(INTEGER, ForeignKey("_contacts.id", ondelete="SET NULL", name="fk_org_contact_id"))
name = Column(String(64)) #: organization name
submissions = relationship("BasicSubmission", back_populates="submitting_lab") #: submissions this organization has submitted
cost_centre = Column(String()) #: cost centre used by org for payment
contacts = relationship("Contact", back_populates="organization", secondary=orgs_contacts) #: contacts involved with this org
contact_ids = Column(INTEGER, ForeignKey("_contacts.id", ondelete="SET NULL", name="fk_org_contact_id")) #: contact ids of this organization
def __str__(self):
def __str__(self) -> str:
"""
String representing organization
Returns:
str: string representing organization name
"""
return self.name.replace("_", " ").title()
class Contact(Base):
"""
Base of Contace
"""
__tablename__ = "_contacts"
id = id = Column(INTEGER, primary_key=True) #: primary key
name = Column(String(64))
email = Column(String(64))
phone = Column(String(32))
organization = relationship("Organization", back_populates="contacts", uselist=True)
# organization_id = Column(INTEGER, ForeignKey("_organizations.id"))
name = Column(String(64)) #: contact name
email = Column(String(64)) #: contact email
phone = Column(String(32)) #: contact phone number
organization = relationship("Organization", back_populates="contacts", uselist=True, secondary=orgs_contacts) #: relationship to joined organization
organization_id = Column(INTEGER, ForeignKey("_organizations.id", ondelete="SET NULL", name="fk_contact_org_id")) #: joined organization ids

View File

@@ -1,18 +1,20 @@
from . import Base
from sqlalchemy import Column, String, TIMESTAMP, text, JSON, INTEGER, ForeignKey, FLOAT, BOOLEAN
from sqlalchemy.orm import relationship, relationships
from sqlalchemy.orm import relationship
class WWSample(Base):
"""
Base wastewater sample
"""
__tablename__ = "_ww_samples"
id = Column(INTEGER, primary_key=True) #: primary key
ww_processing_num = Column(String(64))
ww_sample_full_id = Column(String(64), nullable=False)
rsl_number = Column(String(64))
rsl_plate = relationship("Wastewater", back_populates="samples")
rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_WWS_sample_id"))
rsl_number = Column(String(64)) #: rsl plate identification number
rsl_plate = relationship("Wastewater", back_populates="samples") #: relationship to parent plate
rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_WWS_submission_id"))
collection_date = Column(TIMESTAMP) #: Date submission received
testing_type = Column(String(64))
site_status = Column(String(64))
@@ -22,12 +24,24 @@ class WWSample(Base):
seq_submitted = Column(BOOLEAN())
ww_seq_run_id = Column(String(64))
sample_type = Column(String(8))
well_number = Column(String(8))
well_number = Column(String(8)) #: location on plate
def to_string(self):
def to_string(self) -> str:
"""
string representing sample object
Returns:
str: string representing location and sample id
"""
return f"{self.well_number}: {self.ww_sample_full_id}"
def to_sub_dict(self):
def to_sub_dict(self) -> dict:
"""
gui friendly dictionary
Returns:
dict: well location and id
"""
return {
"well": self.well_number,
"name": self.ww_sample_full_id,
@@ -35,21 +49,35 @@ class WWSample(Base):
class BCSample(Base):
"""
base of bacterial culture sample
"""
__tablename__ = "_bc_samples"
id = Column(INTEGER, primary_key=True) #: primary key
well_number = Column(String(8))
sample_id = Column(String(64), nullable=False)
organism = Column(String(64))
concentration = Column(String(16))
rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_BCS_sample_id"))
rsl_plate = relationship("BacterialCulture", back_populates="samples")
well_number = Column(String(8)) #: location on parent plate
sample_id = Column(String(64), nullable=False) #: identification from submitter
organism = Column(String(64)) #: bacterial specimen
concentration = Column(String(16)) #:
rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_BCS_sample_id")) #: id of parent plate
rsl_plate = relationship("BacterialCulture", back_populates="samples") #: relationship to parent plate
def to_string(self):
def to_string(self) -> str:
"""
string representing object
Returns:
str: string representing well location, sample id and organism
"""
return f"{self.well_number}: {self.sample_id} - {self.organism}"
def to_sub_dict(self):
def to_sub_dict(self) -> dict:
"""
gui friendly dictionary
Returns:
dict: well location and name (sample id, organism)
"""
return {
"well": self.well_number,
"name": f"{self.sample_id} - ({self.organism})",

View File

@@ -3,26 +3,29 @@ from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, Table
from sqlalchemy.orm import relationship
from datetime import datetime as dt
# table containing reagents/submission relationships
reagents_submissions = Table("_reagents_submissions", Base.metadata, Column("reagent_id", INTEGER, ForeignKey("_reagents.id")), Column("submission_id", INTEGER, ForeignKey("_submissions.id")))
class BasicSubmission(Base):
"""
Base of basic submission which polymorphs into BacterialCulture and Wastewater
"""
__tablename__ = "_submissions"
id = Column(INTEGER, primary_key=True) #: primary key
rsl_plate_num = Column(String(32), unique=True) #: RSL name (e.g. RSL-22-0012)
submitter_plate_num = Column(String(127), unique=True) #: The number given to the submission by the submitting lab
submitted_date = Column(TIMESTAMP) #: Date submission received
submitting_lab = relationship("Organization", back_populates="submissions") #: client
submitting_lab = relationship("Organization", back_populates="submissions") #: client org
submitting_lab_id = Column(INTEGER, ForeignKey("_organizations.id", ondelete="SET NULL", name="fk_BS_sublab_id"))
sample_count = Column(INTEGER) #: Number of samples in the submission
extraction_kit = relationship("KitType", back_populates="submissions") #: The extraction kit used
extraction_kit_id = Column(INTEGER, ForeignKey("_kits.id", ondelete="SET NULL", name="fk_BS_extkit_id"))
submission_type = Column(String(32))
technician = Column(String(64))
submission_type = Column(String(32)) #: submission type (should be string in D3 of excel sheet)
technician = Column(String(64)) #: initials of processing tech
# Move this into custom types?
reagents = relationship("Reagent", back_populates="submissions", secondary=reagents_submissions)
reagents_id = Column(String, ForeignKey("_reagents.id", ondelete="SET NULL", name="fk_BS_reagents_id"))
reagents = relationship("Reagent", back_populates="submissions", secondary=reagents_submissions) #: relationship to reagents
reagents_id = Column(String, ForeignKey("_reagents.id", ondelete="SET NULL", name="fk_BS_reagents_id")) #: id of used reagents
__mapper_args__ = {
"polymorphic_identity": "basic_submission",
@@ -30,10 +33,23 @@ class BasicSubmission(Base):
"with_polymorphic": "*",
}
def to_string(self):
def to_string(self) -> str:
"""
string presenting basic submission
Returns:
str: string representing rsl plate number and submitter plate number
"""
return f"{self.rsl_plate_num} - {self.submitter_plate_num}"
def to_dict(self):
def to_dict(self) -> dict:
"""
dictionary used in submissions summary
Returns:
dict: dictionary used in submissions summary
"""
# get lab from nested organization object
try:
sub_lab = self.submitting_lab.name
except AttributeError:
@@ -42,6 +58,7 @@ class BasicSubmission(Base):
sub_lab = sub_lab.replace("_", " ").title()
except AttributeError:
pass
# get extraction kit name from nested kit object
try:
ext_kit = self.extraction_kit.name
except AttributeError:
@@ -60,7 +77,14 @@ class BasicSubmission(Base):
return output
def report_dict(self):
def report_dict(self) -> dict:
"""
dictionary used in creating reports
Returns:
dict: dictionary used in creating reports
"""
# get lab name from nested organization object
try:
sub_lab = self.submitting_lab.name
except AttributeError:
@@ -69,10 +93,12 @@ class BasicSubmission(Base):
sub_lab = sub_lab.replace("_", " ").title()
except AttributeError:
pass
# get extraction kit name from nested kittype object
try:
ext_kit = self.extraction_kit.name
except AttributeError:
ext_kit = None
# get extraction kit cost from nested kittype object
try:
cost = self.extraction_kit.cost_per_run
except AttributeError:
@@ -93,6 +119,9 @@ class BasicSubmission(Base):
# Below are the custom submission
class BacterialCulture(BasicSubmission):
"""
derivative submission type from BasicSubmission
"""
# control_id = Column(INTEGER, ForeignKey("_control_samples.id", ondelete="SET NULL", name="fk_BC_control_id"))
controls = relationship("Control", back_populates="submission", uselist=True) #: A control sample added to submission
samples = relationship("BCSample", back_populates="rsl_plate", uselist=True)
@@ -101,6 +130,9 @@ class BacterialCulture(BasicSubmission):
class Wastewater(BasicSubmission):
"""
derivative submission type from BasicSubmission
"""
samples = relationship("WWSample", back_populates="rsl_plate", uselist=True)
# ww_sample_id = Column(String, ForeignKey("_ww_samples.id", ondelete="SET NULL", name="fk_WW_sample_id"))
__mapper_args__ = {"polymorphic_identity": "wastewater", "polymorphic_load": "inline"}

View File

@@ -1,4 +1,3 @@
from pandas import DataFrame
import re
@@ -6,14 +5,14 @@ import re
def get_unique_values_in_df_column(df: DataFrame, column_name: str) -> list:
"""
_summary_
get all unique values in a dataframe column by name
Args:
df (DataFrame): _description_
column_name (str): _description_
df (DataFrame): input dataframe
column_name (str): name of column of interest
Returns:
list: _description_
list: sorted list of unique values
"""
return sorted(df[column_name].unique())
@@ -23,7 +22,7 @@ def drop_reruns_from_df(ctx:dict, df: DataFrame) -> DataFrame:
Removes semi-duplicates from dataframe after finding sequencing repeats.
Args:
settings (dict): settings passed down from click
settings (dict): settings passed from gui
df (DataFrame): initial dataframe
Returns:

View File

@@ -11,40 +11,68 @@ import uuid
logger = logging.getLogger(f"submissions.{__name__}")
class SheetParser(object):
def __init__(self, filepath:Path|None = None, **kwargs):
"""
object to pull and contain data from excel file
"""
def __init__(self, filepath:Path|None = None, **kwargs) -> None:
"""
Args:
filepath (Path | None, optional): file path to excel sheet. Defaults to None.
"""
logger.debug(f"Parsing {filepath.__str__()}")
# set attributes based on kwargs from gui ctx
for kwarg in kwargs:
setattr(self, f"_{kwarg}", kwargs[kwarg])
if filepath == None:
logger.debug(f"No filepath.")
logger.error(f"No filepath given.")
self.xl = None
else:
try:
self.xl = pd.ExcelFile(filepath.__str__())
except ValueError:
except ValueError as e:
logger.error(f"Incorrect value: {e}")
self.xl = None
self.sub = OrderedDict()
self.sub['submission_type'] = self._type_decider()
# make decision about type of sample we have
self.sub['submission_type'] = self._type_decider()
# select proper parser based on sample type
parse_sub = getattr(self, f"_parse_{self.sub['submission_type'].lower()}")
parse_sub()
def _type_decider(self):
def _type_decider(self) -> str:
"""
makes decisions about submission type based on structure of excel file
Returns:
str: submission type name
"""
try:
for type in self._submission_types:
if self.xl.sheet_names == self._submission_types[type]['excel_map']:
return type.title()
return "Unknown"
except:
except Exception as e:
logger.warning(f"We were unable to parse the submission type due to: {e}")
return "Unknown"
def _parse_unknown(self):
def _parse_unknown(self) -> None:
"""
Dummy function to handle unknown excel structures
"""
self.sub = None
def _parse_generic(self, sheet_name:str):
def _parse_generic(self, sheet_name:str) -> pd.DataFrame:
"""
Pulls information common to all submission types and passes on dataframe
Args:
sheet_name (str): name of excel worksheet to pull from
Returns:
pd.DataFrame: relevant dataframe from excel sheet
"""
submission_info = self.xl.parse(sheet_name=sheet_name, dtype=object)
self.sub['submitter_plate_num'] = submission_info.iloc[0][1] #if pd.isnull(submission_info.iloc[0][1]) else string_formatter(submission_info.iloc[0][1])
@@ -57,7 +85,10 @@ class SheetParser(object):
return submission_info
def _parse_bacterial_culture(self):
def _parse_bacterial_culture(self) -> None:
"""
pulls info specific to bacterial culture sample type
"""
submission_info = self._parse_generic("Sample List")
# iloc is [row][column] and the first row is set as header row so -2
tech = str(submission_info.iloc[11][1])
@@ -68,7 +99,7 @@ class SheetParser(object):
tech = ", ".join(tech_reg.findall(tech))
self.sub['technician'] = tech
# reagents
# must be prefixed with 'lot_' to be recognized by gui
self.sub['lot_wash_1'] = submission_info.iloc[1][6] #if pd.isnull(submission_info.iloc[1][6]) else string_formatter(submission_info.iloc[1][6])
self.sub['lot_wash_2'] = submission_info.iloc[2][6] #if pd.isnull(submission_info.iloc[2][6]) else string_formatter(submission_info.iloc[2][6])
self.sub['lot_binding_buffer'] = submission_info.iloc[3][6] #if pd.isnull(submission_info.iloc[3][6]) else string_formatter(submission_info.iloc[3][6])
@@ -79,13 +110,17 @@ class SheetParser(object):
self.sub['lot_ethanol'] = submission_info.iloc[10][6] #if pd.isnull(submission_info.iloc[10][6]) else string_formatter(submission_info.iloc[10][6])
self.sub['lot_positive_control'] = submission_info.iloc[103][1] #if pd.isnull(submission_info.iloc[103][1]) else string_formatter(submission_info.iloc[103][1])
self.sub['lot_plate'] = submission_info.iloc[12][6] #if pd.isnull(submission_info.iloc[12][6]) else string_formatter(submission_info.iloc[12][6])
# get individual sample info
sample_parser = SampleParser(submission_info.iloc[15:111])
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
logger.debug(f"Parser result: {self.sub}")
self.sub['samples'] = sample_parse()
def _parse_wastewater(self):
def _parse_wastewater(self) -> None:
"""
pulls info specific to wastewater sample type
"""
# submission_info = self.xl.parse("WW Submissions (ENTER HERE)")
submission_info = self._parse_generic("WW Submissions (ENTER HERE)")
enrichment_info = self.xl.parse("Enrichment Worksheet", dtype=object)
@@ -108,19 +143,28 @@ class SheetParser(object):
self.sub['lot_pre_mix_2'] = qprc_info.iloc[2][14] #if pd.isnull(qprc_info.iloc[2][14]) else string_formatter(qprc_info.iloc[2][14])
self.sub['lot_positive_control'] = qprc_info.iloc[3][14] #if pd.isnull(qprc_info.iloc[3][14]) else string_formatter(qprc_info.iloc[3][14])
self.sub['lot_ddh2o'] = qprc_info.iloc[4][14] #if pd.isnull(qprc_info.iloc[4][14]) else string_formatter(qprc_info.iloc[4][14])
# gt individual sample info
sample_parser = SampleParser(submission_info.iloc[16:40])
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
self.sub['samples'] = sample_parse()
class SampleParser(object):
"""
object to pull data for samples in excel sheet and construct individual sample objects
"""
def __init__(self, df:pd.DataFrame) -> None:
self.samples = df.to_dict("records")
def parse_bacterial_culture_samples(self) -> list[BCSample]:
"""
construct bacterial culture specific sample objects
Returns:
list[BCSample]: list of sample objects
"""
new_list = []
for sample in self.samples:
new = BCSample()
@@ -130,6 +174,7 @@ class SampleParser(object):
new.concentration = sample['Unnamed: 3']
# logger.debug(f"Sample object: {new.sample_id} = {type(new.sample_id)}")
logger.debug(f"Got sample_id: {new.sample_id}")
# need to exclude empties and blanks
try:
not_a_nan = not np.isnan(new.sample_id) and str(new.sample_id).lower() != 'blank'
except TypeError:
@@ -140,10 +185,17 @@ class SampleParser(object):
def parse_wastewater_samples(self) -> list[WWSample]:
"""
construct wastewater specific sample objects
Returns:
list[WWSample]: list of sample objects
"""
new_list = []
for sample in self.samples:
new = WWSample()
new.ww_processing_num = sample['Unnamed: 2']
# need to ensure we have a sample id for database integrity
try:
not_a_nan = not np.isnan(sample['Unnamed: 3'])
except TypeError:
@@ -153,6 +205,7 @@ class SampleParser(object):
else:
new.ww_sample_full_id = uuid.uuid4().hex.upper()
new.rsl_number = sample['Unnamed: 9']
# need to ensure we get a collection date
try:
not_a_nan = not np.isnan(sample['Unnamed: 5'])
except TypeError:
@@ -169,11 +222,11 @@ class SampleParser(object):
return new_list
def string_formatter(input):
logger.debug(f"{input} : {type(input)}")
match input:
case int() | float() | np.float64:
return "{:0.0f}".format(input)
case _:
return input
# def string_formatter(input):
# logger.debug(f"{input} : {type(input)}")
# match input:
# case int() | float() | np.float64:
# return "{:0.0f}".format(input)
# case _:
# return input

View File

@@ -8,13 +8,22 @@ import logging
logger = logging.getLogger(f"submissions.{__name__}")
def make_report_xlsx(records:list[dict]) -> DataFrame:
"""
create the dataframe for a report
Args:
records (list[dict]): list of dictionaries created from submissions
Returns:
DataFrame: output dataframe
"""
df = DataFrame.from_records(records)
# put submissions with the same lab together
df = df.sort_values("Submitting Lab")
# table = df.pivot_table(values="Cost", index=["Submitting Lab", "Extraction Kit"], columns=["Cost", "Sample Count"], aggfunc={'Cost':np.sum,'Sample Count':np.sum})
# aggregate cost and sample count columns
df2 = df.groupby(["Submitting Lab", "Extraction Kit"]).agg({'Cost': ['sum', 'count'], 'Sample Count':['sum']})
# df2['Cost'] = df2['Cost'].map('${:,.2f}'.format)
logger.debug(df2.columns)
# df2['Cost']['sum'] = df2['Cost']['sum'].apply('${:,.2f}'.format)
# apply formating to cost column
df2.iloc[:, (df2.columns.get_level_values(1)=='sum') & (df2.columns.get_level_values(0)=='Cost')] = df2.iloc[:, (df2.columns.get_level_values(1)=='sum') & (df2.columns.get_level_values(0)=='Cost')].applymap('${:,.2f}'.format)
return df2
@@ -65,7 +74,18 @@ def make_report_xlsx(records:list[dict]) -> DataFrame:
# dfs['name'] = df
# return dfs
def convert_control_by_mode(ctx:dict, control:models.Control, mode:str):
def convert_control_by_mode(ctx:dict, control:models.Control, mode:str) -> list[dict]:
"""
split control object into analysis types
Args:
ctx (dict): settings passed from gui
control (models.Control): control to be parsed into list
mode (str): analysis type
Returns:
list[dict]: list of records
"""
output = []
data = json.loads(getattr(control, mode))
for genus in data:
@@ -82,6 +102,17 @@ def convert_control_by_mode(ctx:dict, control:models.Control, mode:str):
def convert_data_list_to_df(ctx:dict, input:list[dict], subtype:str|None=None) -> DataFrame:
"""
Convert list of control records to dataframe
Args:
ctx (dict): settings passed from gui
input (list[dict]): list of dictionaries containing records
subtype (str | None, optional): _description_. Defaults to None.
Returns:
DataFrame: _description_
"""
df = DataFrame.from_records(input)
safe = ['name', 'submitted_date', 'genus', 'target']
logger.debug(df)