Large scale refactor to improve db efficiency

This commit is contained in:
Landon Wark
2023-09-27 14:16:28 -05:00
parent 82ab06efad
commit e484eabb22
37 changed files with 1782 additions and 1697 deletions

View File

@@ -1,12 +1,46 @@
'''
Contains all models for sqlalchemy
'''
from sqlalchemy.ext.declarative import declarative_base
from typing import Any
from sqlalchemy.orm import declarative_base
import logging
from pprint import pformat
Base = declarative_base()
metadata = Base.metadata
logger = logging.getLogger(f"submissions.{__name__}")
def find_subclasses(parent:Any, attrs:dict) -> Any:
"""
Finds subclasses of a parent that does contain all
attributes if the parent does not.
Args:
parent (_type_): Parent class.
attrs (dict): Key:Value dictionary of attributes
Raises:
AttributeError: Raised if no subclass is found.
Returns:
_type_: Parent or subclass.
"""
if len(attrs) == 0:
return parent
if any([not hasattr(parent, attr) for attr in attrs]):
# looks for first model that has all included kwargs
try:
model = [subclass for subclass in parent.__subclasses__() if all([hasattr(subclass, attr) for attr in attrs])][0]
except IndexError as e:
raise AttributeError(f"Couldn't find existing class/subclass of {parent} with all attributes:\n{pformat(attrs)}")
else:
model = parent
logger.debug(f"Using model: {model}")
return model
from .controls import Control, ControlType
from .kits import KitType, ReagentType, Reagent, Discount, KitTypeReagentTypeAssociation, SubmissionType, SubmissionTypeKitTypeAssociation
from .organizations import Organization, Contact
from .submissions import BasicSubmission, BacterialCulture, Wastewater, WastewaterArtic, WastewaterSample, BacterialCultureSample, BasicSample, SubmissionSampleAssociation, WastewaterAssociation

View File

@@ -19,9 +19,7 @@ class ControlType(Base):
id = Column(INTEGER, primary_key=True) #: primary key
name = Column(String(255), unique=True) #: controltype name (e.g. MCS)
targets = Column(JSON) #: organisms checked for
# instances_id = Column(INTEGER, ForeignKey("_control_samples.id", ondelete="SET NULL", name="fk_ctype_instances_id"))
instances = relationship("Control", back_populates="controltype") #: control samples created of this type.
# UniqueConstraint('name', name='uq_controltype_name')
class Control(Base):
@@ -39,13 +37,14 @@ class Control(Base):
contains = Column(JSON) #: unstructured hashes in contains.tsv for each organism
matches = Column(JSON) #: unstructured hashes in matches.tsv for each organism
kraken = Column(JSON) #: unstructured output from kraken_report
# UniqueConstraint('name', name='uq_control_name')
submission_id = Column(INTEGER, ForeignKey("_submissions.id")) #: parent submission id
submission = relationship("BacterialCulture", back_populates="controls", foreign_keys=[submission_id]) #: parent submission
refseq_version = Column(String(16)) #: version of refseq used in fastq parsing
kraken2_version = Column(String(16)) #: version of kraken2 used in fastq parsing
kraken2_db_version = Column(String(32)) #: folder name of kraken2 db
def __repr__(self) -> str:
return f"<Control({self.name})>"
def to_sub_dict(self) -> dict:
"""

View File

@@ -5,13 +5,11 @@ from . import Base
from sqlalchemy import Column, String, TIMESTAMP, JSON, INTEGER, ForeignKey, Interval, Table, FLOAT
from sqlalchemy.orm import relationship, validates
from sqlalchemy.ext.associationproxy import association_proxy
from datetime import date
import logging
logger = logging.getLogger(f'submissions.{__name__}')
reagenttypes_reagents = Table("_reagenttypes_reagents", Base.metadata, Column("reagent_id", INTEGER, ForeignKey("_reagents.id")), Column("reagenttype_id", INTEGER, ForeignKey("_reagent_types.id")))
@@ -55,22 +53,26 @@ class KitType(Base):
"""
return self.name
def get_reagents(self, required:bool=False) -> list:
def get_reagents(self, required:bool=False, submission_type:str|None=None) -> list:
"""
Return ReagentTypes linked to kit through KitTypeReagentTypeAssociation.
Args:
required (bool, optional): If true only return required types. Defaults to False.
submission_type (str | None, optional): Submission type to narrow results. Defaults to None.
Returns:
list: List of ReagentTypes
list: List of reagent types
"""
if required:
return [item.reagent_type for item in self.kit_reagenttype_associations if item.required == 1]
if submission_type != None:
relevant_associations = [item for item in self.kit_reagenttype_associations if submission_type in item.uses.keys()]
else:
return [item.reagent_type for item in self.kit_reagenttype_associations]
relevant_associations = [item for item in self.kit_reagenttype_associations]
if required:
return [item.reagent_type for item in relevant_associations if item.required == 1]
else:
return [item.reagent_type for item in relevant_associations]
def construct_xl_map_for_use(self, use:str) -> dict:
"""
Creates map of locations in excel workbook for a SubmissionType
@@ -96,8 +98,6 @@ class KitType(Base):
except IndexError as e:
map['info'] = {}
return map
class ReagentType(Base):
"""
@@ -118,13 +118,7 @@ class ReagentType(Base):
# association proxy of "user_keyword_associations" collection
# to "keyword" attribute
kit_types = association_proxy("kit_reagenttype_associations", "kit_type")
@validates('required')
def validate_age(self, key, value):
if not 0 <= value < 2:
raise ValueError(f'Invalid required value {value}. Must be 0 or 1.')
return value
kit_types = association_proxy("reagenttype_kit_associations", "kit_type")
def __str__(self) -> str:
"""
@@ -205,13 +199,17 @@ class Reagent(Base):
str: string representing this object's type and lot number
"""
return str(self.lot)
def to_sub_dict(self, extraction_kit:KitType=None) -> dict:
"""
dictionary containing values necessary for gui
Args:
extraction_kit (KitType, optional): KitType to use to get reagent type. Defaults to None.
Returns:
dict: gui friendly dictionary
dict: _description_
"""
if extraction_kit != None:
# Get the intersection of this reagent's ReagentType and all ReagentTypes in KitType
@@ -245,6 +243,9 @@ class Reagent(Base):
"""
Returns basic reagent dictionary.
Args:
extraction_kit (KitType, optional): KitType to use to get reagent type. Defaults to None.
Returns:
dict: Basic reagent dictionary of 'type', 'lot', 'expiry'
"""
@@ -268,7 +269,6 @@ class Reagent(Base):
"expiry": self.expiry.strftime("%Y-%m-%d")
}
class Discount(Base):
"""
Relationship table for client labs for certain kits.
@@ -303,7 +303,7 @@ class SubmissionType(Base):
cascade="all, delete-orphan",
)
kit_types = association_proxy("kit_submissiontype_associations", "kit_type")
kit_types = association_proxy("submissiontype_kit_associations", "kit_type")
def __repr__(self) -> str:
return f"<SubmissionType({self.name})>"
@@ -321,7 +321,7 @@ class SubmissionTypeKitTypeAssociation(Base):
kit_type = relationship(KitType, back_populates="kit_submissiontype_associations")
# reference to the "ReagentType" object
# reference to the "SubmissionType" object
submission_type = relationship(SubmissionType, back_populates="submissiontype_kit_associations")
def __init__(self, kit_type=None, submission_type=None):

View File

@@ -5,7 +5,6 @@ from . import Base
from sqlalchemy import Column, String, INTEGER, ForeignKey, Table
from sqlalchemy.orm import relationship
# table containing organization/contact relationship
orgs_contacts = Table("_orgs_contacts", Base.metadata, Column("org_id", INTEGER, ForeignKey("_organizations.id")), Column("contact_id", INTEGER, ForeignKey("_contacts.id")))

View File

@@ -74,10 +74,13 @@ class BasicSubmission(Base):
def to_dict(self, full_data:bool=False) -> dict:
"""
dictionary used in submissions summary
Constructs dictionary used in submissions summary
Args:
full_data (bool, optional): indicates if sample dicts to be constructed. Defaults to False.
Returns:
dict: dictionary used in submissions summary
dict: dictionary used in submissions summary and details
"""
# get lab from nested organization object
logger.debug(f"Converting {self.rsl_plate_num} to dict...")
@@ -113,10 +116,6 @@ class BasicSubmission(Base):
else:
reagents = None
samples = None
# Updated 2023-09 to get sample association with plate number
# for item in self.submission_sample_associations:
# sample = item.sample.to_sub_dict(submission_rsl=self.rsl_plate_num)
# samples.append(sample)
try:
comments = self.comment
except:
@@ -383,7 +382,6 @@ class BasicSample(Base):
Returns:
dict: dictionary of sample id, row and column in elution plate
"""
# self.assoc = [item for item in self.sample_submission_associations if item.submission.rsl_plate_num==submission_rsl][0]
# Since there is no PCR, negliable result is necessary.
return dict(name=self.submitter_id, positive=False)