Improvements to submission querying.

This commit is contained in:
lwark
2024-04-26 15:25:24 -05:00
parent b619d751b8
commit 5378c79933
7 changed files with 164 additions and 136 deletions

View File

@@ -1,3 +1,7 @@
- [ ] Make reporting better.
- [ ] Build master query method?
- Obviously there will need to be extensions, but I feel the attr method I have in Submissions could work.
- [x] Fix Artic RSLNamer
- [x] Put "Not applicable" reagents in to_dict() method.
- Currently in to_pydantic().
- [x] Critical: Convert Json lits to dicts so I can have them update properly without using crashy Sqlalchemy-json

View File

@@ -2,8 +2,10 @@
Contains all models for sqlalchemy
'''
import sys
from sqlalchemy.orm import DeclarativeMeta, declarative_base, Query
from sqlalchemy.orm import DeclarativeMeta, declarative_base, Query, Session
from sqlalchemy.ext.declarative import declared_attr
from typing import Any, List
from pathlib import Path
# Load testing environment
if 'pytest' in sys.modules:
from pathlib import Path
@@ -11,28 +13,32 @@ if 'pytest' in sys.modules:
Base: DeclarativeMeta = declarative_base()
class BaseClass(Base):
"""
Abstract class to pass ctx values to all SQLAlchemy objects.
Args:
Base (DeclarativeMeta): Declarative base for metadata.
"""
__abstract__ = True
__abstract__ = True #: Will not be added to DB
__table_args__ = {'extend_existing': True}
__table_args__ = {'extend_existing': True} #: Will only add new columns
@declared_attr
def __tablename__(cls):
def __tablename__(cls) -> str:
"""
Set tablename to lowercase class name
Sets table name to lower case class name.
Returns:
str: lower case class name
"""
return f"_{cls.__name__.lower()}"
@declared_attr
def __database_session__(cls):
def __database_session__(cls) -> Session:
"""
Pull db session from ctx
Pull db session from ctx to be used in operations
Returns:
Session: DB session from ctx settings.
"""
if not 'pytest' in sys.modules:
from tools import ctx
@@ -41,9 +47,12 @@ class BaseClass(Base):
return ctx.database_session
@declared_attr
def __directory_path__(cls):
def __directory_path__(cls) -> Path:
"""
Pull submission directory from ctx
Pull directory path from ctx to be used in operations.
Returns:
Path: Location of the Submissions directory in Settings object
"""
if not 'pytest' in sys.modules:
from tools import ctx
@@ -52,9 +61,12 @@ class BaseClass(Base):
return ctx.directory_path
@declared_attr
def __backup_path__(cls):
def __backup_path__(cls) -> Path:
"""
Pull backup directory from ctx
Pull backup directory path from ctx to be used in operations.
Returns:
Path: Location of the Submissions backup directory in Settings object
"""
if not 'pytest' in sys.modules:
from tools import ctx
@@ -62,16 +74,17 @@ class BaseClass(Base):
from test_settings import ctx
return ctx.backup_path
def query_return(query:Query, limit:int=0):
@classmethod
def execute_query(cls, query: Query, limit: int = 0) -> Any | List[Any]:
"""
Execute sqlalchemy query.
Args:
query (Query): Query object
limit (int, optional): Maximum number of results to return (0 = all). Defaults to 0.
query (Query): input query object
limit (int): Maximum number of results. (0 = all)
Returns:
_type_: Query result.
Any | List[Any]: Single result if limit = 1 or List if other.
"""
with query.session.no_autoflush:
match limit:
@@ -94,6 +107,7 @@ class BaseClass(Base):
logger.critical(f"Problem saving object: {e}")
self.__database_session__.rollback()
from .controls import *
# import order must go: orgs, kit, subs due to circular import issues
from .organizations import *

View File

@@ -1,29 +1,29 @@
'''
"""
All control related models.
'''
"""
from __future__ import annotations
from sqlalchemy import Column, String, TIMESTAMP, JSON, INTEGER, ForeignKey
from sqlalchemy.orm import relationship, Query
import logging, re, sys
import logging, re
from operator import itemgetter
from . import BaseClass
from tools import setup_lookup
from datetime import date, datetime
from typing import List
from dateutil.parser import parse
from re import Pattern
logger = logging.getLogger(f"submissions.{__name__}")
class ControlType(BaseClass):
"""
Base class of a control archetype.
"""
id = Column(INTEGER, primary_key=True) #: primary key
name = Column(String(255), unique=True) #: controltype name (e.g. MCS)
targets = Column(JSON) #: organisms checked for
instances = relationship("Control", back_populates="controltype") #: control samples created of this type.
id = Column(INTEGER, primary_key=True) #: primary key
name = Column(String(255), unique=True) #: controltype name (e.g. MCS)
targets = Column(JSON) #: organisms checked for
instances = relationship("Control", back_populates="controltype") #: control samples created of this type.
def __repr__(self) -> str:
return f"<ControlType({self.name})>"
@@ -31,29 +31,29 @@ class ControlType(BaseClass):
@classmethod
@setup_lookup
def query(cls,
name:str=None,
limit:int=0
) -> ControlType|List[ControlType]:
name: str = None,
limit: int = 0
) -> ControlType | List[ControlType]:
"""
Lookup control archetypes in the database
Args:
name (str, optional): Control type name (limits results to 1). Defaults to None.
limit (int, optional): Maximum number of results to return. Defaults to 0.
name (str, optional): Name of the desired controltype. Defaults to None.
limit (int, optional): Maximum number of results to return (0 = all). Defaults to 0.
Returns:
models.ControlType|List[models.ControlType]: ControlType(s) of interest.
ControlType | List[ControlType]: Single result if the limit = 1, else a list.
"""
query = cls.__database_session__.query(cls)
match name:
case str():
query = query.filter(cls.name==name)
query = query.filter(cls.name == name)
limit = 1
case _:
pass
return cls.query_return(query=query, limit=limit)
return cls.execute_query(query=query, limit=limit)
def get_subtypes(self, mode:str) -> List[str]:
def get_subtypes(self, mode: str) -> List[str]:
"""
Get subtypes associated with this controltype
@@ -64,54 +64,66 @@ class ControlType(BaseClass):
List[str]: list of subtypes available
"""
# Get first instance since all should have same subtypes
# outs = self.instances[0]
# Get mode of instance
# jsoner = json.loads(getattr(outs, mode))
jsoner = getattr(self.instances[0], mode)
logger.debug(f"JSON out: {jsoner.keys()}")
# logger.debug(f"JSON out: {jsoner.keys()}")
try:
# Pick genera (all should have same subtypes)
genera = list(jsoner.keys())[0]
except IndexError:
return []
# remove items that don't have relevant data
subtypes = [item for item in jsoner[genera] if "_hashes" not in item and "_ratio" not in item]
return subtypes
@classmethod
def get_positive_control_types(cls):
def get_positive_control_types(cls) -> List[ControlType]:
"""
Gets list of Control types if they have targets
Returns:
List[ControlType]: Control types that have targets
"""
return [item for item in cls.query() if item.targets != []]
@classmethod
def build_positive_regex(cls):
def build_positive_regex(cls) -> Pattern:
"""
Creates a re.Pattern that will look for positive control types
Returns:
Pattern: Constructed pattern
"""
strings = list(set([item.name.split("-")[0] for item in cls.get_positive_control_types()]))
return re.compile(rf"(^{'|^'.join(strings)})-.*", flags=re.IGNORECASE)
class Control(BaseClass):
"""
Base class of a control sample.
"""
id = Column(INTEGER, primary_key=True) #: primary key
parent_id = Column(String, ForeignKey("_controltype.id", name="fk_control_parent_id")) #: primary key of control type
controltype = relationship("ControlType", back_populates="instances", foreign_keys=[parent_id]) #: reference to parent control type
name = Column(String(255), unique=True) #: Sample ID
submitted_date = Column(TIMESTAMP) #: Date submitted to Robotics
contains = Column(JSON) #: unstructured hashes in contains.tsv for each organism
matches = Column(JSON) #: unstructured hashes in matches.tsv for each organism
kraken = Column(JSON) #: unstructured output from kraken_report
submission_id = Column(INTEGER, ForeignKey("_basicsubmission.id")) #: parent submission id
submission = relationship("BacterialCulture", back_populates="controls", foreign_keys=[submission_id]) #: parent submission
refseq_version = Column(String(16)) #: version of refseq used in fastq parsing
kraken2_version = Column(String(16)) #: version of kraken2 used in fastq parsing
kraken2_db_version = Column(String(32)) #: folder name of kraken2 db
sample = relationship("BacterialCultureSample", back_populates="control") #: This control's submission sample
sample_id = Column(INTEGER, ForeignKey("_basicsample.id", ondelete="SET NULL", name="cont_BCS_id")) #: sample id key
id = Column(INTEGER, primary_key=True) #: primary key
parent_id = Column(String,
ForeignKey("_controltype.id", name="fk_control_parent_id")) #: primary key of control type
controltype = relationship("ControlType", back_populates="instances",
foreign_keys=[parent_id]) #: reference to parent control type
name = Column(String(255), unique=True) #: Sample ID
submitted_date = Column(TIMESTAMP) #: Date submitted to Robotics
contains = Column(JSON) #: unstructured hashes in contains.tsv for each organism
matches = Column(JSON) #: unstructured hashes in matches.tsv for each organism
kraken = Column(JSON) #: unstructured output from kraken_report
submission_id = Column(INTEGER, ForeignKey("_basicsubmission.id")) #: parent submission id
submission = relationship("BacterialCulture", back_populates="controls",
foreign_keys=[submission_id]) #: parent submission
refseq_version = Column(String(16)) #: version of refseq used in fastq parsing
kraken2_version = Column(String(16)) #: version of kraken2 used in fastq parsing
kraken2_db_version = Column(String(32)) #: folder name of kraken2 db
sample = relationship("BacterialCultureSample", back_populates="control") #: This control's submission sample
sample_id = Column(INTEGER,
ForeignKey("_basicsample.id", ondelete="SET NULL", name="cont_BCS_id")) #: sample id key
def __repr__(self) -> str:
"""
Returns:
str: Representation of self
"""
return f"<Control({self.name})>"
def to_sub_dict(self) -> dict:
@@ -133,7 +145,8 @@ class Control(BaseClass):
for item in kraken:
# logger.debug("calculating kraken percent (overwrites what's already been scraped)")
kraken_percent = kraken[item]['kraken_count'] / kraken_cnt_total
new_kraken.append({'name': item, 'kraken_count':kraken[item]['kraken_count'], 'kraken_percent':"{0:.0%}".format(kraken_percent)})
new_kraken.append({'name': item, 'kraken_count': kraken[item]['kraken_count'],
'kraken_percent': "{0:.0%}".format(kraken_percent)})
new_kraken = sorted(new_kraken, key=itemgetter('kraken_count'), reverse=True)
# logger.debug("setting targets")
if self.controltype.targets == []:
@@ -142,14 +155,14 @@ class Control(BaseClass):
targets = self.controltype.targets
# logger.debug("constructing output dictionary")
output = {
"name" : self.name,
"type" : self.controltype.name,
"targets" : ", ".join(targets),
"kraken" : new_kraken[0:5]
"name": self.name,
"type": self.controltype.name,
"targets": ", ".join(targets),
"kraken": new_kraken[0:5]
}
return output
def convert_by_mode(self, mode:str) -> list[dict]:
def convert_by_mode(self, mode: str) -> list[dict]:
"""
split this instance into analysis types for controls graphs
@@ -203,12 +216,12 @@ class Control(BaseClass):
@classmethod
@setup_lookup
def query(cls,
control_type:ControlType|str|None=None,
start_date:date|str|int|None=None,
end_date:date|str|int|None=None,
control_name:str|None=None,
limit:int=0
) -> Control|List[Control]:
control_type: ControlType | str | None = None,
start_date: date | str | int | None = None,
end_date: date | str | int | None = None,
control_name: str | None = None,
limit: int = 0
) -> Control | List[Control]:
"""
Lookup control objects in the database based on a number of parameters.
@@ -227,10 +240,10 @@ class Control(BaseClass):
match control_type:
case ControlType():
# logger.debug(f"Looking up control by control type: {control_type}")
query = query.filter(cls.controltype==control_type)
query = query.filter(cls.controltype == control_type)
case str():
# logger.debug(f"Looking up control by control type: {control_type}")
query = query.join(ControlType).filter(ControlType.name==control_type)
query = query.join(ControlType).filter(ControlType.name == control_type)
case _:
pass
# by date range
@@ -247,7 +260,8 @@ class Control(BaseClass):
start_date = start_date.strftime("%Y-%m-%d")
case int():
# logger.debug(f"Lookup control by ordinal start date {start_date}")
start_date = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + start_date - 2).date().strftime("%Y-%m-%d")
start_date = datetime.fromordinal(
datetime(1900, 1, 1).toordinal() + start_date - 2).date().strftime("%Y-%m-%d")
case _:
# logger.debug(f"Lookup control with parsed start date {start_date}")
start_date = parse(start_date).strftime("%Y-%m-%d")
@@ -257,7 +271,8 @@ class Control(BaseClass):
end_date = end_date.strftime("%Y-%m-%d")
case int():
# logger.debug(f"Lookup control by ordinal end date {end_date}")
end_date = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + end_date - 2).date().strftime("%Y-%m-%d")
end_date = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + end_date - 2).date().strftime(
"%Y-%m-%d")
case _:
# logger.debug(f"Lookup control with parsed end date {end_date}")
end_date = parse(end_date).strftime("%Y-%m-%d")
@@ -270,5 +285,4 @@ class Control(BaseClass):
limit = 1
case _:
pass
return cls.query_return(query=query, limit=limit)
return cls.execute_query(query=query, limit=limit)

View File

@@ -214,7 +214,7 @@ class KitType(BaseClass):
limit = 1
case _:
pass
return cls.query_return(query=query, limit=limit)
return cls.execute_query(query=query, limit=limit)
@check_authorization
def save(self):
@@ -303,7 +303,7 @@ class ReagentType(BaseClass):
limit = 1
case _:
pass
return cls.query_return(query=query, limit=limit)
return cls.execute_query(query=query, limit=limit)
def to_pydantic(self) -> "PydReagent":
"""
@@ -464,7 +464,7 @@ class Reagent(BaseClass):
limit = 1
case _:
pass
return cls.query_return(query=query, limit=limit)
return cls.execute_query(query=query, limit=limit)
class Discount(BaseClass):
"""
@@ -533,7 +533,7 @@ class Discount(BaseClass):
case _:
# raise ValueError(f"Invalid value for kit type: {kit_type}")
pass
return cls.query_return(query=query)
return cls.execute_query(query=query)
@check_authorization
def save(self):
@@ -702,7 +702,7 @@ class SubmissionType(BaseClass):
query = query.filter(cls.info_map.op('->')(key)!=None)
case _:
pass
return cls.query_return(query=query, limit=limit)
return cls.execute_query(query=query, limit=limit)
@check_authorization
def save(self):
@@ -781,7 +781,7 @@ class SubmissionTypeKitTypeAssociation(BaseClass):
# logger.debug(f"Looking up {cls.__name__} by id {kit_type}")
query = query.join(KitType).filter(KitType.id==kit_type)
limit = query.count()
return cls.query_return(query=query, limit=limit)
return cls.execute_query(query=query, limit=limit)
class KitTypeReagentTypeAssociation(BaseClass):
"""
@@ -889,7 +889,7 @@ class KitTypeReagentTypeAssociation(BaseClass):
pass
if kit_type != None and reagent_type != None:
limit = 1
return cls.query_return(query=query, limit=limit)
return cls.execute_query(query=query, limit=limit)
class SubmissionReagentAssociation(BaseClass):
"""
@@ -956,7 +956,7 @@ class SubmissionReagentAssociation(BaseClass):
query = query.join(BasicSubmission).filter(BasicSubmission.id==submission)
case _:
pass
return cls.query_return(query=query, limit=limit)
return cls.execute_query(query=query, limit=limit)
def to_sub_dict(self, extraction_kit) -> dict:
"""
@@ -1083,7 +1083,7 @@ class Equipment(BaseClass):
limit = 1
case _:
pass
return cls.query_return(query=query, limit=limit)
return cls.execute_query(query=query, limit=limit)
def to_pydantic(self, submission_type:SubmissionType, extraction_kit:str|KitType|None=None) -> "PydEquipment":
"""
@@ -1206,7 +1206,7 @@ class EquipmentRole(BaseClass):
limit = 1
case _:
pass
return cls.query_return(query=query, limit=limit)
return cls.execute_query(query=query, limit=limit)
def get_processes(self, submission_type:str|SubmissionType|None, extraction_kit:str|KitType|None=None) -> List[Process]:
"""
@@ -1382,5 +1382,5 @@ class Process(BaseClass):
limit = 1
case _:
pass
return cls.query_return(query=query, limit=limit)
return cls.execute_query(query=query, limit=limit)

View File

@@ -33,10 +33,6 @@ class Organization(BaseClass):
contacts = relationship("Contact", back_populates="organization", secondary=orgs_contacts) #: contacts involved with this org
def __repr__(self) -> str:
"""
Returns:
str: Representation of this Organization
"""
return f"<Organization({self.name})>"
@classmethod
@@ -70,7 +66,7 @@ class Organization(BaseClass):
limit = 1
case _:
pass
return cls.query_return(query=query, limit=limit)
return cls.execute_query(query=query, limit=limit)
@check_authorization
def save(self):
@@ -137,5 +133,5 @@ class Contact(BaseClass):
limit = 1
case _:
pass
return cls.query_return(query=query, limit=limit)
return cls.execute_query(query=query, limit=limit)

View File

@@ -47,7 +47,6 @@ class BasicSubmission(BaseClass):
submission_type_name = Column(String, ForeignKey("_submissiontype.name", ondelete="SET NULL", name="fk_BS_subtype_name")) #: name of joined submission type
technician = Column(String(64)) #: initials of processing tech(s)
# Move this into custom types?
# reagents = relationship("Reagent", back_populates="submissions", secondary=reagents_submissions) #: relationship to reagents
reagents_id = Column(String, ForeignKey("_reagent.id", ondelete="SET NULL", name="fk_BS_reagents_id")) #: id of used reagents
extraction_info = Column(JSON) #: unstructured output from the extraction table logger.
run_cost = Column(FLOAT(2)) #: total cost of running the plate. Set from constant and mutable kit costs at time of creation.
@@ -127,13 +126,13 @@ class BasicSubmission(BaseClass):
output = {}
for k,v in dicto.items():
if len(args) > 0 and k not in args:
logger.debug(f"Don't want {k}")
# logger.debug(f"Don't want {k}")
continue
else:
output[k] = v
for k,v in st.defaults.items():
if len(args) > 0 and k not in args:
logger.debug(f"Don't want {k}")
# logger.debug(f"Don't want {k}")
continue
else:
match v:
@@ -410,7 +409,7 @@ class BasicSubmission(BaseClass):
case item if item in self.jsons():
logger.debug(f"Setting JSON attribute.")
existing = self.__getattribute__(key)
if value == "" or value is None or value == 'null':
if value is None or value in ['', 'null']:
logger.error(f"No value given, not setting.")
return
if existing is None:
@@ -422,7 +421,8 @@ class BasicSubmission(BaseClass):
if isinstance(value, list):
existing += value
else:
existing.append(value)
if value is not None:
existing.append(value)
self.__setattr__(key, existing)
flag_modified(self, key)
return
@@ -890,7 +890,7 @@ class BasicSubmission(BaseClass):
# limit = 1
if chronologic:
query.order_by(cls.submitted_date)
return cls.query_return(query=query, limit=limit)
return cls.execute_query(query=query, limit=limit)
@classmethod
def query_or_create(cls, submission_type:str|SubmissionType|None=None, **kwargs) -> BasicSubmission:
@@ -1421,7 +1421,7 @@ class WastewaterArtic(BasicSubmission):
return input_dict
@classmethod
def enforce_name(cls, instr:str, data:dict|None={}) -> str:
def enforce_name(cls, instr:str, data:dict={}) -> str:
"""
Extends parent
"""
@@ -1430,16 +1430,12 @@ class WastewaterArtic(BasicSubmission):
instr = re.sub(r"Artic", "", instr, flags=re.IGNORECASE)
except (AttributeError, TypeError) as e:
logger.error(f"Problem using regex: {e}")
# try:
# check = instr.startswith("RSL")
# except AttributeError:
# check = False
# if not check:
# try:
# instr = "RSL" + instr
# except TypeError:
# instr = "RSL"
# logger.debug(f"Before RSL addition: {instr}")
instr = instr.replace("-", "")
instr = re.sub(r"^(\d{6})", f"RSL-AR-\\1", instr)
# logger.debug(f"name coming out of Artic namer: {instr}")
outstr = super().enforce_name(instr=instr, data=data)
return outstr
@classmethod
@@ -1922,7 +1918,7 @@ class BasicSample(BaseClass):
query = query.filter(attr==v)
if len(kwargs) > 0:
limit = 1
return cls.query_return(query=query, limit=limit)
return cls.execute_query(query=query, limit=limit)
@classmethod
def query_or_create(cls, sample_type:str|None=None, **kwargs) -> BasicSample:
@@ -2259,7 +2255,7 @@ class SubmissionSampleAssociation(BaseClass):
query = query.order_by(BasicSubmission.submitted_date.desc())
else:
query = query.order_by(BasicSubmission.submitted_date)
return cls.query_return(query=query, limit=limit)
return cls.execute_query(query=query, limit=limit)
@classmethod
def query_or_create(cls,

View File

@@ -482,12 +482,16 @@ def setup_lookup(func):
func (_type_): _description_
"""
def wrapper(*args, **kwargs):
for k, v in locals().items():
if k == "kwargs":
continue
sanitized_kwargs = {}
for k, v in locals()['kwargs'].items():
if isinstance(v, dict):
raise ValueError("Cannot use dictionary in query. Make sure you parse it first.")
return func(*args, **kwargs)
try:
sanitized_kwargs[k] = v['value']
except KeyError:
raise ValueError("Could not sanitize dictionary in query. Make sure you parse it first.")
elif v is not None:
sanitized_kwargs[k] = v
return func(*args, **sanitized_kwargs)
return wrapper
class Result(BaseModel):