Improvements to submission querying.
This commit is contained in:
@@ -1,59 +1,59 @@
|
||||
'''
|
||||
"""
|
||||
All control related models.
|
||||
'''
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from sqlalchemy import Column, String, TIMESTAMP, JSON, INTEGER, ForeignKey
|
||||
from sqlalchemy.orm import relationship, Query
|
||||
import logging, re, sys
|
||||
import logging, re
|
||||
from operator import itemgetter
|
||||
from . import BaseClass
|
||||
from tools import setup_lookup
|
||||
from datetime import date, datetime
|
||||
from typing import List
|
||||
from dateutil.parser import parse
|
||||
|
||||
from re import Pattern
|
||||
|
||||
logger = logging.getLogger(f"submissions.{__name__}")
|
||||
|
||||
|
||||
class ControlType(BaseClass):
|
||||
"""
|
||||
Base class of a control archetype.
|
||||
"""
|
||||
|
||||
id = Column(INTEGER, primary_key=True) #: primary key
|
||||
name = Column(String(255), unique=True) #: controltype name (e.g. MCS)
|
||||
targets = Column(JSON) #: organisms checked for
|
||||
instances = relationship("Control", back_populates="controltype") #: control samples created of this type.
|
||||
"""
|
||||
id = Column(INTEGER, primary_key=True) #: primary key
|
||||
name = Column(String(255), unique=True) #: controltype name (e.g. MCS)
|
||||
targets = Column(JSON) #: organisms checked for
|
||||
instances = relationship("Control", back_populates="controltype") #: control samples created of this type.
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<ControlType({self.name})>"
|
||||
|
||||
@classmethod
|
||||
@setup_lookup
|
||||
def query(cls,
|
||||
name:str=None,
|
||||
limit:int=0
|
||||
) -> ControlType|List[ControlType]:
|
||||
def query(cls,
|
||||
name: str = None,
|
||||
limit: int = 0
|
||||
) -> ControlType | List[ControlType]:
|
||||
"""
|
||||
Lookup control archetypes in the database
|
||||
|
||||
Args:
|
||||
name (str, optional): Control type name (limits results to 1). Defaults to None.
|
||||
limit (int, optional): Maximum number of results to return. Defaults to 0.
|
||||
name (str, optional): Name of the desired controltype. Defaults to None.
|
||||
limit (int, optional): Maximum number of results to return (0 = all). Defaults to 0.
|
||||
|
||||
Returns:
|
||||
models.ControlType|List[models.ControlType]: ControlType(s) of interest.
|
||||
"""
|
||||
ControlType | List[ControlType]: Single result if the limit = 1, else a list.
|
||||
"""
|
||||
query = cls.__database_session__.query(cls)
|
||||
match name:
|
||||
case str():
|
||||
query = query.filter(cls.name==name)
|
||||
query = query.filter(cls.name == name)
|
||||
limit = 1
|
||||
case _:
|
||||
pass
|
||||
return cls.query_return(query=query, limit=limit)
|
||||
|
||||
def get_subtypes(self, mode:str) -> List[str]:
|
||||
return cls.execute_query(query=query, limit=limit)
|
||||
|
||||
def get_subtypes(self, mode: str) -> List[str]:
|
||||
"""
|
||||
Get subtypes associated with this controltype
|
||||
|
||||
@@ -62,56 +62,68 @@ class ControlType(BaseClass):
|
||||
|
||||
Returns:
|
||||
List[str]: list of subtypes available
|
||||
"""
|
||||
"""
|
||||
# Get first instance since all should have same subtypes
|
||||
# outs = self.instances[0]
|
||||
# Get mode of instance
|
||||
# jsoner = json.loads(getattr(outs, mode))
|
||||
jsoner = getattr(self.instances[0], mode)
|
||||
logger.debug(f"JSON out: {jsoner.keys()}")
|
||||
# logger.debug(f"JSON out: {jsoner.keys()}")
|
||||
try:
|
||||
# Pick genera (all should have same subtypes)
|
||||
genera = list(jsoner.keys())[0]
|
||||
except IndexError:
|
||||
return []
|
||||
# remove items that don't have relevant data
|
||||
subtypes = [item for item in jsoner[genera] if "_hashes" not in item and "_ratio" not in item]
|
||||
return subtypes
|
||||
|
||||
|
||||
@classmethod
|
||||
def get_positive_control_types(cls):
|
||||
def get_positive_control_types(cls) -> List[ControlType]:
|
||||
"""
|
||||
Gets list of Control types if they have targets
|
||||
|
||||
Returns:
|
||||
List[ControlType]: Control types that have targets
|
||||
"""
|
||||
return [item for item in cls.query() if item.targets != []]
|
||||
|
||||
|
||||
@classmethod
|
||||
def build_positive_regex(cls):
|
||||
def build_positive_regex(cls) -> Pattern:
|
||||
"""
|
||||
Creates a re.Pattern that will look for positive control types
|
||||
|
||||
Returns:
|
||||
Pattern: Constructed pattern
|
||||
"""
|
||||
strings = list(set([item.name.split("-")[0] for item in cls.get_positive_control_types()]))
|
||||
return re.compile(rf"(^{'|^'.join(strings)})-.*", flags=re.IGNORECASE)
|
||||
|
||||
|
||||
class Control(BaseClass):
|
||||
"""
|
||||
Base class of a control sample.
|
||||
"""
|
||||
|
||||
id = Column(INTEGER, primary_key=True) #: primary key
|
||||
parent_id = Column(String, ForeignKey("_controltype.id", name="fk_control_parent_id")) #: primary key of control type
|
||||
controltype = relationship("ControlType", back_populates="instances", foreign_keys=[parent_id]) #: reference to parent control type
|
||||
name = Column(String(255), unique=True) #: Sample ID
|
||||
submitted_date = Column(TIMESTAMP) #: Date submitted to Robotics
|
||||
contains = Column(JSON) #: unstructured hashes in contains.tsv for each organism
|
||||
matches = Column(JSON) #: unstructured hashes in matches.tsv for each organism
|
||||
kraken = Column(JSON) #: unstructured output from kraken_report
|
||||
submission_id = Column(INTEGER, ForeignKey("_basicsubmission.id")) #: parent submission id
|
||||
submission = relationship("BacterialCulture", back_populates="controls", foreign_keys=[submission_id]) #: parent submission
|
||||
refseq_version = Column(String(16)) #: version of refseq used in fastq parsing
|
||||
kraken2_version = Column(String(16)) #: version of kraken2 used in fastq parsing
|
||||
kraken2_db_version = Column(String(32)) #: folder name of kraken2 db
|
||||
sample = relationship("BacterialCultureSample", back_populates="control") #: This control's submission sample
|
||||
sample_id = Column(INTEGER, ForeignKey("_basicsample.id", ondelete="SET NULL", name="cont_BCS_id")) #: sample id key
|
||||
"""
|
||||
|
||||
id = Column(INTEGER, primary_key=True) #: primary key
|
||||
parent_id = Column(String,
|
||||
ForeignKey("_controltype.id", name="fk_control_parent_id")) #: primary key of control type
|
||||
controltype = relationship("ControlType", back_populates="instances",
|
||||
foreign_keys=[parent_id]) #: reference to parent control type
|
||||
name = Column(String(255), unique=True) #: Sample ID
|
||||
submitted_date = Column(TIMESTAMP) #: Date submitted to Robotics
|
||||
contains = Column(JSON) #: unstructured hashes in contains.tsv for each organism
|
||||
matches = Column(JSON) #: unstructured hashes in matches.tsv for each organism
|
||||
kraken = Column(JSON) #: unstructured output from kraken_report
|
||||
submission_id = Column(INTEGER, ForeignKey("_basicsubmission.id")) #: parent submission id
|
||||
submission = relationship("BacterialCulture", back_populates="controls",
|
||||
foreign_keys=[submission_id]) #: parent submission
|
||||
refseq_version = Column(String(16)) #: version of refseq used in fastq parsing
|
||||
kraken2_version = Column(String(16)) #: version of kraken2 used in fastq parsing
|
||||
kraken2_db_version = Column(String(32)) #: folder name of kraken2 db
|
||||
sample = relationship("BacterialCultureSample", back_populates="control") #: This control's submission sample
|
||||
sample_id = Column(INTEGER,
|
||||
ForeignKey("_basicsample.id", ondelete="SET NULL", name="cont_BCS_id")) #: sample id key
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""
|
||||
Returns:
|
||||
str: Representation of self
|
||||
"""
|
||||
return f"<Control({self.name})>"
|
||||
|
||||
def to_sub_dict(self) -> dict:
|
||||
@@ -120,7 +132,7 @@ class Control(BaseClass):
|
||||
|
||||
Returns:
|
||||
dict: output dictionary containing: Name, Type, Targets, Top Kraken results
|
||||
"""
|
||||
"""
|
||||
# logger.debug("loading json string into dict")
|
||||
try:
|
||||
# kraken = json.loads(self.kraken)
|
||||
@@ -133,7 +145,8 @@ class Control(BaseClass):
|
||||
for item in kraken:
|
||||
# logger.debug("calculating kraken percent (overwrites what's already been scraped)")
|
||||
kraken_percent = kraken[item]['kraken_count'] / kraken_cnt_total
|
||||
new_kraken.append({'name': item, 'kraken_count':kraken[item]['kraken_count'], 'kraken_percent':"{0:.0%}".format(kraken_percent)})
|
||||
new_kraken.append({'name': item, 'kraken_count': kraken[item]['kraken_count'],
|
||||
'kraken_percent': "{0:.0%}".format(kraken_percent)})
|
||||
new_kraken = sorted(new_kraken, key=itemgetter('kraken_count'), reverse=True)
|
||||
# logger.debug("setting targets")
|
||||
if self.controltype.targets == []:
|
||||
@@ -142,14 +155,14 @@ class Control(BaseClass):
|
||||
targets = self.controltype.targets
|
||||
# logger.debug("constructing output dictionary")
|
||||
output = {
|
||||
"name" : self.name,
|
||||
"type" : self.controltype.name,
|
||||
"targets" : ", ".join(targets),
|
||||
"kraken" : new_kraken[0:5]
|
||||
"name": self.name,
|
||||
"type": self.controltype.name,
|
||||
"targets": ", ".join(targets),
|
||||
"kraken": new_kraken[0:5]
|
||||
}
|
||||
return output
|
||||
|
||||
def convert_by_mode(self, mode:str) -> list[dict]:
|
||||
def convert_by_mode(self, mode: str) -> list[dict]:
|
||||
"""
|
||||
split this instance into analysis types for controls graphs
|
||||
|
||||
@@ -158,7 +171,7 @@ class Control(BaseClass):
|
||||
|
||||
Returns:
|
||||
list[dict]: list of records
|
||||
"""
|
||||
"""
|
||||
output = []
|
||||
# logger.debug("load json string for mode (i.e. contains, matches, kraken2)")
|
||||
try:
|
||||
@@ -191,7 +204,7 @@ class Control(BaseClass):
|
||||
|
||||
Returns:
|
||||
List[str]: List of control mode names.
|
||||
"""
|
||||
"""
|
||||
try:
|
||||
# logger.debug("Creating a list of JSON columns in _controls table")
|
||||
cols = [item.name for item in list(cls.__table__.columns) if isinstance(item.type, JSON)]
|
||||
@@ -202,13 +215,13 @@ class Control(BaseClass):
|
||||
|
||||
@classmethod
|
||||
@setup_lookup
|
||||
def query(cls,
|
||||
control_type:ControlType|str|None=None,
|
||||
start_date:date|str|int|None=None,
|
||||
end_date:date|str|int|None=None,
|
||||
control_name:str|None=None,
|
||||
limit:int=0
|
||||
) -> Control|List[Control]:
|
||||
def query(cls,
|
||||
control_type: ControlType | str | None = None,
|
||||
start_date: date | str | int | None = None,
|
||||
end_date: date | str | int | None = None,
|
||||
control_name: str | None = None,
|
||||
limit: int = 0
|
||||
) -> Control | List[Control]:
|
||||
"""
|
||||
Lookup control objects in the database based on a number of parameters.
|
||||
|
||||
@@ -221,16 +234,16 @@ class Control(BaseClass):
|
||||
|
||||
Returns:
|
||||
models.Control|List[models.Control]: Control object of interest.
|
||||
"""
|
||||
"""
|
||||
query: Query = cls.__database_session__.query(cls)
|
||||
# by control type
|
||||
match control_type:
|
||||
case ControlType():
|
||||
# logger.debug(f"Looking up control by control type: {control_type}")
|
||||
query = query.filter(cls.controltype==control_type)
|
||||
query = query.filter(cls.controltype == control_type)
|
||||
case str():
|
||||
# logger.debug(f"Looking up control by control type: {control_type}")
|
||||
query = query.join(ControlType).filter(ControlType.name==control_type)
|
||||
query = query.join(ControlType).filter(ControlType.name == control_type)
|
||||
case _:
|
||||
pass
|
||||
# by date range
|
||||
@@ -247,7 +260,8 @@ class Control(BaseClass):
|
||||
start_date = start_date.strftime("%Y-%m-%d")
|
||||
case int():
|
||||
# logger.debug(f"Lookup control by ordinal start date {start_date}")
|
||||
start_date = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + start_date - 2).date().strftime("%Y-%m-%d")
|
||||
start_date = datetime.fromordinal(
|
||||
datetime(1900, 1, 1).toordinal() + start_date - 2).date().strftime("%Y-%m-%d")
|
||||
case _:
|
||||
# logger.debug(f"Lookup control with parsed start date {start_date}")
|
||||
start_date = parse(start_date).strftime("%Y-%m-%d")
|
||||
@@ -257,7 +271,8 @@ class Control(BaseClass):
|
||||
end_date = end_date.strftime("%Y-%m-%d")
|
||||
case int():
|
||||
# logger.debug(f"Lookup control by ordinal end date {end_date}")
|
||||
end_date = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + end_date - 2).date().strftime("%Y-%m-%d")
|
||||
end_date = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + end_date - 2).date().strftime(
|
||||
"%Y-%m-%d")
|
||||
case _:
|
||||
# logger.debug(f"Lookup control with parsed end date {end_date}")
|
||||
end_date = parse(end_date).strftime("%Y-%m-%d")
|
||||
@@ -270,5 +285,4 @@ class Control(BaseClass):
|
||||
limit = 1
|
||||
case _:
|
||||
pass
|
||||
return cls.query_return(query=query, limit=limit)
|
||||
|
||||
return cls.execute_query(query=query, limit=limit)
|
||||
|
||||
Reference in New Issue
Block a user