Updated controls to both Irida and PCR.
This commit is contained in:
@@ -2,14 +2,19 @@
|
||||
All control related models.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from sqlalchemy import Column, String, TIMESTAMP, JSON, INTEGER, ForeignKey
|
||||
from sqlalchemy.orm import relationship, Query
|
||||
from pprint import pformat
|
||||
|
||||
from PyQt6.QtWidgets import QWidget, QCheckBox, QLabel
|
||||
from pandas import DataFrame
|
||||
from sqlalchemy import Column, String, TIMESTAMP, JSON, INTEGER, ForeignKey, case, FLOAT
|
||||
from sqlalchemy.orm import relationship, Query, validates
|
||||
import logging, re
|
||||
from operator import itemgetter
|
||||
|
||||
from . import BaseClass
|
||||
from tools import setup_lookup
|
||||
from datetime import date, datetime
|
||||
from typing import List, Literal
|
||||
from tools import setup_lookup, report_result, Result, Report, Settings, get_unique_values_in_df_column
|
||||
from datetime import date, datetime, timedelta
|
||||
from typing import List, Literal, Tuple, Generator
|
||||
from dateutil.parser import parse
|
||||
from re import Pattern
|
||||
|
||||
@@ -21,7 +26,7 @@ class ControlType(BaseClass):
|
||||
Base class of a control archetype.
|
||||
"""
|
||||
id = Column(INTEGER, primary_key=True) #: primary key
|
||||
name = Column(String(255), unique=True) #: controltype name (e.g. MCS)
|
||||
name = Column(String(255), unique=True) #: controltype name (e.g. Irida Control)
|
||||
targets = Column(JSON) #: organisms checked for
|
||||
instances = relationship("Control", back_populates="controltype") #: control samples created of this type.
|
||||
|
||||
@@ -53,7 +58,7 @@ class ControlType(BaseClass):
|
||||
pass
|
||||
return cls.execute_query(query=query, limit=limit)
|
||||
|
||||
def get_subtypes(self, mode: Literal['kraken', 'matches', 'contains']) -> List[str]:
|
||||
def get_modes(self, mode: Literal['kraken', 'matches', 'contains']) -> List[str]:
|
||||
"""
|
||||
Get subtypes associated with this controltype (currently used only for Kraken)
|
||||
|
||||
@@ -65,8 +70,10 @@ class ControlType(BaseClass):
|
||||
"""
|
||||
# NOTE: Get first instance since all should have same subtypes
|
||||
# NOTE: Get mode of instance
|
||||
if not self.instances:
|
||||
return
|
||||
jsoner = getattr(self.instances[0], mode)
|
||||
# logger.debug(f"JSON out: {jsoner.keys()}")
|
||||
# logger.debug(f"JSON retrieved: {jsoner.keys()}")
|
||||
try:
|
||||
# NOTE: Pick genera (all should have same subtypes)
|
||||
genera = list(jsoner.keys())[0]
|
||||
@@ -74,10 +81,14 @@ class ControlType(BaseClass):
|
||||
return []
|
||||
# NOTE: remove items that don't have relevant data
|
||||
subtypes = [item for item in jsoner[genera] if "_hashes" not in item and "_ratio" not in item]
|
||||
logger.debug(f"subtypes out: {pformat(subtypes)}")
|
||||
return subtypes
|
||||
|
||||
def get_instance_class(self):
|
||||
return Control.find_polymorphic_subclass(polymorphic_identity=self.name)
|
||||
|
||||
@classmethod
|
||||
def get_positive_control_types(cls) -> List[ControlType]:
|
||||
def get_positive_control_types(cls) -> Generator[ControlType, None, None]:
|
||||
"""
|
||||
Gets list of Control types if they have targets
|
||||
|
||||
@@ -104,35 +115,234 @@ class Control(BaseClass):
|
||||
"""
|
||||
|
||||
id = Column(INTEGER, primary_key=True) #: primary key
|
||||
parent_id = Column(String,
|
||||
ForeignKey("_controltype.id", name="fk_control_parent_id")) #: primary key of control type
|
||||
controltype_name = Column(String, ForeignKey("_controltype.name", ondelete="SET NULL",
|
||||
name="fk_BC_subtype_name")) #: name of joined submission type
|
||||
controltype = relationship("ControlType", back_populates="instances",
|
||||
foreign_keys=[parent_id]) #: reference to parent control type
|
||||
foreign_keys=[controltype_name]) #: reference to parent control type
|
||||
name = Column(String(255), unique=True) #: Sample ID
|
||||
submitted_date = Column(TIMESTAMP) #: Date submitted to Robotics
|
||||
submission_id = Column(INTEGER, ForeignKey("_basicsubmission.id")) #: parent submission id
|
||||
submission = relationship("BasicSubmission", back_populates="controls",
|
||||
foreign_keys=[submission_id]) #: parent submission
|
||||
|
||||
__mapper_args__ = {
|
||||
"polymorphic_identity": "Basic Control",
|
||||
"polymorphic_on": case(
|
||||
|
||||
(controltype_name == "PCR Control", "PCR Control"),
|
||||
(controltype_name == "Irida Control", "Irida Control"),
|
||||
|
||||
else_="Basic Control"
|
||||
),
|
||||
"with_polymorphic": "*",
|
||||
}
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<{self.controltype_name}({self.name})>"
|
||||
|
||||
@classmethod
|
||||
def find_polymorphic_subclass(cls, polymorphic_identity: str | ControlType | None = None,
|
||||
attrs: dict | None = None):
|
||||
"""
|
||||
Find subclass based on polymorphic identity or relevant attributes.
|
||||
|
||||
Args:
|
||||
polymorphic_identity (str | None, optional): String representing polymorphic identity. Defaults to None.
|
||||
attrs (str | SubmissionType | None, optional): Attributes of the relevant class. Defaults to None.
|
||||
|
||||
Returns:
|
||||
_type_: Subclass of interest.
|
||||
"""
|
||||
if isinstance(polymorphic_identity, dict):
|
||||
# logger.debug(f"Controlling for dict value")
|
||||
polymorphic_identity = polymorphic_identity['value']
|
||||
if isinstance(polymorphic_identity, ControlType):
|
||||
polymorphic_identity = polymorphic_identity.name
|
||||
model = cls
|
||||
match polymorphic_identity:
|
||||
case str():
|
||||
try:
|
||||
model = cls.__mapper__.polymorphic_map[polymorphic_identity].class_
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Could not get polymorph {polymorphic_identity} of {cls} due to {e}, falling back to BasicSubmission")
|
||||
case _:
|
||||
pass
|
||||
if attrs and any([not hasattr(cls, attr) for attr in attrs.keys()]):
|
||||
# NOTE: looks for first model that has all included kwargs
|
||||
try:
|
||||
model = next(subclass for subclass in cls.__subclasses__() if
|
||||
all([hasattr(subclass, attr) for attr in attrs.keys()]))
|
||||
except StopIteration as e:
|
||||
raise AttributeError(
|
||||
f"Couldn't find existing class/subclass of {cls} with all attributes:\n{pformat(attrs.keys())}")
|
||||
logger.info(f"Recruiting model: {model}")
|
||||
return model
|
||||
|
||||
@classmethod
|
||||
def make_parent_buttons(cls, parent: QWidget) -> None:
|
||||
"""
|
||||
|
||||
Args:
|
||||
parent (QWidget): chart holding widget to add buttons to.
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def make_chart(cls, parent, chart_settings: dict, ctx):
|
||||
"""
|
||||
|
||||
Args:
|
||||
chart_settings (dict): settings passed down from chart widget
|
||||
ctx (Settings): settings passed down from gui
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
return None
|
||||
|
||||
|
||||
class PCRControl(Control):
|
||||
id = Column(INTEGER, ForeignKey('_control.id'), primary_key=True)
|
||||
subtype = Column(String(16)) #: PC or NC
|
||||
target = Column(String(16)) #: N1, N2, etc.
|
||||
ct = Column(FLOAT)
|
||||
reagent_lot = Column(String(64), ForeignKey("_reagent.name", ondelete="SET NULL",
|
||||
name="fk_reagent_lot"))
|
||||
reagent = relationship("Reagent", foreign_keys=reagent_lot)
|
||||
|
||||
__mapper_args__ = dict(polymorphic_identity="PCR Control",
|
||||
polymorphic_load="inline",
|
||||
inherit_condition=(id == Control.id))
|
||||
|
||||
def to_sub_dict(self):
|
||||
return dict(name=self.name, ct=self.ct, subtype=self.subtype, target=self.target, reagent_lot=self.reagent_lot,
|
||||
submitted_date=self.submitted_date.date())
|
||||
|
||||
@classmethod
|
||||
@setup_lookup
|
||||
def query(cls,
|
||||
sub_type: str | None = None,
|
||||
start_date: date | str | int | None = None,
|
||||
end_date: date | str | int | None = None,
|
||||
control_name: str | None = None,
|
||||
limit: int = 0
|
||||
) -> Control | List[Control]:
|
||||
"""
|
||||
Lookup control objects in the database based on a number of parameters.
|
||||
|
||||
Args:
|
||||
sub_type (models.ControlType | str | None, optional): Control archetype. Defaults to None.
|
||||
start_date (date | str | int | None, optional): Beginning date to search by. Defaults to 2023-01-01 if end_date not None.
|
||||
end_date (date | str | int | None, optional): End date to search by. Defaults to today if start_date not None.
|
||||
control_name (str | None, optional): Name of control. Defaults to None.
|
||||
limit (int, optional): Maximum number of results to return (0 = all). Defaults to 0.
|
||||
|
||||
Returns:
|
||||
models.Control|List[models.Control]: Control object of interest.
|
||||
"""
|
||||
query: Query = cls.__database_session__.query(cls)
|
||||
# NOTE: by date range
|
||||
if start_date is not None and end_date is None:
|
||||
logger.warning(f"Start date with no end date, using today.")
|
||||
end_date = date.today()
|
||||
if end_date is not None and start_date is None:
|
||||
logger.warning(f"End date with no start date, using Jan 1, 2023")
|
||||
start_date = date(2023, 1, 1)
|
||||
if start_date is not None:
|
||||
match start_date:
|
||||
case date():
|
||||
# logger.debug(f"Lookup control by start date({start_date})")
|
||||
start_date = start_date.strftime("%Y-%m-%d")
|
||||
case int():
|
||||
# logger.debug(f"Lookup control by ordinal start date {start_date}")
|
||||
start_date = datetime.fromordinal(
|
||||
datetime(1900, 1, 1).toordinal() + start_date - 2).date().strftime("%Y-%m-%d")
|
||||
case _:
|
||||
# logger.debug(f"Lookup control with parsed start date {start_date}")
|
||||
start_date = parse(start_date).strftime("%Y-%m-%d")
|
||||
match end_date:
|
||||
case date():
|
||||
# logger.debug(f"Lookup control by end date({end_date})")
|
||||
end_date = end_date.strftime("%Y-%m-%d")
|
||||
case int():
|
||||
# logger.debug(f"Lookup control by ordinal end date {end_date}")
|
||||
end_date = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + end_date - 2).date().strftime(
|
||||
"%Y-%m-%d")
|
||||
case _:
|
||||
# logger.debug(f"Lookup control with parsed end date {end_date}")
|
||||
end_date = parse(end_date).strftime("%Y-%m-%d")
|
||||
# logger.debug(f"Looking up BasicSubmissions from start date: {start_date} and end date: {end_date}")
|
||||
query = query.filter(cls.submitted_date.between(start_date, end_date))
|
||||
match sub_type:
|
||||
case str():
|
||||
from backend import BasicSubmission, SubmissionType
|
||||
query = query.join(BasicSubmission).join(SubmissionType).filter(SubmissionType.name == sub_type)
|
||||
case _:
|
||||
pass
|
||||
match control_name:
|
||||
case str():
|
||||
# logger.debug(f"Lookup control by name {control_name}")
|
||||
query = query.filter(cls.name.startswith(control_name))
|
||||
limit = 1
|
||||
case _:
|
||||
pass
|
||||
return cls.execute_query(query=query, limit=limit)
|
||||
|
||||
@classmethod
|
||||
def make_chart(cls, parent, chart_settings: dict, ctx):
|
||||
from frontend.visualizations.pcr_charts import PCRFigure
|
||||
parent.mode_typer.clear()
|
||||
parent.mode_typer.setEnabled(False)
|
||||
report = Report()
|
||||
controls = cls.query(sub_type=chart_settings['sub_type'], start_date=chart_settings['start_date'], end_date=chart_settings['end_date'])
|
||||
data = [control.to_sub_dict() for control in controls]
|
||||
df = DataFrame.from_records(data)
|
||||
try:
|
||||
df = df[df.ct > 0.0]
|
||||
except AttributeError:
|
||||
df = df
|
||||
fig = PCRFigure(df=df, modes=None)
|
||||
return report, fig
|
||||
|
||||
|
||||
class IridaControl(Control):
|
||||
id = Column(INTEGER, ForeignKey('_control.id'), primary_key=True)
|
||||
contains = Column(JSON) #: unstructured hashes in contains.tsv for each organism
|
||||
matches = Column(JSON) #: unstructured hashes in matches.tsv for each organism
|
||||
kraken = Column(JSON) #: unstructured output from kraken_report
|
||||
submission_id = Column(INTEGER, ForeignKey("_basicsubmission.id")) #: parent submission id
|
||||
submission = relationship("BacterialCulture", back_populates="controls",
|
||||
foreign_keys=[submission_id]) #: parent submission
|
||||
sub_type = Column(String(16), nullable=False) #: EN-NOS, MCS-NOS, etc
|
||||
refseq_version = Column(String(16)) #: version of refseq used in fastq parsing
|
||||
kraken2_version = Column(String(16)) #: version of kraken2 used in fastq parsing
|
||||
kraken2_db_version = Column(String(32)) #: folder name of kraken2 db
|
||||
sample = relationship("BacterialCultureSample", back_populates="control") #: This control's submission sample
|
||||
sample_id = Column(INTEGER,
|
||||
ForeignKey("_basicsample.id", ondelete="SET NULL", name="cont_BCS_id")) #: sample id key
|
||||
# submission_id = Column(INTEGER, ForeignKey("_basicsubmission.id")) #: parent submission id
|
||||
# submission = relationship("BacterialCulture", back_populates="controls",
|
||||
# foreign_keys=[submission_id]) #: parent submission
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<Control({self.name})>"
|
||||
__mapper_args__ = dict(polymorphic_identity="Irida Control",
|
||||
polymorphic_load="inline",
|
||||
inherit_condition=(id == Control.id))
|
||||
|
||||
@validates("sub_type")
|
||||
def enforce_subtype_literals(self, key: str, value: str):
|
||||
acceptables = ['ATCC49226', 'ATCC49619', 'EN-NOS', "EN-SSTI", "MCS-NOS", "MCS-SSTI", "SN-NOS", "SN-SSTI"]
|
||||
if value.upper() not in acceptables:
|
||||
raise KeyError(f"Sub-type must be in {acceptables}")
|
||||
return value
|
||||
|
||||
def to_sub_dict(self) -> dict:
|
||||
"""
|
||||
Converts object into convenient dictionary for use in submission summary
|
||||
Converts object into convenient dictionary for use in submission summary
|
||||
|
||||
Returns:
|
||||
dict: output dictionary containing: Name, Type, Targets, Top Kraken results
|
||||
"""
|
||||
Returns:
|
||||
dict: output dictionary containing: Name, Type, Targets, Top Kraken results
|
||||
"""
|
||||
# logger.debug("loading json string into dict")
|
||||
try:
|
||||
kraken = self.kraken
|
||||
@@ -153,25 +363,27 @@ class Control(BaseClass):
|
||||
else:
|
||||
targets = ["None"]
|
||||
# logger.debug("constructing output dictionary")
|
||||
output = {
|
||||
"name": self.name,
|
||||
"type": self.controltype.name,
|
||||
"targets": ", ".join(targets),
|
||||
"kraken": new_kraken[0:10]
|
||||
}
|
||||
output = dict(
|
||||
name=self.name,
|
||||
type=self.controltype.name,
|
||||
targets=", ".join(targets),
|
||||
kraken=new_kraken[0:10]
|
||||
)
|
||||
return output
|
||||
|
||||
def convert_by_mode(self, mode: Literal['kraken', 'matches', 'contains']) -> List[dict]:
|
||||
def convert_by_mode(self, control_sub_type: str, mode: Literal['kraken', 'matches', 'contains'],
|
||||
consolidate: bool = False) -> Generator[dict, None, None]:
|
||||
"""
|
||||
split this instance into analysis types for controls graphs
|
||||
|
||||
Args:
|
||||
mode (str): analysis type, 'contains', etc
|
||||
consolidate (bool): whether to merge all off-target genera. Defaults to False
|
||||
control_sub_type (str): control subtype, 'MCS-NOS', etc.
|
||||
mode (str): analysis type, 'contains', etc.
|
||||
|
||||
Returns:
|
||||
List[dict]: list of records
|
||||
"""
|
||||
output = []
|
||||
# logger.debug("load json string for mode (i.e. contains, matches, kraken2)")
|
||||
try:
|
||||
data = self.__getattribute__(mode)
|
||||
@@ -179,6 +391,18 @@ class Control(BaseClass):
|
||||
data = {}
|
||||
if data is None:
|
||||
data = {}
|
||||
# NOTE: Data truncation and consolidation.
|
||||
if "kraken" in mode:
|
||||
data = {k: v for k, v in sorted(data.items(), key=lambda d: d[1][f"{mode}_count"], reverse=True)[:50]}
|
||||
else:
|
||||
if consolidate:
|
||||
on_tar = {k: v for k, v in data.items() if k.strip("*") in self.controltype.targets[control_sub_type]}
|
||||
# logger.debug(f"Consolidating off-targets to: {self.controltype.targets[control_sub_type]}")
|
||||
off_tar = sum(v[f'{mode}_ratio'] for k, v in data.items() if
|
||||
k.strip("*") not in self.controltype.targets[control_sub_type])
|
||||
on_tar['Off-target'] = {f"{mode}_ratio": off_tar}
|
||||
data = on_tar
|
||||
# logger.debug(pformat(data))
|
||||
# logger.debug(f"Length of data: {len(data)}")
|
||||
# logger.debug("dict keys are genera of bacteria, e.g. 'Streptococcus'")
|
||||
for genus in data:
|
||||
@@ -186,17 +410,13 @@ class Control(BaseClass):
|
||||
name=self.name,
|
||||
submitted_date=self.submitted_date,
|
||||
genus=genus,
|
||||
target='Target' if genus.strip("*") in self.controltype.targets else "Off-target"
|
||||
target='Target' if genus.strip("*") in self.controltype.targets[control_sub_type] else "Off-target"
|
||||
)
|
||||
# logger.debug("get Target or Off-target of genus")
|
||||
# logger.debug("set 'contains_hashes', etc for genus")
|
||||
for key in data[genus]:
|
||||
_dict[key] = data[genus][key]
|
||||
output.append(_dict)
|
||||
# logger.debug("Have to triage kraken data to keep program from getting overwhelmed")
|
||||
if "kraken" in mode:
|
||||
output = sorted(output, key=lambda d: d[f"{mode}_count"], reverse=True)[:50]
|
||||
return output
|
||||
yield _dict
|
||||
|
||||
@classmethod
|
||||
def get_modes(cls) -> List[str]:
|
||||
@@ -217,7 +437,7 @@ class Control(BaseClass):
|
||||
@classmethod
|
||||
@setup_lookup
|
||||
def query(cls,
|
||||
control_type: ControlType | str | None = None,
|
||||
sub_type: str | None = None,
|
||||
start_date: date | str | int | None = None,
|
||||
end_date: date | str | int | None = None,
|
||||
control_name: str | None = None,
|
||||
@@ -227,7 +447,7 @@ class Control(BaseClass):
|
||||
Lookup control objects in the database based on a number of parameters.
|
||||
|
||||
Args:
|
||||
control_type (models.ControlType | str | None, optional): Control archetype. Defaults to None.
|
||||
sub_type (models.ControlType | str | None, optional): Control archetype. Defaults to None.
|
||||
start_date (date | str | int | None, optional): Beginning date to search by. Defaults to 2023-01-01 if end_date not None.
|
||||
end_date (date | str | int | None, optional): End date to search by. Defaults to today if start_date not None.
|
||||
control_name (str | None, optional): Name of control. Defaults to None.
|
||||
@@ -238,13 +458,14 @@ class Control(BaseClass):
|
||||
"""
|
||||
query: Query = cls.__database_session__.query(cls)
|
||||
# NOTE: by control type
|
||||
match control_type:
|
||||
case ControlType():
|
||||
# logger.debug(f"Looking up control by control type: {control_type}")
|
||||
query = query.filter(cls.controltype == control_type)
|
||||
match sub_type:
|
||||
# case ControlType():
|
||||
# # logger.debug(f"Looking up control by control type: {sub_type}")
|
||||
# query = query.filter(cls.controltype == sub_type)
|
||||
case str():
|
||||
# logger.debug(f"Looking up control by control type: {control_type}")
|
||||
query = query.join(ControlType).filter(ControlType.name == control_type)
|
||||
# logger.debug(f"Looking up control by control type: {sub_type}")
|
||||
# query = query.join(ControlType).filter(ControlType.name == sub_type)
|
||||
query = query.filter(cls.sub_type == sub_type)
|
||||
case _:
|
||||
pass
|
||||
# NOTE: by date range
|
||||
@@ -287,3 +508,241 @@ class Control(BaseClass):
|
||||
case _:
|
||||
pass
|
||||
return cls.execute_query(query=query, limit=limit)
|
||||
|
||||
@classmethod
|
||||
def make_parent_buttons(cls, parent: QWidget) -> None:
|
||||
"""
|
||||
|
||||
Args:
|
||||
parent (QWidget): chart holding widget to add buttons to.
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
super().make_parent_buttons(parent=parent)
|
||||
rows = parent.layout.rowCount()
|
||||
logger.debug(f"Parent rows: {rows}")
|
||||
checker = QCheckBox(parent)
|
||||
checker.setChecked(True)
|
||||
checker.setObjectName("irida_check")
|
||||
checker.setToolTip("Pools off-target genera to save time.")
|
||||
parent.layout.addWidget(QLabel("Consolidate Off-targets"), rows, 0, 1, 1)
|
||||
parent.layout.addWidget(checker, rows, 1, 1, 2)
|
||||
checker.checkStateChanged.connect(parent.controls_getter_function)
|
||||
|
||||
@classmethod
|
||||
@report_result
|
||||
def make_chart(cls, chart_settings: dict, parent, ctx) -> Tuple[Report, "IridaFigure" | None]:
|
||||
from frontend.visualizations import IridaFigure
|
||||
try:
|
||||
checker = parent.findChild(QCheckBox, name="irida_check")
|
||||
if chart_settings['mode'] == "kraken":
|
||||
checker.setEnabled(False)
|
||||
checker.setChecked(False)
|
||||
else:
|
||||
checker.setEnabled(True)
|
||||
consolidate = checker.isChecked()
|
||||
except AttributeError:
|
||||
consolidate = False
|
||||
report = Report()
|
||||
# logger.debug(f"settings: {pformat(chart_settings)}")
|
||||
controls = cls.query(sub_type=chart_settings['sub_type'], start_date=chart_settings['start_date'],
|
||||
end_date=chart_settings['end_date'])
|
||||
# logger.debug(f"Controls found: {controls}")
|
||||
if not controls:
|
||||
report.add_result(Result(status="Critical", msg="No controls found in given date range."))
|
||||
return report, None
|
||||
# NOTE: change each control to list of dictionaries
|
||||
data = [control.convert_by_mode(control_sub_type=chart_settings['sub_type'], mode=chart_settings['mode'],
|
||||
consolidate=consolidate) for
|
||||
control in controls]
|
||||
# NOTE: flatten data to one dimensional list
|
||||
data = [item for sublist in data for item in sublist]
|
||||
# logger.debug(f"Control objects going into df conversion: {pformat(data)}")
|
||||
if not data:
|
||||
report.add_result(Result(status="Critical", msg="No data found for controls in given date range."))
|
||||
return report, None
|
||||
df = cls.convert_data_list_to_df(input_df=data, sub_mode=chart_settings['sub_mode'])
|
||||
# logger.debug(f"Chart df: \n {df}")
|
||||
if chart_settings['sub_mode'] is None:
|
||||
title = chart_settings['sub_mode']
|
||||
else:
|
||||
title = f"{chart_settings['mode']} - {chart_settings['sub_mode']}"
|
||||
# NOTE: send dataframe to chart maker
|
||||
df, modes = cls.prep_df(ctx=ctx, df=df)
|
||||
# logger.debug(f"prepped df: \n {df}")
|
||||
# assert modes
|
||||
# logger.debug(f"modes: {modes}")
|
||||
fig = IridaFigure(df=df, ytitle=title, modes=modes, parent=parent,
|
||||
months=chart_settings['months'])
|
||||
return report, fig
|
||||
|
||||
@classmethod
|
||||
def convert_data_list_to_df(cls, input_df: list[dict], sub_mode) -> DataFrame:
|
||||
"""
|
||||
Convert list of control records to dataframe
|
||||
|
||||
Args:
|
||||
ctx (dict): settings passed from gui
|
||||
input_df (list[dict]): list of dictionaries containing records
|
||||
sub_type (str | None, optional): sub_type of submission type. Defaults to None.
|
||||
|
||||
Returns:
|
||||
DataFrame: dataframe of controls
|
||||
"""
|
||||
# logger.debug(f"Subtype: {sub_mode}")
|
||||
df = DataFrame.from_records(input_df)
|
||||
# logger.debug(f"DF from records: {df}")
|
||||
safe = ['name', 'submitted_date', 'genus', 'target']
|
||||
for column in df.columns:
|
||||
if column not in safe:
|
||||
if sub_mode is not None and column != sub_mode:
|
||||
continue
|
||||
else:
|
||||
safe.append(column)
|
||||
if "percent" in column:
|
||||
# count_col = [item for item in df.columns if "count" in item][0]
|
||||
try:
|
||||
count_col = next(item for item in df.columns if "count" in item)
|
||||
except StopIteration:
|
||||
continue
|
||||
# NOTE: The actual percentage from kraken was off due to exclusion of NaN, recalculating.
|
||||
df[column] = 100 * df[count_col] / df.groupby('name')[count_col].transform('sum')
|
||||
df = df[[c for c in df.columns if c in safe]]
|
||||
# NOTE: move date of sample submitted on same date as previous ahead one.
|
||||
df = cls.displace_date(df=df)
|
||||
# NOTE: ad hoc method to make data labels more accurate.
|
||||
df = cls.df_column_renamer(df=df)
|
||||
return df
|
||||
|
||||
@classmethod
|
||||
def df_column_renamer(cls, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Ad hoc function I created to clarify some fields
|
||||
|
||||
Args:
|
||||
df (DataFrame): input dataframe
|
||||
|
||||
Returns:
|
||||
DataFrame: dataframe with 'clarified' column names
|
||||
"""
|
||||
df = df[df.columns.drop(list(df.filter(regex='_hashes')))]
|
||||
return df.rename(columns={
|
||||
"contains_ratio": "contains_shared_hashes_ratio",
|
||||
"matches_ratio": "matches_shared_hashes_ratio",
|
||||
"kraken_count": "kraken2_read_count_(top_50)",
|
||||
"kraken_percent": "kraken2_read_percent_(top_50)"
|
||||
})
|
||||
|
||||
@classmethod
|
||||
def displace_date(cls, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
This function serves to split samples that were submitted on the same date by incrementing dates.
|
||||
It will shift the date forward by one day if it is the same day as an existing date in a list.
|
||||
|
||||
Args:
|
||||
df (DataFrame): input dataframe composed of control records
|
||||
|
||||
Returns:
|
||||
DataFrame: output dataframe with dates incremented.
|
||||
"""
|
||||
# logger.debug(f"Unique items: {df['name'].unique()}")
|
||||
# NOTE: get submitted dates for each control
|
||||
dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in
|
||||
sorted(df['name'].unique())]
|
||||
previous_dates = set()
|
||||
for item in dict_list:
|
||||
df, previous_dates = cls.check_date(df=df, item=item, previous_dates=previous_dates)
|
||||
return df
|
||||
|
||||
@classmethod
|
||||
def check_date(cls, df: DataFrame, item: dict, previous_dates: set) -> Tuple[DataFrame, list]:
|
||||
"""
|
||||
Checks if an items date is already present in df and adjusts df accordingly
|
||||
|
||||
Args:
|
||||
df (DataFrame): input dataframe
|
||||
item (dict): control for checking
|
||||
previous_dates (list): list of dates found in previous controls
|
||||
|
||||
Returns:
|
||||
Tuple[DataFrame, list]: Output dataframe and appended list of previous dates
|
||||
"""
|
||||
try:
|
||||
check = item['date'] in previous_dates
|
||||
except IndexError:
|
||||
check = False
|
||||
previous_dates.add(item['date'])
|
||||
if check:
|
||||
# logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
|
||||
# NOTE: get df locations where name == item name
|
||||
mask = df['name'] == item['name']
|
||||
# NOTE: increment date in dataframe
|
||||
df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
|
||||
item['date'] += timedelta(days=1)
|
||||
passed = False
|
||||
else:
|
||||
passed = True
|
||||
# logger.debug(f"\n\tCurrent date: {item['date']}\n\tPrevious dates:{previous_dates}")
|
||||
# logger.debug(f"DF: {type(df)}, previous_dates: {type(previous_dates)}")
|
||||
# NOTE: if run didn't lead to changed date, return values
|
||||
if passed:
|
||||
# logger.debug(f"Date check passed, returning.")
|
||||
return df, previous_dates
|
||||
# NOTE: if date was changed, rerun with new date
|
||||
else:
|
||||
logger.warning(f"Date check failed, running recursion")
|
||||
df, previous_dates = cls.check_date(df, item, previous_dates)
|
||||
return df, previous_dates
|
||||
|
||||
@classmethod
|
||||
def prep_df(cls, ctx: Settings, df: DataFrame) -> Tuple[DataFrame | None, list]:
|
||||
"""
|
||||
Constructs figures based on parsed pandas dataframe.
|
||||
|
||||
Args:
|
||||
ctx (Settings): settings passed down from gui
|
||||
df (pd.DataFrame): input dataframe
|
||||
ytitle (str | None, optional): title for the y-axis. Defaults to None.
|
||||
|
||||
Returns:
|
||||
Figure: Plotly figure
|
||||
"""
|
||||
# NOTE: converts starred genera to normal and splits off list of starred
|
||||
if df.empty:
|
||||
return None, []
|
||||
df['genus'] = df['genus'].replace({'\*': ''}, regex=True).replace({"NaN": "Unknown"})
|
||||
df['genera'] = [item[-1] if item and item[-1] == "*" else "" for item in df['genus'].to_list()]
|
||||
# NOTE: remove original runs, using reruns if applicable
|
||||
df = cls.drop_reruns_from_df(ctx=ctx, df=df)
|
||||
# NOTE: sort by and exclude from
|
||||
sorts = ['submitted_date', "target", "genus"]
|
||||
exclude = ['name', 'genera']
|
||||
# logger.debug(df.columns)
|
||||
modes = [item for item in df.columns if item not in sorts and item not in exclude]
|
||||
# logger.debug(f"Modes coming out: {modes}")
|
||||
# NOTE: Set descending for any columns that have "{mode}" in the header.
|
||||
ascending = [False if item == "target" else True for item in sorts]
|
||||
df = df.sort_values(by=sorts, ascending=ascending)
|
||||
# logger.debug(df[df.isna().any(axis=1)])
|
||||
# NOTE: actual chart construction is done by
|
||||
return df, modes
|
||||
|
||||
@classmethod
|
||||
def drop_reruns_from_df(cls, ctx: Settings, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Removes semi-duplicates from dataframe after finding sequencing repeats.
|
||||
|
||||
Args:
|
||||
ctx (Settings): settings passed from gui
|
||||
df (DataFrame): initial dataframe
|
||||
|
||||
Returns:
|
||||
DataFrame: dataframe with originals removed in favour of repeats.
|
||||
"""
|
||||
if 'rerun_regex' in ctx:
|
||||
sample_names = get_unique_values_in_df_column(df, column_name="name")
|
||||
rerun_regex = re.compile(fr"{ctx.rerun_regex}")
|
||||
exclude = [re.sub(rerun_regex, "", sample) for sample in sample_names if rerun_regex.search(sample)]
|
||||
df = df[df.name not in exclude]
|
||||
return df
|
||||
|
||||
@@ -13,7 +13,7 @@ from tempfile import TemporaryDirectory, TemporaryFile
|
||||
from operator import itemgetter
|
||||
from pprint import pformat
|
||||
from . import BaseClass, Reagent, SubmissionType, KitType, Organization, Contact
|
||||
from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, JSON, FLOAT, case, desc
|
||||
from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, JSON, FLOAT, case
|
||||
from sqlalchemy.orm import relationship, validates, Query
|
||||
from sqlalchemy.orm.attributes import flag_modified
|
||||
from sqlalchemy.ext.associationproxy import association_proxy
|
||||
@@ -22,7 +22,6 @@ from sqlalchemy.exc import OperationalError as AlcOperationalError, IntegrityErr
|
||||
from sqlite3 import OperationalError as SQLOperationalError, IntegrityError as SQLIntegrityError
|
||||
import pandas as pd
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.worksheet.worksheet import Worksheet
|
||||
from openpyxl.drawing.image import Image as OpenpyxlImage
|
||||
from tools import row_map, setup_lookup, jinja_template_loading, rreplace, row_keys, check_key_or_attr, Result, Report, \
|
||||
report_result
|
||||
@@ -32,8 +31,6 @@ from dateutil.parser import parse
|
||||
from pathlib import Path
|
||||
from jinja2.exceptions import TemplateNotFound
|
||||
from jinja2 import Template
|
||||
from docxtpl import InlineImage
|
||||
from docx.shared import Inches
|
||||
from PIL import Image
|
||||
|
||||
logger = logging.getLogger(f"submissions.{__name__}")
|
||||
@@ -74,6 +71,8 @@ class BasicSubmission(BaseClass):
|
||||
contact_id = Column(INTEGER, ForeignKey("_contact.id", ondelete="SET NULL",
|
||||
name="fk_BS_contact_id")) #: client lab id from _organizations
|
||||
custom = Column(JSON)
|
||||
controls = relationship("Control", back_populates="submission",
|
||||
uselist=True) #: A control sample added to submission
|
||||
|
||||
submission_sample_associations = relationship(
|
||||
"SubmissionSampleAssociation",
|
||||
@@ -114,7 +113,6 @@ class BasicSubmission(BaseClass):
|
||||
# NOTE: Allows for subclassing into ex. BacterialCulture, Wastewater, etc.
|
||||
__mapper_args__ = {
|
||||
"polymorphic_identity": "Basic Submission",
|
||||
# "polymorphic_on": submission_type_name,
|
||||
"polymorphic_on": case(
|
||||
|
||||
(submission_type_name == "Wastewater", "Wastewater"),
|
||||
@@ -190,7 +188,7 @@ class BasicSubmission(BaseClass):
|
||||
# NOTE: Singles tells the query which fields to set limit to 1
|
||||
dicto['singles'] = parent_defs['singles']
|
||||
# logger.debug(dicto['singles'])
|
||||
# NOTE: Grab subtype specific info.
|
||||
# NOTE: Grab mode_sub_type specific info.
|
||||
output = {}
|
||||
for k, v in dicto.items():
|
||||
if len(args) > 0 and k not in args:
|
||||
@@ -960,7 +958,6 @@ class BasicSubmission(BaseClass):
|
||||
pcr_sample_map = cls.get_submission_type().sample_map['pcr_samples']
|
||||
# logger.debug(f'sample map: {pcr_sample_map}')
|
||||
main_sheet = xl[pcr_sample_map['main_sheet']]
|
||||
# samples = []
|
||||
fields = {k: v for k, v in pcr_sample_map.items() if k not in ['main_sheet', 'start_row']}
|
||||
for row in main_sheet.iter_rows(min_row=pcr_sample_map['start_row']):
|
||||
idx = row[0].row
|
||||
@@ -969,12 +966,11 @@ class BasicSubmission(BaseClass):
|
||||
sheet = xl[v['sheet']]
|
||||
sample[k] = sheet.cell(row=idx, column=v['column']).value
|
||||
yield sample
|
||||
# samples.append(sample)
|
||||
# return samples
|
||||
|
||||
@classmethod
|
||||
def parse_pcr_controls(cls, xl: Workbook) -> list:
|
||||
def parse_pcr_controls(cls, xl: Workbook, rsl_plate_num: str) -> list:
|
||||
location_map = cls.get_submission_type().sample_map['pcr_controls']
|
||||
submission = cls.query(rsl_plate_num=rsl_plate_num)
|
||||
name_column = 1
|
||||
for item in location_map:
|
||||
logger.debug(f"Looking for {item['name']}")
|
||||
@@ -983,7 +979,29 @@ class BasicSubmission(BaseClass):
|
||||
for cell in row:
|
||||
if cell.value == item['name']:
|
||||
logger.debug(f"Pulling from row {iii}, column {item['ct_column']}")
|
||||
yield dict(name=item['name'], ct=worksheet.cell(row=iii, column=item['ct_column']).value)
|
||||
subtype, target = item['name'].split("-")
|
||||
ct = worksheet.cell(row=iii, column=item['ct_column']).value
|
||||
if subtype == "PC":
|
||||
ctrl = next((assoc.reagent for assoc in submission.submission_reagent_associations
|
||||
if any(["positive control" in item.name.lower() for item in assoc.reagent.role])), None)
|
||||
elif subtype == "NC":
|
||||
ctrl = next((assoc.reagent for assoc in submission.submission_reagent_associations
|
||||
if any(["molecular grade water" in item.name.lower() for item in assoc.reagent.role])), None)
|
||||
try:
|
||||
ct = float(ct)
|
||||
except ValueError:
|
||||
ct = 0.0
|
||||
if ctrl:
|
||||
ctrl = ctrl.lot
|
||||
else:
|
||||
ctrl = None
|
||||
yield dict(
|
||||
name=f"{rsl_plate_num}<{item['name']}>",
|
||||
ct=ct,
|
||||
subtype=subtype,
|
||||
target=target,
|
||||
reagent_lot=ctrl
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def filename_template(cls) -> str:
|
||||
@@ -996,21 +1014,6 @@ class BasicSubmission(BaseClass):
|
||||
"""
|
||||
return "{{ rsl_plate_num }}"
|
||||
|
||||
# @classmethod
|
||||
# def custom_sample_autofill_row(cls, sample, worksheet: Worksheet) -> int:
|
||||
# """
|
||||
# Updates row information
|
||||
#
|
||||
# Args:
|
||||
# sample (_type_): _description_
|
||||
# worksheet (Workbook): _description_
|
||||
#
|
||||
# Returns:
|
||||
# int: New row number
|
||||
# """
|
||||
# logger.debug(f"Sample from args: {sample}")
|
||||
# return None
|
||||
|
||||
@classmethod
|
||||
def adjust_autofill_samples(cls, samples: List[Any]) -> List[Any]:
|
||||
"""
|
||||
@@ -1025,19 +1028,6 @@ class BasicSubmission(BaseClass):
|
||||
logger.info(f"Hello from {cls.__mapper_args__['polymorphic_identity']} sampler")
|
||||
return samples
|
||||
|
||||
# def adjust_to_dict_samples(self, backup: bool = False) -> List[dict]:
|
||||
# """
|
||||
# Updates sample dictionaries with custom values
|
||||
#
|
||||
# Args:
|
||||
# backup (bool, optional): Whether to perform backup. Defaults to False.
|
||||
#
|
||||
# Returns:
|
||||
# List[dict]: Updated dictionaries
|
||||
# """
|
||||
# # logger.debug(f"Hello from {self.__class__.__name__} dictionary sample adjuster.")
|
||||
# return [item.to_sub_dict() for item in self.submission_sample_associations]
|
||||
|
||||
@classmethod
|
||||
def get_details_template(cls, base_dict: dict) -> Template:
|
||||
"""
|
||||
@@ -1380,8 +1370,7 @@ class BacterialCulture(BasicSubmission):
|
||||
derivative submission type from BasicSubmission
|
||||
"""
|
||||
id = Column(INTEGER, ForeignKey('_basicsubmission.id'), primary_key=True)
|
||||
controls = relationship("Control", back_populates="submission",
|
||||
uselist=True) #: A control sample added to submission
|
||||
|
||||
__mapper_args__ = dict(polymorphic_identity="Bacterial Culture",
|
||||
polymorphic_load="inline",
|
||||
inherit_condition=(id == BasicSubmission.id))
|
||||
@@ -1442,25 +1431,6 @@ class BacterialCulture(BasicSubmission):
|
||||
pos_control_reg.missing = False
|
||||
return pyd
|
||||
|
||||
# @classmethod
|
||||
# def custom_sample_autofill_row(cls, sample, worksheet: Worksheet) -> int:
|
||||
# """
|
||||
# Extends parent
|
||||
# """
|
||||
# # logger.debug(f"Checking {sample.well}")
|
||||
# # logger.debug(f"here's the worksheet: {worksheet}")
|
||||
# row = super().custom_sample_autofill_row(sample, worksheet)
|
||||
# df = pd.DataFrame(list(worksheet.values))
|
||||
# # logger.debug(f"Here's the dataframe: {df}")
|
||||
# idx = df[df[0] == sample.well]
|
||||
# if idx.empty:
|
||||
# new = f"{sample.well[0]}{sample.well[1:].zfill(2)}"
|
||||
# # logger.debug(f"Checking: {new}")
|
||||
# idx = df[df[0] == new]
|
||||
# # logger.debug(f"Here is the row: {idx}")
|
||||
# row = idx.index.to_list()[0]
|
||||
# return row + 1
|
||||
|
||||
@classmethod
|
||||
def custom_info_parser(cls, input_dict: dict, xl: Workbook | None = None, custom_fields: dict = {}) -> dict:
|
||||
input_dict = super().custom_info_parser(input_dict=input_dict, xl=xl, custom_fields=custom_fields)
|
||||
@@ -1548,7 +1518,7 @@ class Wastewater(BasicSubmission):
|
||||
for sample in samples:
|
||||
# NOTE: remove '-{target}' from controls
|
||||
sample['sample'] = re.sub('-N\\d$', '', sample['sample'])
|
||||
# # NOTE: if sample is already in output skip
|
||||
# NOTE: if sample is already in output skip
|
||||
if sample['sample'] in [item['sample'] for item in output]:
|
||||
logger.warning(f"Already have {sample['sample']}")
|
||||
continue
|
||||
@@ -1577,8 +1547,6 @@ class Wastewater(BasicSubmission):
|
||||
# @classmethod
|
||||
# def parse_pcr_controls(cls, xl: Workbook, location_map: list) -> list:
|
||||
|
||||
|
||||
|
||||
@classmethod
|
||||
def enforce_name(cls, instr: str, data: dict | None = {}) -> str:
|
||||
"""
|
||||
@@ -1681,15 +1649,17 @@ class Wastewater(BasicSubmission):
|
||||
obj (_type_): Parent widget
|
||||
"""
|
||||
from backend.excel import PCRParser
|
||||
from backend.db import PCRControl, ControlType
|
||||
from frontend.widgets import select_open_file
|
||||
report = Report()
|
||||
fname = select_open_file(obj=obj, file_extension="xlsx")
|
||||
if not fname:
|
||||
report.add_result(Result(msg="No file selected, cancelling.", status="Warning"))
|
||||
return report
|
||||
parser = PCRParser(filepath=fname)
|
||||
parser = PCRParser(filepath=fname, submission=self)
|
||||
self.set_attribute("pcr_info", parser.pcr)
|
||||
pcr_samples = [sample for sample in parser.samples]
|
||||
pcr_controls = [control for control in parser.controls]
|
||||
self.save(original=False)
|
||||
# logger.debug(f"Got {len(parser.samples)} samples to update!")
|
||||
# logger.debug(f"Parser samples: {parser.samples}")
|
||||
@@ -1700,6 +1670,16 @@ class Wastewater(BasicSubmission):
|
||||
except StopIteration:
|
||||
continue
|
||||
self.update_subsampassoc(sample=sample, input_dict=sample_dict)
|
||||
controltype = ControlType.query(name="PCR Control")
|
||||
logger.debug(parser.pcr)
|
||||
submitted_date = datetime.strptime(" ".join(parser.pcr['run_start_date/time'].split(" ")[:-1]),
|
||||
"%Y-%m-%d %I:%M:%S %p")
|
||||
for control in pcr_controls:
|
||||
new_control = PCRControl(**control)
|
||||
new_control.submitted_date = submitted_date
|
||||
new_control.controltype = controltype
|
||||
new_control.submission = self
|
||||
new_control.save()
|
||||
|
||||
|
||||
class WastewaterArtic(BasicSubmission):
|
||||
@@ -2207,7 +2187,7 @@ class BasicSample(BaseClass):
|
||||
|
||||
id = Column(INTEGER, primary_key=True) #: primary key
|
||||
submitter_id = Column(String(64), nullable=False, unique=True) #: identification from submitter
|
||||
sample_type = Column(String(32)) #: subtype of sample
|
||||
sample_type = Column(String(32)) #: mode_sub_type of sample
|
||||
|
||||
sample_submission_associations = relationship(
|
||||
"SubmissionSampleAssociation",
|
||||
@@ -2632,7 +2612,7 @@ class BacterialCultureSample(BasicSample):
|
||||
id = Column(INTEGER, ForeignKey('_basicsample.id'), primary_key=True)
|
||||
organism = Column(String(64)) #: bacterial specimen
|
||||
concentration = Column(String(16)) #: sample concentration
|
||||
control = relationship("Control", back_populates="sample", uselist=False)
|
||||
control = relationship("IridaControl", back_populates="sample", uselist=False)
|
||||
__mapper_args__ = dict(polymorphic_identity="Bacterial Culture Sample",
|
||||
polymorphic_load="inline",
|
||||
inherit_condition=(id == BasicSample.id))
|
||||
@@ -2677,7 +2657,7 @@ class SubmissionSampleAssociation(BaseClass):
|
||||
# reference to the Sample object
|
||||
sample = relationship(BasicSample, back_populates="sample_submission_associations") #: associated sample
|
||||
|
||||
base_sub_type = Column(String) #: string of subtype name
|
||||
base_sub_type = Column(String) #: string of mode_sub_type name
|
||||
|
||||
# Refers to the type of parent.
|
||||
# Hooooooo boy, polymorphic association type, now we're getting into the weeds!
|
||||
|
||||
@@ -675,7 +675,7 @@ class PCRParser(object):
|
||||
rsl_plate_num = self.submission_obj.rsl_plate_num
|
||||
self.pcr = self.parse_general()
|
||||
self.samples = self.submission_obj.parse_pcr(xl=self.xl, rsl_plate_num=rsl_plate_num)
|
||||
self.controls = self.submission_obj.parse_pcr_controls(xl=self.xl)
|
||||
self.controls = self.submission_obj.parse_pcr_controls(xl=self.xl, rsl_plate_num=rsl_plate_num)
|
||||
|
||||
def parse_general(self):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user