Updated controls to both Irida and PCR.

This commit is contained in:
lwark
2024-10-16 15:07:43 -05:00
parent 066d1af0f2
commit c3a4aac68b
11 changed files with 750 additions and 314 deletions

View File

@@ -2,14 +2,19 @@
All control related models.
"""
from __future__ import annotations
from sqlalchemy import Column, String, TIMESTAMP, JSON, INTEGER, ForeignKey
from sqlalchemy.orm import relationship, Query
from pprint import pformat
from PyQt6.QtWidgets import QWidget, QCheckBox, QLabel
from pandas import DataFrame
from sqlalchemy import Column, String, TIMESTAMP, JSON, INTEGER, ForeignKey, case, FLOAT
from sqlalchemy.orm import relationship, Query, validates
import logging, re
from operator import itemgetter
from . import BaseClass
from tools import setup_lookup
from datetime import date, datetime
from typing import List, Literal
from tools import setup_lookup, report_result, Result, Report, Settings, get_unique_values_in_df_column
from datetime import date, datetime, timedelta
from typing import List, Literal, Tuple, Generator
from dateutil.parser import parse
from re import Pattern
@@ -21,7 +26,7 @@ class ControlType(BaseClass):
Base class of a control archetype.
"""
id = Column(INTEGER, primary_key=True) #: primary key
name = Column(String(255), unique=True) #: controltype name (e.g. MCS)
name = Column(String(255), unique=True) #: controltype name (e.g. Irida Control)
targets = Column(JSON) #: organisms checked for
instances = relationship("Control", back_populates="controltype") #: control samples created of this type.
@@ -53,7 +58,7 @@ class ControlType(BaseClass):
pass
return cls.execute_query(query=query, limit=limit)
def get_subtypes(self, mode: Literal['kraken', 'matches', 'contains']) -> List[str]:
def get_modes(self, mode: Literal['kraken', 'matches', 'contains']) -> List[str]:
"""
Get subtypes associated with this controltype (currently used only for Kraken)
@@ -65,8 +70,10 @@ class ControlType(BaseClass):
"""
# NOTE: Get first instance since all should have same subtypes
# NOTE: Get mode of instance
if not self.instances:
return
jsoner = getattr(self.instances[0], mode)
# logger.debug(f"JSON out: {jsoner.keys()}")
# logger.debug(f"JSON retrieved: {jsoner.keys()}")
try:
# NOTE: Pick genera (all should have same subtypes)
genera = list(jsoner.keys())[0]
@@ -74,10 +81,14 @@ class ControlType(BaseClass):
return []
# NOTE: remove items that don't have relevant data
subtypes = [item for item in jsoner[genera] if "_hashes" not in item and "_ratio" not in item]
logger.debug(f"subtypes out: {pformat(subtypes)}")
return subtypes
def get_instance_class(self):
return Control.find_polymorphic_subclass(polymorphic_identity=self.name)
@classmethod
def get_positive_control_types(cls) -> List[ControlType]:
def get_positive_control_types(cls) -> Generator[ControlType, None, None]:
"""
Gets list of Control types if they have targets
@@ -104,35 +115,234 @@ class Control(BaseClass):
"""
id = Column(INTEGER, primary_key=True) #: primary key
parent_id = Column(String,
ForeignKey("_controltype.id", name="fk_control_parent_id")) #: primary key of control type
controltype_name = Column(String, ForeignKey("_controltype.name", ondelete="SET NULL",
name="fk_BC_subtype_name")) #: name of joined submission type
controltype = relationship("ControlType", back_populates="instances",
foreign_keys=[parent_id]) #: reference to parent control type
foreign_keys=[controltype_name]) #: reference to parent control type
name = Column(String(255), unique=True) #: Sample ID
submitted_date = Column(TIMESTAMP) #: Date submitted to Robotics
submission_id = Column(INTEGER, ForeignKey("_basicsubmission.id")) #: parent submission id
submission = relationship("BasicSubmission", back_populates="controls",
foreign_keys=[submission_id]) #: parent submission
__mapper_args__ = {
"polymorphic_identity": "Basic Control",
"polymorphic_on": case(
(controltype_name == "PCR Control", "PCR Control"),
(controltype_name == "Irida Control", "Irida Control"),
else_="Basic Control"
),
"with_polymorphic": "*",
}
def __repr__(self) -> str:
return f"<{self.controltype_name}({self.name})>"
@classmethod
def find_polymorphic_subclass(cls, polymorphic_identity: str | ControlType | None = None,
attrs: dict | None = None):
"""
Find subclass based on polymorphic identity or relevant attributes.
Args:
polymorphic_identity (str | None, optional): String representing polymorphic identity. Defaults to None.
attrs (str | SubmissionType | None, optional): Attributes of the relevant class. Defaults to None.
Returns:
_type_: Subclass of interest.
"""
if isinstance(polymorphic_identity, dict):
# logger.debug(f"Controlling for dict value")
polymorphic_identity = polymorphic_identity['value']
if isinstance(polymorphic_identity, ControlType):
polymorphic_identity = polymorphic_identity.name
model = cls
match polymorphic_identity:
case str():
try:
model = cls.__mapper__.polymorphic_map[polymorphic_identity].class_
except Exception as e:
logger.error(
f"Could not get polymorph {polymorphic_identity} of {cls} due to {e}, falling back to BasicSubmission")
case _:
pass
if attrs and any([not hasattr(cls, attr) for attr in attrs.keys()]):
# NOTE: looks for first model that has all included kwargs
try:
model = next(subclass for subclass in cls.__subclasses__() if
all([hasattr(subclass, attr) for attr in attrs.keys()]))
except StopIteration as e:
raise AttributeError(
f"Couldn't find existing class/subclass of {cls} with all attributes:\n{pformat(attrs.keys())}")
logger.info(f"Recruiting model: {model}")
return model
@classmethod
def make_parent_buttons(cls, parent: QWidget) -> None:
"""
Args:
parent (QWidget): chart holding widget to add buttons to.
Returns:
"""
pass
@classmethod
def make_chart(cls, parent, chart_settings: dict, ctx):
"""
Args:
chart_settings (dict): settings passed down from chart widget
ctx (Settings): settings passed down from gui
Returns:
"""
return None
class PCRControl(Control):
id = Column(INTEGER, ForeignKey('_control.id'), primary_key=True)
subtype = Column(String(16)) #: PC or NC
target = Column(String(16)) #: N1, N2, etc.
ct = Column(FLOAT)
reagent_lot = Column(String(64), ForeignKey("_reagent.name", ondelete="SET NULL",
name="fk_reagent_lot"))
reagent = relationship("Reagent", foreign_keys=reagent_lot)
__mapper_args__ = dict(polymorphic_identity="PCR Control",
polymorphic_load="inline",
inherit_condition=(id == Control.id))
def to_sub_dict(self):
return dict(name=self.name, ct=self.ct, subtype=self.subtype, target=self.target, reagent_lot=self.reagent_lot,
submitted_date=self.submitted_date.date())
@classmethod
@setup_lookup
def query(cls,
sub_type: str | None = None,
start_date: date | str | int | None = None,
end_date: date | str | int | None = None,
control_name: str | None = None,
limit: int = 0
) -> Control | List[Control]:
"""
Lookup control objects in the database based on a number of parameters.
Args:
sub_type (models.ControlType | str | None, optional): Control archetype. Defaults to None.
start_date (date | str | int | None, optional): Beginning date to search by. Defaults to 2023-01-01 if end_date not None.
end_date (date | str | int | None, optional): End date to search by. Defaults to today if start_date not None.
control_name (str | None, optional): Name of control. Defaults to None.
limit (int, optional): Maximum number of results to return (0 = all). Defaults to 0.
Returns:
models.Control|List[models.Control]: Control object of interest.
"""
query: Query = cls.__database_session__.query(cls)
# NOTE: by date range
if start_date is not None and end_date is None:
logger.warning(f"Start date with no end date, using today.")
end_date = date.today()
if end_date is not None and start_date is None:
logger.warning(f"End date with no start date, using Jan 1, 2023")
start_date = date(2023, 1, 1)
if start_date is not None:
match start_date:
case date():
# logger.debug(f"Lookup control by start date({start_date})")
start_date = start_date.strftime("%Y-%m-%d")
case int():
# logger.debug(f"Lookup control by ordinal start date {start_date}")
start_date = datetime.fromordinal(
datetime(1900, 1, 1).toordinal() + start_date - 2).date().strftime("%Y-%m-%d")
case _:
# logger.debug(f"Lookup control with parsed start date {start_date}")
start_date = parse(start_date).strftime("%Y-%m-%d")
match end_date:
case date():
# logger.debug(f"Lookup control by end date({end_date})")
end_date = end_date.strftime("%Y-%m-%d")
case int():
# logger.debug(f"Lookup control by ordinal end date {end_date}")
end_date = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + end_date - 2).date().strftime(
"%Y-%m-%d")
case _:
# logger.debug(f"Lookup control with parsed end date {end_date}")
end_date = parse(end_date).strftime("%Y-%m-%d")
# logger.debug(f"Looking up BasicSubmissions from start date: {start_date} and end date: {end_date}")
query = query.filter(cls.submitted_date.between(start_date, end_date))
match sub_type:
case str():
from backend import BasicSubmission, SubmissionType
query = query.join(BasicSubmission).join(SubmissionType).filter(SubmissionType.name == sub_type)
case _:
pass
match control_name:
case str():
# logger.debug(f"Lookup control by name {control_name}")
query = query.filter(cls.name.startswith(control_name))
limit = 1
case _:
pass
return cls.execute_query(query=query, limit=limit)
@classmethod
def make_chart(cls, parent, chart_settings: dict, ctx):
from frontend.visualizations.pcr_charts import PCRFigure
parent.mode_typer.clear()
parent.mode_typer.setEnabled(False)
report = Report()
controls = cls.query(sub_type=chart_settings['sub_type'], start_date=chart_settings['start_date'], end_date=chart_settings['end_date'])
data = [control.to_sub_dict() for control in controls]
df = DataFrame.from_records(data)
try:
df = df[df.ct > 0.0]
except AttributeError:
df = df
fig = PCRFigure(df=df, modes=None)
return report, fig
class IridaControl(Control):
id = Column(INTEGER, ForeignKey('_control.id'), primary_key=True)
contains = Column(JSON) #: unstructured hashes in contains.tsv for each organism
matches = Column(JSON) #: unstructured hashes in matches.tsv for each organism
kraken = Column(JSON) #: unstructured output from kraken_report
submission_id = Column(INTEGER, ForeignKey("_basicsubmission.id")) #: parent submission id
submission = relationship("BacterialCulture", back_populates="controls",
foreign_keys=[submission_id]) #: parent submission
sub_type = Column(String(16), nullable=False) #: EN-NOS, MCS-NOS, etc
refseq_version = Column(String(16)) #: version of refseq used in fastq parsing
kraken2_version = Column(String(16)) #: version of kraken2 used in fastq parsing
kraken2_db_version = Column(String(32)) #: folder name of kraken2 db
sample = relationship("BacterialCultureSample", back_populates="control") #: This control's submission sample
sample_id = Column(INTEGER,
ForeignKey("_basicsample.id", ondelete="SET NULL", name="cont_BCS_id")) #: sample id key
# submission_id = Column(INTEGER, ForeignKey("_basicsubmission.id")) #: parent submission id
# submission = relationship("BacterialCulture", back_populates="controls",
# foreign_keys=[submission_id]) #: parent submission
def __repr__(self) -> str:
return f"<Control({self.name})>"
__mapper_args__ = dict(polymorphic_identity="Irida Control",
polymorphic_load="inline",
inherit_condition=(id == Control.id))
@validates("sub_type")
def enforce_subtype_literals(self, key: str, value: str):
acceptables = ['ATCC49226', 'ATCC49619', 'EN-NOS', "EN-SSTI", "MCS-NOS", "MCS-SSTI", "SN-NOS", "SN-SSTI"]
if value.upper() not in acceptables:
raise KeyError(f"Sub-type must be in {acceptables}")
return value
def to_sub_dict(self) -> dict:
"""
Converts object into convenient dictionary for use in submission summary
Converts object into convenient dictionary for use in submission summary
Returns:
dict: output dictionary containing: Name, Type, Targets, Top Kraken results
"""
Returns:
dict: output dictionary containing: Name, Type, Targets, Top Kraken results
"""
# logger.debug("loading json string into dict")
try:
kraken = self.kraken
@@ -153,25 +363,27 @@ class Control(BaseClass):
else:
targets = ["None"]
# logger.debug("constructing output dictionary")
output = {
"name": self.name,
"type": self.controltype.name,
"targets": ", ".join(targets),
"kraken": new_kraken[0:10]
}
output = dict(
name=self.name,
type=self.controltype.name,
targets=", ".join(targets),
kraken=new_kraken[0:10]
)
return output
def convert_by_mode(self, mode: Literal['kraken', 'matches', 'contains']) -> List[dict]:
def convert_by_mode(self, control_sub_type: str, mode: Literal['kraken', 'matches', 'contains'],
consolidate: bool = False) -> Generator[dict, None, None]:
"""
split this instance into analysis types for controls graphs
Args:
mode (str): analysis type, 'contains', etc
consolidate (bool): whether to merge all off-target genera. Defaults to False
control_sub_type (str): control subtype, 'MCS-NOS', etc.
mode (str): analysis type, 'contains', etc.
Returns:
List[dict]: list of records
"""
output = []
# logger.debug("load json string for mode (i.e. contains, matches, kraken2)")
try:
data = self.__getattribute__(mode)
@@ -179,6 +391,18 @@ class Control(BaseClass):
data = {}
if data is None:
data = {}
# NOTE: Data truncation and consolidation.
if "kraken" in mode:
data = {k: v for k, v in sorted(data.items(), key=lambda d: d[1][f"{mode}_count"], reverse=True)[:50]}
else:
if consolidate:
on_tar = {k: v for k, v in data.items() if k.strip("*") in self.controltype.targets[control_sub_type]}
# logger.debug(f"Consolidating off-targets to: {self.controltype.targets[control_sub_type]}")
off_tar = sum(v[f'{mode}_ratio'] for k, v in data.items() if
k.strip("*") not in self.controltype.targets[control_sub_type])
on_tar['Off-target'] = {f"{mode}_ratio": off_tar}
data = on_tar
# logger.debug(pformat(data))
# logger.debug(f"Length of data: {len(data)}")
# logger.debug("dict keys are genera of bacteria, e.g. 'Streptococcus'")
for genus in data:
@@ -186,17 +410,13 @@ class Control(BaseClass):
name=self.name,
submitted_date=self.submitted_date,
genus=genus,
target='Target' if genus.strip("*") in self.controltype.targets else "Off-target"
target='Target' if genus.strip("*") in self.controltype.targets[control_sub_type] else "Off-target"
)
# logger.debug("get Target or Off-target of genus")
# logger.debug("set 'contains_hashes', etc for genus")
for key in data[genus]:
_dict[key] = data[genus][key]
output.append(_dict)
# logger.debug("Have to triage kraken data to keep program from getting overwhelmed")
if "kraken" in mode:
output = sorted(output, key=lambda d: d[f"{mode}_count"], reverse=True)[:50]
return output
yield _dict
@classmethod
def get_modes(cls) -> List[str]:
@@ -217,7 +437,7 @@ class Control(BaseClass):
@classmethod
@setup_lookup
def query(cls,
control_type: ControlType | str | None = None,
sub_type: str | None = None,
start_date: date | str | int | None = None,
end_date: date | str | int | None = None,
control_name: str | None = None,
@@ -227,7 +447,7 @@ class Control(BaseClass):
Lookup control objects in the database based on a number of parameters.
Args:
control_type (models.ControlType | str | None, optional): Control archetype. Defaults to None.
sub_type (models.ControlType | str | None, optional): Control archetype. Defaults to None.
start_date (date | str | int | None, optional): Beginning date to search by. Defaults to 2023-01-01 if end_date not None.
end_date (date | str | int | None, optional): End date to search by. Defaults to today if start_date not None.
control_name (str | None, optional): Name of control. Defaults to None.
@@ -238,13 +458,14 @@ class Control(BaseClass):
"""
query: Query = cls.__database_session__.query(cls)
# NOTE: by control type
match control_type:
case ControlType():
# logger.debug(f"Looking up control by control type: {control_type}")
query = query.filter(cls.controltype == control_type)
match sub_type:
# case ControlType():
# # logger.debug(f"Looking up control by control type: {sub_type}")
# query = query.filter(cls.controltype == sub_type)
case str():
# logger.debug(f"Looking up control by control type: {control_type}")
query = query.join(ControlType).filter(ControlType.name == control_type)
# logger.debug(f"Looking up control by control type: {sub_type}")
# query = query.join(ControlType).filter(ControlType.name == sub_type)
query = query.filter(cls.sub_type == sub_type)
case _:
pass
# NOTE: by date range
@@ -287,3 +508,241 @@ class Control(BaseClass):
case _:
pass
return cls.execute_query(query=query, limit=limit)
@classmethod
def make_parent_buttons(cls, parent: QWidget) -> None:
"""
Args:
parent (QWidget): chart holding widget to add buttons to.
Returns:
"""
super().make_parent_buttons(parent=parent)
rows = parent.layout.rowCount()
logger.debug(f"Parent rows: {rows}")
checker = QCheckBox(parent)
checker.setChecked(True)
checker.setObjectName("irida_check")
checker.setToolTip("Pools off-target genera to save time.")
parent.layout.addWidget(QLabel("Consolidate Off-targets"), rows, 0, 1, 1)
parent.layout.addWidget(checker, rows, 1, 1, 2)
checker.checkStateChanged.connect(parent.controls_getter_function)
@classmethod
@report_result
def make_chart(cls, chart_settings: dict, parent, ctx) -> Tuple[Report, "IridaFigure" | None]:
from frontend.visualizations import IridaFigure
try:
checker = parent.findChild(QCheckBox, name="irida_check")
if chart_settings['mode'] == "kraken":
checker.setEnabled(False)
checker.setChecked(False)
else:
checker.setEnabled(True)
consolidate = checker.isChecked()
except AttributeError:
consolidate = False
report = Report()
# logger.debug(f"settings: {pformat(chart_settings)}")
controls = cls.query(sub_type=chart_settings['sub_type'], start_date=chart_settings['start_date'],
end_date=chart_settings['end_date'])
# logger.debug(f"Controls found: {controls}")
if not controls:
report.add_result(Result(status="Critical", msg="No controls found in given date range."))
return report, None
# NOTE: change each control to list of dictionaries
data = [control.convert_by_mode(control_sub_type=chart_settings['sub_type'], mode=chart_settings['mode'],
consolidate=consolidate) for
control in controls]
# NOTE: flatten data to one dimensional list
data = [item for sublist in data for item in sublist]
# logger.debug(f"Control objects going into df conversion: {pformat(data)}")
if not data:
report.add_result(Result(status="Critical", msg="No data found for controls in given date range."))
return report, None
df = cls.convert_data_list_to_df(input_df=data, sub_mode=chart_settings['sub_mode'])
# logger.debug(f"Chart df: \n {df}")
if chart_settings['sub_mode'] is None:
title = chart_settings['sub_mode']
else:
title = f"{chart_settings['mode']} - {chart_settings['sub_mode']}"
# NOTE: send dataframe to chart maker
df, modes = cls.prep_df(ctx=ctx, df=df)
# logger.debug(f"prepped df: \n {df}")
# assert modes
# logger.debug(f"modes: {modes}")
fig = IridaFigure(df=df, ytitle=title, modes=modes, parent=parent,
months=chart_settings['months'])
return report, fig
@classmethod
def convert_data_list_to_df(cls, input_df: list[dict], sub_mode) -> DataFrame:
"""
Convert list of control records to dataframe
Args:
ctx (dict): settings passed from gui
input_df (list[dict]): list of dictionaries containing records
sub_type (str | None, optional): sub_type of submission type. Defaults to None.
Returns:
DataFrame: dataframe of controls
"""
# logger.debug(f"Subtype: {sub_mode}")
df = DataFrame.from_records(input_df)
# logger.debug(f"DF from records: {df}")
safe = ['name', 'submitted_date', 'genus', 'target']
for column in df.columns:
if column not in safe:
if sub_mode is not None and column != sub_mode:
continue
else:
safe.append(column)
if "percent" in column:
# count_col = [item for item in df.columns if "count" in item][0]
try:
count_col = next(item for item in df.columns if "count" in item)
except StopIteration:
continue
# NOTE: The actual percentage from kraken was off due to exclusion of NaN, recalculating.
df[column] = 100 * df[count_col] / df.groupby('name')[count_col].transform('sum')
df = df[[c for c in df.columns if c in safe]]
# NOTE: move date of sample submitted on same date as previous ahead one.
df = cls.displace_date(df=df)
# NOTE: ad hoc method to make data labels more accurate.
df = cls.df_column_renamer(df=df)
return df
@classmethod
def df_column_renamer(cls, df: DataFrame) -> DataFrame:
"""
Ad hoc function I created to clarify some fields
Args:
df (DataFrame): input dataframe
Returns:
DataFrame: dataframe with 'clarified' column names
"""
df = df[df.columns.drop(list(df.filter(regex='_hashes')))]
return df.rename(columns={
"contains_ratio": "contains_shared_hashes_ratio",
"matches_ratio": "matches_shared_hashes_ratio",
"kraken_count": "kraken2_read_count_(top_50)",
"kraken_percent": "kraken2_read_percent_(top_50)"
})
@classmethod
def displace_date(cls, df: DataFrame) -> DataFrame:
"""
This function serves to split samples that were submitted on the same date by incrementing dates.
It will shift the date forward by one day if it is the same day as an existing date in a list.
Args:
df (DataFrame): input dataframe composed of control records
Returns:
DataFrame: output dataframe with dates incremented.
"""
# logger.debug(f"Unique items: {df['name'].unique()}")
# NOTE: get submitted dates for each control
dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in
sorted(df['name'].unique())]
previous_dates = set()
for item in dict_list:
df, previous_dates = cls.check_date(df=df, item=item, previous_dates=previous_dates)
return df
@classmethod
def check_date(cls, df: DataFrame, item: dict, previous_dates: set) -> Tuple[DataFrame, list]:
"""
Checks if an items date is already present in df and adjusts df accordingly
Args:
df (DataFrame): input dataframe
item (dict): control for checking
previous_dates (list): list of dates found in previous controls
Returns:
Tuple[DataFrame, list]: Output dataframe and appended list of previous dates
"""
try:
check = item['date'] in previous_dates
except IndexError:
check = False
previous_dates.add(item['date'])
if check:
# logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
# NOTE: get df locations where name == item name
mask = df['name'] == item['name']
# NOTE: increment date in dataframe
df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
item['date'] += timedelta(days=1)
passed = False
else:
passed = True
# logger.debug(f"\n\tCurrent date: {item['date']}\n\tPrevious dates:{previous_dates}")
# logger.debug(f"DF: {type(df)}, previous_dates: {type(previous_dates)}")
# NOTE: if run didn't lead to changed date, return values
if passed:
# logger.debug(f"Date check passed, returning.")
return df, previous_dates
# NOTE: if date was changed, rerun with new date
else:
logger.warning(f"Date check failed, running recursion")
df, previous_dates = cls.check_date(df, item, previous_dates)
return df, previous_dates
@classmethod
def prep_df(cls, ctx: Settings, df: DataFrame) -> Tuple[DataFrame | None, list]:
"""
Constructs figures based on parsed pandas dataframe.
Args:
ctx (Settings): settings passed down from gui
df (pd.DataFrame): input dataframe
ytitle (str | None, optional): title for the y-axis. Defaults to None.
Returns:
Figure: Plotly figure
"""
# NOTE: converts starred genera to normal and splits off list of starred
if df.empty:
return None, []
df['genus'] = df['genus'].replace({'\*': ''}, regex=True).replace({"NaN": "Unknown"})
df['genera'] = [item[-1] if item and item[-1] == "*" else "" for item in df['genus'].to_list()]
# NOTE: remove original runs, using reruns if applicable
df = cls.drop_reruns_from_df(ctx=ctx, df=df)
# NOTE: sort by and exclude from
sorts = ['submitted_date', "target", "genus"]
exclude = ['name', 'genera']
# logger.debug(df.columns)
modes = [item for item in df.columns if item not in sorts and item not in exclude]
# logger.debug(f"Modes coming out: {modes}")
# NOTE: Set descending for any columns that have "{mode}" in the header.
ascending = [False if item == "target" else True for item in sorts]
df = df.sort_values(by=sorts, ascending=ascending)
# logger.debug(df[df.isna().any(axis=1)])
# NOTE: actual chart construction is done by
return df, modes
@classmethod
def drop_reruns_from_df(cls, ctx: Settings, df: DataFrame) -> DataFrame:
"""
Removes semi-duplicates from dataframe after finding sequencing repeats.
Args:
ctx (Settings): settings passed from gui
df (DataFrame): initial dataframe
Returns:
DataFrame: dataframe with originals removed in favour of repeats.
"""
if 'rerun_regex' in ctx:
sample_names = get_unique_values_in_df_column(df, column_name="name")
rerun_regex = re.compile(fr"{ctx.rerun_regex}")
exclude = [re.sub(rerun_regex, "", sample) for sample in sample_names if rerun_regex.search(sample)]
df = df[df.name not in exclude]
return df

View File

@@ -13,7 +13,7 @@ from tempfile import TemporaryDirectory, TemporaryFile
from operator import itemgetter
from pprint import pformat
from . import BaseClass, Reagent, SubmissionType, KitType, Organization, Contact
from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, JSON, FLOAT, case, desc
from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, JSON, FLOAT, case
from sqlalchemy.orm import relationship, validates, Query
from sqlalchemy.orm.attributes import flag_modified
from sqlalchemy.ext.associationproxy import association_proxy
@@ -22,7 +22,6 @@ from sqlalchemy.exc import OperationalError as AlcOperationalError, IntegrityErr
from sqlite3 import OperationalError as SQLOperationalError, IntegrityError as SQLIntegrityError
import pandas as pd
from openpyxl import Workbook
from openpyxl.worksheet.worksheet import Worksheet
from openpyxl.drawing.image import Image as OpenpyxlImage
from tools import row_map, setup_lookup, jinja_template_loading, rreplace, row_keys, check_key_or_attr, Result, Report, \
report_result
@@ -32,8 +31,6 @@ from dateutil.parser import parse
from pathlib import Path
from jinja2.exceptions import TemplateNotFound
from jinja2 import Template
from docxtpl import InlineImage
from docx.shared import Inches
from PIL import Image
logger = logging.getLogger(f"submissions.{__name__}")
@@ -74,6 +71,8 @@ class BasicSubmission(BaseClass):
contact_id = Column(INTEGER, ForeignKey("_contact.id", ondelete="SET NULL",
name="fk_BS_contact_id")) #: client lab id from _organizations
custom = Column(JSON)
controls = relationship("Control", back_populates="submission",
uselist=True) #: A control sample added to submission
submission_sample_associations = relationship(
"SubmissionSampleAssociation",
@@ -114,7 +113,6 @@ class BasicSubmission(BaseClass):
# NOTE: Allows for subclassing into ex. BacterialCulture, Wastewater, etc.
__mapper_args__ = {
"polymorphic_identity": "Basic Submission",
# "polymorphic_on": submission_type_name,
"polymorphic_on": case(
(submission_type_name == "Wastewater", "Wastewater"),
@@ -190,7 +188,7 @@ class BasicSubmission(BaseClass):
# NOTE: Singles tells the query which fields to set limit to 1
dicto['singles'] = parent_defs['singles']
# logger.debug(dicto['singles'])
# NOTE: Grab subtype specific info.
# NOTE: Grab mode_sub_type specific info.
output = {}
for k, v in dicto.items():
if len(args) > 0 and k not in args:
@@ -960,7 +958,6 @@ class BasicSubmission(BaseClass):
pcr_sample_map = cls.get_submission_type().sample_map['pcr_samples']
# logger.debug(f'sample map: {pcr_sample_map}')
main_sheet = xl[pcr_sample_map['main_sheet']]
# samples = []
fields = {k: v for k, v in pcr_sample_map.items() if k not in ['main_sheet', 'start_row']}
for row in main_sheet.iter_rows(min_row=pcr_sample_map['start_row']):
idx = row[0].row
@@ -969,12 +966,11 @@ class BasicSubmission(BaseClass):
sheet = xl[v['sheet']]
sample[k] = sheet.cell(row=idx, column=v['column']).value
yield sample
# samples.append(sample)
# return samples
@classmethod
def parse_pcr_controls(cls, xl: Workbook) -> list:
def parse_pcr_controls(cls, xl: Workbook, rsl_plate_num: str) -> list:
location_map = cls.get_submission_type().sample_map['pcr_controls']
submission = cls.query(rsl_plate_num=rsl_plate_num)
name_column = 1
for item in location_map:
logger.debug(f"Looking for {item['name']}")
@@ -983,7 +979,29 @@ class BasicSubmission(BaseClass):
for cell in row:
if cell.value == item['name']:
logger.debug(f"Pulling from row {iii}, column {item['ct_column']}")
yield dict(name=item['name'], ct=worksheet.cell(row=iii, column=item['ct_column']).value)
subtype, target = item['name'].split("-")
ct = worksheet.cell(row=iii, column=item['ct_column']).value
if subtype == "PC":
ctrl = next((assoc.reagent for assoc in submission.submission_reagent_associations
if any(["positive control" in item.name.lower() for item in assoc.reagent.role])), None)
elif subtype == "NC":
ctrl = next((assoc.reagent for assoc in submission.submission_reagent_associations
if any(["molecular grade water" in item.name.lower() for item in assoc.reagent.role])), None)
try:
ct = float(ct)
except ValueError:
ct = 0.0
if ctrl:
ctrl = ctrl.lot
else:
ctrl = None
yield dict(
name=f"{rsl_plate_num}<{item['name']}>",
ct=ct,
subtype=subtype,
target=target,
reagent_lot=ctrl
)
@classmethod
def filename_template(cls) -> str:
@@ -996,21 +1014,6 @@ class BasicSubmission(BaseClass):
"""
return "{{ rsl_plate_num }}"
# @classmethod
# def custom_sample_autofill_row(cls, sample, worksheet: Worksheet) -> int:
# """
# Updates row information
#
# Args:
# sample (_type_): _description_
# worksheet (Workbook): _description_
#
# Returns:
# int: New row number
# """
# logger.debug(f"Sample from args: {sample}")
# return None
@classmethod
def adjust_autofill_samples(cls, samples: List[Any]) -> List[Any]:
"""
@@ -1025,19 +1028,6 @@ class BasicSubmission(BaseClass):
logger.info(f"Hello from {cls.__mapper_args__['polymorphic_identity']} sampler")
return samples
# def adjust_to_dict_samples(self, backup: bool = False) -> List[dict]:
# """
# Updates sample dictionaries with custom values
#
# Args:
# backup (bool, optional): Whether to perform backup. Defaults to False.
#
# Returns:
# List[dict]: Updated dictionaries
# """
# # logger.debug(f"Hello from {self.__class__.__name__} dictionary sample adjuster.")
# return [item.to_sub_dict() for item in self.submission_sample_associations]
@classmethod
def get_details_template(cls, base_dict: dict) -> Template:
"""
@@ -1380,8 +1370,7 @@ class BacterialCulture(BasicSubmission):
derivative submission type from BasicSubmission
"""
id = Column(INTEGER, ForeignKey('_basicsubmission.id'), primary_key=True)
controls = relationship("Control", back_populates="submission",
uselist=True) #: A control sample added to submission
__mapper_args__ = dict(polymorphic_identity="Bacterial Culture",
polymorphic_load="inline",
inherit_condition=(id == BasicSubmission.id))
@@ -1442,25 +1431,6 @@ class BacterialCulture(BasicSubmission):
pos_control_reg.missing = False
return pyd
# @classmethod
# def custom_sample_autofill_row(cls, sample, worksheet: Worksheet) -> int:
# """
# Extends parent
# """
# # logger.debug(f"Checking {sample.well}")
# # logger.debug(f"here's the worksheet: {worksheet}")
# row = super().custom_sample_autofill_row(sample, worksheet)
# df = pd.DataFrame(list(worksheet.values))
# # logger.debug(f"Here's the dataframe: {df}")
# idx = df[df[0] == sample.well]
# if idx.empty:
# new = f"{sample.well[0]}{sample.well[1:].zfill(2)}"
# # logger.debug(f"Checking: {new}")
# idx = df[df[0] == new]
# # logger.debug(f"Here is the row: {idx}")
# row = idx.index.to_list()[0]
# return row + 1
@classmethod
def custom_info_parser(cls, input_dict: dict, xl: Workbook | None = None, custom_fields: dict = {}) -> dict:
input_dict = super().custom_info_parser(input_dict=input_dict, xl=xl, custom_fields=custom_fields)
@@ -1548,7 +1518,7 @@ class Wastewater(BasicSubmission):
for sample in samples:
# NOTE: remove '-{target}' from controls
sample['sample'] = re.sub('-N\\d$', '', sample['sample'])
# # NOTE: if sample is already in output skip
# NOTE: if sample is already in output skip
if sample['sample'] in [item['sample'] for item in output]:
logger.warning(f"Already have {sample['sample']}")
continue
@@ -1577,8 +1547,6 @@ class Wastewater(BasicSubmission):
# @classmethod
# def parse_pcr_controls(cls, xl: Workbook, location_map: list) -> list:
@classmethod
def enforce_name(cls, instr: str, data: dict | None = {}) -> str:
"""
@@ -1681,15 +1649,17 @@ class Wastewater(BasicSubmission):
obj (_type_): Parent widget
"""
from backend.excel import PCRParser
from backend.db import PCRControl, ControlType
from frontend.widgets import select_open_file
report = Report()
fname = select_open_file(obj=obj, file_extension="xlsx")
if not fname:
report.add_result(Result(msg="No file selected, cancelling.", status="Warning"))
return report
parser = PCRParser(filepath=fname)
parser = PCRParser(filepath=fname, submission=self)
self.set_attribute("pcr_info", parser.pcr)
pcr_samples = [sample for sample in parser.samples]
pcr_controls = [control for control in parser.controls]
self.save(original=False)
# logger.debug(f"Got {len(parser.samples)} samples to update!")
# logger.debug(f"Parser samples: {parser.samples}")
@@ -1700,6 +1670,16 @@ class Wastewater(BasicSubmission):
except StopIteration:
continue
self.update_subsampassoc(sample=sample, input_dict=sample_dict)
controltype = ControlType.query(name="PCR Control")
logger.debug(parser.pcr)
submitted_date = datetime.strptime(" ".join(parser.pcr['run_start_date/time'].split(" ")[:-1]),
"%Y-%m-%d %I:%M:%S %p")
for control in pcr_controls:
new_control = PCRControl(**control)
new_control.submitted_date = submitted_date
new_control.controltype = controltype
new_control.submission = self
new_control.save()
class WastewaterArtic(BasicSubmission):
@@ -2207,7 +2187,7 @@ class BasicSample(BaseClass):
id = Column(INTEGER, primary_key=True) #: primary key
submitter_id = Column(String(64), nullable=False, unique=True) #: identification from submitter
sample_type = Column(String(32)) #: subtype of sample
sample_type = Column(String(32)) #: mode_sub_type of sample
sample_submission_associations = relationship(
"SubmissionSampleAssociation",
@@ -2632,7 +2612,7 @@ class BacterialCultureSample(BasicSample):
id = Column(INTEGER, ForeignKey('_basicsample.id'), primary_key=True)
organism = Column(String(64)) #: bacterial specimen
concentration = Column(String(16)) #: sample concentration
control = relationship("Control", back_populates="sample", uselist=False)
control = relationship("IridaControl", back_populates="sample", uselist=False)
__mapper_args__ = dict(polymorphic_identity="Bacterial Culture Sample",
polymorphic_load="inline",
inherit_condition=(id == BasicSample.id))
@@ -2677,7 +2657,7 @@ class SubmissionSampleAssociation(BaseClass):
# reference to the Sample object
sample = relationship(BasicSample, back_populates="sample_submission_associations") #: associated sample
base_sub_type = Column(String) #: string of subtype name
base_sub_type = Column(String) #: string of mode_sub_type name
# Refers to the type of parent.
# Hooooooo boy, polymorphic association type, now we're getting into the weeds!

View File

@@ -675,7 +675,7 @@ class PCRParser(object):
rsl_plate_num = self.submission_obj.rsl_plate_num
self.pcr = self.parse_general()
self.samples = self.submission_obj.parse_pcr(xl=self.xl, rsl_plate_num=rsl_plate_num)
self.controls = self.submission_obj.parse_pcr_controls(xl=self.xl)
self.controls = self.submission_obj.parse_pcr_controls(xl=self.xl, rsl_plate_num=rsl_plate_num)
def parse_general(self):
"""