Updated controls to both Irida and PCR.

2024-10-16 15:07:43 -05:00
parent 066d1af0f2
commit c3a4aac68b
11 changed files with 750 additions and 314 deletions
--- a/src/submissions/backend/db/models/controls.py
+++ b/src/submissions/backend/db/models/controls.py
@@ -2,14 +2,19 @@
 All control related models.
 """
 from __future__ import annotations
-from sqlalchemy import Column, String, TIMESTAMP, JSON, INTEGER, ForeignKey
-from sqlalchemy.orm import relationship, Query
+from pprint import pformat
+
+from PyQt6.QtWidgets import QWidget, QCheckBox, QLabel
+from pandas import DataFrame
+from sqlalchemy import Column, String, TIMESTAMP, JSON, INTEGER, ForeignKey, case, FLOAT
+from sqlalchemy.orm import relationship, Query, validates
 import logging, re
 from operator import itemgetter
+
 from . import BaseClass
-from tools import setup_lookup
-from datetime import date, datetime
-from typing import List, Literal
+from tools import setup_lookup, report_result, Result, Report, Settings, get_unique_values_in_df_column
+from datetime import date, datetime, timedelta
+from typing import List, Literal, Tuple, Generator
 from dateutil.parser import parse
 from re import Pattern

@@ -21,7 +26,7 @@ class ControlType(BaseClass):
    Base class of a control archetype.
    """
    id = Column(INTEGER, primary_key=True)  #: primary key
-    name = Column(String(255), unique=True)  #: controltype name (e.g. MCS)
+    name = Column(String(255), unique=True)  #: controltype name (e.g. Irida Control)
    targets = Column(JSON)  #: organisms checked for
    instances = relationship("Control", back_populates="controltype")  #: control samples created of this type.

@@ -53,7 +58,7 @@ class ControlType(BaseClass):
                pass
        return cls.execute_query(query=query, limit=limit)

-    def get_subtypes(self, mode: Literal['kraken', 'matches', 'contains']) -> List[str]:
+    def get_modes(self, mode: Literal['kraken', 'matches', 'contains']) -> List[str]:
        """
        Get subtypes associated with this controltype (currently used only for Kraken)

@@ -65,8 +70,10 @@ class ControlType(BaseClass):
        """
        # NOTE: Get first instance since all should have same subtypes
        # NOTE: Get mode of instance
+        if not self.instances:
+            return
        jsoner = getattr(self.instances[0], mode)
-        # logger.debug(f"JSON out: {jsoner.keys()}")
+        # logger.debug(f"JSON retrieved: {jsoner.keys()}")
        try:
            # NOTE: Pick genera (all should have same subtypes)
            genera = list(jsoner.keys())[0]
@@ -74,10 +81,14 @@ class ControlType(BaseClass):
            return []
        # NOTE: remove items that don't have relevant data
        subtypes = [item for item in jsoner[genera] if "_hashes" not in item and "_ratio" not in item]
+        logger.debug(f"subtypes out: {pformat(subtypes)}")
        return subtypes

+    def get_instance_class(self):
+        return Control.find_polymorphic_subclass(polymorphic_identity=self.name)
+
    @classmethod
-    def get_positive_control_types(cls) -> List[ControlType]:
+    def get_positive_control_types(cls) -> Generator[ControlType, None, None]:
        """
        Gets list of Control types if they have targets

@@ -104,35 +115,234 @@ class Control(BaseClass):
    """

    id = Column(INTEGER, primary_key=True)  #: primary key
-    parent_id = Column(String,
-                       ForeignKey("_controltype.id", name="fk_control_parent_id"))  #: primary key of control type
+    controltype_name = Column(String, ForeignKey("_controltype.name", ondelete="SET NULL",
+                                                 name="fk_BC_subtype_name"))  #: name of joined submission type
    controltype = relationship("ControlType", back_populates="instances",
-                               foreign_keys=[parent_id])  #: reference to parent control type
+                               foreign_keys=[controltype_name])  #: reference to parent control type
    name = Column(String(255), unique=True)  #: Sample ID
    submitted_date = Column(TIMESTAMP)  #: Date submitted to Robotics
+    submission_id = Column(INTEGER, ForeignKey("_basicsubmission.id"))  #: parent submission id
+    submission = relationship("BasicSubmission", back_populates="controls",
+                              foreign_keys=[submission_id])  #: parent submission
+
+    __mapper_args__ = {
+        "polymorphic_identity": "Basic Control",
+        "polymorphic_on": case(
+
+            (controltype_name == "PCR Control", "PCR Control"),
+            (controltype_name == "Irida Control", "Irida Control"),
+
+            else_="Basic Control"
+        ),
+        "with_polymorphic": "*",
+    }
+
+    def __repr__(self) -> str:
+        return f"<{self.controltype_name}({self.name})>"
+
+    @classmethod
+    def find_polymorphic_subclass(cls, polymorphic_identity: str | ControlType | None = None,
+                                  attrs: dict | None = None):
+        """
+                Find subclass based on polymorphic identity or relevant attributes.
+
+                Args:
+                    polymorphic_identity (str | None, optional): String representing polymorphic identity. Defaults to None.
+                    attrs (str | SubmissionType | None, optional): Attributes of the relevant class. Defaults to None.
+
+                Returns:
+                    _type_: Subclass of interest.
+                """
+        if isinstance(polymorphic_identity, dict):
+            # logger.debug(f"Controlling for dict value")
+            polymorphic_identity = polymorphic_identity['value']
+        if isinstance(polymorphic_identity, ControlType):
+            polymorphic_identity = polymorphic_identity.name
+        model = cls
+        match polymorphic_identity:
+            case str():
+                try:
+                    model = cls.__mapper__.polymorphic_map[polymorphic_identity].class_
+                except Exception as e:
+                    logger.error(
+                        f"Could not get polymorph {polymorphic_identity} of {cls} due to {e}, falling back to BasicSubmission")
+            case _:
+                pass
+        if attrs and any([not hasattr(cls, attr) for attr in attrs.keys()]):
+            # NOTE: looks for first model that has all included kwargs
+            try:
+                model = next(subclass for subclass in cls.__subclasses__() if
+                             all([hasattr(subclass, attr) for attr in attrs.keys()]))
+            except StopIteration as e:
+                raise AttributeError(
+                    f"Couldn't find existing class/subclass of {cls} with all attributes:\n{pformat(attrs.keys())}")
+        logger.info(f"Recruiting model: {model}")
+        return model
+
+    @classmethod
+    def make_parent_buttons(cls, parent: QWidget) -> None:
+        """
+
+        Args:
+            parent (QWidget): chart holding widget to add buttons to.
+
+        Returns:
+
+        """
+        pass
+
+    @classmethod
+    def make_chart(cls, parent, chart_settings: dict, ctx):
+        """
+
+        Args:
+            chart_settings (dict): settings passed down from chart widget
+            ctx (Settings): settings passed down from gui
+
+        Returns:
+
+        """
+        return None
+
+
+class PCRControl(Control):
+    id = Column(INTEGER, ForeignKey('_control.id'), primary_key=True)
+    subtype = Column(String(16))  #: PC or NC
+    target = Column(String(16))  #: N1, N2, etc.
+    ct = Column(FLOAT)
+    reagent_lot = Column(String(64), ForeignKey("_reagent.name", ondelete="SET NULL",
+                                                name="fk_reagent_lot"))
+    reagent = relationship("Reagent", foreign_keys=reagent_lot)
+
+    __mapper_args__ = dict(polymorphic_identity="PCR Control",
+                           polymorphic_load="inline",
+                           inherit_condition=(id == Control.id))
+
+    def to_sub_dict(self):
+        return dict(name=self.name, ct=self.ct, subtype=self.subtype, target=self.target, reagent_lot=self.reagent_lot,
+                    submitted_date=self.submitted_date.date())
+
+    @classmethod
+    @setup_lookup
+    def query(cls,
+              sub_type: str | None = None,
+              start_date: date | str | int | None = None,
+              end_date: date | str | int | None = None,
+              control_name: str | None = None,
+              limit: int = 0
+              ) -> Control | List[Control]:
+        """
+        Lookup control objects in the database based on a number of parameters.
+
+        Args:
+            sub_type (models.ControlType | str | None, optional): Control archetype. Defaults to None.
+            start_date (date | str | int | None, optional): Beginning date to search by. Defaults to 2023-01-01 if end_date not None.
+            end_date (date | str | int | None, optional): End date to search by. Defaults to today if start_date not None.
+            control_name (str | None, optional): Name of control. Defaults to None.
+            limit (int, optional): Maximum number of results to return (0 = all). Defaults to 0.
+
+        Returns:
+            models.Control|List[models.Control]: Control object of interest.
+        """
+        query: Query = cls.__database_session__.query(cls)
+        # NOTE: by date range
+        if start_date is not None and end_date is None:
+            logger.warning(f"Start date with no end date, using today.")
+            end_date = date.today()
+        if end_date is not None and start_date is None:
+            logger.warning(f"End date with no start date, using Jan 1, 2023")
+            start_date = date(2023, 1, 1)
+        if start_date is not None:
+            match start_date:
+                case date():
+                    # logger.debug(f"Lookup control by start date({start_date})")
+                    start_date = start_date.strftime("%Y-%m-%d")
+                case int():
+                    # logger.debug(f"Lookup control by ordinal start date {start_date}")
+                    start_date = datetime.fromordinal(
+                        datetime(1900, 1, 1).toordinal() + start_date - 2).date().strftime("%Y-%m-%d")
+                case _:
+                    # logger.debug(f"Lookup control with parsed start date {start_date}")
+                    start_date = parse(start_date).strftime("%Y-%m-%d")
+            match end_date:
+                case date():
+                    # logger.debug(f"Lookup control by end date({end_date})")
+                    end_date = end_date.strftime("%Y-%m-%d")
+                case int():
+                    # logger.debug(f"Lookup control by ordinal end date {end_date}")
+                    end_date = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + end_date - 2).date().strftime(
+                        "%Y-%m-%d")
+                case _:
+                    # logger.debug(f"Lookup control with parsed end date {end_date}")
+                    end_date = parse(end_date).strftime("%Y-%m-%d")
+            # logger.debug(f"Looking up BasicSubmissions from start date: {start_date} and end date: {end_date}")
+            query = query.filter(cls.submitted_date.between(start_date, end_date))
+        match sub_type:
+            case str():
+                from backend import BasicSubmission, SubmissionType
+                query = query.join(BasicSubmission).join(SubmissionType).filter(SubmissionType.name == sub_type)
+            case _:
+                pass
+        match control_name:
+            case str():
+                # logger.debug(f"Lookup control by name {control_name}")
+                query = query.filter(cls.name.startswith(control_name))
+                limit = 1
+            case _:
+                pass
+        return cls.execute_query(query=query, limit=limit)
+
+    @classmethod
+    def make_chart(cls, parent, chart_settings: dict, ctx):
+        from frontend.visualizations.pcr_charts import PCRFigure
+        parent.mode_typer.clear()
+        parent.mode_typer.setEnabled(False)
+        report = Report()
+        controls = cls.query(sub_type=chart_settings['sub_type'], start_date=chart_settings['start_date'], end_date=chart_settings['end_date'])
+        data = [control.to_sub_dict() for control in controls]
+        df = DataFrame.from_records(data)
+        try:
+            df = df[df.ct > 0.0]
+        except AttributeError:
+            df = df
+        fig = PCRFigure(df=df, modes=None)
+        return report, fig
+
+
+class IridaControl(Control):
+    id = Column(INTEGER, ForeignKey('_control.id'), primary_key=True)
    contains = Column(JSON)  #: unstructured hashes in contains.tsv for each organism
    matches = Column(JSON)  #: unstructured hashes in matches.tsv for each organism
    kraken = Column(JSON)  #: unstructured output from kraken_report
-    submission_id = Column(INTEGER, ForeignKey("_basicsubmission.id"))  #: parent submission id
-    submission = relationship("BacterialCulture", back_populates="controls",
-                              foreign_keys=[submission_id])  #: parent submission
+    sub_type = Column(String(16), nullable=False)  #: EN-NOS, MCS-NOS, etc
    refseq_version = Column(String(16))  #: version of refseq used in fastq parsing
    kraken2_version = Column(String(16))  #: version of kraken2 used in fastq parsing
    kraken2_db_version = Column(String(32))  #: folder name of kraken2 db
    sample = relationship("BacterialCultureSample", back_populates="control")  #: This control's submission sample
    sample_id = Column(INTEGER,
                       ForeignKey("_basicsample.id", ondelete="SET NULL", name="cont_BCS_id"))  #: sample id key
+    # submission_id = Column(INTEGER, ForeignKey("_basicsubmission.id"))  #: parent submission id
+    # submission = relationship("BacterialCulture", back_populates="controls",
+    #                           foreign_keys=[submission_id])  #: parent submission

-    def __repr__(self) -> str:
-        return f"<Control({self.name})>"
+    __mapper_args__ = dict(polymorphic_identity="Irida Control",
+                           polymorphic_load="inline",
+                           inherit_condition=(id == Control.id))
+
+    @validates("sub_type")
+    def enforce_subtype_literals(self, key: str, value: str):
+        acceptables = ['ATCC49226', 'ATCC49619', 'EN-NOS', "EN-SSTI", "MCS-NOS", "MCS-SSTI", "SN-NOS", "SN-SSTI"]
+        if value.upper() not in acceptables:
+            raise KeyError(f"Sub-type must be in {acceptables}")
+        return value

    def to_sub_dict(self) -> dict:
        """
-        Converts object into convenient dictionary for use in submission summary
+            Converts object into convenient dictionary for use in submission summary

-        Returns:
-            dict: output dictionary containing: Name, Type, Targets, Top Kraken results
-        """
+            Returns:
+                dict: output dictionary containing: Name, Type, Targets, Top Kraken results
+            """
        # logger.debug("loading json string into dict")
        try:
            kraken = self.kraken
@@ -153,25 +363,27 @@ class Control(BaseClass):
        else:
            targets = ["None"]
        # logger.debug("constructing output dictionary")
-        output = {
-            "name": self.name,
-            "type": self.controltype.name,
-            "targets": ", ".join(targets),
-            "kraken": new_kraken[0:10]
-        }
+        output = dict(
+            name=self.name,
+            type=self.controltype.name,
+            targets=", ".join(targets),
+            kraken=new_kraken[0:10]
+        )
        return output

-    def convert_by_mode(self, mode: Literal['kraken', 'matches', 'contains']) -> List[dict]:
+    def convert_by_mode(self, control_sub_type: str, mode: Literal['kraken', 'matches', 'contains'],
+                        consolidate: bool = False) -> Generator[dict, None, None]:
        """
        split this instance into analysis types for controls graphs

        Args:
-            mode (str): analysis type, 'contains', etc
+            consolidate (bool): whether to merge all off-target genera. Defaults to False
+            control_sub_type (str): control subtype, 'MCS-NOS', etc.
+            mode (str): analysis type, 'contains', etc.

        Returns:
            List[dict]: list of records
        """
-        output = []
        # logger.debug("load json string for mode (i.e. contains, matches, kraken2)")
        try:
            data = self.__getattribute__(mode)
@@ -179,6 +391,18 @@ class Control(BaseClass):
            data = {}
        if data is None:
            data = {}
+        # NOTE: Data truncation and consolidation.
+        if "kraken" in mode:
+            data = {k: v for k, v in sorted(data.items(), key=lambda d: d[1][f"{mode}_count"], reverse=True)[:50]}
+        else:
+            if consolidate:
+                on_tar = {k: v for k, v in data.items() if k.strip("*") in self.controltype.targets[control_sub_type]}
+                # logger.debug(f"Consolidating off-targets to: {self.controltype.targets[control_sub_type]}")
+                off_tar = sum(v[f'{mode}_ratio'] for k, v in data.items() if
+                              k.strip("*") not in self.controltype.targets[control_sub_type])
+                on_tar['Off-target'] = {f"{mode}_ratio": off_tar}
+                data = on_tar
+        # logger.debug(pformat(data))
        # logger.debug(f"Length of data: {len(data)}")
        # logger.debug("dict keys are genera of bacteria, e.g. 'Streptococcus'")
        for genus in data:
@@ -186,17 +410,13 @@ class Control(BaseClass):
                name=self.name,
                submitted_date=self.submitted_date,
                genus=genus,
-                target='Target' if genus.strip("*") in self.controltype.targets else "Off-target"
+                target='Target' if genus.strip("*") in self.controltype.targets[control_sub_type] else "Off-target"
            )
            # logger.debug("get Target or Off-target of genus")
            # logger.debug("set 'contains_hashes', etc for genus")
            for key in data[genus]:
                _dict[key] = data[genus][key]
-            output.append(_dict)
-        # logger.debug("Have to triage kraken data to keep program from getting overwhelmed")
-        if "kraken" in mode:
-            output = sorted(output, key=lambda d: d[f"{mode}_count"], reverse=True)[:50]
-        return output
+            yield _dict

    @classmethod
    def get_modes(cls) -> List[str]:
@@ -217,7 +437,7 @@ class Control(BaseClass):
    @classmethod
    @setup_lookup
    def query(cls,
-              control_type: ControlType | str | None = None,
+              sub_type: str | None = None,
              start_date: date | str | int | None = None,
              end_date: date | str | int | None = None,
              control_name: str | None = None,
@@ -227,7 +447,7 @@ class Control(BaseClass):
        Lookup control objects in the database based on a number of parameters.

        Args:
-            control_type (models.ControlType | str | None, optional): Control archetype. Defaults to None.
+            sub_type (models.ControlType | str | None, optional): Control archetype. Defaults to None.
            start_date (date | str | int | None, optional): Beginning date to search by. Defaults to 2023-01-01 if end_date not None.
            end_date (date | str | int | None, optional): End date to search by. Defaults to today if start_date not None.
            control_name (str | None, optional): Name of control. Defaults to None.
@@ -238,13 +458,14 @@ class Control(BaseClass):
        """
        query: Query = cls.__database_session__.query(cls)
        # NOTE: by control type
-        match control_type:
-            case ControlType():
-                # logger.debug(f"Looking up control by control type: {control_type}")
-                query = query.filter(cls.controltype == control_type)
+        match sub_type:
+            # case ControlType():
+            #     # logger.debug(f"Looking up control by control type: {sub_type}")
+            #     query = query.filter(cls.controltype == sub_type)
            case str():
-                # logger.debug(f"Looking up control by control type: {control_type}")
-                query = query.join(ControlType).filter(ControlType.name == control_type)
+                # logger.debug(f"Looking up control by control type: {sub_type}")
+                # query = query.join(ControlType).filter(ControlType.name == sub_type)
+                query = query.filter(cls.sub_type == sub_type)
            case _:
                pass
        # NOTE: by date range
@@ -287,3 +508,241 @@ class Control(BaseClass):
            case _:
                pass
        return cls.execute_query(query=query, limit=limit)
+
+    @classmethod
+    def make_parent_buttons(cls, parent: QWidget) -> None:
+        """
+
+        Args:
+            parent (QWidget): chart holding widget to add buttons to.
+
+        Returns:
+
+        """
+        super().make_parent_buttons(parent=parent)
+        rows = parent.layout.rowCount()
+        logger.debug(f"Parent rows: {rows}")
+        checker = QCheckBox(parent)
+        checker.setChecked(True)
+        checker.setObjectName("irida_check")
+        checker.setToolTip("Pools off-target genera to save time.")
+        parent.layout.addWidget(QLabel("Consolidate Off-targets"), rows, 0, 1, 1)
+        parent.layout.addWidget(checker, rows, 1, 1, 2)
+        checker.checkStateChanged.connect(parent.controls_getter_function)
+
+    @classmethod
+    @report_result
+    def make_chart(cls, chart_settings: dict, parent, ctx) -> Tuple[Report, "IridaFigure" | None]:
+        from frontend.visualizations import IridaFigure
+        try:
+            checker = parent.findChild(QCheckBox, name="irida_check")
+            if chart_settings['mode'] == "kraken":
+                checker.setEnabled(False)
+                checker.setChecked(False)
+            else:
+                checker.setEnabled(True)
+            consolidate = checker.isChecked()
+        except AttributeError:
+            consolidate = False
+        report = Report()
+        # logger.debug(f"settings: {pformat(chart_settings)}")
+        controls = cls.query(sub_type=chart_settings['sub_type'], start_date=chart_settings['start_date'],
+                             end_date=chart_settings['end_date'])
+        # logger.debug(f"Controls found: {controls}")
+        if not controls:
+            report.add_result(Result(status="Critical", msg="No controls found in given date range."))
+            return report, None
+        # NOTE: change each control to list of dictionaries
+        data = [control.convert_by_mode(control_sub_type=chart_settings['sub_type'], mode=chart_settings['mode'],
+                                        consolidate=consolidate) for
+                control in controls]
+        # NOTE: flatten data to one dimensional list
+        data = [item for sublist in data for item in sublist]
+        # logger.debug(f"Control objects going into df conversion: {pformat(data)}")
+        if not data:
+            report.add_result(Result(status="Critical", msg="No data found for controls in given date range."))
+            return report, None
+        df = cls.convert_data_list_to_df(input_df=data, sub_mode=chart_settings['sub_mode'])
+        # logger.debug(f"Chart df: \n {df}")
+        if chart_settings['sub_mode'] is None:
+            title = chart_settings['sub_mode']
+        else:
+            title = f"{chart_settings['mode']} - {chart_settings['sub_mode']}"
+        # NOTE: send dataframe to chart maker
+        df, modes = cls.prep_df(ctx=ctx, df=df)
+        # logger.debug(f"prepped df: \n {df}")
+        # assert modes
+        # logger.debug(f"modes: {modes}")
+        fig = IridaFigure(df=df, ytitle=title, modes=modes, parent=parent,
+                          months=chart_settings['months'])
+        return report, fig
+
+    @classmethod
+    def convert_data_list_to_df(cls, input_df: list[dict], sub_mode) -> DataFrame:
+        """
+        Convert list of control records to dataframe
+
+        Args:
+            ctx (dict): settings passed from gui
+            input_df (list[dict]): list of dictionaries containing records
+            sub_type (str | None, optional): sub_type of submission type. Defaults to None.
+
+        Returns:
+            DataFrame: dataframe of controls
+        """
+        # logger.debug(f"Subtype: {sub_mode}")
+        df = DataFrame.from_records(input_df)
+        # logger.debug(f"DF from records: {df}")
+        safe = ['name', 'submitted_date', 'genus', 'target']
+        for column in df.columns:
+            if column not in safe:
+                if sub_mode is not None and column != sub_mode:
+                    continue
+                else:
+                    safe.append(column)
+            if "percent" in column:
+                # count_col = [item for item in df.columns if "count" in item][0]
+                try:
+                    count_col = next(item for item in df.columns if "count" in item)
+                except StopIteration:
+                    continue
+                # NOTE: The actual percentage from kraken was off due to exclusion of NaN, recalculating.
+                df[column] = 100 * df[count_col] / df.groupby('name')[count_col].transform('sum')
+        df = df[[c for c in df.columns if c in safe]]
+        # NOTE: move date of sample submitted on same date as previous ahead one.
+        df = cls.displace_date(df=df)
+        # NOTE: ad hoc method to make data labels more accurate.
+        df = cls.df_column_renamer(df=df)
+        return df
+
+    @classmethod
+    def df_column_renamer(cls, df: DataFrame) -> DataFrame:
+        """
+        Ad hoc function I created to clarify some fields
+
+        Args:
+            df (DataFrame): input dataframe
+
+        Returns:
+            DataFrame: dataframe with 'clarified' column names
+        """
+        df = df[df.columns.drop(list(df.filter(regex='_hashes')))]
+        return df.rename(columns={
+            "contains_ratio": "contains_shared_hashes_ratio",
+            "matches_ratio": "matches_shared_hashes_ratio",
+            "kraken_count": "kraken2_read_count_(top_50)",
+            "kraken_percent": "kraken2_read_percent_(top_50)"
+        })
+
+    @classmethod
+    def displace_date(cls, df: DataFrame) -> DataFrame:
+        """
+        This function serves to split samples that were submitted on the same date by incrementing dates.
+        It will shift the date forward by one day if it is the same day as an existing date in a list.
+
+        Args:
+            df (DataFrame): input dataframe composed of control records
+
+        Returns:
+            DataFrame: output dataframe with dates incremented.
+        """
+        # logger.debug(f"Unique items: {df['name'].unique()}")
+        # NOTE: get submitted dates for each control
+        dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in
+                     sorted(df['name'].unique())]
+        previous_dates = set()
+        for item in dict_list:
+            df, previous_dates = cls.check_date(df=df, item=item, previous_dates=previous_dates)
+        return df
+
+    @classmethod
+    def check_date(cls, df: DataFrame, item: dict, previous_dates: set) -> Tuple[DataFrame, list]:
+        """
+        Checks if an items date is already present in df and adjusts df accordingly
+
+        Args:
+            df (DataFrame): input dataframe
+            item (dict): control for checking
+            previous_dates (list): list of dates found in previous controls
+
+        Returns:
+            Tuple[DataFrame, list]: Output dataframe and appended list of previous dates
+        """
+        try:
+            check = item['date'] in previous_dates
+        except IndexError:
+            check = False
+        previous_dates.add(item['date'])
+        if check:
+            # logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
+            # NOTE: get df locations where name == item name
+            mask = df['name'] == item['name']
+            # NOTE: increment date in dataframe
+            df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
+            item['date'] += timedelta(days=1)
+            passed = False
+        else:
+            passed = True
+        # logger.debug(f"\n\tCurrent date: {item['date']}\n\tPrevious dates:{previous_dates}")
+        # logger.debug(f"DF: {type(df)}, previous_dates: {type(previous_dates)}")
+        # NOTE: if run didn't lead to changed date, return values
+        if passed:
+            # logger.debug(f"Date check passed, returning.")
+            return df, previous_dates
+        # NOTE: if date was changed, rerun with new date
+        else:
+            logger.warning(f"Date check failed, running recursion")
+            df, previous_dates = cls.check_date(df, item, previous_dates)
+            return df, previous_dates
+
+    @classmethod
+    def prep_df(cls, ctx: Settings, df: DataFrame) -> Tuple[DataFrame | None, list]:
+        """
+        Constructs figures based on parsed pandas dataframe.
+
+        Args:
+            ctx (Settings): settings passed down from gui
+            df (pd.DataFrame): input dataframe
+            ytitle (str | None, optional): title for the y-axis. Defaults to None.
+
+        Returns:
+            Figure: Plotly figure
+        """
+        # NOTE: converts starred genera to normal and splits off list of starred
+        if df.empty:
+            return None, []
+        df['genus'] = df['genus'].replace({'\*': ''}, regex=True).replace({"NaN": "Unknown"})
+        df['genera'] = [item[-1] if item and item[-1] == "*" else "" for item in df['genus'].to_list()]
+        # NOTE: remove original runs, using reruns if applicable
+        df = cls.drop_reruns_from_df(ctx=ctx, df=df)
+        # NOTE: sort by and exclude from
+        sorts = ['submitted_date', "target", "genus"]
+        exclude = ['name', 'genera']
+        # logger.debug(df.columns)
+        modes = [item for item in df.columns if item not in sorts and item not in exclude]
+        # logger.debug(f"Modes coming out: {modes}")
+        # NOTE: Set descending for any columns that have "{mode}" in the header.
+        ascending = [False if item == "target" else True for item in sorts]
+        df = df.sort_values(by=sorts, ascending=ascending)
+        # logger.debug(df[df.isna().any(axis=1)])
+        # NOTE: actual chart construction is done by
+        return df, modes
+
+    @classmethod
+    def drop_reruns_from_df(cls, ctx: Settings, df: DataFrame) -> DataFrame:
+        """
+        Removes semi-duplicates from dataframe after finding sequencing repeats.
+
+        Args:
+            ctx (Settings): settings passed from gui
+            df (DataFrame): initial dataframe
+
+        Returns:
+            DataFrame: dataframe with originals removed in favour of repeats.
+        """
+        if 'rerun_regex' in ctx:
+            sample_names = get_unique_values_in_df_column(df, column_name="name")
+            rerun_regex = re.compile(fr"{ctx.rerun_regex}")
+            exclude = [re.sub(rerun_regex, "", sample) for sample in sample_names if rerun_regex.search(sample)]
+            df = df[df.name not in exclude]
+        return df
--- a/src/submissions/backend/db/models/submissions.py
+++ b/src/submissions/backend/db/models/submissions.py
@@ -13,7 +13,7 @@ from tempfile import TemporaryDirectory, TemporaryFile
 from operator import itemgetter
 from pprint import pformat
 from . import BaseClass, Reagent, SubmissionType, KitType, Organization, Contact
-from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, JSON, FLOAT, case, desc
+from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, JSON, FLOAT, case
 from sqlalchemy.orm import relationship, validates, Query
 from sqlalchemy.orm.attributes import flag_modified
 from sqlalchemy.ext.associationproxy import association_proxy
@@ -22,7 +22,6 @@ from sqlalchemy.exc import OperationalError as AlcOperationalError, IntegrityErr
 from sqlite3 import OperationalError as SQLOperationalError, IntegrityError as SQLIntegrityError
 import pandas as pd
 from openpyxl import Workbook
-from openpyxl.worksheet.worksheet import Worksheet
 from openpyxl.drawing.image import Image as OpenpyxlImage
 from tools import row_map, setup_lookup, jinja_template_loading, rreplace, row_keys, check_key_or_attr, Result, Report, \
    report_result
@@ -32,8 +31,6 @@ from dateutil.parser import parse
 from pathlib import Path
 from jinja2.exceptions import TemplateNotFound
 from jinja2 import Template
-from docxtpl import InlineImage
-from docx.shared import Inches
 from PIL import Image

 logger = logging.getLogger(f"submissions.{__name__}")
@@ -74,6 +71,8 @@ class BasicSubmission(BaseClass):
    contact_id = Column(INTEGER, ForeignKey("_contact.id", ondelete="SET NULL",
                                            name="fk_BS_contact_id"))  #: client lab id from _organizations
    custom = Column(JSON)
+    controls = relationship("Control", back_populates="submission",
+                            uselist=True)  #: A control sample added to submission

    submission_sample_associations = relationship(
        "SubmissionSampleAssociation",
@@ -114,7 +113,6 @@ class BasicSubmission(BaseClass):
    # NOTE: Allows for subclassing into ex. BacterialCulture, Wastewater, etc.
    __mapper_args__ = {
        "polymorphic_identity": "Basic Submission",
-        # "polymorphic_on": submission_type_name,
        "polymorphic_on": case(

            (submission_type_name == "Wastewater", "Wastewater"),
@@ -190,7 +188,7 @@ class BasicSubmission(BaseClass):
        # NOTE: Singles tells the query which fields to set limit to 1
        dicto['singles'] = parent_defs['singles']
        # logger.debug(dicto['singles'])
-        # NOTE: Grab subtype specific info.
+        # NOTE: Grab mode_sub_type specific info.
        output = {}
        for k, v in dicto.items():
            if len(args) > 0 and k not in args:
@@ -960,7 +958,6 @@ class BasicSubmission(BaseClass):
        pcr_sample_map = cls.get_submission_type().sample_map['pcr_samples']
        # logger.debug(f'sample map: {pcr_sample_map}')
        main_sheet = xl[pcr_sample_map['main_sheet']]
-        # samples = []
        fields = {k: v for k, v in pcr_sample_map.items() if k not in ['main_sheet', 'start_row']}
        for row in main_sheet.iter_rows(min_row=pcr_sample_map['start_row']):
            idx = row[0].row
@@ -969,12 +966,11 @@ class BasicSubmission(BaseClass):
                sheet = xl[v['sheet']]
                sample[k] = sheet.cell(row=idx, column=v['column']).value
            yield sample
-        #     samples.append(sample)
-        # return samples

    @classmethod
-    def parse_pcr_controls(cls, xl: Workbook) -> list:
+    def parse_pcr_controls(cls, xl: Workbook, rsl_plate_num: str) -> list:
        location_map = cls.get_submission_type().sample_map['pcr_controls']
+        submission = cls.query(rsl_plate_num=rsl_plate_num)
        name_column = 1
        for item in location_map:
            logger.debug(f"Looking for {item['name']}")
@@ -983,7 +979,29 @@ class BasicSubmission(BaseClass):
                for cell in row:
                    if cell.value == item['name']:
                        logger.debug(f"Pulling from row {iii}, column {item['ct_column']}")
-                        yield dict(name=item['name'], ct=worksheet.cell(row=iii, column=item['ct_column']).value)
+                        subtype, target = item['name'].split("-")
+                        ct = worksheet.cell(row=iii, column=item['ct_column']).value
+                        if subtype == "PC":
+                            ctrl = next((assoc.reagent for assoc in submission.submission_reagent_associations
+                                         if any(["positive control" in item.name.lower() for item in  assoc.reagent.role])), None)
+                        elif subtype == "NC":
+                            ctrl = next((assoc.reagent for assoc in submission.submission_reagent_associations
+                                         if any(["molecular grade water" in item.name.lower() for item in  assoc.reagent.role])), None)
+                        try:
+                            ct = float(ct)
+                        except ValueError:
+                            ct = 0.0
+                        if ctrl:
+                            ctrl = ctrl.lot
+                        else:
+                            ctrl = None
+                        yield dict(
+                            name=f"{rsl_plate_num}<{item['name']}>",
+                            ct=ct,
+                            subtype=subtype,
+                            target=target,
+                            reagent_lot=ctrl
+                        )

    @classmethod
    def filename_template(cls) -> str:
@@ -996,21 +1014,6 @@ class BasicSubmission(BaseClass):
        """
        return "{{ rsl_plate_num }}"

-    # @classmethod
-    # def custom_sample_autofill_row(cls, sample, worksheet: Worksheet) -> int:
-    #     """
-    #     Updates row information
-    #
-    #     Args:
-    #         sample (_type_): _description_
-    #         worksheet (Workbook): _description_
-    #
-    #     Returns:
-    #         int: New row number
-    #     """
-    #     logger.debug(f"Sample from args: {sample}")
-    #     return None
-
    @classmethod
    def adjust_autofill_samples(cls, samples: List[Any]) -> List[Any]:
        """
@@ -1025,19 +1028,6 @@ class BasicSubmission(BaseClass):
        logger.info(f"Hello from {cls.__mapper_args__['polymorphic_identity']} sampler")
        return samples

-    # def adjust_to_dict_samples(self, backup: bool = False) -> List[dict]:
-    #     """
-    #     Updates sample dictionaries with custom values
-    #
-    #     Args:
-    #         backup (bool, optional): Whether to perform backup. Defaults to False.
-    #
-    #     Returns:
-    #         List[dict]: Updated dictionaries
-    #     """
-    #     # logger.debug(f"Hello from {self.__class__.__name__} dictionary sample adjuster.")
-    #     return [item.to_sub_dict() for item in self.submission_sample_associations]
-
    @classmethod
    def get_details_template(cls, base_dict: dict) -> Template:
        """
@@ -1380,8 +1370,7 @@ class BacterialCulture(BasicSubmission):
    derivative submission type from BasicSubmission
    """
    id = Column(INTEGER, ForeignKey('_basicsubmission.id'), primary_key=True)
-    controls = relationship("Control", back_populates="submission",
-                            uselist=True)  #: A control sample added to submission
+
    __mapper_args__ = dict(polymorphic_identity="Bacterial Culture",
                           polymorphic_load="inline",
                           inherit_condition=(id == BasicSubmission.id))
@@ -1442,25 +1431,6 @@ class BacterialCulture(BasicSubmission):
                pos_control_reg.missing = False
        return pyd

-    # @classmethod
-    # def custom_sample_autofill_row(cls, sample, worksheet: Worksheet) -> int:
-    #     """
-    #     Extends parent
-    #     """
-    #     # logger.debug(f"Checking {sample.well}")
-    #     # logger.debug(f"here's the worksheet: {worksheet}")
-    #     row = super().custom_sample_autofill_row(sample, worksheet)
-    #     df = pd.DataFrame(list(worksheet.values))
-    #     # logger.debug(f"Here's the dataframe: {df}")
-    #     idx = df[df[0] == sample.well]
-    #     if idx.empty:
-    #         new = f"{sample.well[0]}{sample.well[1:].zfill(2)}"
-    #         # logger.debug(f"Checking: {new}")
-    #         idx = df[df[0] == new]
-    #     # logger.debug(f"Here is the row: {idx}")
-    #     row = idx.index.to_list()[0]
-    #     return row + 1
-
    @classmethod
    def custom_info_parser(cls, input_dict: dict, xl: Workbook | None = None, custom_fields: dict = {}) -> dict:
        input_dict = super().custom_info_parser(input_dict=input_dict, xl=xl, custom_fields=custom_fields)
@@ -1548,7 +1518,7 @@ class Wastewater(BasicSubmission):
        for sample in samples:
            # NOTE: remove '-{target}' from controls
            sample['sample'] = re.sub('-N\\d$', '', sample['sample'])
-            # # NOTE: if sample is already in output skip
+            # NOTE: if sample is already in output skip
            if sample['sample'] in [item['sample'] for item in output]:
                logger.warning(f"Already have {sample['sample']}")
                continue
@@ -1577,8 +1547,6 @@ class Wastewater(BasicSubmission):
    # @classmethod
    # def parse_pcr_controls(cls, xl: Workbook, location_map: list) -> list:

-
-
    @classmethod
    def enforce_name(cls, instr: str, data: dict | None = {}) -> str:
        """
@@ -1681,15 +1649,17 @@ class Wastewater(BasicSubmission):
            obj (_type_): Parent widget
        """
        from backend.excel import PCRParser
+        from backend.db import PCRControl, ControlType
        from frontend.widgets import select_open_file
        report = Report()
        fname = select_open_file(obj=obj, file_extension="xlsx")
        if not fname:
            report.add_result(Result(msg="No file selected, cancelling.", status="Warning"))
            return report
-        parser = PCRParser(filepath=fname)
+        parser = PCRParser(filepath=fname, submission=self)
        self.set_attribute("pcr_info", parser.pcr)
        pcr_samples = [sample for sample in parser.samples]
+        pcr_controls = [control for control in parser.controls]
        self.save(original=False)
        # logger.debug(f"Got {len(parser.samples)} samples to update!")
        # logger.debug(f"Parser samples: {parser.samples}")
@@ -1700,6 +1670,16 @@ class Wastewater(BasicSubmission):
            except StopIteration:
                continue
            self.update_subsampassoc(sample=sample, input_dict=sample_dict)
+        controltype = ControlType.query(name="PCR Control")
+        logger.debug(parser.pcr)
+        submitted_date = datetime.strptime(" ".join(parser.pcr['run_start_date/time'].split(" ")[:-1]),
+                                           "%Y-%m-%d %I:%M:%S %p")
+        for control in pcr_controls:
+            new_control = PCRControl(**control)
+            new_control.submitted_date = submitted_date
+            new_control.controltype = controltype
+            new_control.submission = self
+            new_control.save()


 class WastewaterArtic(BasicSubmission):
@@ -2207,7 +2187,7 @@ class BasicSample(BaseClass):

    id = Column(INTEGER, primary_key=True)  #: primary key
    submitter_id = Column(String(64), nullable=False, unique=True)  #: identification from submitter
-    sample_type = Column(String(32))  #: subtype of sample
+    sample_type = Column(String(32))  #: mode_sub_type of sample

    sample_submission_associations = relationship(
        "SubmissionSampleAssociation",
@@ -2632,7 +2612,7 @@ class BacterialCultureSample(BasicSample):
    id = Column(INTEGER, ForeignKey('_basicsample.id'), primary_key=True)
    organism = Column(String(64))  #: bacterial specimen
    concentration = Column(String(16))  #: sample concentration
-    control = relationship("Control", back_populates="sample", uselist=False)
+    control = relationship("IridaControl", back_populates="sample", uselist=False)
    __mapper_args__ = dict(polymorphic_identity="Bacterial Culture Sample",
                           polymorphic_load="inline",
                           inherit_condition=(id == BasicSample.id))
@@ -2677,7 +2657,7 @@ class SubmissionSampleAssociation(BaseClass):
    # reference to the Sample object
    sample = relationship(BasicSample, back_populates="sample_submission_associations")  #: associated sample

-    base_sub_type = Column(String)  #: string of subtype name
+    base_sub_type = Column(String)  #: string of mode_sub_type name

    # Refers to the type of parent.
    # Hooooooo boy, polymorphic association type, now we're getting into the weeds!
--- a/src/submissions/backend/excel/parser.py
+++ b/src/submissions/backend/excel/parser.py
@@ -675,7 +675,7 @@ class PCRParser(object):
            rsl_plate_num = self.submission_obj.rsl_plate_num
        self.pcr = self.parse_general()
        self.samples = self.submission_obj.parse_pcr(xl=self.xl, rsl_plate_num=rsl_plate_num)
-        self.controls = self.submission_obj.parse_pcr_controls(xl=self.xl)
+        self.controls = self.submission_obj.parse_pcr_controls(xl=self.xl, rsl_plate_num=rsl_plate_num)

    def parse_general(self):
        """