Before control chart update.

2024-07-31 07:50:23 -05:00
parent 2a07265cbc
commit eb6cdc63e2
10 changed files with 305 additions and 396 deletions
--- a/src/submissions/backend/db/init.py
+++ b/src/submissions/backend/db/init.py
@@ -3,6 +3,7 @@ All database related operations.
 """
 from sqlalchemy import event
 from sqlalchemy.engine import Engine
+from tools import ctx


@event.listens_for(Engine, "connect")
@@ -17,7 +18,8 @@ def set_sqlite_pragma(dbapi_connection, connection_record):
        connection_record (_type_): _description_
    """    
    cursor = dbapi_connection.cursor()
-    # cursor.execute("PRAGMA foreign_keys=ON")
+    if ctx.database_schema == "sqlite":
+        cursor.execute("PRAGMA foreign_keys=ON")
    cursor.close()


--- a/src/submissions/backend/db/models/init.py
+++ b/src/submissions/backend/db/models/init.py
@@ -3,7 +3,7 @@ Contains all models for sqlalchemy
 """
 from __future__ import annotations
 import sys, logging
-from sqlalchemy import Column, INTEGER, String, JSON, inspect
+from sqlalchemy import Column, INTEGER, String, JSON
 from sqlalchemy.orm import DeclarativeMeta, declarative_base, Query, Session
 from sqlalchemy.ext.declarative import declared_attr
 from sqlalchemy.exc import ArgumentError
@@ -163,7 +163,6 @@ class BaseClass(Base):
        try:
            self.__database_session__.add(self)
            self.__database_session__.commit()
-            # self.__database_session__.merge(self)
        except Exception as e:
            logger.critical(f"Problem saving object: {e}")
            self.__database_session__.rollback()
@@ -203,4 +202,5 @@ from .organizations import *
 from .kits import *
 from .submissions import *

+# NOTE: Add a creator to the submission for reagent association.
 BasicSubmission.reagents.creator = lambda reg: SubmissionReagentAssociation(reagent=reg)
--- a/src/submissions/backend/db/models/kits.py
+++ b/src/submissions/backend/db/models/kits.py
@@ -467,6 +467,7 @@ class Reagent(BaseClass):
        Lookup a list of reagents from the database.

        Args:
+            id (int | None, optional): reagent id number
            reagent_role (str | models.ReagentType | None, optional): Reagent type. Defaults to None.
            lot_number (str | None, optional): Reagent lot number. Defaults to None.
            name (str | None, optional): Reagent name. Defaults to None.
@@ -1468,7 +1469,7 @@ class SubmissionTypeEquipmentRoleAssociation(BaseClass):
            List[Process]: All associated processes
        """
        processes = [equipment.get_processes(self.submission_type) for equipment in self.equipment_role.instances]
-        # flatten list
+        # NOTE: flatten list
        processes = [item for items in processes for item in items if item is not None]
        match extraction_kit:
            case str():
@@ -1523,6 +1524,7 @@ class Process(BaseClass):
        Lookup Processes

        Args:
+            id (int | None, optional): Process id. Defaults to None.
            name (str | None, optional): Process name. Defaults to None.
            limit (int, optional): Maximum number of results to return (0=all). Defaults to 0.

@@ -1666,7 +1668,6 @@ class SubmissionTipsAssociation(BaseClass):
                        back_populates="tips_submission_associations")  #: associated equipment
    role_name = Column(String(32), primary_key=True)  #, ForeignKey("_tiprole.name"))

-    # role = relationship(TipRole)

    def to_sub_dict(self) -> dict:
        """
--- a/src/submissions/backend/db/models/submissions.py
+++ b/src/submissions/backend/db/models/submissions.py
@@ -27,7 +27,6 @@ from tools import row_map, setup_lookup, jinja_template_loading, rreplace, row_k
 from datetime import datetime, date
 from typing import List, Any, Tuple, Literal
 from dateutil.parser import parse
-# from dateutil.parser import ParserError
 from pathlib import Path
 from jinja2.exceptions import TemplateNotFound
 from jinja2 import Template
@@ -370,16 +369,16 @@ class BasicSubmission(BaseClass):
        """
        Calculates cost of the plate
        """
-        # Calculate number of columns based on largest column number
+        # NOTE: Calculate number of columns based on largest column number
        try:
            cols_count_96 = self.calculate_column_count()
        except Exception as e:
            logger.error(f"Column count error: {e}")
-        # Get kit associated with this submission
+        # NOTE: Get kit associated with this submission
        assoc = [item for item in self.extraction_kit.kit_submissiontype_associations if
                 item.submission_type == self.submission_type][0]
        # logger.debug(f"Came up with association: {assoc}")
-        # If every individual cost is 0 this is probably an old plate.
+        # NOTE: If every individual cost is 0 this is probably an old plate.
        if all(item == 0.0 for item in [assoc.constant_cost, assoc.mutable_cost_column, assoc.mutable_cost_sample]):
            try:
                self.run_cost = self.extraction_kit.cost_per_run
@@ -446,6 +445,7 @@ class BasicSubmission(BaseClass):
        Convert all submissions to dataframe

        Args:
+            chronologic (bool, optional): Sort submissions in chronologic order. Defaults to True.
            submission_type (str | None, optional): Filter by SubmissionType. Defaults to None.
            limit (int, optional): Maximum number of results to return. Defaults to 0.

@@ -1047,7 +1047,7 @@ class BasicSubmission(BaseClass):
            ValueError: Raised if disallowed key is passed.

        Returns:
-            cls: _description_
+            cls: A BasicSubmission subclass.
        """
        code = 0
        msg = ""
@@ -1056,10 +1056,6 @@ class BasicSubmission(BaseClass):
        if kwargs == {}:
            raise ValueError("Need to narrow down query or the first available instance will be returned.")
        sanitized_kwargs = {k: v for k, v in kwargs.items() if k not in disallowed}
-        # for key in kwargs.keys():
-        #     if key in disallowed:
-        #         raise ValueError(
-        #             f"{key} is not allowed as a query argument as it could lead to creation of duplicate objects. Use .query() instead.")
        instance = cls.query(submission_type=submission_type, limit=1, **sanitized_kwargs)
        # logger.debug(f"Retrieved instance: {instance}")
        if instance is None:
@@ -1102,7 +1098,7 @@ class BasicSubmission(BaseClass):
            obj (_type_, optional): Parent widget. Defaults to None.

        Raises:
-            e: _description_
+            e: SQLIntegrityError or SQLOperationalError if problem with commit.
        """
        from frontend.widgets.pop_ups import QuestionAsker
        # logger.debug("Hello from delete")
@@ -1123,7 +1119,7 @@ class BasicSubmission(BaseClass):
        Creates Widget for showing submission details.

        Args:
-            obj (_type_): Parent widget
+            obj (Widget): Parent widget
        """
        # logger.debug("Hello from details")
        from frontend.widgets.submission_details import SubmissionDetails
@@ -1139,9 +1135,9 @@ class BasicSubmission(BaseClass):
            obj (Widget): Parent widget 
        """        
        from frontend.widgets.submission_widget import SubmissionFormWidget
-        for widg in obj.app.table_widget.formwidget.findChildren(SubmissionFormWidget):
-            # logger.debug(widg)
-            widg.setParent(None)
+        for widget in obj.app.table_widget.formwidget.findChildren(SubmissionFormWidget):
+            # logger.debug(widget)
+            widget.setParent(None)
        pyd = self.to_pydantic(backup=True)
        form = pyd.to_form(parent=obj, disable=['rsl_plate_num'])
        obj.app.table_widget.formwidget.layout().addWidget(form)
@@ -1271,7 +1267,6 @@ class BacterialCulture(BasicSubmission):
            input_dict (dict): _description_
            xl (pd.ExcelFile | None, optional): _description_. Defaults to None.
            info_map (dict | None, optional): _description_. Defaults to None.
-            plate_map (dict | None, optional): _description_. Defaults to None.

        Returns:
            dict: Updated dictionary.
@@ -1560,7 +1555,6 @@ class Wastewater(BasicSubmission):
        well_24 = []
        samples_copy = deepcopy(input_dict['samples'])
        for sample in sorted(samples_copy, key=itemgetter('column', 'row')):
-        # for sample in input_dict['samples']:
            try:
                row = sample['source_row']
            except KeyError:
@@ -1742,6 +1736,7 @@ class WastewaterArtic(BasicSubmission):
            processed = rreplace(processed, plate_num, "")
        except AttributeError:
            plate_num = "1"
+        # NOTE: plate_num not currently used, but will keep incase it is in the future
        plate_num = plate_num.strip("-")
        # logger.debug(f"Processed after plate-num: {processed}")
        day = re.search(r"\d{2}$", processed).group()
@@ -1827,7 +1822,6 @@ class WastewaterArtic(BasicSubmission):
        """
        input_dict = super().finalize_parse(input_dict, xl, info_map)
        # logger.debug(f"Incoming input_dict: {pformat(input_dict)}")
-        # TODO: Move to validator?
        for sample in input_dict['samples']:
            # logger.debug(f"Sample: {sample}")
            if re.search(r"^NTC", sample['submitter_id']):
@@ -1978,7 +1972,7 @@ class WastewaterArtic(BasicSubmission):
                    self.comment = [com]
            # logger.debug(pformat(self.gel_info))
            with ZipFile(self.__directory_path__.joinpath("submission_imgs.zip"), 'a') as zipf:
-                # Add a file located at the source_path to the destination within the zip
+                # NOTE: Add a file located at the source_path to the destination within the zip
                # file. It will overwrite existing files if the names collide, but it
                # will give a warning
                zipf.write(img_path, self.gel_image)
@@ -1997,6 +1991,7 @@ class WastewaterArtic(BasicSubmission):
            dict: Dictionary with information added.
        """        
        input_dict = super().custom_docx_writer(input_dict)
+        # NOTE: if there's a gel image, extract it.
        if check_key_or_attr(key='gel_image_path', interest=input_dict, check_none=True):
            with ZipFile(cls.__directory_path__.joinpath("submission_imgs.zip")) as zipped:
                img = zipped.read(input_dict['gel_image_path'])
@@ -2246,9 +2241,7 @@ class BasicSample(BaseClass):

    @classmethod
    def fuzzy_search(cls,
-                     # submitter_id: str | None = None,
                     sample_type: str | BasicSample | None = None,
-                     # limit: int = 0,
                     **kwargs
                     ) -> List[BasicSample]:
        """
@@ -2315,8 +2308,8 @@ class BasicSample(BaseClass):
                     'equipment', 'gel_info', 'gel_image', 'dna_core_submission_number', 'gel_controls']:
            try:
                df = df.drop(item, axis=1)
-            except:
-                logger.warning(f"Couldn't drop '{item}' column from submissionsheet df.")
+            except KeyError as e:
+                logger.warning(f"Couldn't drop '{item}' column from submissionsheet df due to {e}.")
        return df

    def show_details(self, obj):
@@ -2408,7 +2401,7 @@ class WastewaterSample(BasicSample):
        # logger.debug(f"Initial sample dict: {pformat(output_dict)}")
        disallowed = ["", None, "None"]
        try:
-            check = output_dict['rsl_number'] in [None, "None"]
+            check = output_dict['rsl_number'] in disallowed
        except KeyError:
            check = True
        if check:
@@ -2451,7 +2444,6 @@ class BacterialCultureSample(BasicSample):
        Returns:
            dict: well location and name (sample id, organism) NOTE: keys must sync with WWSample to_sub_dict above
        """
-        # start = time()
        sample = super().to_sub_dict(full_data=full_data)
        sample['name'] = self.submitter_id
        sample['organism'] = self.organism
@@ -2553,7 +2545,7 @@ class SubmissionSampleAssociation(BaseClass):
        Returns:
            dict: dictionary of sample id, row and column in elution plate
        """
-        # Since there is no PCR, negliable result is necessary.
+        # NOTE: Since there is no PCR, negliable result is necessary.
        sample = self.to_sub_dict()
        # logger.debug(f"Sample dict to hitpick: {sample}")
        env = jinja_template_loading()
@@ -2728,7 +2720,6 @@ class SubmissionSampleAssociation(BaseClass):
        except StatementError:
            instance = None
        if instance is None:
-            # sanitized_kwargs = {k:v for k,v in kwargs.items() if k not in ['id']}
            used_cls = cls.find_polymorphic_subclass(polymorphic_identity=association_type)
            instance = used_cls(submission=submission, sample=sample, id=id, **kwargs)
        return instance
--- a/src/submissions/backend/excel/parser.py
+++ b/src/submissions/backend/excel/parser.py
@@ -6,7 +6,6 @@ from copy import copy
 from getpass import getuser
 from pprint import pformat
 from typing import List
-import pandas as pd
 from openpyxl import load_workbook, Workbook
 from pathlib import Path
 from backend.db.models import *
@@ -17,7 +16,6 @@ from datetime import date
 from dateutil.parser import parse, ParserError
 from tools import check_not_nan, convert_nans_to_nones, is_missing, remove_key_from_list_of_dicts, check_key_or_attr

-
 logger = logging.getLogger(f"submissions.{__name__}")


@@ -70,8 +68,8 @@ class SheetParser(object):
        self.info_map = parser.map
        for k, v in info.items():
            match k:
+                # NOTE: exclude samples.
                case "sample":
-                    # case item if
                    pass
                case _:
                    self.sub[k] = v
@@ -150,7 +148,7 @@ class SheetParser(object):
        except TypeError:
            check = False
        if check:
-            pyd_dict['equipment'] = [PydEquipment(**equipment) for equipment in  self.sub['equipment']]
+            pyd_dict['equipment'] = [PydEquipment(**equipment) for equipment in self.sub['equipment']]
        else:
            pyd_dict['equipment'] = None
        try:
@@ -158,7 +156,7 @@ class SheetParser(object):
        except TypeError:
            check = False
        if check:
-            pyd_dict['tips'] = [PydTips(**tips) for tips in  self.sub['tips']]
+            pyd_dict['tips'] = [PydTips(**tips) for tips in self.sub['tips']]
        else:
            pyd_dict['tips'] = None
        psm = PydSubmission(filepath=self.filepath, **pyd_dict)
@@ -169,13 +167,14 @@ class InfoParser(object):
    """
    Object to parse generic info from excel sheet.
    """
-    def __init__(self, xl: Workbook, submission_type: str|SubmissionType, sub_object: BasicSubmission|None=None):
+
+    def __init__(self, xl: Workbook, submission_type: str | SubmissionType, sub_object: BasicSubmission | None = None):
        """
        Args:
            xl (Workbook): Openpyxl workbook from submitted excel file.
            submission_type (str | SubmissionType): Type of submission expected (Wastewater, Bacterial Culture, etc.)
            sub_object (BasicSubmission | None, optional): Submission object holding methods. Defaults to None.
-        """        
+        """
        logger.info(f"\n\nHello from InfoParser!\n\n")
        if isinstance(submission_type, str):
            submission_type = SubmissionType.query(name=submission_type)
@@ -191,9 +190,6 @@ class InfoParser(object):
        """
        Gets location of basic info from the submission_type object in the database.

-        Args:
-            submission_type (str|dict): name of the submission type or parsed object with value=submission_type
-
        Returns:
            dict: Location map of all info for this submission type
        """
@@ -266,23 +262,24 @@ class InfoParser(object):
                        dicto[item['name']] = dict(value=value, missing=missing)
                    except (KeyError, IndexError):
                        continue
-        # Return after running the parser components held in submission object.
+        # NOTE: Return after running the parser components held in submission object.
        return self.sub_object.custom_info_parser(input_dict=dicto, xl=self.xl, custom_fields=self.map['custom'])


 class ReagentParser(object):
    """
    Object to pull reagents from excel sheet.
-    """    
+    """

-    def __init__(self, xl: Workbook, submission_type: str, extraction_kit: str, sub_object:BasicSubmission|None=None):
+    def __init__(self, xl: Workbook, submission_type: str, extraction_kit: str,
+                 sub_object: BasicSubmission | None = None):
        """
        Args:
            xl (Workbook): Openpyxl workbook from submitted excel file.
            submission_type (str): Type of submission expected (Wastewater, Bacterial Culture, etc.)
            extraction_kit (str): Extraction kit used.
            sub_object (BasicSubmission | None, optional): Submission object holding methods. Defaults to None.
-        """        
+        """
        # logger.debug("\n\nHello from ReagentParser!\n\n")
        self.submission_type_obj = submission_type
        self.sub_object = sub_object
@@ -359,7 +356,7 @@ class ReagentParser(object):
                    check = True
                if check:
                    listo.append(dict(role=item.strip(), lot=lot, expiry=expiry, name=name, comment=comment,
-                                            missing=missing))
+                                      missing=missing))
        return listo


@@ -368,14 +365,15 @@ class SampleParser(object):
    Object to pull data for samples in excel sheet and construct individual sample objects
    """

-    def __init__(self, xl: Workbook, submission_type: SubmissionType, sample_map: dict | None = None, sub_object:BasicSubmission|None=None) -> None:
+    def __init__(self, xl: Workbook, submission_type: SubmissionType, sample_map: dict | None = None,
+                 sub_object: BasicSubmission | None = None) -> None:
        """
        Args:
            xl (Workbook): Openpyxl workbook from submitted excel file.
            submission_type (SubmissionType): Type of submission expected (Wastewater, Bacterial Culture, etc.)
            sample_map (dict | None, optional): Locations in database where samples are found. Defaults to None.
            sub_object (BasicSubmission | None, optional): Submission object holding methods. Defaults to None.
-        """        
+        """
        # logger.debug("\n\nHello from SampleParser!\n\n")
        self.samples = []
        self.xl = xl
@@ -418,7 +416,7 @@ class SampleParser(object):

        Returns:
            List[dict]: List of sample ids and locations.
-        """        
+        """
        invalids = [0, "0", "EMPTY"]
        smap = self.sample_info_map['plate_map']
        ws = self.xl[smap['sheet']]
@@ -447,13 +445,13 @@ class SampleParser(object):

        Returns:
            List[dict]: List of basic sample info.
-        """        
-        
+        """
+
        lmap = self.sample_info_map['lookup_table']
        ws = self.xl[lmap['sheet']]
        lookup_samples = []
-        for ii, row in enumerate(range(lmap['start_row'], lmap['end_row']+1), start=1):
-            row_dict = {k:ws.cell(row=row, column=v).value for k, v in lmap['sample_columns'].items()}
+        for ii, row in enumerate(range(lmap['start_row'], lmap['end_row'] + 1), start=1):
+            row_dict = {k: ws.cell(row=row, column=v).value for k, v in lmap['sample_columns'].items()}
            try:
                row_dict[lmap['merge_on_id']] = str(row_dict[lmap['merge_on_id']])
            except KeyError:
@@ -502,7 +500,7 @@ class SampleParser(object):

        Returns:
            List[dict]: Reconciled samples
-        """        
+        """
        # TODO: Move to pydantic validator?
        if self.plate_map_samples is None or self.lookup_samples is None:
            self.samples = self.lookup_samples or self.plate_map_samples
@@ -533,10 +531,6 @@ class SampleParser(object):
                        break
                    else:
                        new = psample
-            # try:
-            #     check = new['submitter_id'] is None
-            # except KeyError:
-            #     check = True
            if not check_key_or_attr(key='submitter_id', interest=new, check_none=True):
                new['submitter_id'] = psample['id']
            new = self.sub_object.parse_samples(new)
@@ -549,12 +543,13 @@ class EquipmentParser(object):
    """
    Object to pull data for equipment in excel sheet
    """
-    def __init__(self, xl: Workbook, submission_type: str|SubmissionType) -> None:
+
+    def __init__(self, xl: Workbook, submission_type: str | SubmissionType) -> None:
        """
        Args:
            xl (Workbook): Openpyxl workbook from submitted excel file.
            submission_type (str | SubmissionType): Type of submission expected (Wastewater, Bacterial Culture, etc.)
-        """        
+        """
        if isinstance(submission_type, str):
            submission_type = SubmissionType.query(name=submission_type)
        self.submission_type = submission_type
@@ -601,7 +596,7 @@ class EquipmentParser(object):
        for sheet in self.xl.sheetnames:
            ws = self.xl[sheet]
            try:
-                relevant = {k:v for k,v in self.map.items() if v['sheet'] == sheet}
+                relevant = {k: v for k, v in self.map.items() if v['sheet'] == sheet}
            except (TypeError, KeyError) as e:
                logger.error(f"Error creating relevant equipment list: {e}")
                continue
@@ -623,7 +618,7 @@ class EquipmentParser(object):
                try:
                    output.append(
                        dict(name=eq.name, processes=[process], role=k, asset_number=eq.asset_number,
-                                     nickname=eq.nickname))
+                             nickname=eq.nickname))
                except AttributeError:
                    logger.error(f"Unable to add {eq} to list.")
                logger.debug(f"Here is the output so far: {pformat(output)}")
@@ -634,12 +629,13 @@ class TipParser(object):
    """
    Object to pull data for tips in excel sheet
    """
-    def __init__(self, xl: Workbook, submission_type: str|SubmissionType) -> None:
+
+    def __init__(self, xl: Workbook, submission_type: str | SubmissionType) -> None:
        """
        Args:
            xl (Workbook): Openpyxl workbook from submitted excel file.
            submission_type (str | SubmissionType): Type of submission expected (Wastewater, Bacterial Culture, etc.)
-        """        
+        """
        if isinstance(submission_type, str):
            submission_type = SubmissionType.query(name=submission_type)
        self.submission_type = submission_type
@@ -698,7 +694,7 @@ class TipParser(object):
 class PCRParser(object):
    """Object to pull data from Design and Analysis PCR export file."""

-    def __init__(self, filepath: Path | None=None, submission: BasicSubmission | None=None) -> None:
+    def __init__(self, filepath: Path | None = None, submission: BasicSubmission | None = None) -> None:
        """
         Args:
             filepath (Path | None, optional): file to parse. Defaults to None.
--- a/src/submissions/backend/excel/reports.py
+++ b/src/submissions/backend/excel/reports.py
@@ -7,7 +7,7 @@ from pathlib import Path
 from datetime import date, timedelta
 from typing import List, Tuple, Any
 from backend.db.models import BasicSubmission
-from tools import jinja_template_loading, Settings, get_unique_values_in_df_column, html_to_pdf, get_first_blank_df_row, \
+from tools import jinja_template_loading, html_to_pdf, get_first_blank_df_row, \
    row_map
 from PyQt6.QtWidgets import QWidget
 from openpyxl.worksheet.worksheet import Worksheet
@@ -71,20 +71,20 @@ class ReportMaker(object):
            # logger.debug(f"Name: {row[0][1]}")
            data = [item for item in row[1]]
            kit = dict(name=row[0][1], cost=data[1], run_count=int(data[0]), sample_count=int(data[2]))
-            # if this is the same lab as before add together
+            # NOTE: if this is the same lab as before add together
            if lab == old_lab:
                output[-1]['kits'].append(kit)
                output[-1]['total_cost'] += kit['cost']
                output[-1]['total_samples'] += kit['sample_count']
                output[-1]['total_runs'] += kit['run_count']
-            # if not the same lab, make a new one
+            # NOTE: if not the same lab, make a new one
            else:
                adder = dict(lab=lab, kits=[kit], total_cost=kit['cost'], total_samples=kit['sample_count'],
                             total_runs=kit['run_count'])
                output.append(adder)
            old_lab = lab
        # logger.debug(output)
-        dicto = {'start_date': start_date, 'end_date': end_date, 'labs': output}  # , "table":table}
+        dicto = {'start_date': start_date, 'end_date': end_date, 'labs': output}
        temp = env.get_template('summary_report.html')
        html = temp.render(input=dicto)
        return html
@@ -120,11 +120,11 @@ class ReportMaker(object):
        """        
        # logger.debug(f"Updating worksheet")
        worksheet: Worksheet = self.writer.sheets['Report']
-        for idx, col in enumerate(self.summary_df, start=1):  # loop through all columns
+        for idx, col in enumerate(self.summary_df, start=1):  # NOTE: loop through all columns
            series = self.summary_df[col]
            max_len = max((
-                series.astype(str).map(len).max(),  # len of largest item
-                len(str(series.name))  # len of column name/header
+                series.astype(str).map(len).max(),  # NOTE: len of largest item
+                len(str(series.name))  # NOTE: len of column name/header
            )) + 20  # NOTE: adding a little extra space
            try:
                # NOTE: Convert idx to letter
@@ -142,224 +142,3 @@ class ReportMaker(object):
                cell.style = 'Currency'


-def make_report_xlsx(records: list[dict]) -> Tuple[DataFrame, DataFrame]:
-    """
-    create the dataframe for a report
-
-    Args:
-        records (list[dict]): list of dictionaries created from submissions
-
-    Returns:
-        DataFrame: output dataframe
-    """
-    df = DataFrame.from_records(records)
-    # NOTE: put submissions with the same lab together
-    df = df.sort_values("submitting_lab")
-    # NOTE: aggregate cost and sample count columns
-    df2 = df.groupby(["submitting_lab", "extraction_kit"]).agg(
-        {'extraction_kit': 'count', 'cost': 'sum', 'sample_count': 'sum'})
-    df2 = df2.rename(columns={"extraction_kit": 'run_count'})
-    # logger.debug(f"Output daftaframe for xlsx: {df2.columns}")
-    df = df.drop('id', axis=1)
-    df = df.sort_values(['submitting_lab', "submitted_date"])
-    return df, df2
-
-
-def make_report_html(df: DataFrame, start_date: date, end_date: date) -> str:
-    """
-    generates html from the report dataframe
-
-    Args:
-        df (DataFrame): input dataframe generated from 'make_report_xlsx' above
-        start_date (date): starting date of the report period
-        end_date (date): ending date of the report period
-
-    Returns:
-        str: html string
-    """
-    old_lab = ""
-    output = []
-    # logger.debug(f"Report DataFrame: {df}")
-    for ii, row in enumerate(df.iterrows()):
-        # logger.debug(f"Row {ii}: {row}")
-        lab = row[0][0]
-        # logger.debug(type(row))
-        # logger.debug(f"Old lab: {old_lab}, Current lab: {lab}")
-        # logger.debug(f"Name: {row[0][1]}")
-        data = [item for item in row[1]]
-        kit = dict(name=row[0][1], cost=data[1], run_count=int(data[0]), sample_count=int(data[2]))
-        # if this is the same lab as before add together
-        if lab == old_lab:
-            output[-1]['kits'].append(kit)
-            output[-1]['total_cost'] += kit['cost']
-            output[-1]['total_samples'] += kit['sample_count']
-            output[-1]['total_runs'] += kit['run_count']
-        # if not the same lab, make a new one
-        else:
-            adder = dict(lab=lab, kits=[kit], total_cost=kit['cost'], total_samples=kit['sample_count'],
-                         total_runs=kit['run_count'])
-            output.append(adder)
-        old_lab = lab
-    # logger.debug(output)
-    dicto = {'start_date': start_date, 'end_date': end_date, 'labs': output}  #, "table":table}
-    temp = env.get_template('summary_report.html')
-    html = temp.render(input=dicto)
-    return html
-
-
-# TODO: move this into a classmethod of Controls?
-def convert_data_list_to_df(input: list[dict], subtype: str | None = None) -> DataFrame:
-    """
-    Convert list of control records to dataframe
-
-    Args:
-        ctx (dict): settings passed from gui
-        input (list[dict]): list of dictionaries containing records
-        subtype (str | None, optional): name of submission type. Defaults to None.
-
-    Returns:
-        DataFrame: dataframe of controls
-    """
-
-    df = DataFrame.from_records(input)
-    safe = ['name', 'submitted_date', 'genus', 'target']
-    for column in df.columns:
-        if "percent" in column:
-            count_col = [item for item in df.columns if "count" in item][0]
-            # NOTE: The actual percentage from kraken was off due to exclusion of NaN, recalculating.
-            df[column] = 100 * df[count_col] / df.groupby('name')[count_col].transform('sum')
-        if column not in safe:
-            if subtype is not None and column != subtype:
-                del df[column]
-    # NOTE: move date of sample submitted on same date as previous ahead one.
-    df = displace_date(df)
-    # NOTE: ad hoc method to make data labels more accurate.
-    df = df_column_renamer(df=df)
-    return df
-
-
-def df_column_renamer(df: DataFrame) -> DataFrame:
-    """
-    Ad hoc function I created to clarify some fields
-
-    Args:
-        df (DataFrame): input dataframe
-
-    Returns:
-        DataFrame: dataframe with 'clarified' column names
-    """
-    df = df[df.columns.drop(list(df.filter(regex='_hashes')))]
-    return df.rename(columns={
-        "contains_ratio": "contains_shared_hashes_ratio",
-        "matches_ratio": "matches_shared_hashes_ratio",
-        "kraken_count": "kraken2_read_count_(top_50)",
-        "kraken_percent": "kraken2_read_percent_(top_50)"
-    })
-
-
-def displace_date(df: DataFrame) -> DataFrame:
-    """
-    This function serves to split samples that were submitted on the same date by incrementing dates.
-    It will shift the date forward by one day if it is the same day as an existing date in a list.
-
-    Args:
-        df (DataFrame): input dataframe composed of control records
-
-    Returns:
-        DataFrame: output dataframe with dates incremented.
-    """
-    # logger.debug(f"Unique items: {df['name'].unique()}")
-    # NOTE: get submitted dates for each control
-    dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in
-                 sorted(df['name'].unique())]
-    previous_dates = []
-    for _, item in enumerate(dict_list):
-        df, previous_dates = check_date(df=df, item=item, previous_dates=previous_dates)
-    return df
-
-
-def check_date(df: DataFrame, item: dict, previous_dates: list) -> Tuple[DataFrame, list]:
-    """
-    Checks if an items date is already present in df and adjusts df accordingly
-
-    Args:
-        df (DataFrame): input dataframe
-        item (dict): control for checking
-        previous_dates (list): list of dates found in previous controls
-
-    Returns:
-        Tuple[DataFrame, list]: Output dataframe and appended list of previous dates
-    """
-    try:
-        check = item['date'] in previous_dates
-    except IndexError:
-        check = False
-    previous_dates.append(item['date'])
-    if check:
-        # logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
-        # NOTE: get df locations where name == item name
-        mask = df['name'] == item['name']
-        # NOTE: increment date in dataframe
-        df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
-        item['date'] += timedelta(days=1)
-        passed = False
-    else:
-        passed = True
-    # logger.debug(f"\n\tCurrent date: {item['date']}\n\tPrevious dates:{previous_dates}")
-    # logger.debug(f"DF: {type(df)}, previous_dates: {type(previous_dates)}")
-    # if run didn't lead to changed date, return values
-    if passed:
-        # logger.debug(f"Date check passed, returning.")
-        return df, previous_dates
-    # NOTE: if date was changed, rerun with new date
-    else:
-        logger.warning(f"Date check failed, running recursion")
-        df, previous_dates = check_date(df, item, previous_dates)
-        return df, previous_dates
-
-
-# def get_unique_values_in_df_column(df: DataFrame, column_name: str) -> list:
-#     """
-#     get all unique values in a dataframe column by name
-#
-#     Args:
-#         df (DataFrame): input dataframe
-#         column_name (str): name of column of interest
-#
-#     Returns:
-#         list: sorted list of unique values
-#     """
-#     return sorted(df[column_name].unique())
-
-
-def drop_reruns_from_df(ctx: Settings, df: DataFrame) -> DataFrame:
-    """
-    Removes semi-duplicates from dataframe after finding sequencing repeats.
-
-    Args:
-        settings (dict): settings passed from gui
-        df (DataFrame): initial dataframe
-
-    Returns:
-        DataFrame: dataframe with originals removed in favour of repeats.
-    """
-    if 'rerun_regex' in ctx:
-        sample_names = get_unique_values_in_df_column(df, column_name="name")
-        rerun_regex = re.compile(fr"{ctx.rerun_regex}")
-        for sample in sample_names:
-            if rerun_regex.search(sample):
-                first_run = re.sub(rerun_regex, "", sample)
-                df = df.drop(df[df.name == first_run].index)
-    return df
-
-# def make_hitpicks(input:List[dict]) -> DataFrame:
-#     """
-#     Converts list of dictionaries constructed by hitpicking to dataframe
-#
-#     Args:
-#         input (List[dict]): list of hitpicked dictionaries
-#
-#     Returns:
-#         DataFrame: constructed dataframe.
-#     """
-#     return DataFrame.from_records(input)