Major refactor for improved documentation and readability

2023-03-17 15:23:22 -05:00
parent c645d3a9cf
commit 9c9c373830
29 changed files with 1828 additions and 1236 deletions
--- a/src/submissions/backend/init.py
+++ b/src/submissions/backend/init.py
@@ -0,0 +1,3 @@
+'''
+Contains database and excel operations.
+'''
--- a/src/submissions/backend/db/init.py
+++ b/src/submissions/backend/db/init.py
--- a/src/submissions/backend/db/functions.py
+++ b/src/submissions/backend/db/functions.py
@@ -0,0 +1,669 @@
+'''
+Convenience functions for interacting with the database.
+'''
+
+from . import models
+from .models.kits import reagenttypes_kittypes
+from .models.submissions import reagents_submissions
+import pandas as pd
+import sqlalchemy.exc
+import sqlite3
+import logging
+from datetime import date, datetime, timedelta
+from sqlalchemy import and_
+import uuid
+from sqlalchemy import JSON, event
+from sqlalchemy.engine import Engine
+import json
+from getpass import getuser
+import numpy as np
+
+import yaml
+from pathlib import Path
+
+logger = logging.getLogger(f"submissions.{__name__}")
+
+# The below _should_ allow automatic creation of foreign keys in the database
+@event.listens_for(Engine, "connect")
+def set_sqlite_pragma(dbapi_connection, connection_record):
+    cursor = dbapi_connection.cursor()
+    cursor.execute("PRAGMA foreign_keys=ON")
+    cursor.close()
+
+def store_submission(ctx:dict, base_submission:models.BasicSubmission) -> None|dict:
+    """
+    Upserts submissions into database
+
+    Args:
+        ctx (dict): settings passed down from gui
+        base_submission (models.BasicSubmission): submission to be add to db
+
+    Returns:
+        None|dict : object that indicates issue raised for reporting in gui
+    """    
+    logger.debug(f"Hello from store_submission")
+    # Add all samples to sample table
+    for sample in base_submission.samples:
+        sample.rsl_plate = base_submission
+        logger.debug(f"Attempting to add sample: {sample.to_string()}")
+        try:
+            ctx['database_session'].add(sample)
+        except (sqlite3.IntegrityError, sqlalchemy.exc.IntegrityError) as e:
+            logger.debug(f"Hit an integrity error : {e}")
+            continue
+    # Add submission to submission table
+    ctx['database_session'].add(base_submission)
+    logger.debug(f"Attempting to add submission: {base_submission.rsl_plate_num}")
+    try:
+        ctx['database_session'].commit()
+    except (sqlite3.IntegrityError, sqlalchemy.exc.IntegrityError) as e:
+        logger.debug(f"Hit an integrity error : {e}")
+        ctx['database_session'].rollback()
+        return {"message":"This plate number already exists, so we can't add it."}
+    except (sqlite3.OperationalError, sqlalchemy.exc.IntegrityError) as e:
+        logger.debug(f"Hit an operational error: {e}")
+        ctx['database_session'].rollback()
+        return {"message":"The database is locked for editing."}
+    return None
+
+
+def store_reagent(ctx:dict, reagent:models.Reagent) -> None|dict:
+    """
+    Inserts a reagent into the database.
+
+    Args:
+        ctx (dict): settings passed down from gui
+        reagent (models.Reagent): Reagent object to be added to db
+
+    Returns:
+        None|dict: object indicating issue to be reported in the gui
+    """    
+    logger.debug(f"Reagent dictionary: {reagent.__dict__}")
+    ctx['database_session'].add(reagent)
+    try:
+        ctx['database_session'].commit()
+    except (sqlite3.OperationalError, sqlalchemy.exc.OperationalError):
+        return {"message":"The database is locked for editing."}
+    return None
+
+
+def construct_submission_info(ctx:dict, info_dict:dict) -> models.BasicSubmission:
+    """
+    Construct submission object from dictionary
+
+    Args:
+        ctx (dict): settings passed down from gui
+        info_dict (dict): dictionary to be transformed
+
+    Returns:
+        models.BasicSubmission: Constructed submission object
+    """
+    from tools import check_not_nan
+    # convert submission type into model name
+    query = info_dict['submission_type'].replace(" ", "")
+    # Ensure an rsl plate number exists for the plate
+    if info_dict["rsl_plate_num"] == 'nan' or info_dict["rsl_plate_num"] == None or not check_not_nan(info_dict["rsl_plate_num"]):
+        instance = None
+        msg = "A proper RSL plate number is required."
+        return instance, {'code': 2, 'message': "A proper RSL plate number is required."}
+    # check database for existing object
+    instance = ctx['database_session'].query(models.BasicSubmission).filter(models.BasicSubmission.rsl_plate_num==info_dict['rsl_plate_num']).first()
+    # get model based on submission type converted above
+    logger.debug(f"Looking at models for submission type: {query}")
+    model = getattr(models, query)
+    logger.debug(f"We've got the model: {type(model)}")
+    info_dict['submission_type'] = info_dict['submission_type'].replace(" ", "_").lower()
+    # if query return nothing, ie doesn't already exist in db
+    if instance == None:
+        instance = model()
+        logger.debug(f"Submission doesn't exist yet, creating new instance: {instance}")
+        msg = None
+        code = 0
+    else:
+        code = 1
+        msg = "This submission already exists.\nWould you like to overwrite?"
+    for item in info_dict:
+        logger.debug(f"Setting {item} to {info_dict[item]}")
+        # set fields based on keys in dictionary
+        match item:
+            case "extraction_kit":
+                q_str = info_dict[item]
+                logger.debug(f"Looking up kit {q_str}")
+                try:
+                    field_value = lookup_kittype_by_name(ctx=ctx, name=q_str)
+                except (sqlite3.IntegrityError, sqlalchemy.exc.IntegrityError) as e:
+                    logger.error(f"Hit an integrity error: {e}")
+                logger.debug(f"Got {field_value} for kit {q_str}")
+            case "submitting_lab":
+                q_str = info_dict[item].replace(" ", "_").lower()
+                logger.debug(f"Looking up organization: {q_str}")
+                field_value = lookup_org_by_name(ctx=ctx, name=q_str)
+                logger.debug(f"Got {field_value} for organization {q_str}")
+            case "submitter_plate_num":
+                # Because of unique constraint, there will be problems with 
+                # multiple submissions named 'None', so...
+                logger.debug(f"Submitter plate id: {info_dict[item]}")
+                if info_dict[item] == None or info_dict[item] == "None":
+                    logger.debug(f"Got None as a submitter plate number, inserting random string to preserve database unique constraint.")
+                    info_dict[item] = uuid.uuid4().hex.upper()
+                field_value = info_dict[item]
+            case _:
+                field_value = info_dict[item]
+        # insert into field
+        try:
+            setattr(instance, item, field_value)
+        except AttributeError:
+            logger.debug(f"Could not set attribute: {item} to {info_dict[item]}")
+            continue
+    # calculate cost of the run: immutable cost + mutable times number of columns
+    # This is now attached to submission upon creation to preserve at-run costs incase of cost increase in the future.
+    try:
+        instance.run_cost = instance.extraction_kit.immutable_cost + (instance.extraction_kit.mutable_cost * ((instance.sample_count / 8)/12))
+    except (TypeError, AttributeError):
+        logger.debug(f"Looks like that kit doesn't have cost breakdown yet, using full plate cost.")
+        instance.run_cost = instance.extraction_kit.cost_per_run
+    # We need to make sure there's a proper rsl plate number
+    try:
+        logger.debug(f"Constructed instance: {instance.to_string()}")
+    except AttributeError as e:
+        logger.debug(f"Something went wrong constructing instance {info_dict['rsl_plate_num']}: {e}")
+    logger.debug(f"Constructed submissions message: {msg}")
+    return instance, {'code':code, 'message':msg}
+    
+
+def construct_reagent(ctx:dict, info_dict:dict) -> models.Reagent:
+    """
+    Construct reagent object from dictionary
+
+    Args:
+        ctx (dict): settings passed down from gui
+        info_dict (dict): dictionary to be converted
+
+    Returns:
+        models.Reagent: Constructed reagent object
+    """    
+    reagent = models.Reagent()
+    for item in info_dict:
+        logger.debug(f"Reagent info item: {item}")
+        # set fields based on keys in dictionary
+        match item:
+            case "lot":
+                reagent.lot = info_dict[item].upper()
+            case "expiry":
+                reagent.expiry = info_dict[item]
+            case "type":
+                reagent.type = lookup_reagenttype_by_name(ctx=ctx, rt_name=info_dict[item].replace(" ", "_").lower())
+    # add end-of-life extension from reagent type to expiry date
+    # NOTE: this will now be done only in the reporting phase to account for potential changes in end-of-life extensions
+    # try:
+    #     reagent.expiry = reagent.expiry + reagent.type.eol_ext
+    # except TypeError as e:
+    #     logger.debug(f"We got a type error: {e}.")
+    # except AttributeError:
+    #     pass
+    return reagent
+
+
+def lookup_reagent(ctx:dict, reagent_lot:str) -> models.Reagent:
+    """
+    Query db for reagent based on lot number
+
+    Args:
+        ctx (dict): settings passed down from gui
+        reagent_lot (str): lot number to query
+
+    Returns:
+        models.Reagent: looked up reagent
+    """    
+    lookedup = ctx['database_session'].query(models.Reagent).filter(models.Reagent.lot==reagent_lot).first()
+    return lookedup
+
+
+def get_all_reagenttype_names(ctx:dict) -> list[str]:
+    """
+    Lookup all reagent types and get names
+
+    Args:
+        ctx (dict): settings passed from gui
+
+    Returns:
+        list[str]: reagent type names
+    """    
+    lookedup = [item.__str__() for item in ctx['database_session'].query(models.ReagentType).all()]
+    return lookedup
+
+
+def lookup_reagenttype_by_name(ctx:dict, rt_name:str) -> models.ReagentType:
+    """
+    Lookup a single reagent type by name
+
+    Args:
+        ctx (dict): settings passed from gui
+        rt_name (str): reagent type name to look up
+
+    Returns:
+        models.ReagentType: looked up reagent type
+    """    
+    logger.debug(f"Looking up ReagentType by name: {rt_name}")
+    lookedup = ctx['database_session'].query(models.ReagentType).filter(models.ReagentType.name==rt_name).first()
+    logger.debug(f"Found ReagentType: {lookedup}")
+    return lookedup
+
+
+def lookup_kittype_by_use(ctx:dict, used_by:str) -> list[models.KitType]:
+    """
+    Lookup kits by a sample type its used for
+
+    Args:
+        ctx (dict): settings passed from gui
+        used_by (str): sample type (should be string in D3 of excel sheet)
+
+    Returns:
+        list[models.KitType]: list of kittypes that have that sample type in their uses
+    """    
+    return ctx['database_session'].query(models.KitType).filter(models.KitType.used_for.contains(used_by)).all()
+
+
+def lookup_kittype_by_name(ctx:dict, name:str) -> models.KitType:
+    """
+    Lookup a kit type by name
+
+    Args:
+        ctx (dict): settings passed from bui
+        name (str): name of kit to query
+
+    Returns:
+        models.KitType: retrieved kittype
+    """    
+    logger.debug(f"Querying kittype: {name}")
+    return ctx['database_session'].query(models.KitType).filter(models.KitType.name==name).first()
+    
+
+def lookup_regent_by_type_name(ctx:dict, type_name:str) -> list[models.Reagent]:
+    """
+    Lookup reagents by their type name
+
+    Args:
+        ctx (dict): settings passed from gui
+        type_name (str): reagent type name
+
+    Returns:
+        list[models.Reagent]: list of retrieved reagents
+    """    
+    return ctx['database_session'].query(models.Reagent).join(models.Reagent.type, aliased=True).filter(models.ReagentType.name==type_name).all()
+
+
+def lookup_regent_by_type_name_and_kit_name(ctx:dict, type_name:str, kit_name:str) -> list[models.Reagent]:
+    """
+    Lookup reagents by their type name and kits they belong to (Broken... maybe cursed, I'm not sure.)
+
+    Args:
+        ctx (dict): settings pass by gui
+        type_name (str): reagent type name
+        kit_name (str): kit name
+
+    Returns:
+        list[models.Reagent]: list of retrieved reagents
+    """    
+    # What I want to do is get the reagent type by name 
+    # Hang on, this is going to be a long one.
+    # by_type = ctx['database_session'].query(models.Reagent).join(models.Reagent.type, aliased=True).filter(models.ReagentType.name.endswith(type_name)).all()
+    rt_types = ctx['database_session'].query(models.ReagentType).filter(models.ReagentType.name.endswith(type_name))
+    # add filter for kit name... 
+    try:
+        check = not np.isnan(kit_name)
+    except TypeError:
+        check = True
+    if check:
+        kit_type = lookup_kittype_by_name(ctx=ctx, name=kit_name)
+        logger.debug(f"reagenttypes: {[item.name for item in rt_types.all()]}, kit: {kit_type.name}")
+        # add in lookup for related kit_id
+        rt_types = rt_types.join(reagenttypes_kittypes).filter(reagenttypes_kittypes.c.kits_id==kit_type.id).first()
+    else:
+        rt_types = rt_types.first()
+    output = rt_types.instances
+    return output
+
+
+def lookup_all_submissions_by_type(ctx:dict, sub_type:str|None=None) -> list[models.BasicSubmission]:
+    """
+    Get all submissions, filtering by type if given
+
+    Args:
+        ctx (dict): settings pass from gui
+        type (str | None, optional): submission type (should be string in D3 of excel sheet). Defaults to None.
+
+    Returns:
+        list[models.BasicSubmission]: list of retrieved submissions
+    """
+    if sub_type == None:
+        subs = ctx['database_session'].query(models.BasicSubmission).all()
+    else:
+        subs = ctx['database_session'].query(models.BasicSubmission).filter(models.BasicSubmission.submission_type==sub_type.lower().replace(" ", "_")).all()
+    return subs
+
+def lookup_all_orgs(ctx:dict) -> list[models.Organization]:
+    """
+    Lookup all organizations (labs)
+
+    Args:
+        ctx (dict): settings passed from gui
+
+    Returns:
+        list[models.Organization]: list of retrieved organizations
+    """    
+    return ctx['database_session'].query(models.Organization).all()
+
+def lookup_org_by_name(ctx:dict, name:str|None) -> models.Organization:
+    """
+    Lookup organization (lab) by (startswith) name.
+
+    Args:
+        ctx (dict): settings passed from gui
+        name (str | None): name of organization
+
+    Returns:
+        models.Organization: retrieved organization
+    """    
+    logger.debug(f"Querying organization: {name}")
+    return ctx['database_session'].query(models.Organization).filter(models.Organization.name.startswith(name)).first()
+
+def submissions_to_df(ctx:dict, sub_type:str|None=None) -> pd.DataFrame:
+    """
+    Convert submissions looked up by type to dataframe
+
+    Args:
+        ctx (dict): settings passed by gui
+        type (str | None, optional): submission type (should be string in D3 of excel sheet) Defaults to None.
+
+    Returns:
+        pd.DataFrame: dataframe constructed from retrieved submissions
+    """    
+    logger.debug(f"Type: {sub_type}")
+    # use lookup function to create list of dicts
+    subs = [item.to_dict() for item in lookup_all_submissions_by_type(ctx=ctx, sub_type=sub_type)]
+    # make df from dicts (records) in list
+    df = pd.DataFrame.from_records(subs)
+    # Exclude sub information
+    try:
+        df = df.drop("controls", axis=1)
+    except:
+        logger.warning(f"Couldn't drop 'controls' column from submissionsheet df.")
+    try:
+        df = df.drop("ext_info", axis=1)
+    except:
+        logger.warning(f"Couldn't drop 'controls' column from submissionsheet df.")
+    return df
+     
+    
+def lookup_submission_by_id(ctx:dict, id:int) -> models.BasicSubmission:
+    """
+    Lookup submission by id number
+
+    Args:
+        ctx (dict): settings passed from gui
+        id (int): submission id number
+
+    Returns:
+        models.BasicSubmission: retrieved submission
+    """    
+    return ctx['database_session'].query(models.BasicSubmission).filter(models.BasicSubmission.id==id).first()
+
+
+def lookup_submissions_by_date_range(ctx:dict, start_date:datetime.date, end_date:datetime.date) -> list[models.BasicSubmission]:
+    """
+    Lookup submissions greater than start_date and less than end_date
+
+    Args:
+        ctx (dict): settings passed from gui
+        start_date (datetime.date): date to start looking
+        end_date (datetime.date): date to end looking
+
+    Returns:
+        list[models.BasicSubmission]: list of retrieved submissions
+    """    
+    # return ctx['database_session'].query(models.BasicSubmission).filter(and_(models.BasicSubmission.submitted_date > start_date, models.BasicSubmission.submitted_date < end_date)).all()
+    start_date = start_date.strftime("%Y-%m-%d")
+    end_date = end_date.strftime("%Y-%m-%d")
+    return ctx['database_session'].query(models.BasicSubmission).filter(models.BasicSubmission.submitted_date.between(start_date, end_date)).all()
+
+
+def get_all_Control_Types_names(ctx:dict) -> list[str]:
+    """
+    Grabs all control type names from db.
+
+    Args:
+        settings (dict): settings passed down from gui.
+
+    Returns:
+        list: list of controltype names
+    """    
+    conTypes = ctx['database_session'].query(models.ControlType).all()
+    conTypes = [conType.name for conType in conTypes]
+    logger.debug(f"Control Types: {conTypes}")
+    return conTypes
+
+
+def create_kit_from_yaml(ctx:dict, exp:dict) -> dict:
+    """
+    Create and store a new kit in the database based on a .yml file
+    TODO: split into create and store functions
+
+    Args:
+        ctx (dict): Context dictionary passed down from frontend
+        exp (dict): Experiment dictionary created from yaml file
+
+    Returns:
+        dict: a dictionary containing results of db addition
+    """    
+    from tools import check_is_power_user
+    # Don't want just anyone adding kits
+    if not check_is_power_user(ctx=ctx):
+        logger.debug(f"{getuser()} does not have permission to add kits.")
+        return {'code':1, 'message':"This user does not have permission to add kits.", "status":"warning"}
+    # iterate through keys in dict
+    for type in exp:
+        if type == "password":
+            continue
+        # A submission type may use multiple kits.
+        for kt in exp[type]['kits']:
+            kit = models.KitType(name=kt, used_for=[type.replace("_", " ").title()], constant_cost=exp[type]["kits"][kt]["constant_cost"], mutable_cost=exp[type]["kits"][kt]["mutable_cost"])
+            # A kit contains multiple reagent types.
+            for r in exp[type]['kits'][kt]['reagenttypes']:
+                # check if reagent type already exists.
+                look_up = ctx['database_session'].query(models.ReagentType).filter(models.ReagentType.name==r).first()
+                if look_up == None:
+                    rt = models.ReagentType(name=r.replace(" ", "_").lower(), eol_ext=timedelta(30*exp[type]['kits'][kt]['reagenttypes'][r]['eol_ext']), kits=[kit])
+                else:
+                    rt = look_up
+                    rt.kits.append(kit)
+                    # add this because I think it's necessary to get proper back population
+                    kit.reagent_types_id.append(rt.id)
+                ctx['database_session'].add(rt)
+                logger.debug(f"Kit construction reagent type: {rt.__dict__}")
+            logger.debug(f"Kit construction kit: {kit.__dict__}")
+        ctx['database_session'].add(kit)
+    ctx['database_session'].commit()
+    return {'code':0, 'message':'Kit has been added', 'status': 'information'}
+
+
+def create_org_from_yaml(ctx:dict, org:dict) -> dict:
+    """
+    Create and store a new organization based on a .yml file
+
+    Args:
+        ctx (dict): Context dictionary passed down from frontend
+        org (dict): Dictionary containing organization info.
+
+    Returns:
+        dict: dictionary containing results of db addition
+    """    
+    from tools import check_is_power_user
+    # Don't want just anyone adding in clients
+    if not check_is_power_user(ctx=ctx):
+        logger.debug(f"{getuser()} does not have permission to add kits.")
+        return {'code':1, 'message':"This user does not have permission to add organizations."}
+    # the yml can contain multiple clients
+    for client in org:
+        cli_org = models.Organization(name=client.replace(" ", "_").lower(), cost_centre=org[client]['cost centre'])
+        # a client can contain multiple contacts
+        for contact in org[client]['contacts']:
+            cont_name = list(contact.keys())[0]
+            # check if contact already exists
+            look_up = ctx['database_session'].query(models.Contact).filter(models.Contact.name==cont_name).first()
+            if look_up == None:
+                cli_cont = models.Contact(name=cont_name, phone=contact[cont_name]['phone'], email=contact[cont_name]['email'], organization=[cli_org])
+            else:
+                cli_cont = look_up
+                cli_cont.organization.append(cli_org)
+            ctx['database_session'].add(cli_cont)
+            logger.debug(f"Client creation contact: {cli_cont.__dict__}")
+        logger.debug(f"Client creation client: {cli_org.__dict__}")
+        ctx['database_session'].add(cli_org)
+    ctx["database_session"].commit()
+    return {"code":0, "message":"Organization has been added."}
+
+
+def lookup_all_sample_types(ctx:dict) -> list[str]:
+    """
+    Lookup all sample types and get names
+
+    Args:
+        ctx (dict): settings pass from gui
+
+    Returns:
+        list[str]: list of sample type names
+    """    
+    uses = [item.used_for for item in ctx['database_session'].query(models.KitType).all()]
+    # flattened list of lists
+    uses = list(set([item for sublist in uses for item in sublist]))
+    return uses
+
+
+def get_all_available_modes(ctx:dict) -> list[str]:
+    """
+    Get types of analysis for controls
+
+    Args:
+        ctx (dict): settings passed from gui
+
+    Returns:
+        list[str]: list of analysis types
+    """    
+    # Only one control is necessary since they all share the same control types.
+    rel = ctx['database_session'].query(models.Control).first()
+    try:
+        cols = [item.name for item in list(rel.__table__.columns) if isinstance(item.type, JSON)]
+    except AttributeError as e:
+        logger.debug(f"Failed to get available modes from db: {e}")
+        cols = []
+    return cols
+
+
+def get_all_controls_by_type(ctx:dict, con_type:str, start_date:date|None=None, end_date:date|None=None) -> list[models.Control]:
+    """
+    Returns a list of control objects that are instances of the input controltype.
+    Between dates if supplied.
+
+    Args:
+        ctx (dict): Settings passed down from gui
+        con_type (str): Name of control type.
+        start_date (date | None, optional): Start date of query. Defaults to None.
+        end_date (date | None, optional): End date of query. Defaults to None.
+
+    Returns:
+        list[models.Control]: list of control samples.
+    """    
+    logger.debug(f"Using dates: {start_date} to {end_date}")
+    if start_date != None and end_date != None:
+        start_date = start_date.strftime("%Y-%m-%d")
+        end_date = end_date.strftime("%Y-%m-%d")
+        output = ctx['database_session'].query(models.Control).join(models.ControlType).filter_by(name=con_type).filter(models.Control.submitted_date.between(start_date, end_date)).all()
+    else:
+        output = ctx['database_session'].query(models.Control).join(models.ControlType).filter_by(name=con_type).all()
+    logger.debug(f"Returned controls between dates: {output}")
+    return output
+
+
+def get_control_subtypes(ctx:dict, type:str, mode:str) -> list[str]:
+    """
+    Get subtypes for a control analysis mode
+
+    Args:
+        ctx (dict): settings passed from gui
+        type (str): control type name
+        mode (str): analysis mode name
+
+    Returns:
+        list[str]: list of subtype names
+    """    
+    # Only the first control of type is necessary since they all share subtypes
+    try:
+        outs = get_all_controls_by_type(ctx=ctx, con_type=type)[0]
+    except TypeError:
+        return []
+    # Get analysis mode data as dict
+    jsoner = json.loads(getattr(outs, mode))
+    logger.debug(f"JSON out: {jsoner}")
+    try:
+        genera = list(jsoner.keys())[0]
+    except IndexError:
+        return []
+    subtypes = [item for item in jsoner[genera] if "_hashes" not in item and "_ratio" not in item]
+    return subtypes
+
+
+def get_all_controls(ctx:dict) -> list[models.Control]:
+    """
+    Retrieve a list of all controls from the database
+
+    Args:
+        ctx (dict): settings passed down from the gui.
+
+    Returns:
+        list[models.Control]: list of all control objects
+    """    
+    return ctx['database_session'].query(models.Control).all()
+
+
+def lookup_submission_by_rsl_num(ctx:dict, rsl_num:str) -> models.BasicSubmission:
+    """
+    Retrieve a submission from the database based on rsl plate number
+
+    Args:
+        ctx (dict): settings passed down from gui
+        rsl_num (str): rsl plate number
+
+    Returns:
+        models.BasicSubmission: Submissions object retrieved from database
+    """
+    return ctx['database_session'].query(models.BasicSubmission).filter(models.BasicSubmission.rsl_plate_num.startswith(rsl_num)).first()
+
+
+def lookup_submissions_using_reagent(ctx:dict, reagent:models.Reagent) -> list[models.BasicSubmission]:
+    return ctx['database_session'].query(models.BasicSubmission).join(reagents_submissions).filter(reagents_submissions.c.reagent_id==reagent.id).all()
+
+
+def delete_submission_by_id(ctx:dict, id:int) -> None:
+    """
+    Deletes a submission and its associated samples from the database.
+
+    Args:
+        ctx (dict): settings passed down from gui
+        id (int): id of submission to be deleted.
+    """    
+    # In order to properly do this Im' going to have to delete all of the secondary table stuff as well.
+    # Retrieve submission
+    sub = ctx['database_session'].query(models.BasicSubmission).filter(models.BasicSubmission.id==id).first()
+    # Convert to dict for storing backup as a yml
+    backup = sub.to_dict()
+    try:
+        with open(Path(ctx['backup_path']).joinpath(f"{sub.rsl_plate_num}-backup({date.today().strftime('%Y%m%d')}).yml"), "w") as f:
+            yaml.dump(backup, f)
+    except KeyError:
+        pass
+    sub.reagents = []
+    for sample in sub.samples:
+        ctx['database_session'].delete(sample)
+    ctx["database_session"].delete(sub)
+    ctx["database_session"].commit()
--- a/src/submissions/backend/db/functions/init.py
+++ b/src/submissions/backend/db/functions/init.py
@@ -1,21 +0,0 @@
-# from ..models import *
-# import logging
-
-# logger = logging.getLogger(f"submissions.{__name__}")
-
-# def check_kit_integrity(sub:BasicSubmission):
-#     ext_kit_rtypes = [reagenttype.name for reagenttype in sub.extraction_kit.reagent_types]
-#     logger.debug(f"Kit reagents: {ext_kit_rtypes}")
-#     reagenttypes = [reagent.type.name for reagent in sub.reagents]
-#     logger.debug(f"Submission reagents: {reagenttypes}")
-#     check = set(ext_kit_rtypes) == set(reagenttypes)
-#     logger.debug(f"Checking if reagents match kit contents: {check}")
-#     common = list(set(ext_kit_rtypes).intersection(reagenttypes))
-#     logger.debug(f"common reagents types: {common}")
-#     if check:
-#         result = None
-#     else:
-#         result = {'message' : f"Couldn't verify reagents match listed kit components.\n\nIt looks like you are missing: {[x.upper for x in ext_kit_rtypes if x not in common]}\n\nAlternatively, you may have set the wrong extraction kit."}
-#     return result
-    
-    
--- a/src/submissions/backend/db/models/init.py
+++ b/src/submissions/backend/db/models/init.py
@@ -1,5 +1,7 @@
+'''
+Contains all models for sqlalchemy
+'''
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship

 Base = declarative_base()
 metadata = Base.metadata
--- a/src/submissions/backend/db/models/controls.py
+++ b/src/submissions/backend/db/models/controls.py
@@ -1,3 +1,6 @@
+'''
+All control related models.
+'''
 from . import Base
 from sqlalchemy import Column, String, TIMESTAMP, JSON, INTEGER, ForeignKey
 from sqlalchemy.orm import relationship
--- a/src/submissions/backend/db/models/kits.py
+++ b/src/submissions/backend/db/models/kits.py
@@ -1,4 +1,6 @@
-from copy import deepcopy
+'''
+All kit and reagent related models
+'''
 from . import Base
 from sqlalchemy import Column, String, TIMESTAMP, JSON, INTEGER, ForeignKey, Interval, Table, FLOAT
 from sqlalchemy.orm import relationship
--- a/src/submissions/backend/db/models/organizations.py
+++ b/src/submissions/backend/db/models/organizations.py
@@ -1,3 +1,6 @@
+'''
+All client organization related models.
+'''
 from . import Base
 from sqlalchemy import Column, String, INTEGER, ForeignKey, Table
 from sqlalchemy.orm import relationship
--- a/src/submissions/backend/db/models/samples.py
+++ b/src/submissions/backend/db/models/samples.py
@@ -1,3 +1,6 @@
+'''
+All models for individual samples.
+'''
 from . import Base
 from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, FLOAT, BOOLEAN
 from sqlalchemy.orm import relationship
--- a/src/submissions/backend/db/models/submissions.py
+++ b/src/submissions/backend/db/models/submissions.py
@@ -1,3 +1,6 @@
+'''
+Models for the main submission types.
+'''
 from . import Base
 from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, Table, JSON, FLOAT
 from sqlalchemy.orm import relationship
--- a/src/submissions/backend/excel/init.py
+++ b/src/submissions/backend/excel/init.py
@@ -1,43 +1,50 @@
-from pandas import DataFrame
-import re
+'''
+Contains pandas convenience functions for interacting with excel workbooks
+'''
+
+from .reports import *
+from .parser import *
+
+# from pandas import DataFrame
+# import re


-def get_unique_values_in_df_column(df: DataFrame, column_name: str) -> list:
-    """
-    get all unique values in a dataframe column by name
+# def get_unique_values_in_df_column(df: DataFrame, column_name: str) -> list:
+#     """
+#     get all unique values in a dataframe column by name

-    Args:
-        df (DataFrame): input dataframe
-        column_name (str): name of column of interest
+#     Args:
+#         df (DataFrame): input dataframe
+#         column_name (str): name of column of interest

-    Returns:
-        list: sorted list of unique values
-    """    
-    return sorted(df[column_name].unique())
+#     Returns:
+#         list: sorted list of unique values
+#     """    
+#     return sorted(df[column_name].unique())


-def drop_reruns_from_df(ctx:dict, df: DataFrame) -> DataFrame:
-    """
-    Removes semi-duplicates from dataframe after finding sequencing repeats.
+# def drop_reruns_from_df(ctx:dict, df: DataFrame) -> DataFrame:
+#     """
+#     Removes semi-duplicates from dataframe after finding sequencing repeats.

-    Args:
-        settings (dict): settings passed from gui
-        df (DataFrame): initial dataframe
+#     Args:
+#         settings (dict): settings passed from gui
+#         df (DataFrame): initial dataframe

-    Returns:
-        DataFrame: dataframe with originals removed in favour of repeats.
-    """    
-    sample_names = get_unique_values_in_df_column(df, column_name="name")
-    if 'rerun_regex' in ctx:
-        # logger.debug(f"Compiling regex from: {settings['rerun_regex']}")
-        rerun_regex = re.compile(fr"{ctx['rerun_regex']}")
-        for sample in sample_names:
-            # logger.debug(f'Running search on {sample}')
-            if rerun_regex.search(sample):
-                # logger.debug(f'Match on {sample}')
-                first_run = re.sub(rerun_regex, "", sample)
-                # logger.debug(f"First run: {first_run}")
-                df = df.drop(df[df.name == first_run].index)
-        return df
-    else:
-        return None
+#     Returns:
+#         DataFrame: dataframe with originals removed in favour of repeats.
+#     """    
+#     sample_names = get_unique_values_in_df_column(df, column_name="name")
+#     if 'rerun_regex' in ctx:
+#         # logger.debug(f"Compiling regex from: {settings['rerun_regex']}")
+#         rerun_regex = re.compile(fr"{ctx['rerun_regex']}")
+#         for sample in sample_names:
+#             # logger.debug(f'Running search on {sample}')
+#             if rerun_regex.search(sample):
+#                 # logger.debug(f'Match on {sample}')
+#                 first_run = re.sub(rerun_regex, "", sample)
+#                 # logger.debug(f"First run: {first_run}")
+#                 df = df.drop(df[df.name == first_run].index)
+#         return df
+#     else:
+#         return None
--- a/src/submissions/backend/excel/parser.py
+++ b/src/submissions/backend/excel/parser.py
@@ -1,3 +1,6 @@
+'''
+contains parser object for pulling values from client generated submission sheets.
+'''
 import pandas as pd
 from pathlib import Path
 from backend.db.models import WWSample, BCSample
--- a/src/submissions/backend/excel/reports.py
+++ b/src/submissions/backend/excel/reports.py
@@ -1,11 +1,13 @@
-
+'''
+Contains functions for generating summary reports
+'''
 from pandas import DataFrame
-# from backend.db import models
 import logging
 from jinja2 import Environment, FileSystemLoader
 from datetime import date, timedelta
 import sys
 from pathlib import Path
+import re

 logger = logging.getLogger(f"submissions.{__name__}")

@@ -93,30 +95,22 @@ def convert_data_list_to_df(ctx:dict, input:list[dict], subtype:str|None=None) -
    Returns:
        DataFrame: _description_
    """    
-    # copy = input
-    # for item in copy:
-    #     item['submitted_date'] = item['submitted_date'].strftime("%Y-%m-%d")
-    # with open("controls.json", "w") as f:
-    #     f.write(json.dumps(copy))
-    # for item in input:
-    #     logger.debug(item.keys())
+    
    df = DataFrame.from_records(input)
    df.to_excel("test.xlsx", engine="openpyxl")
    safe = ['name', 'submitted_date', 'genus', 'target']
-    # logger.debug(df)
    for column in df.columns:
        if "percent" in column:
            count_col = [item for item in df.columns if "count" in item][0]
            # The actual percentage from kraken was off due to exclusion of NaN, recalculating.
-            # df[column] = 100 * df[count_col] / df.groupby('submitted_date')[count_col].transform('sum')
            df[column] = 100 * df[count_col] / df.groupby('name')[count_col].transform('sum')
        if column not in safe:
            if subtype != None and column != subtype:
                del df[column]
-    # logger.debug(df)
-    # df.sort_values('submitted_date').to_excel("controls.xlsx", engine="openpyxl")
+    # move date of sample submitted on same date as previous ahead one.
    df = displace_date(df)
    df.sort_values('submitted_date').to_excel("controls.xlsx", engine="openpyxl")
+    # ad hoc method to make data labels more accurate.
    df = df_column_renamer(df=df)
    return df

@@ -150,24 +144,59 @@ def displace_date(df:DataFrame) -> DataFrame:
    Returns:
        DataFrame: output dataframe with dates incremented.
    """    
-    # dict_list = []
-    # for item in df['name'].unique():
-    #     dict_list.append(dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']))
    logger.debug(f"Unique items: {df['name'].unique()}")
-    # logger.debug(df.to_string())
-    # the assumption is that closest names will have closest dates...
+    # get submitted dates for each control
    dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in sorted(df['name'].unique())]
    for ii, item in enumerate(dict_list):
-        # if ii > 0:
        try:
            check = item['date'] == dict_list[ii-1]['date']
        except IndexError:
            check = False
        if check:
-            logger.debug(f"We found one! Increment date!\n{item['date'] - timedelta(days=1)}")
+            logger.debug(f"We found one! Increment date!\n\t{item['date'] - timedelta(days=1)}")
+            # get df locations where name == item name
            mask = df['name'] == item['name']
-            # logger.debug(f"We will increment dates in: {df.loc[mask, 'submitted_date']}")
+            # increment date in dataframe
            df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
-            # logger.debug(f"Do these look incremented: {df.loc[mask, 'submitted_date']}")
    return df
                
+
+def get_unique_values_in_df_column(df: DataFrame, column_name: str) -> list:
+    """
+    get all unique values in a dataframe column by name
+
+    Args:
+        df (DataFrame): input dataframe
+        column_name (str): name of column of interest
+
+    Returns:
+        list: sorted list of unique values
+    """    
+    return sorted(df[column_name].unique())
+
+
+def drop_reruns_from_df(ctx:dict, df: DataFrame) -> DataFrame:
+    """
+    Removes semi-duplicates from dataframe after finding sequencing repeats.
+
+    Args:
+        settings (dict): settings passed from gui
+        df (DataFrame): initial dataframe
+
+    Returns:
+        DataFrame: dataframe with originals removed in favour of repeats.
+    """    
+    sample_names = get_unique_values_in_df_column(df, column_name="name")
+    if 'rerun_regex' in ctx:
+        # logger.debug(f"Compiling regex from: {settings['rerun_regex']}")
+        rerun_regex = re.compile(fr"{ctx['rerun_regex']}")
+        for sample in sample_names:
+            # logger.debug(f'Running search on {sample}')
+            if rerun_regex.search(sample):
+                # logger.debug(f'Match on {sample}')
+                first_run = re.sub(rerun_regex, "", sample)
+                # logger.debug(f"First run: {first_run}")
+                df = df.drop(df[df.name == first_run].index)
+        return df
+    else:
+        return None