Updated parsers and chart constructors.

2023-04-27 12:51:53 -05:00
parent 8a0a9aa69c
commit dff5a5aa1e
9 changed files with 59 additions and 19 deletions
--- a/src/submissions/backend/db/functions.py
+++ b/src/submissions/backend/db/functions.py
@@ -20,6 +20,7 @@ from getpass import getuser
 import numpy as np
 import yaml
 from pathlib import Path
+from math import ceil

 logger = logging.getLogger(f"submissions.{__name__}")

@@ -161,9 +162,13 @@ def construct_submission_info(ctx:dict, info_dict:dict) -> models.BasicSubmissio
    # calculate cost of the run: immutable cost + mutable times number of columns
    # This is now attached to submission upon creation to preserve at-run costs incase of cost increase in the future.
    try:
-        instance.run_cost = instance.extraction_kit.immutable_cost + (instance.extraction_kit.mutable_cost * ((instance.sample_count / 8)/12))
-    except (TypeError, AttributeError):
-        logger.debug(f"Looks like that kit doesn't have cost breakdown yet, using full plate cost.")
+        # ceil(instance.sample_count / 8) will get number of columns
+        # the cost of a full run multiplied by (that number / 12) is x twelfths the cost of a full run
+        logger.debug(f"Instance extraction kit details: {instance.extraction_kit.__dict__}")
+        cols_count = ceil(int(instance.sample_count) / 8)
+        instance.run_cost = instance.extraction_kit.constant_cost + (instance.extraction_kit.mutable_cost * (cols_count / 12))
+    except (TypeError, AttributeError) as e:
+        logger.debug(f"Looks like that kit doesn't have cost breakdown yet due to: {e}, using full plate cost.")
        instance.run_cost = instance.extraction_kit.cost_per_run
    # We need to make sure there's a proper rsl plate number
    try:
--- a/src/submissions/backend/db/models/controls.py
+++ b/src/submissions/backend/db/models/controls.py
@@ -105,6 +105,9 @@ class Control(Base):
            for key in data[genus]:
                _dict[key] = data[genus][key]
            output.append(_dict)
+        # Have to triage kraken data to keep program from getting overwhelmed
+        if "kraken" in mode:
+            output = sorted(output, key=lambda d: d[f"{mode}_count"], reverse=True)[:49]
        return output
    
    def create_dummy_data(self, mode:str) -> dict:
--- a/src/submissions/backend/db/models/kits.py
+++ b/src/submissions/backend/db/models/kits.py
@@ -25,8 +25,8 @@ class KitType(Base):
    submissions = relationship("BasicSubmission", back_populates="extraction_kit") #: submissions this kit was used for
    used_for = Column(JSON) #: list of names of sample types this kit can process
    cost_per_run = Column(FLOAT(2)) #: dollar amount for each full run of this kit NOTE: depreciated, use the constant and mutable costs instead
-    mutable_cost = Column(FLOAT(2)) #: dollar amount that can change with number of columns (reagents, tips, etc)
-    constant_cost = Column(FLOAT(2)) #: dollar amount that will remain constant (plates, man hours, etc)
+    mutable_cost = Column(FLOAT(2)) #: dollar amount per plate that can change with number of columns (reagents, tips, etc)
+    constant_cost = Column(FLOAT(2)) #: dollar amount per plate that will remain constant (plates, man hours, etc)
    reagent_types = relationship("ReagentType", back_populates="kits", uselist=True, secondary=reagenttypes_kittypes) #: reagent types this kit contains
    reagent_types_id = Column(INTEGER, ForeignKey("_reagent_types.id", ondelete='SET NULL', use_alter=True, name="fk_KT_reagentstype_id")) #: joined reagent type id
    
@@ -110,4 +110,16 @@ class Reagent(Base):
            "type": type,
            "lot": self.lot,
            "expiry": place_holder.strftime("%Y-%m-%d")
-        }
+        }
+    
+
+# class Discounts(Base):
+#     """
+#     Relationship table for client labs for certain kits.
+#     """
+#     __tablename__ = "_discounts"
+
+#     id = Column(INTEGER, primary_key=True) #: primary key
+#     kit = relationship("KitType") #: joined parent reagent type
+#     kit_id = Column(INTEGER, ForeignKey("_kits.id", ondelete='SET NULL', name="fk_kit_type_id"))
+#     client = relationship("Organization")
--- a/src/submissions/backend/excel/parser.py
+++ b/src/submissions/backend/excel/parser.py
@@ -107,8 +107,8 @@ class SheetParser(object):
            """            
            for ii, row in df.iterrows():
                # skip positive control
-                if ii == 11:
-                    continue
+                # if ii == 12:
+                #     continue
                logger.debug(f"Running reagent parse for {row[1]} with type {type(row[1])} and value: {row[2]} with type {type(row[2])}")
                if not isinstance(row[2], float) and check_not_nan(row[1]):
                    # must be prefixed with 'lot_' to be recognized by gui
@@ -117,7 +117,10 @@ class SheetParser(object):
                    except AttributeError:
                        pass
                    if reagent_type == "//":
-                        reagent_type = row[0].replace(' ', '_').lower().strip()
+                        if check_not_nan(row[2]):
+                            reagent_type = row[0].replace(' ', '_').lower().strip()
+                        else:
+                            continue
                    try:
                        output_var = row[2].upper()
                    except AttributeError:
@@ -142,10 +145,11 @@ class SheetParser(object):
        # reagents
        # must be prefixed with 'lot_' to be recognized by gui
        # Todo: find a more adaptable way to read reagents.
-        reagent_range = submission_info.iloc[1:13, 4:8]
+        reagent_range = submission_info.iloc[1:14, 4:8]
+        logger.debug(reagent_range)
        parse_reagents(reagent_range)
        # get individual sample info
-        sample_parser = SampleParser(submission_info.iloc[15:111])
+        sample_parser = SampleParser(submission_info.iloc[16:112])
        sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
        logger.debug(f"Parser result: {self.sub}")
        self.sub['samples'] = sample_parse()
--- a/src/submissions/backend/excel/reports.py
+++ b/src/submissions/backend/excel/reports.py
@@ -9,6 +9,7 @@ import sys
 from pathlib import Path
 import re
 from tools import check_if_app
+import asyncio

 logger = logging.getLogger(f"submissions.{__name__}")

@@ -109,9 +110,10 @@ def convert_data_list_to_df(ctx:dict, input:list[dict], subtype:str|None=None) -
        if column not in safe:
            if subtype != None and column != subtype:
                del df[column]
+    # logger.debug(df)
    # move date of sample submitted on same date as previous ahead one.
    df = displace_date(df)
-    df.sort_values('submitted_date').to_excel("controls.xlsx", engine="openpyxl")
+    # df.sort_values('submitted_date').to_excel("controls.xlsx", engine="openpyxl")
    # ad hoc method to make data labels more accurate.
    df = df_column_renamer(df=df)
    return df
@@ -131,8 +133,8 @@ def df_column_renamer(df:DataFrame) -> DataFrame:
    return df.rename(columns = {
        "contains_ratio":"contains_shared_hashes_ratio",
        "matches_ratio":"matches_shared_hashes_ratio",
-        "kraken_count":"kraken2_read_count",
-        "kraken_percent":"kraken2_read_percent"
+        "kraken_count":"kraken2_read_count_(top_20)",
+        "kraken_percent":"kraken2_read_percent_(top_20)"
    })