Updated parsers and chart constructors.

This commit is contained in:
Landon Wark
2023-04-27 12:51:53 -05:00
parent 8a0a9aa69c
commit dff5a5aa1e
9 changed files with 59 additions and 19 deletions

View File

@@ -20,6 +20,7 @@ from getpass import getuser
import numpy as np
import yaml
from pathlib import Path
from math import ceil
logger = logging.getLogger(f"submissions.{__name__}")
@@ -161,9 +162,13 @@ def construct_submission_info(ctx:dict, info_dict:dict) -> models.BasicSubmissio
# calculate cost of the run: immutable cost + mutable times number of columns
# This is now attached to submission upon creation to preserve at-run costs incase of cost increase in the future.
try:
instance.run_cost = instance.extraction_kit.immutable_cost + (instance.extraction_kit.mutable_cost * ((instance.sample_count / 8)/12))
except (TypeError, AttributeError):
logger.debug(f"Looks like that kit doesn't have cost breakdown yet, using full plate cost.")
# ceil(instance.sample_count / 8) will get number of columns
# the cost of a full run multiplied by (that number / 12) is x twelfths the cost of a full run
logger.debug(f"Instance extraction kit details: {instance.extraction_kit.__dict__}")
cols_count = ceil(int(instance.sample_count) / 8)
instance.run_cost = instance.extraction_kit.constant_cost + (instance.extraction_kit.mutable_cost * (cols_count / 12))
except (TypeError, AttributeError) as e:
logger.debug(f"Looks like that kit doesn't have cost breakdown yet due to: {e}, using full plate cost.")
instance.run_cost = instance.extraction_kit.cost_per_run
# We need to make sure there's a proper rsl plate number
try:

View File

@@ -105,6 +105,9 @@ class Control(Base):
for key in data[genus]:
_dict[key] = data[genus][key]
output.append(_dict)
# Have to triage kraken data to keep program from getting overwhelmed
if "kraken" in mode:
output = sorted(output, key=lambda d: d[f"{mode}_count"], reverse=True)[:49]
return output
def create_dummy_data(self, mode:str) -> dict:

View File

@@ -25,8 +25,8 @@ class KitType(Base):
submissions = relationship("BasicSubmission", back_populates="extraction_kit") #: submissions this kit was used for
used_for = Column(JSON) #: list of names of sample types this kit can process
cost_per_run = Column(FLOAT(2)) #: dollar amount for each full run of this kit NOTE: depreciated, use the constant and mutable costs instead
mutable_cost = Column(FLOAT(2)) #: dollar amount that can change with number of columns (reagents, tips, etc)
constant_cost = Column(FLOAT(2)) #: dollar amount that will remain constant (plates, man hours, etc)
mutable_cost = Column(FLOAT(2)) #: dollar amount per plate that can change with number of columns (reagents, tips, etc)
constant_cost = Column(FLOAT(2)) #: dollar amount per plate that will remain constant (plates, man hours, etc)
reagent_types = relationship("ReagentType", back_populates="kits", uselist=True, secondary=reagenttypes_kittypes) #: reagent types this kit contains
reagent_types_id = Column(INTEGER, ForeignKey("_reagent_types.id", ondelete='SET NULL', use_alter=True, name="fk_KT_reagentstype_id")) #: joined reagent type id
@@ -110,4 +110,16 @@ class Reagent(Base):
"type": type,
"lot": self.lot,
"expiry": place_holder.strftime("%Y-%m-%d")
}
}
# class Discounts(Base):
# """
# Relationship table for client labs for certain kits.
# """
# __tablename__ = "_discounts"
# id = Column(INTEGER, primary_key=True) #: primary key
# kit = relationship("KitType") #: joined parent reagent type
# kit_id = Column(INTEGER, ForeignKey("_kits.id", ondelete='SET NULL', name="fk_kit_type_id"))
# client = relationship("Organization")

View File

@@ -107,8 +107,8 @@ class SheetParser(object):
"""
for ii, row in df.iterrows():
# skip positive control
if ii == 11:
continue
# if ii == 12:
# continue
logger.debug(f"Running reagent parse for {row[1]} with type {type(row[1])} and value: {row[2]} with type {type(row[2])}")
if not isinstance(row[2], float) and check_not_nan(row[1]):
# must be prefixed with 'lot_' to be recognized by gui
@@ -117,7 +117,10 @@ class SheetParser(object):
except AttributeError:
pass
if reagent_type == "//":
reagent_type = row[0].replace(' ', '_').lower().strip()
if check_not_nan(row[2]):
reagent_type = row[0].replace(' ', '_').lower().strip()
else:
continue
try:
output_var = row[2].upper()
except AttributeError:
@@ -142,10 +145,11 @@ class SheetParser(object):
# reagents
# must be prefixed with 'lot_' to be recognized by gui
# Todo: find a more adaptable way to read reagents.
reagent_range = submission_info.iloc[1:13, 4:8]
reagent_range = submission_info.iloc[1:14, 4:8]
logger.debug(reagent_range)
parse_reagents(reagent_range)
# get individual sample info
sample_parser = SampleParser(submission_info.iloc[15:111])
sample_parser = SampleParser(submission_info.iloc[16:112])
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
logger.debug(f"Parser result: {self.sub}")
self.sub['samples'] = sample_parse()

View File

@@ -9,6 +9,7 @@ import sys
from pathlib import Path
import re
from tools import check_if_app
import asyncio
logger = logging.getLogger(f"submissions.{__name__}")
@@ -109,9 +110,10 @@ def convert_data_list_to_df(ctx:dict, input:list[dict], subtype:str|None=None) -
if column not in safe:
if subtype != None and column != subtype:
del df[column]
# logger.debug(df)
# move date of sample submitted on same date as previous ahead one.
df = displace_date(df)
df.sort_values('submitted_date').to_excel("controls.xlsx", engine="openpyxl")
# df.sort_values('submitted_date').to_excel("controls.xlsx", engine="openpyxl")
# ad hoc method to make data labels more accurate.
df = df_column_renamer(df=df)
return df
@@ -131,8 +133,8 @@ def df_column_renamer(df:DataFrame) -> DataFrame:
return df.rename(columns = {
"contains_ratio":"contains_shared_hashes_ratio",
"matches_ratio":"matches_shared_hashes_ratio",
"kraken_count":"kraken2_read_count",
"kraken_percent":"kraken2_read_percent"
"kraken_count":"kraken2_read_count_(top_20)",
"kraken_percent":"kraken2_read_percent_(top_20)"
})