Updated parsers and chart constructors.
This commit is contained in:
@@ -1,3 +1,8 @@
|
|||||||
|
## 202304.04
|
||||||
|
|
||||||
|
- Kraken controls graph now only pulls top 20 results to prevent crashing.
|
||||||
|
- Improved cost calculations per column in a 96 well plate.
|
||||||
|
|
||||||
## 202304.01
|
## 202304.01
|
||||||
|
|
||||||
- Improved function results output to ui.
|
- Improved function results output to ui.
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
# Version of the realpython-reader package
|
# Version of the realpython-reader package
|
||||||
__project__ = "submissions"
|
__project__ = "submissions"
|
||||||
__version__ = "202304.2b"
|
__version__ = "202304.4b"
|
||||||
__author__ = {"name":"Landon Wark", "email":"Landon.Wark@phac-aspc.gc.ca"}
|
__author__ = {"name":"Landon Wark", "email":"Landon.Wark@phac-aspc.gc.ca"}
|
||||||
__copyright__ = "2022-2023, Government of Canada"
|
__copyright__ = "2022-2023, Government of Canada"
|
||||||
|
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ from getpass import getuser
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import yaml
|
import yaml
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from math import ceil
|
||||||
|
|
||||||
logger = logging.getLogger(f"submissions.{__name__}")
|
logger = logging.getLogger(f"submissions.{__name__}")
|
||||||
|
|
||||||
@@ -161,9 +162,13 @@ def construct_submission_info(ctx:dict, info_dict:dict) -> models.BasicSubmissio
|
|||||||
# calculate cost of the run: immutable cost + mutable times number of columns
|
# calculate cost of the run: immutable cost + mutable times number of columns
|
||||||
# This is now attached to submission upon creation to preserve at-run costs incase of cost increase in the future.
|
# This is now attached to submission upon creation to preserve at-run costs incase of cost increase in the future.
|
||||||
try:
|
try:
|
||||||
instance.run_cost = instance.extraction_kit.immutable_cost + (instance.extraction_kit.mutable_cost * ((instance.sample_count / 8)/12))
|
# ceil(instance.sample_count / 8) will get number of columns
|
||||||
except (TypeError, AttributeError):
|
# the cost of a full run multiplied by (that number / 12) is x twelfths the cost of a full run
|
||||||
logger.debug(f"Looks like that kit doesn't have cost breakdown yet, using full plate cost.")
|
logger.debug(f"Instance extraction kit details: {instance.extraction_kit.__dict__}")
|
||||||
|
cols_count = ceil(int(instance.sample_count) / 8)
|
||||||
|
instance.run_cost = instance.extraction_kit.constant_cost + (instance.extraction_kit.mutable_cost * (cols_count / 12))
|
||||||
|
except (TypeError, AttributeError) as e:
|
||||||
|
logger.debug(f"Looks like that kit doesn't have cost breakdown yet due to: {e}, using full plate cost.")
|
||||||
instance.run_cost = instance.extraction_kit.cost_per_run
|
instance.run_cost = instance.extraction_kit.cost_per_run
|
||||||
# We need to make sure there's a proper rsl plate number
|
# We need to make sure there's a proper rsl plate number
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -105,6 +105,9 @@ class Control(Base):
|
|||||||
for key in data[genus]:
|
for key in data[genus]:
|
||||||
_dict[key] = data[genus][key]
|
_dict[key] = data[genus][key]
|
||||||
output.append(_dict)
|
output.append(_dict)
|
||||||
|
# Have to triage kraken data to keep program from getting overwhelmed
|
||||||
|
if "kraken" in mode:
|
||||||
|
output = sorted(output, key=lambda d: d[f"{mode}_count"], reverse=True)[:49]
|
||||||
return output
|
return output
|
||||||
|
|
||||||
def create_dummy_data(self, mode:str) -> dict:
|
def create_dummy_data(self, mode:str) -> dict:
|
||||||
|
|||||||
@@ -25,8 +25,8 @@ class KitType(Base):
|
|||||||
submissions = relationship("BasicSubmission", back_populates="extraction_kit") #: submissions this kit was used for
|
submissions = relationship("BasicSubmission", back_populates="extraction_kit") #: submissions this kit was used for
|
||||||
used_for = Column(JSON) #: list of names of sample types this kit can process
|
used_for = Column(JSON) #: list of names of sample types this kit can process
|
||||||
cost_per_run = Column(FLOAT(2)) #: dollar amount for each full run of this kit NOTE: depreciated, use the constant and mutable costs instead
|
cost_per_run = Column(FLOAT(2)) #: dollar amount for each full run of this kit NOTE: depreciated, use the constant and mutable costs instead
|
||||||
mutable_cost = Column(FLOAT(2)) #: dollar amount that can change with number of columns (reagents, tips, etc)
|
mutable_cost = Column(FLOAT(2)) #: dollar amount per plate that can change with number of columns (reagents, tips, etc)
|
||||||
constant_cost = Column(FLOAT(2)) #: dollar amount that will remain constant (plates, man hours, etc)
|
constant_cost = Column(FLOAT(2)) #: dollar amount per plate that will remain constant (plates, man hours, etc)
|
||||||
reagent_types = relationship("ReagentType", back_populates="kits", uselist=True, secondary=reagenttypes_kittypes) #: reagent types this kit contains
|
reagent_types = relationship("ReagentType", back_populates="kits", uselist=True, secondary=reagenttypes_kittypes) #: reagent types this kit contains
|
||||||
reagent_types_id = Column(INTEGER, ForeignKey("_reagent_types.id", ondelete='SET NULL', use_alter=True, name="fk_KT_reagentstype_id")) #: joined reagent type id
|
reagent_types_id = Column(INTEGER, ForeignKey("_reagent_types.id", ondelete='SET NULL', use_alter=True, name="fk_KT_reagentstype_id")) #: joined reagent type id
|
||||||
|
|
||||||
@@ -111,3 +111,15 @@ class Reagent(Base):
|
|||||||
"lot": self.lot,
|
"lot": self.lot,
|
||||||
"expiry": place_holder.strftime("%Y-%m-%d")
|
"expiry": place_holder.strftime("%Y-%m-%d")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# class Discounts(Base):
|
||||||
|
# """
|
||||||
|
# Relationship table for client labs for certain kits.
|
||||||
|
# """
|
||||||
|
# __tablename__ = "_discounts"
|
||||||
|
|
||||||
|
# id = Column(INTEGER, primary_key=True) #: primary key
|
||||||
|
# kit = relationship("KitType") #: joined parent reagent type
|
||||||
|
# kit_id = Column(INTEGER, ForeignKey("_kits.id", ondelete='SET NULL', name="fk_kit_type_id"))
|
||||||
|
# client = relationship("Organization")
|
||||||
@@ -107,8 +107,8 @@ class SheetParser(object):
|
|||||||
"""
|
"""
|
||||||
for ii, row in df.iterrows():
|
for ii, row in df.iterrows():
|
||||||
# skip positive control
|
# skip positive control
|
||||||
if ii == 11:
|
# if ii == 12:
|
||||||
continue
|
# continue
|
||||||
logger.debug(f"Running reagent parse for {row[1]} with type {type(row[1])} and value: {row[2]} with type {type(row[2])}")
|
logger.debug(f"Running reagent parse for {row[1]} with type {type(row[1])} and value: {row[2]} with type {type(row[2])}")
|
||||||
if not isinstance(row[2], float) and check_not_nan(row[1]):
|
if not isinstance(row[2], float) and check_not_nan(row[1]):
|
||||||
# must be prefixed with 'lot_' to be recognized by gui
|
# must be prefixed with 'lot_' to be recognized by gui
|
||||||
@@ -117,7 +117,10 @@ class SheetParser(object):
|
|||||||
except AttributeError:
|
except AttributeError:
|
||||||
pass
|
pass
|
||||||
if reagent_type == "//":
|
if reagent_type == "//":
|
||||||
|
if check_not_nan(row[2]):
|
||||||
reagent_type = row[0].replace(' ', '_').lower().strip()
|
reagent_type = row[0].replace(' ', '_').lower().strip()
|
||||||
|
else:
|
||||||
|
continue
|
||||||
try:
|
try:
|
||||||
output_var = row[2].upper()
|
output_var = row[2].upper()
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
@@ -142,10 +145,11 @@ class SheetParser(object):
|
|||||||
# reagents
|
# reagents
|
||||||
# must be prefixed with 'lot_' to be recognized by gui
|
# must be prefixed with 'lot_' to be recognized by gui
|
||||||
# Todo: find a more adaptable way to read reagents.
|
# Todo: find a more adaptable way to read reagents.
|
||||||
reagent_range = submission_info.iloc[1:13, 4:8]
|
reagent_range = submission_info.iloc[1:14, 4:8]
|
||||||
|
logger.debug(reagent_range)
|
||||||
parse_reagents(reagent_range)
|
parse_reagents(reagent_range)
|
||||||
# get individual sample info
|
# get individual sample info
|
||||||
sample_parser = SampleParser(submission_info.iloc[15:111])
|
sample_parser = SampleParser(submission_info.iloc[16:112])
|
||||||
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
|
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
|
||||||
logger.debug(f"Parser result: {self.sub}")
|
logger.debug(f"Parser result: {self.sub}")
|
||||||
self.sub['samples'] = sample_parse()
|
self.sub['samples'] = sample_parse()
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import sys
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import re
|
import re
|
||||||
from tools import check_if_app
|
from tools import check_if_app
|
||||||
|
import asyncio
|
||||||
|
|
||||||
logger = logging.getLogger(f"submissions.{__name__}")
|
logger = logging.getLogger(f"submissions.{__name__}")
|
||||||
|
|
||||||
@@ -109,9 +110,10 @@ def convert_data_list_to_df(ctx:dict, input:list[dict], subtype:str|None=None) -
|
|||||||
if column not in safe:
|
if column not in safe:
|
||||||
if subtype != None and column != subtype:
|
if subtype != None and column != subtype:
|
||||||
del df[column]
|
del df[column]
|
||||||
|
# logger.debug(df)
|
||||||
# move date of sample submitted on same date as previous ahead one.
|
# move date of sample submitted on same date as previous ahead one.
|
||||||
df = displace_date(df)
|
df = displace_date(df)
|
||||||
df.sort_values('submitted_date').to_excel("controls.xlsx", engine="openpyxl")
|
# df.sort_values('submitted_date').to_excel("controls.xlsx", engine="openpyxl")
|
||||||
# ad hoc method to make data labels more accurate.
|
# ad hoc method to make data labels more accurate.
|
||||||
df = df_column_renamer(df=df)
|
df = df_column_renamer(df=df)
|
||||||
return df
|
return df
|
||||||
@@ -131,8 +133,8 @@ def df_column_renamer(df:DataFrame) -> DataFrame:
|
|||||||
return df.rename(columns = {
|
return df.rename(columns = {
|
||||||
"contains_ratio":"contains_shared_hashes_ratio",
|
"contains_ratio":"contains_shared_hashes_ratio",
|
||||||
"matches_ratio":"matches_shared_hashes_ratio",
|
"matches_ratio":"matches_shared_hashes_ratio",
|
||||||
"kraken_count":"kraken2_read_count",
|
"kraken_count":"kraken2_read_count_(top_20)",
|
||||||
"kraken_percent":"kraken2_read_percent"
|
"kraken_percent":"kraken2_read_percent_(top_20)"
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ from .custom_widgets.pop_ups import AlertPop, QuestionAsker
|
|||||||
from .custom_widgets import ReportDatePicker, ReagentTypeForm
|
from .custom_widgets import ReportDatePicker, ReagentTypeForm
|
||||||
from .custom_widgets.misc import ImportReagent
|
from .custom_widgets.misc import ImportReagent
|
||||||
from .visualizations.control_charts import create_charts, construct_html
|
from .visualizations.control_charts import create_charts, construct_html
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(f"submissions.{__name__}")
|
logger = logging.getLogger(f"submissions.{__name__}")
|
||||||
@@ -111,11 +112,14 @@ def import_submission_function(obj:QMainWindow) -> Tuple[QMainWindow, dict|None]
|
|||||||
add_widget = QComboBox()
|
add_widget = QComboBox()
|
||||||
# lookup existing kits by 'submission_type' decided on by sheetparser
|
# lookup existing kits by 'submission_type' decided on by sheetparser
|
||||||
uses = [item.__str__() for item in lookup_kittype_by_use(ctx=obj.ctx, used_by=prsr.sub['submission_type'])]
|
uses = [item.__str__() for item in lookup_kittype_by_use(ctx=obj.ctx, used_by=prsr.sub['submission_type'])]
|
||||||
add_widget.addItems(uses)
|
|
||||||
if check_not_nan(prsr.sub[item]):
|
if check_not_nan(prsr.sub[item]):
|
||||||
|
logger.debug(f"The extraction kit in parser was: {prsr.sub[item]}")
|
||||||
|
uses.insert(0, uses.pop(uses.index(prsr.sub[item])))
|
||||||
obj.ext_kit = prsr.sub[item]
|
obj.ext_kit = prsr.sub[item]
|
||||||
else:
|
else:
|
||||||
obj.ext_kit = add_widget.currentText()
|
logger.error(f"Couldn't find prsr.sub[extraction_kit]")
|
||||||
|
obj.ext_kit = uses[0]
|
||||||
|
add_widget.addItems(uses)
|
||||||
case 'submitted_date':
|
case 'submitted_date':
|
||||||
# create label
|
# create label
|
||||||
obj.table_widget.formlayout.addWidget(QLabel(item.replace("_", " ").title()))
|
obj.table_widget.formlayout.addWidget(QLabel(item.replace("_", " ").title()))
|
||||||
@@ -265,7 +269,7 @@ def submit_new_sample_function(obj:QMainWindow) -> QMainWindow:
|
|||||||
# reset form
|
# reset form
|
||||||
for item in obj.table_widget.formlayout.parentWidget().findChildren(QWidget):
|
for item in obj.table_widget.formlayout.parentWidget().findChildren(QWidget):
|
||||||
item.setParent(None)
|
item.setParent(None)
|
||||||
print(dir(obj))
|
# print(dir(obj))
|
||||||
if hasattr(obj, 'csv'):
|
if hasattr(obj, 'csv'):
|
||||||
dlg = QuestionAsker("Export CSV?", "Would you like to export the csv file?")
|
dlg = QuestionAsker("Export CSV?", "Would you like to export the csv file?")
|
||||||
if dlg.exec():
|
if dlg.exec():
|
||||||
@@ -426,6 +430,8 @@ def chart_maker_function(obj:QMainWindow) -> QMainWindow:
|
|||||||
# flatten data to one dimensional list
|
# flatten data to one dimensional list
|
||||||
data = [item for sublist in data for item in sublist]
|
data = [item for sublist in data for item in sublist]
|
||||||
logger.debug(f"Control objects going into df conversion: {data}")
|
logger.debug(f"Control objects going into df conversion: {data}")
|
||||||
|
if data == []:
|
||||||
|
return obj, dict(status="Critical", message="No data found for controls in given date range.")
|
||||||
# send to dataframe creator
|
# send to dataframe creator
|
||||||
df = convert_data_list_to_df(ctx=obj.ctx, input=data, subtype=obj.subtype)
|
df = convert_data_list_to_df(ctx=obj.ctx, input=data, subtype=obj.subtype)
|
||||||
if obj.subtype == None:
|
if obj.subtype == None:
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ def create_charts(ctx:dict, df:pd.DataFrame, ytitle:str|None=None) -> Figure:
|
|||||||
genera.append("")
|
genera.append("")
|
||||||
df['genus'] = df['genus'].replace({'\*':''}, regex=True).replace({"NaN":"Unknown"})
|
df['genus'] = df['genus'].replace({'\*':''}, regex=True).replace({"NaN":"Unknown"})
|
||||||
df['genera'] = genera
|
df['genera'] = genera
|
||||||
df = df.dropna()
|
# df = df.dropna()
|
||||||
# remove original runs, using reruns if applicable
|
# remove original runs, using reruns if applicable
|
||||||
df = drop_reruns_from_df(ctx=ctx, df=df)
|
df = drop_reruns_from_df(ctx=ctx, df=df)
|
||||||
# sort by and exclude from
|
# sort by and exclude from
|
||||||
@@ -49,6 +49,7 @@ def create_charts(ctx:dict, df:pd.DataFrame, ytitle:str|None=None) -> Figure:
|
|||||||
# Set descending for any columns that have "{mode}" in the header.
|
# Set descending for any columns that have "{mode}" in the header.
|
||||||
ascending = [False if item == "target" else True for item in sorts]
|
ascending = [False if item == "target" else True for item in sorts]
|
||||||
df = df.sort_values(by=sorts, ascending=ascending)
|
df = df.sort_values(by=sorts, ascending=ascending)
|
||||||
|
logger.debug(df[df.isna().any(axis=1)])
|
||||||
# actual chart construction is done by
|
# actual chart construction is done by
|
||||||
fig = construct_chart(ctx=ctx, df=df, modes=modes, ytitle=ytitle)
|
fig = construct_chart(ctx=ctx, df=df, modes=modes, ytitle=ytitle)
|
||||||
return fig
|
return fig
|
||||||
@@ -245,6 +246,8 @@ def construct_kraken_chart(settings:dict, df:pd.DataFrame, group_name:str, mode:
|
|||||||
Figure: initial figure with traces for modes
|
Figure: initial figure with traces for modes
|
||||||
"""
|
"""
|
||||||
df[f'{mode}_count'] = pd.to_numeric(df[f'{mode}_count'],errors='coerce')
|
df[f'{mode}_count'] = pd.to_numeric(df[f'{mode}_count'],errors='coerce')
|
||||||
|
df = df.groupby('submitted_date')[f'{mode}_count'].nlargest(2)
|
||||||
|
|
||||||
# The actual percentage from kraken was off due to exclusion of NaN, recalculating.
|
# The actual percentage from kraken was off due to exclusion of NaN, recalculating.
|
||||||
df[f'{mode}_percent'] = 100 * df[f'{mode}_count'] / df.groupby('submitted_date')[f'{mode}_count'].transform('sum')
|
df[f'{mode}_percent'] = 100 * df[f'{mode}_count'] / df.groupby('submitted_date')[f'{mode}_count'].transform('sum')
|
||||||
modes = settings['modes'][mode]
|
modes = settings['modes'][mode]
|
||||||
|
|||||||
Reference in New Issue
Block a user