mid refactor for improved rebustness and readability
This commit is contained in:
@@ -2,7 +2,6 @@ from pandas import DataFrame
|
||||
import re
|
||||
|
||||
|
||||
|
||||
def get_unique_values_in_df_column(df: DataFrame, column_name: str) -> list:
|
||||
"""
|
||||
get all unique values in a dataframe column by name
|
||||
@@ -40,3 +39,5 @@ def drop_reruns_from_df(ctx:dict, df: DataFrame) -> DataFrame:
|
||||
# logger.debug(f"First run: {first_run}")
|
||||
df = df.drop(df[df.name == first_run].index)
|
||||
return df
|
||||
else:
|
||||
return None
|
||||
|
||||
@@ -74,16 +74,15 @@ class SheetParser(object):
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: relevant dataframe from excel sheet
|
||||
"""
|
||||
"""
|
||||
# self.xl is a pd.ExcelFile so we need to parse it into a df
|
||||
submission_info = self.xl.parse(sheet_name=sheet_name, dtype=object)
|
||||
|
||||
self.sub['submitter_plate_num'] = submission_info.iloc[0][1]
|
||||
self.sub['rsl_plate_num'] = submission_info.iloc[10][1]
|
||||
self.sub['submitted_date'] = submission_info.iloc[1][1]
|
||||
self.sub['submitting_lab'] = submission_info.iloc[0][3]
|
||||
self.sub['sample_count'] = submission_info.iloc[2][3]
|
||||
self.sub['extraction_kit'] = submission_info.iloc[3][3]
|
||||
|
||||
return submission_info
|
||||
|
||||
|
||||
@@ -104,10 +103,6 @@ class SheetParser(object):
|
||||
if ii == 11:
|
||||
continue
|
||||
logger.debug(f"Running reagent parse for {row[1]} with type {type(row[1])} and value: {row[2]} with type {type(row[2])}")
|
||||
# try:
|
||||
# check = not np.isnan(row[1])
|
||||
# except TypeError:
|
||||
# check = True
|
||||
if not isinstance(row[2], float) and check_not_nan(row[1]):
|
||||
# must be prefixed with 'lot_' to be recognized by gui
|
||||
try:
|
||||
@@ -122,13 +117,7 @@ class SheetParser(object):
|
||||
logger.debug(f"Couldn't upperize {row[2]}, must be a number")
|
||||
output_var = row[2]
|
||||
logger.debug(f"Output variable is {output_var}")
|
||||
# self.sub[f"lot_{reagent_type}"] = output_var
|
||||
# update 2023-02-10 to above allowing generation of expiry date in adding reagent to db.
|
||||
logger.debug(f"Expiry date for imported reagent: {row[3]}")
|
||||
# try:
|
||||
# check = not np.isnan(row[3])
|
||||
# except TypeError:
|
||||
# check = True
|
||||
if check_not_nan(row[3]):
|
||||
expiry = row[3].date()
|
||||
else:
|
||||
@@ -146,19 +135,8 @@ class SheetParser(object):
|
||||
# reagents
|
||||
# must be prefixed with 'lot_' to be recognized by gui
|
||||
# Todo: find a more adaptable way to read reagents.
|
||||
|
||||
reagent_range = submission_info.iloc[1:13, 4:8]
|
||||
_parse_reagents(reagent_range)
|
||||
# self.sub['lot_wash_1'] = submission_info.iloc[1][6] #if pd.isnull(submission_info.iloc[1][6]) else string_formatter(submission_info.iloc[1][6])
|
||||
# self.sub['lot_wash_2'] = submission_info.iloc[2][6] #if pd.isnull(submission_info.iloc[2][6]) else string_formatter(submission_info.iloc[2][6])
|
||||
# self.sub['lot_binding_buffer'] = submission_info.iloc[3][6] #if pd.isnull(submission_info.iloc[3][6]) else string_formatter(submission_info.iloc[3][6])
|
||||
# self.sub['lot_magnetic_beads'] = submission_info.iloc[4][6] #if pd.isnull(submission_info.iloc[4][6]) else string_formatter(submission_info.iloc[4][6])
|
||||
# self.sub['lot_lysis_buffer'] = submission_info.iloc[5][6] #if np.nan(submission_info.iloc[5][6]) else string_formatter(submission_info.iloc[5][6])
|
||||
# self.sub['lot_elution_buffer'] = submission_info.iloc[6][6] #if pd.isnull(submission_info.iloc[6][6]) else string_formatter(submission_info.iloc[6][6])
|
||||
# self.sub['lot_isopropanol'] = submission_info.iloc[9][6] #if pd.isnull(submission_info.iloc[9][6]) else string_formatter(submission_info.iloc[9][6])
|
||||
# self.sub['lot_ethanol'] = submission_info.iloc[10][6] #if pd.isnull(submission_info.iloc[10][6]) else string_formatter(submission_info.iloc[10][6])
|
||||
# self.sub['lot_positive_control'] = submission_info.iloc[103][1] #if pd.isnull(submission_info.iloc[103][1]) else string_formatter(submission_info.iloc[103][1])
|
||||
# self.sub['lot_plate'] = submission_info.iloc[12][6] #if pd.isnull(submission_info.iloc[12][6]) else string_formatter(submission_info.iloc[12][6])
|
||||
# get individual sample info
|
||||
sample_parser = SampleParser(submission_info.iloc[15:111])
|
||||
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
|
||||
@@ -178,12 +156,8 @@ class SheetParser(object):
|
||||
Args:
|
||||
df (pd.DataFrame): input sub dataframe
|
||||
"""
|
||||
# logger.debug(df)
|
||||
# iterate through sub-df rows
|
||||
for ii, row in df.iterrows():
|
||||
# try:
|
||||
# check = not np.isnan(row[5])
|
||||
# except TypeError:
|
||||
# check = True
|
||||
if not isinstance(row[5], float) and check_not_nan(row[5]):
|
||||
# must be prefixed with 'lot_' to be recognized by gui
|
||||
# regex below will remove 80% from 80% ethanol in the Wastewater kit.
|
||||
@@ -202,34 +176,26 @@ class SheetParser(object):
|
||||
else:
|
||||
expiry = date.today()
|
||||
self.sub[f"lot_{output_key}"] = {'lot':output_var, 'exp':expiry}
|
||||
# parse submission sheet
|
||||
submission_info = self._parse_generic("WW Submissions (ENTER HERE)")
|
||||
# parse enrichment sheet
|
||||
enrichment_info = self.xl.parse("Enrichment Worksheet", dtype=object)
|
||||
# set enrichment reagent range
|
||||
enr_reagent_range = enrichment_info.iloc[0:4, 9:20]
|
||||
# parse extraction sheet
|
||||
extraction_info = self.xl.parse("Extraction Worksheet", dtype=object)
|
||||
# set extraction reagent range
|
||||
ext_reagent_range = extraction_info.iloc[0:5, 9:20]
|
||||
# parse qpcr sheet
|
||||
qprc_info = self.xl.parse("qPCR Worksheet", dtype=object)
|
||||
# set qpcr reagent range
|
||||
pcr_reagent_range = qprc_info.iloc[0:5, 9:20]
|
||||
# compile technician info
|
||||
self.sub['technician'] = f"Enr: {enrichment_info.columns[2]}, Ext: {extraction_info.columns[2]}, PCR: {qprc_info.columns[2]}"
|
||||
_parse_reagents(enr_reagent_range)
|
||||
_parse_reagents(ext_reagent_range)
|
||||
_parse_reagents(pcr_reagent_range)
|
||||
# reagents
|
||||
# logger.debug(qprc_info)
|
||||
# self.sub['lot_lysis_buffer'] = enrichment_info.iloc[0][14] #if pd.isnull(enrichment_info.iloc[0][14]) else string_formatter(enrichment_info.iloc[0][14])
|
||||
# self.sub['lot_proteinase_K'] = enrichment_info.iloc[1][14] #if pd.isnull(enrichment_info.iloc[1][14]) else string_formatter(enrichment_info.iloc[1][14])
|
||||
# self.sub['lot_magnetic_virus_particles'] = enrichment_info.iloc[2][14] #if pd.isnull(enrichment_info.iloc[2][14]) else string_formatter(enrichment_info.iloc[2][14])
|
||||
# self.sub['lot_enrichment_reagent_1'] = enrichment_info.iloc[3][14] #if pd.isnull(enrichment_info.iloc[3][14]) else string_formatter(enrichment_info.iloc[3][14])
|
||||
# self.sub['lot_binding_buffer'] = extraction_info.iloc[0][14] #if pd.isnull(extraction_info.iloc[0][14]) else string_formatter(extraction_info.iloc[0][14])
|
||||
# self.sub['lot_magnetic_beads'] = extraction_info.iloc[1][14] #if pd.isnull(extraction_info.iloc[1][14]) else string_formatter(extraction_info.iloc[1][14])
|
||||
# self.sub['lot_wash'] = extraction_info.iloc[2][14] #if pd.isnull(extraction_info.iloc[2][14]) else string_formatter(extraction_info.iloc[2][14])
|
||||
# self.sub['lot_ethanol'] = extraction_info.iloc[3][14] #if pd.isnull(extraction_info.iloc[3][14]) else string_formatter(extraction_info.iloc[3][14])
|
||||
# self.sub['lot_elution_buffer'] = extraction_info.iloc[4][14] #if pd.isnull(extraction_info.iloc[4][14]) else string_formatter(extraction_info.iloc[4][14])
|
||||
# self.sub['lot_master_mix'] = qprc_info.iloc[0][14] #if pd.isnull(qprc_info.iloc[0][14]) else string_formatter(qprc_info.iloc[0][14])
|
||||
# self.sub['lot_pre_mix_1'] = qprc_info.iloc[1][14] #if pd.isnull(qprc_info.iloc[1][14]) else string_formatter(qprc_info.iloc[1][14])
|
||||
# self.sub['lot_pre_mix_2'] = qprc_info.iloc[2][14] #if pd.isnull(qprc_info.iloc[2][14]) else string_formatter(qprc_info.iloc[2][14])
|
||||
# self.sub['lot_positive_control'] = qprc_info.iloc[3][14] #if pd.isnull(qprc_info.iloc[3][14]) else string_formatter(qprc_info.iloc[3][14])
|
||||
# self.sub['lot_ddh2o'] = qprc_info.iloc[4][14] #if pd.isnull(qprc_info.iloc[4][14]) else string_formatter(qprc_info.iloc[4][14])
|
||||
# get individual sample info
|
||||
# parse samples
|
||||
sample_parser = SampleParser(submission_info.iloc[16:40])
|
||||
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
|
||||
self.sub['samples'] = sample_parse()
|
||||
@@ -241,6 +207,12 @@ class SampleParser(object):
|
||||
"""
|
||||
|
||||
def __init__(self, df:pd.DataFrame) -> None:
|
||||
"""
|
||||
convert sample sub-dataframe to dictionary of records
|
||||
|
||||
Args:
|
||||
df (pd.DataFrame): input sample dataframe
|
||||
"""
|
||||
self.samples = df.to_dict("records")
|
||||
|
||||
|
||||
@@ -287,6 +259,7 @@ class SampleParser(object):
|
||||
not_a_nan = not np.isnan(sample['Unnamed: 3'])
|
||||
except TypeError:
|
||||
not_a_nan = True
|
||||
# if we don't have a sample full id, make one up
|
||||
if not_a_nan:
|
||||
new.ww_sample_full_id = sample['Unnamed: 3']
|
||||
else:
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
|
||||
from pandas import DataFrame, concat
|
||||
from operator import itemgetter
|
||||
from pandas import DataFrame
|
||||
# from backend.db import models
|
||||
import json
|
||||
import logging
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
from datetime import date, timedelta
|
||||
@@ -38,13 +36,8 @@ def make_report_xlsx(records:list[dict]) -> DataFrame:
|
||||
df2 = df.groupby(["Submitting Lab", "Extraction Kit"]).agg({'Extraction Kit':'count', 'Cost': 'sum', 'Sample Count':'sum'})
|
||||
df2 = df2.rename(columns={"Extraction Kit": 'Kit Count'})
|
||||
logger.debug(f"Output daftaframe for xlsx: {df2.columns}")
|
||||
# apply formating to cost column
|
||||
# df2.iloc[:, (df2.columns.get_level_values(1)=='sum') & (df2.columns.get_level_values(0)=='Cost')] = df2.iloc[:, (df2.columns.get_level_values(1)=='sum') & (df2.columns.get_level_values(0)=='Cost')].applymap('${:,.2f}'.format)
|
||||
return df2
|
||||
|
||||
# def split_row_item(item:str) -> float:
|
||||
# return item.split(" ")[-1]
|
||||
|
||||
|
||||
def make_report_html(df:DataFrame, start_date:date, end_date:date) -> str:
|
||||
|
||||
@@ -63,23 +56,20 @@ def make_report_html(df:DataFrame, start_date:date, end_date:date) -> str:
|
||||
output = []
|
||||
logger.debug(f"Report DataFrame: {df}")
|
||||
for ii, row in enumerate(df.iterrows()):
|
||||
# row = [item for item in row]
|
||||
logger.debug(f"Row {ii}: {row}")
|
||||
lab = row[0][0]
|
||||
logger.debug(type(row))
|
||||
logger.debug(f"Old lab: {old_lab}, Current lab: {lab}")
|
||||
logger.debug(f"Name: {row[0][1]}")
|
||||
data = [item for item in row[1]]
|
||||
# logger.debug(data)
|
||||
# logger.debug(f"Cost: {split_row_item(data[1])}")
|
||||
# logger.debug(f"Kit count: {split_row_item(data[0])}")
|
||||
# logger.debug(f"Sample Count: {split_row_item(data[2])}")
|
||||
kit = dict(name=row[0][1], cost=data[1], plate_count=int(data[0]), sample_count=int(data[2]))
|
||||
# if this is the same lab as before add together
|
||||
if lab == old_lab:
|
||||
output[-1]['kits'].append(kit)
|
||||
output[-1]['total_cost'] += kit['cost']
|
||||
output[-1]['total_samples'] += kit['sample_count']
|
||||
output[-1]['total_plates'] += kit['plate_count']
|
||||
# if not the same lab, make a new one
|
||||
else:
|
||||
adder = dict(lab=lab, kits=[kit], total_cost=kit['cost'], total_samples=kit['sample_count'], total_plates=kit['plate_count'])
|
||||
output.append(adder)
|
||||
@@ -91,83 +81,6 @@ def make_report_html(df:DataFrame, start_date:date, end_date:date) -> str:
|
||||
return html
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# def split_controls_dictionary(ctx:dict, input_dict) -> list[dict]:
|
||||
# # this will be the date in string form
|
||||
# dict_name = list(input_dict.keys())[0]
|
||||
# # the data associated with the date key
|
||||
# sub_dict = input_dict[dict_name]
|
||||
# # How many "count", "Percent", etc are in the dictionary
|
||||
# data_size = get_dict_size(sub_dict)
|
||||
# output = []
|
||||
# for ii in range(data_size):
|
||||
# new_dict = {}
|
||||
# for genus in sub_dict:
|
||||
# logger.debug(genus)
|
||||
# sub_name = list(sub_dict[genus].keys())[ii]
|
||||
# new_dict[genus] = sub_dict[genus][sub_name]
|
||||
# output.append({"date":dict_name, "name": sub_name, "data": new_dict})
|
||||
# return output
|
||||
|
||||
|
||||
# def get_dict_size(input:dict):
|
||||
# return max(len(input[item]) for item in input)
|
||||
|
||||
|
||||
# def convert_all_controls(ctx:dict, data:list) -> dict:
|
||||
# dfs = {}
|
||||
# dict_list = [split_controls_dictionary(ctx, datum) for datum in data]
|
||||
# dict_list = [item for sublist in dict_list for item in sublist]
|
||||
# names = list(set([datum['name'] for datum in dict_list]))
|
||||
# for name in names:
|
||||
|
||||
|
||||
# # df = DataFrame()
|
||||
# # entries = [{item['date']:item['data']} for item in dict_list if item['name']==name]
|
||||
# # series_list = []
|
||||
# # df = pd.json_normalize(entries)
|
||||
# # for entry in entries:
|
||||
# # col_name = list(entry.keys())[0]
|
||||
# # col_dict = entry[col_name]
|
||||
# # series = pd.Series(data=col_dict.values(), index=col_dict.keys(), name=col_name)
|
||||
# # # df[col_name] = series.values
|
||||
# # # logger.debug(df.index)
|
||||
# # series_list.append(series)
|
||||
# # df = DataFrame(series_list).T.fillna(0)
|
||||
# # logger.debug(df)
|
||||
# dfs['name'] = df
|
||||
# return dfs
|
||||
|
||||
# def convert_control_by_mode(ctx:dict, control:models.Control, mode:str) -> list[dict]:
|
||||
# """
|
||||
# split control object into analysis types... can I move this into the class itself?
|
||||
# turns out I can
|
||||
|
||||
# Args:
|
||||
# ctx (dict): settings passed from gui
|
||||
# control (models.Control): control to be parsed into list
|
||||
# mode (str): analysis type
|
||||
|
||||
# Returns:
|
||||
# list[dict]: list of records
|
||||
# """
|
||||
# output = []
|
||||
# data = json.loads(getattr(control, mode))
|
||||
# for genus in data:
|
||||
# _dict = {}
|
||||
# _dict['name'] = control.name
|
||||
# _dict['submitted_date'] = control.submitted_date
|
||||
# _dict['genus'] = genus
|
||||
# _dict['target'] = 'Target' if genus.strip("*") in control.controltype.targets else "Off-target"
|
||||
# for key in data[genus]:
|
||||
# _dict[key] = data[genus][key]
|
||||
# output.append(_dict)
|
||||
# # logger.debug(output)
|
||||
# return output
|
||||
|
||||
|
||||
def convert_data_list_to_df(ctx:dict, input:list[dict], subtype:str|None=None) -> DataFrame:
|
||||
"""
|
||||
Convert list of control records to dataframe
|
||||
|
||||
Reference in New Issue
Block a user