controls working
This commit is contained in:
@@ -3,10 +3,13 @@ import pandas as pd
|
||||
# from sqlite3 import IntegrityError
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
import logging
|
||||
import datetime
|
||||
from datetime import date, datetime
|
||||
from sqlalchemy import and_
|
||||
import uuid
|
||||
import base64
|
||||
from sqlalchemy import JSON
|
||||
import json
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -226,4 +229,52 @@ def create_kit_from_yaml(ctx:dict, exp:dict) -> None:
|
||||
def lookup_all_sample_types(ctx:dict) -> list[str]:
|
||||
uses = [item.used_for for item in ctx['database_session'].query(models.KitType).all()]
|
||||
uses = list(set([item for sublist in uses for item in sublist]))
|
||||
return uses
|
||||
return uses
|
||||
|
||||
|
||||
|
||||
def get_all_available_modes(ctx:dict) -> list[str]:
|
||||
rel = ctx['database_session'].query(models.Control).first()
|
||||
cols = [item.name for item in list(rel.__table__.columns) if isinstance(item.type, JSON)]
|
||||
return cols
|
||||
|
||||
|
||||
|
||||
def get_all_controls_by_type(ctx:dict, con_type:str, start_date:date|None=None, end_date:date|None=None) -> list:
|
||||
"""
|
||||
Returns a list of control objects that are instances of the input controltype.
|
||||
|
||||
Args:
|
||||
con_type (str): Name of the control type.
|
||||
ctx (dict): Settings passed down from gui.
|
||||
|
||||
Returns:
|
||||
list: Control instances.
|
||||
"""
|
||||
|
||||
# print(f"Using dates: {start_date} to {end_date}")
|
||||
query = ctx['database_session'].query(models.ControlType).filter_by(name=con_type)
|
||||
try:
|
||||
output = query.first().instances
|
||||
except AttributeError:
|
||||
output = None
|
||||
# Hacky solution to my not being able to get the sql query to work.
|
||||
if start_date != None and end_date != None:
|
||||
output = [item for item in output if item.submitted_date.date() > start_date and item.submitted_date.date() < end_date]
|
||||
# print(f"Type {con_type}: {query.first()}")
|
||||
return output
|
||||
|
||||
|
||||
def get_control_subtypes(ctx:dict, type:str, mode:str):
|
||||
try:
|
||||
outs = get_all_controls_by_type(ctx=ctx, con_type=type)[0]
|
||||
except TypeError:
|
||||
return []
|
||||
jsoner = json.loads(getattr(outs, mode))
|
||||
print(f"JSON out: {jsoner}")
|
||||
try:
|
||||
genera = list(jsoner.keys())[0]
|
||||
except IndexError:
|
||||
return []
|
||||
subtypes = [item for item in jsoner[genera] if "_hashes" not in item and "_ratio" not in item]
|
||||
return subtypes
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
|
||||
from pandas import DataFrame
|
||||
import re
|
||||
|
||||
|
||||
|
||||
def get_unique_values_in_df_column(df: DataFrame, column_name: str) -> list:
|
||||
"""
|
||||
_summary_
|
||||
|
||||
Args:
|
||||
df (DataFrame): _description_
|
||||
column_name (str): _description_
|
||||
|
||||
Returns:
|
||||
list: _description_
|
||||
"""
|
||||
return sorted(df[column_name].unique())
|
||||
|
||||
|
||||
def drop_reruns_from_df(ctx:dict, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Removes semi-duplicates from dataframe after finding sequencing repeats.
|
||||
|
||||
Args:
|
||||
settings (dict): settings passed down from click
|
||||
df (DataFrame): initial dataframe
|
||||
|
||||
Returns:
|
||||
DataFrame: dataframe with originals removed in favour of repeats.
|
||||
"""
|
||||
sample_names = get_unique_values_in_df_column(df, column_name="name")
|
||||
if 'rerun_regex' in ctx:
|
||||
# logger.debug(f"Compiling regex from: {settings['rerun_regex']}")
|
||||
rerun_regex = re.compile(fr"{ctx['rerun_regex']}")
|
||||
for sample in sample_names:
|
||||
# logger.debug(f'Running search on {sample}')
|
||||
if rerun_regex.search(sample):
|
||||
# logger.debug(f'Match on {sample}')
|
||||
first_run = re.sub(rerun_regex, "", sample)
|
||||
# logger.debug(f"First run: {first_run}")
|
||||
df = df.drop(df[df.name == first_run].index)
|
||||
return df
|
||||
|
||||
@@ -54,7 +54,6 @@ class SheetParser(object):
|
||||
|
||||
|
||||
def _parse_bacterial_culture(self):
|
||||
# submission_info = self.xl.parse("Sample List")
|
||||
submission_info = self._parse_generic("Sample List")
|
||||
# iloc is [row][column] and the first row is set as header row so -2
|
||||
tech = str(submission_info.iloc[11][1])
|
||||
@@ -86,13 +85,6 @@ class SheetParser(object):
|
||||
enrichment_info = self.xl.parse("Enrichment Worksheet")
|
||||
extraction_info = self.xl.parse("Extraction Worksheet")
|
||||
qprc_info = self.xl.parse("qPCR Worksheet")
|
||||
# iloc is [row][column] and the first row is set as header row so -2
|
||||
# self.sub['submitter_plate_num'] = submission_info.iloc[0][1]
|
||||
# self.sub['rsl_plate_num'] = str(submission_info.iloc[10][1])
|
||||
# self.sub['submitted_date'] = submission_info.iloc[1][1].date()#.strftime("%Y-%m-%d")
|
||||
# self.sub['submitting_lab'] = submission_info.iloc[0][3]
|
||||
# self.sub['sample_count'] = str(submission_info.iloc[2][3])
|
||||
# self.sub['extraction_kit'] = submission_info.iloc[3][3]
|
||||
self.sub['technician'] = f"Enr: {enrichment_info.columns[2]}, Ext: {extraction_info.columns[2]}, PCR: {qprc_info.columns[2]}"
|
||||
# reagents
|
||||
self.sub['lot_lysis_buffer'] = enrichment_info.iloc[0][14]
|
||||
@@ -112,24 +104,6 @@ class SheetParser(object):
|
||||
sample_parser = SampleParser(submission_info.iloc[16:40])
|
||||
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
|
||||
self.sub['samples'] = sample_parse()
|
||||
# tech = str(submission_info.iloc[11][1])
|
||||
# if tech == "nan":
|
||||
# tech = "Unknown"
|
||||
# elif len(tech.split(",")) > 1:
|
||||
# tech_reg = re.compile(r"[A-Z]{2}")
|
||||
# tech = ", ".join(tech_reg.findall(tech))
|
||||
|
||||
|
||||
# self.sub['lot_wash_1'] = submission_info.iloc[1][6]
|
||||
# self.sub['lot_wash_2'] = submission_info.iloc[2][6]
|
||||
# self.sub['lot_binding_buffer'] = submission_info.iloc[3][6]
|
||||
# self.sub['lot_magnetic_beads'] = submission_info.iloc[4][6]
|
||||
# self.sub['lot_lysis_buffer'] = submission_info.iloc[5][6]
|
||||
# self.sub['lot_elution_buffer'] = submission_info.iloc[6][6]
|
||||
# self.sub['lot_isopropanol'] = submission_info.iloc[9][6]
|
||||
# self.sub['lot_ethanol'] = submission_info.iloc[10][6]
|
||||
# self.sub['lot_positive_control'] = None #submission_info.iloc[103][1]
|
||||
# self.sub['lot_plate'] = submission_info.iloc[12][6]
|
||||
|
||||
|
||||
class SampleParser(object):
|
||||
@@ -147,9 +121,9 @@ class SampleParser(object):
|
||||
new.sample_id = sample['Unnamed: 1']
|
||||
new.organism = sample['Unnamed: 2']
|
||||
new.concentration = sample['Unnamed: 3']
|
||||
print(f"Sample object: {new.sample_id} = {type(new.sample_id)}")
|
||||
# print(f"Sample object: {new.sample_id} = {type(new.sample_id)}")
|
||||
try:
|
||||
not_a_nan = not np.isnan(new.sample_id)
|
||||
not_a_nan = not np.isnan(new.sample_id) and new.sample_id.lower() != 'blank'
|
||||
except TypeError:
|
||||
not_a_nan = True
|
||||
if not_a_nan:
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
import numpy as np
|
||||
from backend.db import models
|
||||
import json
|
||||
|
||||
def make_report_xlsx(records:list[dict]) -> DataFrame:
|
||||
df = DataFrame.from_records(records)
|
||||
@@ -10,4 +13,82 @@ def make_report_xlsx(records:list[dict]) -> DataFrame:
|
||||
print(df2.columns)
|
||||
# df2['Cost']['sum'] = df2['Cost']['sum'].apply('${:,.2f}'.format)
|
||||
df2.iloc[:, (df2.columns.get_level_values(1)=='sum') & (df2.columns.get_level_values(0)=='Cost')] = df2.iloc[:, (df2.columns.get_level_values(1)=='sum') & (df2.columns.get_level_values(0)=='Cost')].applymap('${:,.2f}'.format)
|
||||
return df2
|
||||
return df2
|
||||
|
||||
|
||||
# def split_controls_dictionary(ctx:dict, input_dict) -> list[dict]:
|
||||
# # this will be the date in string form
|
||||
# dict_name = list(input_dict.keys())[0]
|
||||
# # the data associated with the date key
|
||||
# sub_dict = input_dict[dict_name]
|
||||
# # How many "count", "Percent", etc are in the dictionary
|
||||
# data_size = get_dict_size(sub_dict)
|
||||
# output = []
|
||||
# for ii in range(data_size):
|
||||
# new_dict = {}
|
||||
# for genus in sub_dict:
|
||||
# print(genus)
|
||||
# sub_name = list(sub_dict[genus].keys())[ii]
|
||||
# new_dict[genus] = sub_dict[genus][sub_name]
|
||||
# output.append({"date":dict_name, "name": sub_name, "data": new_dict})
|
||||
# return output
|
||||
|
||||
|
||||
# def get_dict_size(input:dict):
|
||||
# return max(len(input[item]) for item in input)
|
||||
|
||||
|
||||
# def convert_all_controls(ctx:dict, data:list) -> dict:
|
||||
# dfs = {}
|
||||
# dict_list = [split_controls_dictionary(ctx, datum) for datum in data]
|
||||
# dict_list = [item for sublist in dict_list for item in sublist]
|
||||
# names = list(set([datum['name'] for datum in dict_list]))
|
||||
# for name in names:
|
||||
|
||||
|
||||
# # df = DataFrame()
|
||||
# # entries = [{item['date']:item['data']} for item in dict_list if item['name']==name]
|
||||
# # series_list = []
|
||||
# # df = pd.json_normalize(entries)
|
||||
# # for entry in entries:
|
||||
# # col_name = list(entry.keys())[0]
|
||||
# # col_dict = entry[col_name]
|
||||
# # series = pd.Series(data=col_dict.values(), index=col_dict.keys(), name=col_name)
|
||||
# # # df[col_name] = series.values
|
||||
# # # print(df.index)
|
||||
# # series_list.append(series)
|
||||
# # df = DataFrame(series_list).T.fillna(0)
|
||||
# # print(df)
|
||||
# dfs['name'] = df
|
||||
# return dfs
|
||||
|
||||
def convert_control_by_mode(ctx:dict, control:models.Control, mode:str):
|
||||
output = []
|
||||
data = json.loads(getattr(control, mode))
|
||||
for genus in data:
|
||||
_dict = {}
|
||||
_dict['name'] = control.name
|
||||
_dict['submitted_date'] = control.submitted_date
|
||||
_dict['genus'] = genus
|
||||
_dict['target'] = 'Target' if genus.strip("*") in control.controltype.targets else "Off-target"
|
||||
for key in data[genus]:
|
||||
_dict[key] = data[genus][key]
|
||||
output.append(_dict)
|
||||
# print(output)
|
||||
return output
|
||||
|
||||
|
||||
def convert_data_list_to_df(ctx:dict, input:list[dict], subtype:str|None=None) -> DataFrame:
|
||||
df = DataFrame.from_records(input)
|
||||
safe = ['name', 'submitted_date', 'genus', 'target']
|
||||
print(df)
|
||||
for column in df.columns:
|
||||
if "percent" in column:
|
||||
count_col = [item for item in df.columns if "count" in item][0]
|
||||
# The actual percentage from kraken was off due to exclusion of NaN, recalculating.
|
||||
df[column] = 100 * df[count_col] / df.groupby('submitted_date')[count_col].transform('sum')
|
||||
if column not in safe:
|
||||
if subtype != None and column != subtype:
|
||||
del df[column]
|
||||
# print(df)
|
||||
return df
|
||||
|
||||
Reference in New Issue
Block a user