Checking kit integrity on import.
This commit is contained in:
@@ -576,17 +576,23 @@ def get_all_controls_by_type(ctx:dict, con_type:str, start_date:date|None=None,
|
||||
list: Control instances.
|
||||
"""
|
||||
|
||||
# logger.debug(f"Using dates: {start_date} to {end_date}")
|
||||
query = ctx['database_session'].query(models.ControlType).filter_by(name=con_type)
|
||||
try:
|
||||
output = query.first().instances
|
||||
except AttributeError:
|
||||
output = None
|
||||
# Hacky solution to my not being able to get the sql query to work.
|
||||
logger.debug(f"Using dates: {start_date} to {end_date}")
|
||||
if start_date != None and end_date != None:
|
||||
output = [item for item in output if item.submitted_date.date() > start_date and item.submitted_date.date() < end_date]
|
||||
# logger.debug(f"Type {con_type}: {query.first()}")
|
||||
output = ctx['database_session'].query(models.Control).join(models.ControlType).filter_by(name=con_type).filter(models.Control.submitted_date.between(start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d"))).all()
|
||||
else:
|
||||
output = ctx['database_session'].query(models.Control).join(models.ControlType).filter_by(name=con_type).all()
|
||||
logger.debug(f"Returned controls between dates: {output}")
|
||||
return output
|
||||
# query = ctx['database_session'].query(models.ControlType).filter_by(name=con_type)
|
||||
# try:
|
||||
# output = query.first().instances
|
||||
# except AttributeError:
|
||||
# output = None
|
||||
# # Hacky solution to my not being able to get the sql query to work.
|
||||
# if start_date != None and end_date != None:
|
||||
# output = [item for item in output if item.submitted_date.date() > start_date and item.submitted_date.date() < end_date]
|
||||
# # logger.debug(f"Type {con_type}: {query.first()}")
|
||||
# return output
|
||||
|
||||
|
||||
def get_control_subtypes(ctx:dict, type:str, mode:str) -> list[str]:
|
||||
|
||||
@@ -39,9 +39,18 @@ class Control(Base):
|
||||
# UniqueConstraint('name', name='uq_control_name')
|
||||
submission_id = Column(INTEGER, ForeignKey("_submissions.id")) #: parent submission id
|
||||
submission = relationship("BacterialCulture", back_populates="controls", foreign_keys=[submission_id]) #: parent submission
|
||||
refseq_version = Column(String(16))
|
||||
kraken2_version = Column(String(16))
|
||||
kraken2_db_version = Column(String(32))
|
||||
|
||||
|
||||
def to_sub_dict(self):
|
||||
def to_sub_dict(self) -> dict:
|
||||
"""
|
||||
Converts object into convenient dictionary for use in submission summary
|
||||
|
||||
Returns:
|
||||
dict: output dictionary containing: Name, Type, Targets, Top Kraken results
|
||||
"""
|
||||
kraken = json.loads(self.kraken)
|
||||
kraken_cnt_total = sum([kraken[item]['kraken_count'] for item in kraken])
|
||||
new_kraken = []
|
||||
@@ -61,3 +70,46 @@ class Control(Base):
|
||||
}
|
||||
return output
|
||||
|
||||
def convert_by_mode(self, mode:str) -> list[dict]:
|
||||
"""
|
||||
split control object into analysis types
|
||||
|
||||
Args:
|
||||
control (models.Control): control to be parsed into list
|
||||
mode (str): analysis type
|
||||
|
||||
Returns:
|
||||
list[dict]: list of records
|
||||
"""
|
||||
output = []
|
||||
data = json.loads(getattr(self, mode))
|
||||
# if len(data) == 0:
|
||||
# data = self.create_dummy_data(mode)
|
||||
logger.debug(f"Length of data: {len(data)}")
|
||||
for genus in data:
|
||||
_dict = {}
|
||||
_dict['name'] = self.name
|
||||
_dict['submitted_date'] = self.submitted_date
|
||||
_dict['genus'] = genus
|
||||
_dict['target'] = 'Target' if genus.strip("*") in self.controltype.targets else "Off-target"
|
||||
|
||||
for key in data[genus]:
|
||||
_dict[key] = data[genus][key]
|
||||
if _dict[key] == {}:
|
||||
print(self.name, mode)
|
||||
output.append(_dict)
|
||||
# logger.debug(output)
|
||||
return output
|
||||
|
||||
def create_dummy_data(self, mode):
|
||||
match mode:
|
||||
case "contains":
|
||||
data = {"Nothing": {"contains_hashes":"0/400", "contains_ratio":0.0}}
|
||||
case "matches":
|
||||
data = {"Nothing": {"matches_hashes":"0/400", "matches_ratio":0.0}}
|
||||
case "kraken":
|
||||
data = {"Nothing": {"kraken_percent":0.0, "kraken_count":0}}
|
||||
case _:
|
||||
data = {}
|
||||
return data
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ from sqlalchemy.orm import relationship
|
||||
from datetime import datetime as dt
|
||||
import logging
|
||||
import json
|
||||
from json.decoder import JSONDecodeError
|
||||
|
||||
logger = logging.getLogger(f"submissions.{__name__}")
|
||||
|
||||
@@ -74,6 +75,9 @@ class BasicSubmission(Base):
|
||||
ext_info = json.loads(self.extraction_info)
|
||||
except TypeError:
|
||||
ext_info = None
|
||||
except JSONDecodeError as e:
|
||||
ext_info = None
|
||||
logger.debug(f"Json error in {self.rsl_plate_num}: {e}")
|
||||
try:
|
||||
reagents = [item.to_sub_dict() for item in self.reagents]
|
||||
except:
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
|
||||
from pandas import DataFrame, concat
|
||||
from backend.db import models
|
||||
from operator import itemgetter
|
||||
# from backend.db import models
|
||||
import json
|
||||
import logging
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
from datetime import date
|
||||
from datetime import date, timedelta
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
@@ -139,31 +140,32 @@ def make_report_html(df:DataFrame, start_date:date, end_date:date) -> str:
|
||||
# dfs['name'] = df
|
||||
# return dfs
|
||||
|
||||
def convert_control_by_mode(ctx:dict, control:models.Control, mode:str) -> list[dict]:
|
||||
"""
|
||||
split control object into analysis types
|
||||
# def convert_control_by_mode(ctx:dict, control:models.Control, mode:str) -> list[dict]:
|
||||
# """
|
||||
# split control object into analysis types... can I move this into the class itself?
|
||||
# turns out I can
|
||||
|
||||
Args:
|
||||
ctx (dict): settings passed from gui
|
||||
control (models.Control): control to be parsed into list
|
||||
mode (str): analysis type
|
||||
# Args:
|
||||
# ctx (dict): settings passed from gui
|
||||
# control (models.Control): control to be parsed into list
|
||||
# mode (str): analysis type
|
||||
|
||||
Returns:
|
||||
list[dict]: list of records
|
||||
"""
|
||||
output = []
|
||||
data = json.loads(getattr(control, mode))
|
||||
for genus in data:
|
||||
_dict = {}
|
||||
_dict['name'] = control.name
|
||||
_dict['submitted_date'] = control.submitted_date
|
||||
_dict['genus'] = genus
|
||||
_dict['target'] = 'Target' if genus.strip("*") in control.controltype.targets else "Off-target"
|
||||
for key in data[genus]:
|
||||
_dict[key] = data[genus][key]
|
||||
output.append(_dict)
|
||||
# logger.debug(output)
|
||||
return output
|
||||
# Returns:
|
||||
# list[dict]: list of records
|
||||
# """
|
||||
# output = []
|
||||
# data = json.loads(getattr(control, mode))
|
||||
# for genus in data:
|
||||
# _dict = {}
|
||||
# _dict['name'] = control.name
|
||||
# _dict['submitted_date'] = control.submitted_date
|
||||
# _dict['genus'] = genus
|
||||
# _dict['target'] = 'Target' if genus.strip("*") in control.controltype.targets else "Off-target"
|
||||
# for key in data[genus]:
|
||||
# _dict[key] = data[genus][key]
|
||||
# output.append(_dict)
|
||||
# # logger.debug(output)
|
||||
# return output
|
||||
|
||||
|
||||
def convert_data_list_to_df(ctx:dict, input:list[dict], subtype:str|None=None) -> DataFrame:
|
||||
@@ -178,17 +180,81 @@ def convert_data_list_to_df(ctx:dict, input:list[dict], subtype:str|None=None) -
|
||||
Returns:
|
||||
DataFrame: _description_
|
||||
"""
|
||||
# copy = input
|
||||
# for item in copy:
|
||||
# item['submitted_date'] = item['submitted_date'].strftime("%Y-%m-%d")
|
||||
# with open("controls.json", "w") as f:
|
||||
# f.write(json.dumps(copy))
|
||||
# for item in input:
|
||||
# logger.debug(item.keys())
|
||||
df = DataFrame.from_records(input)
|
||||
df.to_excel("test.xlsx", engine="openpyxl")
|
||||
safe = ['name', 'submitted_date', 'genus', 'target']
|
||||
# logger.debug(df)
|
||||
for column in df.columns:
|
||||
if "percent" in column:
|
||||
count_col = [item for item in df.columns if "count" in item][0]
|
||||
# The actual percentage from kraken was off due to exclusion of NaN, recalculating.
|
||||
df[column] = 100 * df[count_col] / df.groupby('submitted_date')[count_col].transform('sum')
|
||||
# df[column] = 100 * df[count_col] / df.groupby('submitted_date')[count_col].transform('sum')
|
||||
df[column] = 100 * df[count_col] / df.groupby('name')[count_col].transform('sum')
|
||||
if column not in safe:
|
||||
if subtype != None and column != subtype:
|
||||
del df[column]
|
||||
# logger.debug(df)
|
||||
# df.sort_values('submitted_date').to_excel("controls.xlsx", engine="openpyxl")
|
||||
df = displace_date(df)
|
||||
df.sort_values('submitted_date').to_excel("controls.xlsx", engine="openpyxl")
|
||||
df = df_column_renamer(df=df)
|
||||
return df
|
||||
|
||||
|
||||
def df_column_renamer(df:DataFrame) -> DataFrame:
|
||||
"""
|
||||
Ad hoc function I created to clarify some fields
|
||||
|
||||
Args:
|
||||
df (DataFrame): input dataframe
|
||||
|
||||
Returns:
|
||||
DataFrame: dataframe with 'clarified' column names
|
||||
"""
|
||||
df = df[df.columns.drop(list(df.filter(regex='_hashes')))]
|
||||
return df.rename(columns = {
|
||||
"contains_ratio":"contains_shared_hashes_ratio",
|
||||
"matches_ratio":"matches_shared_hashes_ratio",
|
||||
"kraken_count":"kraken2_read_count",
|
||||
"kraken_percent":"kraken2_read_percent"
|
||||
})
|
||||
|
||||
|
||||
def displace_date(df:DataFrame) -> DataFrame:
|
||||
"""
|
||||
This function serves to split samples that were submitted on the same date by incrementing dates.
|
||||
|
||||
Args:
|
||||
df (DataFrame): input dataframe composed of control records
|
||||
|
||||
Returns:
|
||||
DataFrame: output dataframe with dates incremented.
|
||||
"""
|
||||
# dict_list = []
|
||||
# for item in df['name'].unique():
|
||||
# dict_list.append(dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']))
|
||||
logger.debug(f"Unique items: {df['name'].unique()}")
|
||||
# logger.debug(df.to_string())
|
||||
# the assumption is that closest names will have closest dates...
|
||||
dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in sorted(df['name'].unique())]
|
||||
for ii, item in enumerate(dict_list):
|
||||
# if ii > 0:
|
||||
try:
|
||||
check = item['date'] == dict_list[ii-1]['date']
|
||||
except IndexError:
|
||||
check = False
|
||||
if check:
|
||||
logger.debug(f"We found one! Increment date!\n{item['date'] - timedelta(days=1)}")
|
||||
mask = df['name'] == item['name']
|
||||
# logger.debug(f"We will increment dates in: {df.loc[mask, 'submitted_date']}")
|
||||
df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
|
||||
# logger.debug(f"Do these look incremented: {df.loc[mask, 'submitted_date']}")
|
||||
return df
|
||||
|
||||
|
||||
Reference in New Issue
Block a user