Before control chart update.
This commit is contained in:
1
TODO.md
1
TODO.md
@@ -1,3 +1,4 @@
|
||||
- [ ] Revamp frontend.widgets.controls_chart to include visualizations?
|
||||
- [x] Convert Parsers to using openpyxl.
|
||||
- The hardest part of this is going to be the sample parsing. I'm onto using the cell formulas in the plate map to suss out the location in the lookup table, but it could get a little recursive up in here.
|
||||
- [ ] Create a default info return function.
|
||||
|
||||
@@ -3,6 +3,7 @@ All database related operations.
|
||||
"""
|
||||
from sqlalchemy import event
|
||||
from sqlalchemy.engine import Engine
|
||||
from tools import ctx
|
||||
|
||||
|
||||
@event.listens_for(Engine, "connect")
|
||||
@@ -17,7 +18,8 @@ def set_sqlite_pragma(dbapi_connection, connection_record):
|
||||
connection_record (_type_): _description_
|
||||
"""
|
||||
cursor = dbapi_connection.cursor()
|
||||
# cursor.execute("PRAGMA foreign_keys=ON")
|
||||
if ctx.database_schema == "sqlite":
|
||||
cursor.execute("PRAGMA foreign_keys=ON")
|
||||
cursor.close()
|
||||
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ Contains all models for sqlalchemy
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import sys, logging
|
||||
from sqlalchemy import Column, INTEGER, String, JSON, inspect
|
||||
from sqlalchemy import Column, INTEGER, String, JSON
|
||||
from sqlalchemy.orm import DeclarativeMeta, declarative_base, Query, Session
|
||||
from sqlalchemy.ext.declarative import declared_attr
|
||||
from sqlalchemy.exc import ArgumentError
|
||||
@@ -163,7 +163,6 @@ class BaseClass(Base):
|
||||
try:
|
||||
self.__database_session__.add(self)
|
||||
self.__database_session__.commit()
|
||||
# self.__database_session__.merge(self)
|
||||
except Exception as e:
|
||||
logger.critical(f"Problem saving object: {e}")
|
||||
self.__database_session__.rollback()
|
||||
@@ -203,4 +202,5 @@ from .organizations import *
|
||||
from .kits import *
|
||||
from .submissions import *
|
||||
|
||||
# NOTE: Add a creator to the submission for reagent association.
|
||||
BasicSubmission.reagents.creator = lambda reg: SubmissionReagentAssociation(reagent=reg)
|
||||
|
||||
@@ -467,6 +467,7 @@ class Reagent(BaseClass):
|
||||
Lookup a list of reagents from the database.
|
||||
|
||||
Args:
|
||||
id (int | None, optional): reagent id number
|
||||
reagent_role (str | models.ReagentType | None, optional): Reagent type. Defaults to None.
|
||||
lot_number (str | None, optional): Reagent lot number. Defaults to None.
|
||||
name (str | None, optional): Reagent name. Defaults to None.
|
||||
@@ -1468,7 +1469,7 @@ class SubmissionTypeEquipmentRoleAssociation(BaseClass):
|
||||
List[Process]: All associated processes
|
||||
"""
|
||||
processes = [equipment.get_processes(self.submission_type) for equipment in self.equipment_role.instances]
|
||||
# flatten list
|
||||
# NOTE: flatten list
|
||||
processes = [item for items in processes for item in items if item is not None]
|
||||
match extraction_kit:
|
||||
case str():
|
||||
@@ -1523,6 +1524,7 @@ class Process(BaseClass):
|
||||
Lookup Processes
|
||||
|
||||
Args:
|
||||
id (int | None, optional): Process id. Defaults to None.
|
||||
name (str | None, optional): Process name. Defaults to None.
|
||||
limit (int, optional): Maximum number of results to return (0=all). Defaults to 0.
|
||||
|
||||
@@ -1666,7 +1668,6 @@ class SubmissionTipsAssociation(BaseClass):
|
||||
back_populates="tips_submission_associations") #: associated equipment
|
||||
role_name = Column(String(32), primary_key=True) #, ForeignKey("_tiprole.name"))
|
||||
|
||||
# role = relationship(TipRole)
|
||||
|
||||
def to_sub_dict(self) -> dict:
|
||||
"""
|
||||
|
||||
@@ -27,7 +27,6 @@ from tools import row_map, setup_lookup, jinja_template_loading, rreplace, row_k
|
||||
from datetime import datetime, date
|
||||
from typing import List, Any, Tuple, Literal
|
||||
from dateutil.parser import parse
|
||||
# from dateutil.parser import ParserError
|
||||
from pathlib import Path
|
||||
from jinja2.exceptions import TemplateNotFound
|
||||
from jinja2 import Template
|
||||
@@ -370,16 +369,16 @@ class BasicSubmission(BaseClass):
|
||||
"""
|
||||
Calculates cost of the plate
|
||||
"""
|
||||
# Calculate number of columns based on largest column number
|
||||
# NOTE: Calculate number of columns based on largest column number
|
||||
try:
|
||||
cols_count_96 = self.calculate_column_count()
|
||||
except Exception as e:
|
||||
logger.error(f"Column count error: {e}")
|
||||
# Get kit associated with this submission
|
||||
# NOTE: Get kit associated with this submission
|
||||
assoc = [item for item in self.extraction_kit.kit_submissiontype_associations if
|
||||
item.submission_type == self.submission_type][0]
|
||||
# logger.debug(f"Came up with association: {assoc}")
|
||||
# If every individual cost is 0 this is probably an old plate.
|
||||
# NOTE: If every individual cost is 0 this is probably an old plate.
|
||||
if all(item == 0.0 for item in [assoc.constant_cost, assoc.mutable_cost_column, assoc.mutable_cost_sample]):
|
||||
try:
|
||||
self.run_cost = self.extraction_kit.cost_per_run
|
||||
@@ -446,6 +445,7 @@ class BasicSubmission(BaseClass):
|
||||
Convert all submissions to dataframe
|
||||
|
||||
Args:
|
||||
chronologic (bool, optional): Sort submissions in chronologic order. Defaults to True.
|
||||
submission_type (str | None, optional): Filter by SubmissionType. Defaults to None.
|
||||
limit (int, optional): Maximum number of results to return. Defaults to 0.
|
||||
|
||||
@@ -1047,7 +1047,7 @@ class BasicSubmission(BaseClass):
|
||||
ValueError: Raised if disallowed key is passed.
|
||||
|
||||
Returns:
|
||||
cls: _description_
|
||||
cls: A BasicSubmission subclass.
|
||||
"""
|
||||
code = 0
|
||||
msg = ""
|
||||
@@ -1056,10 +1056,6 @@ class BasicSubmission(BaseClass):
|
||||
if kwargs == {}:
|
||||
raise ValueError("Need to narrow down query or the first available instance will be returned.")
|
||||
sanitized_kwargs = {k: v for k, v in kwargs.items() if k not in disallowed}
|
||||
# for key in kwargs.keys():
|
||||
# if key in disallowed:
|
||||
# raise ValueError(
|
||||
# f"{key} is not allowed as a query argument as it could lead to creation of duplicate objects. Use .query() instead.")
|
||||
instance = cls.query(submission_type=submission_type, limit=1, **sanitized_kwargs)
|
||||
# logger.debug(f"Retrieved instance: {instance}")
|
||||
if instance is None:
|
||||
@@ -1102,7 +1098,7 @@ class BasicSubmission(BaseClass):
|
||||
obj (_type_, optional): Parent widget. Defaults to None.
|
||||
|
||||
Raises:
|
||||
e: _description_
|
||||
e: SQLIntegrityError or SQLOperationalError if problem with commit.
|
||||
"""
|
||||
from frontend.widgets.pop_ups import QuestionAsker
|
||||
# logger.debug("Hello from delete")
|
||||
@@ -1123,7 +1119,7 @@ class BasicSubmission(BaseClass):
|
||||
Creates Widget for showing submission details.
|
||||
|
||||
Args:
|
||||
obj (_type_): Parent widget
|
||||
obj (Widget): Parent widget
|
||||
"""
|
||||
# logger.debug("Hello from details")
|
||||
from frontend.widgets.submission_details import SubmissionDetails
|
||||
@@ -1139,9 +1135,9 @@ class BasicSubmission(BaseClass):
|
||||
obj (Widget): Parent widget
|
||||
"""
|
||||
from frontend.widgets.submission_widget import SubmissionFormWidget
|
||||
for widg in obj.app.table_widget.formwidget.findChildren(SubmissionFormWidget):
|
||||
# logger.debug(widg)
|
||||
widg.setParent(None)
|
||||
for widget in obj.app.table_widget.formwidget.findChildren(SubmissionFormWidget):
|
||||
# logger.debug(widget)
|
||||
widget.setParent(None)
|
||||
pyd = self.to_pydantic(backup=True)
|
||||
form = pyd.to_form(parent=obj, disable=['rsl_plate_num'])
|
||||
obj.app.table_widget.formwidget.layout().addWidget(form)
|
||||
@@ -1271,7 +1267,6 @@ class BacterialCulture(BasicSubmission):
|
||||
input_dict (dict): _description_
|
||||
xl (pd.ExcelFile | None, optional): _description_. Defaults to None.
|
||||
info_map (dict | None, optional): _description_. Defaults to None.
|
||||
plate_map (dict | None, optional): _description_. Defaults to None.
|
||||
|
||||
Returns:
|
||||
dict: Updated dictionary.
|
||||
@@ -1560,7 +1555,6 @@ class Wastewater(BasicSubmission):
|
||||
well_24 = []
|
||||
samples_copy = deepcopy(input_dict['samples'])
|
||||
for sample in sorted(samples_copy, key=itemgetter('column', 'row')):
|
||||
# for sample in input_dict['samples']:
|
||||
try:
|
||||
row = sample['source_row']
|
||||
except KeyError:
|
||||
@@ -1742,6 +1736,7 @@ class WastewaterArtic(BasicSubmission):
|
||||
processed = rreplace(processed, plate_num, "")
|
||||
except AttributeError:
|
||||
plate_num = "1"
|
||||
# NOTE: plate_num not currently used, but will keep incase it is in the future
|
||||
plate_num = plate_num.strip("-")
|
||||
# logger.debug(f"Processed after plate-num: {processed}")
|
||||
day = re.search(r"\d{2}$", processed).group()
|
||||
@@ -1827,7 +1822,6 @@ class WastewaterArtic(BasicSubmission):
|
||||
"""
|
||||
input_dict = super().finalize_parse(input_dict, xl, info_map)
|
||||
# logger.debug(f"Incoming input_dict: {pformat(input_dict)}")
|
||||
# TODO: Move to validator?
|
||||
for sample in input_dict['samples']:
|
||||
# logger.debug(f"Sample: {sample}")
|
||||
if re.search(r"^NTC", sample['submitter_id']):
|
||||
@@ -1978,7 +1972,7 @@ class WastewaterArtic(BasicSubmission):
|
||||
self.comment = [com]
|
||||
# logger.debug(pformat(self.gel_info))
|
||||
with ZipFile(self.__directory_path__.joinpath("submission_imgs.zip"), 'a') as zipf:
|
||||
# Add a file located at the source_path to the destination within the zip
|
||||
# NOTE: Add a file located at the source_path to the destination within the zip
|
||||
# file. It will overwrite existing files if the names collide, but it
|
||||
# will give a warning
|
||||
zipf.write(img_path, self.gel_image)
|
||||
@@ -1997,6 +1991,7 @@ class WastewaterArtic(BasicSubmission):
|
||||
dict: Dictionary with information added.
|
||||
"""
|
||||
input_dict = super().custom_docx_writer(input_dict)
|
||||
# NOTE: if there's a gel image, extract it.
|
||||
if check_key_or_attr(key='gel_image_path', interest=input_dict, check_none=True):
|
||||
with ZipFile(cls.__directory_path__.joinpath("submission_imgs.zip")) as zipped:
|
||||
img = zipped.read(input_dict['gel_image_path'])
|
||||
@@ -2246,9 +2241,7 @@ class BasicSample(BaseClass):
|
||||
|
||||
@classmethod
|
||||
def fuzzy_search(cls,
|
||||
# submitter_id: str | None = None,
|
||||
sample_type: str | BasicSample | None = None,
|
||||
# limit: int = 0,
|
||||
**kwargs
|
||||
) -> List[BasicSample]:
|
||||
"""
|
||||
@@ -2315,8 +2308,8 @@ class BasicSample(BaseClass):
|
||||
'equipment', 'gel_info', 'gel_image', 'dna_core_submission_number', 'gel_controls']:
|
||||
try:
|
||||
df = df.drop(item, axis=1)
|
||||
except:
|
||||
logger.warning(f"Couldn't drop '{item}' column from submissionsheet df.")
|
||||
except KeyError as e:
|
||||
logger.warning(f"Couldn't drop '{item}' column from submissionsheet df due to {e}.")
|
||||
return df
|
||||
|
||||
def show_details(self, obj):
|
||||
@@ -2408,7 +2401,7 @@ class WastewaterSample(BasicSample):
|
||||
# logger.debug(f"Initial sample dict: {pformat(output_dict)}")
|
||||
disallowed = ["", None, "None"]
|
||||
try:
|
||||
check = output_dict['rsl_number'] in [None, "None"]
|
||||
check = output_dict['rsl_number'] in disallowed
|
||||
except KeyError:
|
||||
check = True
|
||||
if check:
|
||||
@@ -2451,7 +2444,6 @@ class BacterialCultureSample(BasicSample):
|
||||
Returns:
|
||||
dict: well location and name (sample id, organism) NOTE: keys must sync with WWSample to_sub_dict above
|
||||
"""
|
||||
# start = time()
|
||||
sample = super().to_sub_dict(full_data=full_data)
|
||||
sample['name'] = self.submitter_id
|
||||
sample['organism'] = self.organism
|
||||
@@ -2553,7 +2545,7 @@ class SubmissionSampleAssociation(BaseClass):
|
||||
Returns:
|
||||
dict: dictionary of sample id, row and column in elution plate
|
||||
"""
|
||||
# Since there is no PCR, negliable result is necessary.
|
||||
# NOTE: Since there is no PCR, negliable result is necessary.
|
||||
sample = self.to_sub_dict()
|
||||
# logger.debug(f"Sample dict to hitpick: {sample}")
|
||||
env = jinja_template_loading()
|
||||
@@ -2728,7 +2720,6 @@ class SubmissionSampleAssociation(BaseClass):
|
||||
except StatementError:
|
||||
instance = None
|
||||
if instance is None:
|
||||
# sanitized_kwargs = {k:v for k,v in kwargs.items() if k not in ['id']}
|
||||
used_cls = cls.find_polymorphic_subclass(polymorphic_identity=association_type)
|
||||
instance = used_cls(submission=submission, sample=sample, id=id, **kwargs)
|
||||
return instance
|
||||
|
||||
@@ -6,7 +6,6 @@ from copy import copy
|
||||
from getpass import getuser
|
||||
from pprint import pformat
|
||||
from typing import List
|
||||
import pandas as pd
|
||||
from openpyxl import load_workbook, Workbook
|
||||
from pathlib import Path
|
||||
from backend.db.models import *
|
||||
@@ -17,7 +16,6 @@ from datetime import date
|
||||
from dateutil.parser import parse, ParserError
|
||||
from tools import check_not_nan, convert_nans_to_nones, is_missing, remove_key_from_list_of_dicts, check_key_or_attr
|
||||
|
||||
|
||||
logger = logging.getLogger(f"submissions.{__name__}")
|
||||
|
||||
|
||||
@@ -70,8 +68,8 @@ class SheetParser(object):
|
||||
self.info_map = parser.map
|
||||
for k, v in info.items():
|
||||
match k:
|
||||
# NOTE: exclude samples.
|
||||
case "sample":
|
||||
# case item if
|
||||
pass
|
||||
case _:
|
||||
self.sub[k] = v
|
||||
@@ -169,7 +167,8 @@ class InfoParser(object):
|
||||
"""
|
||||
Object to parse generic info from excel sheet.
|
||||
"""
|
||||
def __init__(self, xl: Workbook, submission_type: str|SubmissionType, sub_object: BasicSubmission|None=None):
|
||||
|
||||
def __init__(self, xl: Workbook, submission_type: str | SubmissionType, sub_object: BasicSubmission | None = None):
|
||||
"""
|
||||
Args:
|
||||
xl (Workbook): Openpyxl workbook from submitted excel file.
|
||||
@@ -191,9 +190,6 @@ class InfoParser(object):
|
||||
"""
|
||||
Gets location of basic info from the submission_type object in the database.
|
||||
|
||||
Args:
|
||||
submission_type (str|dict): name of the submission type or parsed object with value=submission_type
|
||||
|
||||
Returns:
|
||||
dict: Location map of all info for this submission type
|
||||
"""
|
||||
@@ -266,7 +262,7 @@ class InfoParser(object):
|
||||
dicto[item['name']] = dict(value=value, missing=missing)
|
||||
except (KeyError, IndexError):
|
||||
continue
|
||||
# Return after running the parser components held in submission object.
|
||||
# NOTE: Return after running the parser components held in submission object.
|
||||
return self.sub_object.custom_info_parser(input_dict=dicto, xl=self.xl, custom_fields=self.map['custom'])
|
||||
|
||||
|
||||
@@ -275,7 +271,8 @@ class ReagentParser(object):
|
||||
Object to pull reagents from excel sheet.
|
||||
"""
|
||||
|
||||
def __init__(self, xl: Workbook, submission_type: str, extraction_kit: str, sub_object:BasicSubmission|None=None):
|
||||
def __init__(self, xl: Workbook, submission_type: str, extraction_kit: str,
|
||||
sub_object: BasicSubmission | None = None):
|
||||
"""
|
||||
Args:
|
||||
xl (Workbook): Openpyxl workbook from submitted excel file.
|
||||
@@ -368,7 +365,8 @@ class SampleParser(object):
|
||||
Object to pull data for samples in excel sheet and construct individual sample objects
|
||||
"""
|
||||
|
||||
def __init__(self, xl: Workbook, submission_type: SubmissionType, sample_map: dict | None = None, sub_object:BasicSubmission|None=None) -> None:
|
||||
def __init__(self, xl: Workbook, submission_type: SubmissionType, sample_map: dict | None = None,
|
||||
sub_object: BasicSubmission | None = None) -> None:
|
||||
"""
|
||||
Args:
|
||||
xl (Workbook): Openpyxl workbook from submitted excel file.
|
||||
@@ -452,8 +450,8 @@ class SampleParser(object):
|
||||
lmap = self.sample_info_map['lookup_table']
|
||||
ws = self.xl[lmap['sheet']]
|
||||
lookup_samples = []
|
||||
for ii, row in enumerate(range(lmap['start_row'], lmap['end_row']+1), start=1):
|
||||
row_dict = {k:ws.cell(row=row, column=v).value for k, v in lmap['sample_columns'].items()}
|
||||
for ii, row in enumerate(range(lmap['start_row'], lmap['end_row'] + 1), start=1):
|
||||
row_dict = {k: ws.cell(row=row, column=v).value for k, v in lmap['sample_columns'].items()}
|
||||
try:
|
||||
row_dict[lmap['merge_on_id']] = str(row_dict[lmap['merge_on_id']])
|
||||
except KeyError:
|
||||
@@ -533,10 +531,6 @@ class SampleParser(object):
|
||||
break
|
||||
else:
|
||||
new = psample
|
||||
# try:
|
||||
# check = new['submitter_id'] is None
|
||||
# except KeyError:
|
||||
# check = True
|
||||
if not check_key_or_attr(key='submitter_id', interest=new, check_none=True):
|
||||
new['submitter_id'] = psample['id']
|
||||
new = self.sub_object.parse_samples(new)
|
||||
@@ -549,7 +543,8 @@ class EquipmentParser(object):
|
||||
"""
|
||||
Object to pull data for equipment in excel sheet
|
||||
"""
|
||||
def __init__(self, xl: Workbook, submission_type: str|SubmissionType) -> None:
|
||||
|
||||
def __init__(self, xl: Workbook, submission_type: str | SubmissionType) -> None:
|
||||
"""
|
||||
Args:
|
||||
xl (Workbook): Openpyxl workbook from submitted excel file.
|
||||
@@ -601,7 +596,7 @@ class EquipmentParser(object):
|
||||
for sheet in self.xl.sheetnames:
|
||||
ws = self.xl[sheet]
|
||||
try:
|
||||
relevant = {k:v for k,v in self.map.items() if v['sheet'] == sheet}
|
||||
relevant = {k: v for k, v in self.map.items() if v['sheet'] == sheet}
|
||||
except (TypeError, KeyError) as e:
|
||||
logger.error(f"Error creating relevant equipment list: {e}")
|
||||
continue
|
||||
@@ -634,7 +629,8 @@ class TipParser(object):
|
||||
"""
|
||||
Object to pull data for tips in excel sheet
|
||||
"""
|
||||
def __init__(self, xl: Workbook, submission_type: str|SubmissionType) -> None:
|
||||
|
||||
def __init__(self, xl: Workbook, submission_type: str | SubmissionType) -> None:
|
||||
"""
|
||||
Args:
|
||||
xl (Workbook): Openpyxl workbook from submitted excel file.
|
||||
@@ -698,7 +694,7 @@ class TipParser(object):
|
||||
class PCRParser(object):
|
||||
"""Object to pull data from Design and Analysis PCR export file."""
|
||||
|
||||
def __init__(self, filepath: Path | None=None, submission: BasicSubmission | None=None) -> None:
|
||||
def __init__(self, filepath: Path | None = None, submission: BasicSubmission | None = None) -> None:
|
||||
"""
|
||||
Args:
|
||||
filepath (Path | None, optional): file to parse. Defaults to None.
|
||||
|
||||
@@ -7,7 +7,7 @@ from pathlib import Path
|
||||
from datetime import date, timedelta
|
||||
from typing import List, Tuple, Any
|
||||
from backend.db.models import BasicSubmission
|
||||
from tools import jinja_template_loading, Settings, get_unique_values_in_df_column, html_to_pdf, get_first_blank_df_row, \
|
||||
from tools import jinja_template_loading, html_to_pdf, get_first_blank_df_row, \
|
||||
row_map
|
||||
from PyQt6.QtWidgets import QWidget
|
||||
from openpyxl.worksheet.worksheet import Worksheet
|
||||
@@ -71,20 +71,20 @@ class ReportMaker(object):
|
||||
# logger.debug(f"Name: {row[0][1]}")
|
||||
data = [item for item in row[1]]
|
||||
kit = dict(name=row[0][1], cost=data[1], run_count=int(data[0]), sample_count=int(data[2]))
|
||||
# if this is the same lab as before add together
|
||||
# NOTE: if this is the same lab as before add together
|
||||
if lab == old_lab:
|
||||
output[-1]['kits'].append(kit)
|
||||
output[-1]['total_cost'] += kit['cost']
|
||||
output[-1]['total_samples'] += kit['sample_count']
|
||||
output[-1]['total_runs'] += kit['run_count']
|
||||
# if not the same lab, make a new one
|
||||
# NOTE: if not the same lab, make a new one
|
||||
else:
|
||||
adder = dict(lab=lab, kits=[kit], total_cost=kit['cost'], total_samples=kit['sample_count'],
|
||||
total_runs=kit['run_count'])
|
||||
output.append(adder)
|
||||
old_lab = lab
|
||||
# logger.debug(output)
|
||||
dicto = {'start_date': start_date, 'end_date': end_date, 'labs': output} # , "table":table}
|
||||
dicto = {'start_date': start_date, 'end_date': end_date, 'labs': output}
|
||||
temp = env.get_template('summary_report.html')
|
||||
html = temp.render(input=dicto)
|
||||
return html
|
||||
@@ -120,11 +120,11 @@ class ReportMaker(object):
|
||||
"""
|
||||
# logger.debug(f"Updating worksheet")
|
||||
worksheet: Worksheet = self.writer.sheets['Report']
|
||||
for idx, col in enumerate(self.summary_df, start=1): # loop through all columns
|
||||
for idx, col in enumerate(self.summary_df, start=1): # NOTE: loop through all columns
|
||||
series = self.summary_df[col]
|
||||
max_len = max((
|
||||
series.astype(str).map(len).max(), # len of largest item
|
||||
len(str(series.name)) # len of column name/header
|
||||
series.astype(str).map(len).max(), # NOTE: len of largest item
|
||||
len(str(series.name)) # NOTE: len of column name/header
|
||||
)) + 20 # NOTE: adding a little extra space
|
||||
try:
|
||||
# NOTE: Convert idx to letter
|
||||
@@ -142,224 +142,3 @@ class ReportMaker(object):
|
||||
cell.style = 'Currency'
|
||||
|
||||
|
||||
def make_report_xlsx(records: list[dict]) -> Tuple[DataFrame, DataFrame]:
|
||||
"""
|
||||
create the dataframe for a report
|
||||
|
||||
Args:
|
||||
records (list[dict]): list of dictionaries created from submissions
|
||||
|
||||
Returns:
|
||||
DataFrame: output dataframe
|
||||
"""
|
||||
df = DataFrame.from_records(records)
|
||||
# NOTE: put submissions with the same lab together
|
||||
df = df.sort_values("submitting_lab")
|
||||
# NOTE: aggregate cost and sample count columns
|
||||
df2 = df.groupby(["submitting_lab", "extraction_kit"]).agg(
|
||||
{'extraction_kit': 'count', 'cost': 'sum', 'sample_count': 'sum'})
|
||||
df2 = df2.rename(columns={"extraction_kit": 'run_count'})
|
||||
# logger.debug(f"Output daftaframe for xlsx: {df2.columns}")
|
||||
df = df.drop('id', axis=1)
|
||||
df = df.sort_values(['submitting_lab', "submitted_date"])
|
||||
return df, df2
|
||||
|
||||
|
||||
def make_report_html(df: DataFrame, start_date: date, end_date: date) -> str:
|
||||
"""
|
||||
generates html from the report dataframe
|
||||
|
||||
Args:
|
||||
df (DataFrame): input dataframe generated from 'make_report_xlsx' above
|
||||
start_date (date): starting date of the report period
|
||||
end_date (date): ending date of the report period
|
||||
|
||||
Returns:
|
||||
str: html string
|
||||
"""
|
||||
old_lab = ""
|
||||
output = []
|
||||
# logger.debug(f"Report DataFrame: {df}")
|
||||
for ii, row in enumerate(df.iterrows()):
|
||||
# logger.debug(f"Row {ii}: {row}")
|
||||
lab = row[0][0]
|
||||
# logger.debug(type(row))
|
||||
# logger.debug(f"Old lab: {old_lab}, Current lab: {lab}")
|
||||
# logger.debug(f"Name: {row[0][1]}")
|
||||
data = [item for item in row[1]]
|
||||
kit = dict(name=row[0][1], cost=data[1], run_count=int(data[0]), sample_count=int(data[2]))
|
||||
# if this is the same lab as before add together
|
||||
if lab == old_lab:
|
||||
output[-1]['kits'].append(kit)
|
||||
output[-1]['total_cost'] += kit['cost']
|
||||
output[-1]['total_samples'] += kit['sample_count']
|
||||
output[-1]['total_runs'] += kit['run_count']
|
||||
# if not the same lab, make a new one
|
||||
else:
|
||||
adder = dict(lab=lab, kits=[kit], total_cost=kit['cost'], total_samples=kit['sample_count'],
|
||||
total_runs=kit['run_count'])
|
||||
output.append(adder)
|
||||
old_lab = lab
|
||||
# logger.debug(output)
|
||||
dicto = {'start_date': start_date, 'end_date': end_date, 'labs': output} #, "table":table}
|
||||
temp = env.get_template('summary_report.html')
|
||||
html = temp.render(input=dicto)
|
||||
return html
|
||||
|
||||
|
||||
# TODO: move this into a classmethod of Controls?
|
||||
def convert_data_list_to_df(input: list[dict], subtype: str | None = None) -> DataFrame:
|
||||
"""
|
||||
Convert list of control records to dataframe
|
||||
|
||||
Args:
|
||||
ctx (dict): settings passed from gui
|
||||
input (list[dict]): list of dictionaries containing records
|
||||
subtype (str | None, optional): name of submission type. Defaults to None.
|
||||
|
||||
Returns:
|
||||
DataFrame: dataframe of controls
|
||||
"""
|
||||
|
||||
df = DataFrame.from_records(input)
|
||||
safe = ['name', 'submitted_date', 'genus', 'target']
|
||||
for column in df.columns:
|
||||
if "percent" in column:
|
||||
count_col = [item for item in df.columns if "count" in item][0]
|
||||
# NOTE: The actual percentage from kraken was off due to exclusion of NaN, recalculating.
|
||||
df[column] = 100 * df[count_col] / df.groupby('name')[count_col].transform('sum')
|
||||
if column not in safe:
|
||||
if subtype is not None and column != subtype:
|
||||
del df[column]
|
||||
# NOTE: move date of sample submitted on same date as previous ahead one.
|
||||
df = displace_date(df)
|
||||
# NOTE: ad hoc method to make data labels more accurate.
|
||||
df = df_column_renamer(df=df)
|
||||
return df
|
||||
|
||||
|
||||
def df_column_renamer(df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Ad hoc function I created to clarify some fields
|
||||
|
||||
Args:
|
||||
df (DataFrame): input dataframe
|
||||
|
||||
Returns:
|
||||
DataFrame: dataframe with 'clarified' column names
|
||||
"""
|
||||
df = df[df.columns.drop(list(df.filter(regex='_hashes')))]
|
||||
return df.rename(columns={
|
||||
"contains_ratio": "contains_shared_hashes_ratio",
|
||||
"matches_ratio": "matches_shared_hashes_ratio",
|
||||
"kraken_count": "kraken2_read_count_(top_50)",
|
||||
"kraken_percent": "kraken2_read_percent_(top_50)"
|
||||
})
|
||||
|
||||
|
||||
def displace_date(df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
This function serves to split samples that were submitted on the same date by incrementing dates.
|
||||
It will shift the date forward by one day if it is the same day as an existing date in a list.
|
||||
|
||||
Args:
|
||||
df (DataFrame): input dataframe composed of control records
|
||||
|
||||
Returns:
|
||||
DataFrame: output dataframe with dates incremented.
|
||||
"""
|
||||
# logger.debug(f"Unique items: {df['name'].unique()}")
|
||||
# NOTE: get submitted dates for each control
|
||||
dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in
|
||||
sorted(df['name'].unique())]
|
||||
previous_dates = []
|
||||
for _, item in enumerate(dict_list):
|
||||
df, previous_dates = check_date(df=df, item=item, previous_dates=previous_dates)
|
||||
return df
|
||||
|
||||
|
||||
def check_date(df: DataFrame, item: dict, previous_dates: list) -> Tuple[DataFrame, list]:
|
||||
"""
|
||||
Checks if an items date is already present in df and adjusts df accordingly
|
||||
|
||||
Args:
|
||||
df (DataFrame): input dataframe
|
||||
item (dict): control for checking
|
||||
previous_dates (list): list of dates found in previous controls
|
||||
|
||||
Returns:
|
||||
Tuple[DataFrame, list]: Output dataframe and appended list of previous dates
|
||||
"""
|
||||
try:
|
||||
check = item['date'] in previous_dates
|
||||
except IndexError:
|
||||
check = False
|
||||
previous_dates.append(item['date'])
|
||||
if check:
|
||||
# logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
|
||||
# NOTE: get df locations where name == item name
|
||||
mask = df['name'] == item['name']
|
||||
# NOTE: increment date in dataframe
|
||||
df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
|
||||
item['date'] += timedelta(days=1)
|
||||
passed = False
|
||||
else:
|
||||
passed = True
|
||||
# logger.debug(f"\n\tCurrent date: {item['date']}\n\tPrevious dates:{previous_dates}")
|
||||
# logger.debug(f"DF: {type(df)}, previous_dates: {type(previous_dates)}")
|
||||
# if run didn't lead to changed date, return values
|
||||
if passed:
|
||||
# logger.debug(f"Date check passed, returning.")
|
||||
return df, previous_dates
|
||||
# NOTE: if date was changed, rerun with new date
|
||||
else:
|
||||
logger.warning(f"Date check failed, running recursion")
|
||||
df, previous_dates = check_date(df, item, previous_dates)
|
||||
return df, previous_dates
|
||||
|
||||
|
||||
# def get_unique_values_in_df_column(df: DataFrame, column_name: str) -> list:
|
||||
# """
|
||||
# get all unique values in a dataframe column by name
|
||||
#
|
||||
# Args:
|
||||
# df (DataFrame): input dataframe
|
||||
# column_name (str): name of column of interest
|
||||
#
|
||||
# Returns:
|
||||
# list: sorted list of unique values
|
||||
# """
|
||||
# return sorted(df[column_name].unique())
|
||||
|
||||
|
||||
def drop_reruns_from_df(ctx: Settings, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Removes semi-duplicates from dataframe after finding sequencing repeats.
|
||||
|
||||
Args:
|
||||
settings (dict): settings passed from gui
|
||||
df (DataFrame): initial dataframe
|
||||
|
||||
Returns:
|
||||
DataFrame: dataframe with originals removed in favour of repeats.
|
||||
"""
|
||||
if 'rerun_regex' in ctx:
|
||||
sample_names = get_unique_values_in_df_column(df, column_name="name")
|
||||
rerun_regex = re.compile(fr"{ctx.rerun_regex}")
|
||||
for sample in sample_names:
|
||||
if rerun_regex.search(sample):
|
||||
first_run = re.sub(rerun_regex, "", sample)
|
||||
df = df.drop(df[df.name == first_run].index)
|
||||
return df
|
||||
|
||||
# def make_hitpicks(input:List[dict]) -> DataFrame:
|
||||
# """
|
||||
# Converts list of dictionaries constructed by hitpicking to dataframe
|
||||
#
|
||||
# Args:
|
||||
# input (List[dict]): list of hitpicked dictionaries
|
||||
#
|
||||
# Returns:
|
||||
# DataFrame: constructed dataframe.
|
||||
# """
|
||||
# return DataFrame.from_records(input)
|
||||
|
||||
@@ -1,19 +1,29 @@
|
||||
'''
|
||||
"""
|
||||
Functions for constructing controls graphs using plotly.
|
||||
'''
|
||||
TODO: Move these functions to widgets.controls_charts
|
||||
"""
|
||||
import re
|
||||
import plotly
|
||||
import plotly.express as px
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
from plotly.graph_objects import Figure
|
||||
import logging
|
||||
# from backend.excel import get_unique_values_in_df_column
|
||||
from tools import Settings, get_unique_values_in_df_column
|
||||
from tools import Settings, get_unique_values_in_df_column, divide_chunks
|
||||
from frontend.widgets.functions import select_save_file
|
||||
|
||||
logger = logging.getLogger(f"submissions.{__name__}")
|
||||
|
||||
|
||||
def create_charts(ctx:Settings, df:pd.DataFrame, ytitle:str|None=None) -> Figure:
|
||||
class CustomFigure(Figure):
|
||||
|
||||
def __init__(self, ctx: Settings, df: pd.DataFrame, ytitle: str | None = None):
|
||||
super().__init__()
|
||||
|
||||
|
||||
# NOTE: Start here.
|
||||
def create_charts(ctx: Settings, df: pd.DataFrame, ytitle: str | None = None) -> Figure:
|
||||
"""
|
||||
Constructs figures based on parsed pandas dataframe.
|
||||
|
||||
@@ -25,7 +35,7 @@ def create_charts(ctx:Settings, df:pd.DataFrame, ytitle:str|None=None) -> Figure
|
||||
Returns:
|
||||
Figure: Plotly figure
|
||||
"""
|
||||
from backend.excel import drop_reruns_from_df
|
||||
# from backend.excel import drop_reruns_from_df
|
||||
# converts starred genera to normal and splits off list of starred
|
||||
genera = []
|
||||
if df.empty:
|
||||
@@ -38,23 +48,45 @@ def create_charts(ctx:Settings, df:pd.DataFrame, ytitle:str|None=None) -> Figure
|
||||
genera.append("")
|
||||
except IndexError:
|
||||
genera.append("")
|
||||
df['genus'] = df['genus'].replace({'\*':''}, regex=True).replace({"NaN":"Unknown"})
|
||||
df['genus'] = df['genus'].replace({'\*': ''}, regex=True).replace({"NaN": "Unknown"})
|
||||
df['genera'] = genera
|
||||
# remove original runs, using reruns if applicable
|
||||
# NOTE: remove original runs, using reruns if applicable
|
||||
df = drop_reruns_from_df(ctx=ctx, df=df)
|
||||
# sort by and exclude from
|
||||
# NOTE: sort by and exclude from
|
||||
sorts = ['submitted_date', "target", "genus"]
|
||||
exclude = ['name', 'genera']
|
||||
modes = [item for item in df.columns if item not in sorts and item not in exclude]# and "_hashes" not in item]
|
||||
# Set descending for any columns that have "{mode}" in the header.
|
||||
modes = [item for item in df.columns if item not in sorts and item not in exclude] # and "_hashes" not in item]
|
||||
# NOTE: Set descending for any columns that have "{mode}" in the header.
|
||||
ascending = [False if item == "target" else True for item in sorts]
|
||||
df = df.sort_values(by=sorts, ascending=ascending)
|
||||
# logger.debug(df[df.isna().any(axis=1)])
|
||||
# actual chart construction is done by
|
||||
# NOTE: actual chart construction is done by
|
||||
fig = construct_chart(df=df, modes=modes, ytitle=ytitle)
|
||||
return fig
|
||||
|
||||
def generic_figure_markers(fig:Figure, modes:list=[], ytitle:str|None=None) -> Figure:
|
||||
|
||||
def drop_reruns_from_df(ctx: Settings, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Removes semi-duplicates from dataframe after finding sequencing repeats.
|
||||
|
||||
Args:
|
||||
settings (dict): settings passed from gui
|
||||
df (DataFrame): initial dataframe
|
||||
|
||||
Returns:
|
||||
DataFrame: dataframe with originals removed in favour of repeats.
|
||||
"""
|
||||
if 'rerun_regex' in ctx:
|
||||
sample_names = get_unique_values_in_df_column(df, column_name="name")
|
||||
rerun_regex = re.compile(fr"{ctx.rerun_regex}")
|
||||
for sample in sample_names:
|
||||
if rerun_regex.search(sample):
|
||||
first_run = re.sub(rerun_regex, "", sample)
|
||||
df = df.drop(df[df.name == first_run].index)
|
||||
return df
|
||||
|
||||
|
||||
def generic_figure_markers(fig: Figure, modes: list = [], ytitle: str | None = None) -> Figure:
|
||||
"""
|
||||
Adds standard layout to figure.
|
||||
|
||||
@@ -101,7 +133,8 @@ def generic_figure_markers(fig:Figure, modes:list=[], ytitle:str|None=None) -> F
|
||||
assert type(fig) == Figure
|
||||
return fig
|
||||
|
||||
def make_buttons(modes:list, fig_len:int) -> list:
|
||||
|
||||
def make_buttons(modes: list, fig_len: int) -> list:
|
||||
"""
|
||||
Creates list of buttons with one for each mode to be used in showing/hiding mode traces.
|
||||
|
||||
@@ -133,7 +166,8 @@ def make_buttons(modes:list, fig_len:int) -> list:
|
||||
))
|
||||
return buttons
|
||||
|
||||
def output_figures(figs:list, group_name:str):
|
||||
|
||||
def output_figures(figs: list, group_name: str):
|
||||
"""
|
||||
Writes plotly figure to html file.
|
||||
|
||||
@@ -150,7 +184,8 @@ def output_figures(figs:list, group_name:str):
|
||||
except AttributeError:
|
||||
logger.error(f"The following figure was a string: {fig}")
|
||||
|
||||
def construct_chart(df:pd.DataFrame, modes:list, ytitle:str|None=None) -> Figure:
|
||||
|
||||
def construct_chart(df: pd.DataFrame, modes: list, ytitle: str | None = None) -> Figure:
|
||||
"""
|
||||
Creates a plotly chart for controls from a pandas dataframe
|
||||
|
||||
@@ -165,21 +200,21 @@ def construct_chart(df:pd.DataFrame, modes:list, ytitle:str|None=None) -> Figure
|
||||
fig = Figure()
|
||||
for ii, mode in enumerate(modes):
|
||||
if "count" in mode:
|
||||
df[mode] = pd.to_numeric(df[mode],errors='coerce')
|
||||
df[mode] = pd.to_numeric(df[mode], errors='coerce')
|
||||
color = "genus"
|
||||
color_discrete_sequence=None
|
||||
color_discrete_sequence = None
|
||||
elif 'percent' in mode:
|
||||
color = "genus"
|
||||
color_discrete_sequence=None
|
||||
color_discrete_sequence = None
|
||||
else:
|
||||
color = "target"
|
||||
match get_unique_values_in_df_column(df, 'target'):
|
||||
case ['Target']:
|
||||
color_discrete_sequence=["blue"]
|
||||
color_discrete_sequence = ["blue"]
|
||||
case ['Off-target']:
|
||||
color_discrete_sequence=['red']
|
||||
color_discrete_sequence = ['red']
|
||||
case _:
|
||||
color_discrete_sequence=['blue', 'red']
|
||||
color_discrete_sequence = ['blue', 'red']
|
||||
bar = px.bar(df, x="submitted_date",
|
||||
y=mode,
|
||||
color=color,
|
||||
@@ -189,25 +224,12 @@ def construct_chart(df:pd.DataFrame, modes:list, ytitle:str|None=None) -> Figure
|
||||
text="genera",
|
||||
color_discrete_sequence=color_discrete_sequence
|
||||
)
|
||||
bar.update_traces(visible = ii == 0)
|
||||
bar.update_traces(visible=ii == 0)
|
||||
fig.add_traces(bar.data)
|
||||
return generic_figure_markers(fig=fig, modes=modes, ytitle=ytitle)
|
||||
|
||||
def divide_chunks(input_list:list, chunk_count:int):
|
||||
"""
|
||||
Divides a list into {chunk_count} equal parts
|
||||
|
||||
Args:
|
||||
input_list (list): Initials list
|
||||
chunk_count (int): size of each chunk
|
||||
|
||||
Returns:
|
||||
tuple: tuple containing sublists.
|
||||
"""
|
||||
k, m = divmod(len(input_list), chunk_count)
|
||||
return (input_list[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(chunk_count))
|
||||
|
||||
def construct_html(figure:Figure) -> str:
|
||||
def construct_html(figure: Figure) -> str:
|
||||
"""
|
||||
Creates final html code from plotly
|
||||
|
||||
@@ -219,7 +241,8 @@ def construct_html(figure:Figure) -> str:
|
||||
"""
|
||||
html = '<html><body>'
|
||||
if figure is not None:
|
||||
html += plotly.offline.plot(figure, output_type='div', include_plotlyjs='cdn')#, image = 'png', auto_open=True, image_filename='plot_image')
|
||||
html += plotly.offline.plot(figure, output_type='div',
|
||||
include_plotlyjs='cdn') #, image = 'png', auto_open=True, image_filename='plot_image')
|
||||
else:
|
||||
html += "<h1>No data was retrieved for the given parameters.</h1>"
|
||||
html += '</body></html>'
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
'''
|
||||
"""
|
||||
Handles display of control charts
|
||||
'''
|
||||
"""
|
||||
from datetime import timedelta
|
||||
from typing import Tuple
|
||||
|
||||
from PyQt6.QtWebEngineWidgets import QWebEngineView
|
||||
from PyQt6.QtWidgets import (
|
||||
QWidget, QVBoxLayout, QComboBox, QHBoxLayout,
|
||||
@@ -10,8 +13,9 @@ from PyQt6.QtCore import QSignalBlocker
|
||||
from backend.db import ControlType, Control
|
||||
from PyQt6.QtCore import QDate, QSize
|
||||
import logging
|
||||
from pandas import DataFrame
|
||||
from tools import Report, Result
|
||||
from backend.excel.reports import convert_data_list_to_df
|
||||
# from backend.excel.reports import convert_data_list_to_df
|
||||
from frontend.visualizations.control_charts import create_charts, construct_html
|
||||
|
||||
logger = logging.getLogger(f"submissions.{__name__}")
|
||||
@@ -28,17 +32,17 @@ class ControlsViewer(QWidget):
|
||||
# set tab2 layout
|
||||
self.layout = QVBoxLayout(self)
|
||||
self.control_typer = QComboBox()
|
||||
# fetch types of controls
|
||||
# NOTE: fetch types of controls
|
||||
con_types = [item.name for item in ControlType.query()]
|
||||
self.control_typer.addItems(con_types)
|
||||
# create custom widget to get types of analysis
|
||||
# NOTE: create custom widget to get types of analysis
|
||||
self.mode_typer = QComboBox()
|
||||
mode_types = Control.get_modes()
|
||||
self.mode_typer.addItems(mode_types)
|
||||
# create custom widget to get subtypes of analysis
|
||||
# NOTE: create custom widget to get subtypes of analysis
|
||||
self.sub_typer = QComboBox()
|
||||
self.sub_typer.setEnabled(False)
|
||||
# add widgets to tab2 layout
|
||||
# NOTE: add widgets to tab2 layout
|
||||
self.layout.addWidget(self.datepicker)
|
||||
self.layout.addWidget(self.control_typer)
|
||||
self.layout.addWidget(self.mode_typer)
|
||||
@@ -118,8 +122,8 @@ class ControlsViewer(QWidget):
|
||||
Tuple[QMainWindow, dict]: Collection of new main app window and result dict
|
||||
"""
|
||||
report = Report()
|
||||
# logger.debug(f"Control getter context: \n\tControl type: {self.con_type}\n\tMode: {self.mode}\n\tStart Date: {self.start_date}\n\tEnd Date: {self.end_date}")
|
||||
# NOTE: set the subtype for kraken
|
||||
# logger.debug(f"Control getter context: \n\tControl type: {self.con_type}\n\tMode: {self.mode}\n\tStart
|
||||
# Date: {self.start_date}\n\tEnd Date: {self.end_date}") NOTE: set the subtype for kraken
|
||||
if self.sub_typer.currentText() == "":
|
||||
self.subtype = None
|
||||
else:
|
||||
@@ -140,7 +144,7 @@ class ControlsViewer(QWidget):
|
||||
self.report.add_result(Result(status="Critical", msg="No data found for controls in given date range."))
|
||||
return
|
||||
# NOTE send to dataframe creator
|
||||
df = convert_data_list_to_df(input=data, subtype=self.subtype)
|
||||
df = self.convert_data_list_to_df(input_df=data)
|
||||
if self.subtype is None:
|
||||
title = self.mode
|
||||
else:
|
||||
@@ -156,6 +160,116 @@ class ControlsViewer(QWidget):
|
||||
# logger.debug("Figure updated... I hope.")
|
||||
self.report.add_result(report)
|
||||
|
||||
def convert_data_list_to_df(self, input_df: list[dict]) -> DataFrame:
|
||||
"""
|
||||
Convert list of control records to dataframe
|
||||
|
||||
Args:
|
||||
ctx (dict): settings passed from gui
|
||||
input_df (list[dict]): list of dictionaries containing records
|
||||
subtype (str | None, optional): name of submission type. Defaults to None.
|
||||
|
||||
Returns:
|
||||
DataFrame: dataframe of controls
|
||||
"""
|
||||
|
||||
df = DataFrame.from_records(input_df)
|
||||
safe = ['name', 'submitted_date', 'genus', 'target']
|
||||
for column in df.columns:
|
||||
if "percent" in column:
|
||||
count_col = [item for item in df.columns if "count" in item][0]
|
||||
# NOTE: The actual percentage from kraken was off due to exclusion of NaN, recalculating.
|
||||
df[column] = 100 * df[count_col] / df.groupby('name')[count_col].transform('sum')
|
||||
if column not in safe:
|
||||
if self.subtype is not None and column != self.subtype:
|
||||
del df[column]
|
||||
# NOTE: move date of sample submitted on same date as previous ahead one.
|
||||
df = self.displace_date(df=df)
|
||||
# NOTE: ad hoc method to make data labels more accurate.
|
||||
df = self.df_column_renamer(df=df)
|
||||
return df
|
||||
|
||||
def df_column_renamer(self, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Ad hoc function I created to clarify some fields
|
||||
|
||||
Args:
|
||||
df (DataFrame): input dataframe
|
||||
|
||||
Returns:
|
||||
DataFrame: dataframe with 'clarified' column names
|
||||
"""
|
||||
df = df[df.columns.drop(list(df.filter(regex='_hashes')))]
|
||||
return df.rename(columns={
|
||||
"contains_ratio": "contains_shared_hashes_ratio",
|
||||
"matches_ratio": "matches_shared_hashes_ratio",
|
||||
"kraken_count": "kraken2_read_count_(top_50)",
|
||||
"kraken_percent": "kraken2_read_percent_(top_50)"
|
||||
})
|
||||
|
||||
def displace_date(self, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
This function serves to split samples that were submitted on the same date by incrementing dates.
|
||||
It will shift the date forward by one day if it is the same day as an existing date in a list.
|
||||
|
||||
Args:
|
||||
df (DataFrame): input dataframe composed of control records
|
||||
|
||||
Returns:
|
||||
DataFrame: output dataframe with dates incremented.
|
||||
"""
|
||||
# logger.debug(f"Unique items: {df['name'].unique()}")
|
||||
# NOTE: get submitted dates for each control
|
||||
dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in
|
||||
sorted(df['name'].unique())]
|
||||
previous_dates = []
|
||||
for _, item in enumerate(dict_list):
|
||||
df, previous_dates = self.check_date(df=df, item=item, previous_dates=previous_dates)
|
||||
return df
|
||||
|
||||
def check_date(self, df: DataFrame, item: dict, previous_dates: list) -> Tuple[DataFrame, list]:
|
||||
"""
|
||||
Checks if an items date is already present in df and adjusts df accordingly
|
||||
|
||||
Args:
|
||||
df (DataFrame): input dataframe
|
||||
item (dict): control for checking
|
||||
previous_dates (list): list of dates found in previous controls
|
||||
|
||||
Returns:
|
||||
Tuple[DataFrame, list]: Output dataframe and appended list of previous dates
|
||||
"""
|
||||
try:
|
||||
check = item['date'] in previous_dates
|
||||
except IndexError:
|
||||
check = False
|
||||
previous_dates.append(item['date'])
|
||||
if check:
|
||||
# logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
|
||||
# NOTE: get df locations where name == item name
|
||||
mask = df['name'] == item['name']
|
||||
# NOTE: increment date in dataframe
|
||||
df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
|
||||
item['date'] += timedelta(days=1)
|
||||
passed = False
|
||||
else:
|
||||
passed = True
|
||||
# logger.debug(f"\n\tCurrent date: {item['date']}\n\tPrevious dates:{previous_dates}")
|
||||
# logger.debug(f"DF: {type(df)}, previous_dates: {type(previous_dates)}")
|
||||
# NOTE: if run didn't lead to changed date, return values
|
||||
if passed:
|
||||
# logger.debug(f"Date check passed, returning.")
|
||||
return df, previous_dates
|
||||
# NOTE: if date was changed, rerun with new date
|
||||
else:
|
||||
logger.warning(f"Date check failed, running recursion")
|
||||
df, previous_dates = self.check_date(df, item, previous_dates)
|
||||
return df, previous_dates
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class ControlsDatePicker(QWidget):
|
||||
"""
|
||||
custom widget to pick start and end dates for controls graphs
|
||||
|
||||
@@ -55,6 +55,21 @@ main_form_style = '''
|
||||
'''
|
||||
|
||||
|
||||
def divide_chunks(input_list: list, chunk_count: int):
|
||||
"""
|
||||
Divides a list into {chunk_count} equal parts
|
||||
|
||||
Args:
|
||||
input_list (list): Initials list
|
||||
chunk_count (int): size of each chunk
|
||||
|
||||
Returns:
|
||||
tuple: tuple containing sublists.
|
||||
"""
|
||||
k, m = divmod(len(input_list), chunk_count)
|
||||
return (input_list[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(chunk_count))
|
||||
|
||||
|
||||
def get_unique_values_in_df_column(df: pd.DataFrame, column_name: str) -> list:
|
||||
"""
|
||||
get all unique values in a dataframe column by name
|
||||
@@ -423,7 +438,6 @@ class Settings(BaseSettings, extra="allow"):
|
||||
if not hasattr(self, k):
|
||||
self.__setattr__(k, v)
|
||||
|
||||
|
||||
@classmethod
|
||||
def get_alembic_db_path(cls, alembic_path, mode=Literal['path', 'schema', 'user', 'pass']) -> Path | str:
|
||||
c = ConfigParser()
|
||||
@@ -490,6 +504,7 @@ def get_config(settings_path: Path | str | None = None) -> Settings:
|
||||
# logger.debug(f"Creating settings...")
|
||||
if isinstance(settings_path, str):
|
||||
settings_path = Path(settings_path)
|
||||
|
||||
# NOTE: custom pyyaml constructor to join fields
|
||||
def join(loader, node):
|
||||
seq = loader.construct_sequence(node)
|
||||
@@ -637,11 +652,13 @@ def setup_logger(verbosity: int = 3):
|
||||
Returns:
|
||||
logger: logger object
|
||||
"""
|
||||
|
||||
def handle_exception(exc_type, exc_value, exc_traceback):
|
||||
if issubclass(exc_type, KeyboardInterrupt):
|
||||
sys.__excepthook__(exc_type, exc_value, exc_traceback)
|
||||
return
|
||||
logger.critical("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback))
|
||||
|
||||
logger = logging.getLogger("submissions")
|
||||
logger.setLevel(logging.DEBUG)
|
||||
# NOTE: create file handler which logs even debug messages
|
||||
@@ -672,23 +689,6 @@ def setup_logger(verbosity: int = 3):
|
||||
return logger
|
||||
|
||||
|
||||
def copy_settings(settings_path: Path, settings: dict) -> dict:
|
||||
"""
|
||||
copies relevant settings dictionary from the default config.yml to a new directory
|
||||
|
||||
Args:
|
||||
settings_path (Path): path to write the file to
|
||||
settings (dict): settings dictionary obtained from default config.yml
|
||||
|
||||
Returns:
|
||||
dict: output dictionary for use in first run
|
||||
"""
|
||||
if not settings_path.exists():
|
||||
with open(settings_path, 'w') as f:
|
||||
yaml.dump(settings, f)
|
||||
return settings
|
||||
|
||||
|
||||
def jinja_template_loading() -> Environment:
|
||||
"""
|
||||
Returns jinja2 template environment.
|
||||
@@ -840,10 +840,6 @@ def html_to_pdf(html: str, output_file: Path | str):
|
||||
printer.setPageSize(QPageSize(QPageSize.PageSizeId.A4))
|
||||
document.print(printer)
|
||||
# document.close()
|
||||
# HTML(string=html).write_pdf(output_file)
|
||||
# new_parser = HtmlToDocx()
|
||||
# docx = new_parser.parse_html_string(html)
|
||||
# docx.save(output_file)
|
||||
|
||||
|
||||
def remove_key_from_list_of_dicts(input: list, key: str) -> list:
|
||||
@@ -862,18 +858,18 @@ def remove_key_from_list_of_dicts(input: list, key: str) -> list:
|
||||
return input
|
||||
|
||||
|
||||
def workbook_2_csv(worksheet: Worksheet, filename: Path):
|
||||
"""
|
||||
Export an excel worksheet (workbook is not correct) to csv file.
|
||||
|
||||
Args:
|
||||
worksheet (Worksheet): Incoming worksheet
|
||||
filename (Path): Output csv filepath.
|
||||
"""
|
||||
with open(filename, 'w', newline="") as f:
|
||||
c = csv.writer(f)
|
||||
for r in worksheet.rows:
|
||||
c.writerow([cell.value for cell in r])
|
||||
# def workbook_2_csv(worksheet: Worksheet, filename: Path):
|
||||
# """
|
||||
# Export an excel worksheet (workbook is not correct) to csv file.
|
||||
#
|
||||
# Args:
|
||||
# worksheet (Worksheet): Incoming worksheet
|
||||
# filename (Path): Output csv filepath.
|
||||
# """
|
||||
# with open(filename, 'w', newline="") as f:
|
||||
# c = csv.writer(f)
|
||||
# for r in worksheet.rows:
|
||||
# c.writerow([cell.value for cell in r])
|
||||
|
||||
|
||||
ctx = get_config(None)
|
||||
@@ -917,9 +913,15 @@ def report_result(func):
|
||||
logger.debug(f"Arguments: {args}")
|
||||
logger.debug(f"Keyword arguments: {kwargs}")
|
||||
output = func(*args, **kwargs)
|
||||
if isinstance(output, tuple):
|
||||
match output:
|
||||
case Report():
|
||||
report = output
|
||||
case tuple():
|
||||
try:
|
||||
report = [item for item in output if isinstance(item, Report)][0]
|
||||
else:
|
||||
except IndexError:
|
||||
report = None
|
||||
case _:
|
||||
report = None
|
||||
logger.debug(f"Got report: {report}")
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user