Post code clean-up, before attempt to upgrade controls to FigureWidgets

This commit is contained in:
lwark
2024-10-21 08:17:43 -05:00
parent 495d1a5a7f
commit 1f83b61c81
29 changed files with 441 additions and 1089 deletions

View File

@@ -1,23 +1,17 @@
"""
Handles display of control charts
"""
import re
import sys
from datetime import timedelta, date
from datetime import date
from pprint import pformat
from typing import Tuple
from PyQt6.QtWebEngineWidgets import QWebEngineView
from PyQt6.QtWidgets import (
QWidget, QVBoxLayout, QComboBox, QHBoxLayout,
QDateEdit, QLabel, QSizePolicy, QPushButton, QGridLayout
QWidget, QComboBox, QPushButton, QGridLayout
)
from PyQt6.QtCore import QSignalBlocker
from backend.db import ControlType, IridaControl
from PyQt6.QtCore import QDate, QSize
import logging
from pandas import DataFrame
from tools import Report, Result, get_unique_values_in_df_column, Settings, report_result
from frontend.visualizations import IridaFigure, PCRFigure, CustomFigure
from tools import Report, report_result
from frontend.visualizations import CustomFigure
from .misc import StartEndDatePicker
logger = logging.getLogger(f"submissions.{__name__}")
@@ -70,19 +64,12 @@ class ControlsViewer(QWidget):
self.save_button.pressed.connect(self.save_chart_function)
self.export_button.pressed.connect(self.save_data_function)
def save_chart_function(self):
self.fig.save_figure(parent=self)
def save_data_function(self):
self.fig.save_data(parent=self)
# def controls_getter(self):
# """
# Lookup controls from database and send to chartmaker
# """
# self.controls_getter_function()
@report_result
def controls_getter_function(self, *args, **kwargs):
"""
@@ -128,7 +115,18 @@ class ControlsViewer(QWidget):
self.chart_maker_function()
return report
def diff_month(self, d1: date, d2: date):
@classmethod
def diff_month(self, d1: date, d2: date) -> float:
"""
Gets the number of months difference between two different dates
Args:
d1 (date): Start date.
d2 (date): End date.
Returns:
float: Number of months difference
"""
return abs((d1.year - d2.year) * 12 + d1.month - d2.month)
@report_result
@@ -169,164 +167,3 @@ class ControlsViewer(QWidget):
# logger.debug("Figure updated... I hope.")
return report
# def convert_data_list_to_df(self, input_df: list[dict]) -> DataFrame:
# """
# Convert list of control records to dataframe
#
# Args:
# ctx (dict): settings passed from gui
# input_df (list[dict]): list of dictionaries containing records
# mode_sub_type (str | None, optional): sub_type of submission type. Defaults to None.
#
# Returns:
# DataFrame: dataframe of controls
# """
#
# df = DataFrame.from_records(input_df)
# safe = ['name', 'submitted_date', 'genus', 'target']
# for column in df.columns:
# if column not in safe:
# if self.mode_sub_type is not None and column != self.mode_sub_type:
# continue
# else:
# safe.append(column)
# if "percent" in column:
# # count_col = [item for item in df.columns if "count" in item][0]
# try:
# count_col = next(item for item in df.columns if "count" in item)
# except StopIteration:
# continue
# # NOTE: The actual percentage from kraken was off due to exclusion of NaN, recalculating.
# df[column] = 100 * df[count_col] / df.groupby('name')[count_col].transform('sum')
# df = df[[c for c in df.columns if c in safe]]
# # NOTE: move date of sample submitted on same date as previous ahead one.
# df = self.displace_date(df=df)
# # NOTE: ad hoc method to make data labels more accurate.
# df = self.df_column_renamer(df=df)
# return df
#
# def df_column_renamer(self, df: DataFrame) -> DataFrame:
# """
# Ad hoc function I created to clarify some fields
#
# Args:
# df (DataFrame): input dataframe
#
# Returns:
# DataFrame: dataframe with 'clarified' column names
# """
# df = df[df.columns.drop(list(df.filter(regex='_hashes')))]
# return df.rename(columns={
# "contains_ratio": "contains_shared_hashes_ratio",
# "matches_ratio": "matches_shared_hashes_ratio",
# "kraken_count": "kraken2_read_count_(top_50)",
# "kraken_percent": "kraken2_read_percent_(top_50)"
# })
#
# def displace_date(self, df: DataFrame) -> DataFrame:
# """
# This function serves to split samples that were submitted on the same date by incrementing dates.
# It will shift the date forward by one day if it is the same day as an existing date in a list.
#
# Args:
# df (DataFrame): input dataframe composed of control records
#
# Returns:
# DataFrame: output dataframe with dates incremented.
# """
# # logger.debug(f"Unique items: {df['name'].unique()}")
# # NOTE: get submitted dates for each control
# dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in
# sorted(df['name'].unique())]
# previous_dates = set()
# # for _, item in enumerate(dict_list):
# for item in dict_list:
# df, previous_dates = self.check_date(df=df, item=item, previous_dates=previous_dates)
# return df
#
# def check_date(self, df: DataFrame, item: dict, previous_dates: set) -> Tuple[DataFrame, list]:
# """
# Checks if an items date is already present in df and adjusts df accordingly
#
# Args:
# df (DataFrame): input dataframe
# item (dict): control for checking
# previous_dates (list): list of dates found in previous controls
#
# Returns:
# Tuple[DataFrame, list]: Output dataframe and appended list of previous dates
# """
# try:
# check = item['date'] in previous_dates
# except IndexError:
# check = False
# previous_dates.add(item['date'])
# if check:
# # logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
# # NOTE: get df locations where name == item name
# mask = df['name'] == item['name']
# # NOTE: increment date in dataframe
# df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
# item['date'] += timedelta(days=1)
# passed = False
# else:
# passed = True
# # logger.debug(f"\n\tCurrent date: {item['date']}\n\tPrevious dates:{previous_dates}")
# # logger.debug(f"DF: {type(df)}, previous_dates: {type(previous_dates)}")
# # NOTE: if run didn't lead to changed date, return values
# if passed:
# # logger.debug(f"Date check passed, returning.")
# return df, previous_dates
# # NOTE: if date was changed, rerun with new date
# else:
# logger.warning(f"Date check failed, running recursion")
# df, previous_dates = self.check_date(df, item, previous_dates)
# return df, previous_dates
#
# def prep_df(self, ctx: Settings, df: DataFrame) -> Tuple[DataFrame, list]:
# """
# Constructs figures based on parsed pandas dataframe.
#
# Args:
# ctx (Settings): settings passed down from gui
# df (pd.DataFrame): input dataframe
# ytitle (str | None, optional): title for the y-axis. Defaults to None.
#
# Returns:
# Figure: Plotly figure
# """
# # NOTE: converts starred genera to normal and splits off list of starred
# if df.empty:
# return None
# df['genus'] = df['genus'].replace({'\*': ''}, regex=True).replace({"NaN": "Unknown"})
# df['genera'] = [item[-1] if item and item[-1] == "*" else "" for item in df['genus'].to_list()]
# # NOTE: remove original runs, using reruns if applicable
# df = self.drop_reruns_from_df(ctx=ctx, df=df)
# # NOTE: sort by and exclude from
# sorts = ['submitted_date', "target", "genus"]
# exclude = ['name', 'genera']
# modes = [item for item in df.columns if item not in sorts and item not in exclude]
# # NOTE: Set descending for any columns that have "{mode}" in the header.
# ascending = [False if item == "target" else True for item in sorts]
# df = df.sort_values(by=sorts, ascending=ascending)
# # logger.debug(df[df.isna().any(axis=1)])
# # NOTE: actual chart construction is done by
# return df, modes
#
# def drop_reruns_from_df(self, ctx: Settings, df: DataFrame) -> DataFrame:
# """
# Removes semi-duplicates from dataframe after finding sequencing repeats.
#
# Args:
# settings (dict): settings passed from gui
# df (DataFrame): initial dataframe
#
# Returns:
# DataFrame: dataframe with originals removed in favour of repeats.
# """
# if 'rerun_regex' in ctx:
# sample_names = get_unique_values_in_df_column(df, column_name="name")
# rerun_regex = re.compile(fr"{ctx.rerun_regex}")
# exclude = [re.sub(rerun_regex, "", sample) for sample in sample_names if rerun_regex.search(sample)]
# df = df[df.name not in exclude]
# return df