Before control chart update.
This commit is contained in:
@@ -1,19 +1,29 @@
|
||||
'''
|
||||
"""
|
||||
Functions for constructing controls graphs using plotly.
|
||||
'''
|
||||
TODO: Move these functions to widgets.controls_charts
|
||||
"""
|
||||
import re
|
||||
import plotly
|
||||
import plotly.express as px
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
from plotly.graph_objects import Figure
|
||||
import logging
|
||||
# from backend.excel import get_unique_values_in_df_column
|
||||
from tools import Settings, get_unique_values_in_df_column
|
||||
from tools import Settings, get_unique_values_in_df_column, divide_chunks
|
||||
from frontend.widgets.functions import select_save_file
|
||||
|
||||
logger = logging.getLogger(f"submissions.{__name__}")
|
||||
|
||||
|
||||
def create_charts(ctx:Settings, df:pd.DataFrame, ytitle:str|None=None) -> Figure:
|
||||
class CustomFigure(Figure):
|
||||
|
||||
def __init__(self, ctx: Settings, df: pd.DataFrame, ytitle: str | None = None):
|
||||
super().__init__()
|
||||
|
||||
|
||||
# NOTE: Start here.
|
||||
def create_charts(ctx: Settings, df: pd.DataFrame, ytitle: str | None = None) -> Figure:
|
||||
"""
|
||||
Constructs figures based on parsed pandas dataframe.
|
||||
|
||||
@@ -24,8 +34,8 @@ def create_charts(ctx:Settings, df:pd.DataFrame, ytitle:str|None=None) -> Figure
|
||||
|
||||
Returns:
|
||||
Figure: Plotly figure
|
||||
"""
|
||||
from backend.excel import drop_reruns_from_df
|
||||
"""
|
||||
# from backend.excel import drop_reruns_from_df
|
||||
# converts starred genera to normal and splits off list of starred
|
||||
genera = []
|
||||
if df.empty:
|
||||
@@ -33,28 +43,50 @@ def create_charts(ctx:Settings, df:pd.DataFrame, ytitle:str|None=None) -> Figure
|
||||
for item in df['genus'].to_list():
|
||||
try:
|
||||
if item[-1] == "*":
|
||||
genera.append(item[-1])
|
||||
genera.append(item[-1])
|
||||
else:
|
||||
genera.append("")
|
||||
except IndexError:
|
||||
genera.append("")
|
||||
df['genus'] = df['genus'].replace({'\*':''}, regex=True).replace({"NaN":"Unknown"})
|
||||
df['genus'] = df['genus'].replace({'\*': ''}, regex=True).replace({"NaN": "Unknown"})
|
||||
df['genera'] = genera
|
||||
# remove original runs, using reruns if applicable
|
||||
# NOTE: remove original runs, using reruns if applicable
|
||||
df = drop_reruns_from_df(ctx=ctx, df=df)
|
||||
# sort by and exclude from
|
||||
# NOTE: sort by and exclude from
|
||||
sorts = ['submitted_date', "target", "genus"]
|
||||
exclude = ['name', 'genera']
|
||||
modes = [item for item in df.columns if item not in sorts and item not in exclude]# and "_hashes" not in item]
|
||||
# Set descending for any columns that have "{mode}" in the header.
|
||||
modes = [item for item in df.columns if item not in sorts and item not in exclude] # and "_hashes" not in item]
|
||||
# NOTE: Set descending for any columns that have "{mode}" in the header.
|
||||
ascending = [False if item == "target" else True for item in sorts]
|
||||
df = df.sort_values(by=sorts, ascending=ascending)
|
||||
# logger.debug(df[df.isna().any(axis=1)])
|
||||
# actual chart construction is done by
|
||||
# NOTE: actual chart construction is done by
|
||||
fig = construct_chart(df=df, modes=modes, ytitle=ytitle)
|
||||
return fig
|
||||
|
||||
def generic_figure_markers(fig:Figure, modes:list=[], ytitle:str|None=None) -> Figure:
|
||||
|
||||
|
||||
def drop_reruns_from_df(ctx: Settings, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Removes semi-duplicates from dataframe after finding sequencing repeats.
|
||||
|
||||
Args:
|
||||
settings (dict): settings passed from gui
|
||||
df (DataFrame): initial dataframe
|
||||
|
||||
Returns:
|
||||
DataFrame: dataframe with originals removed in favour of repeats.
|
||||
"""
|
||||
if 'rerun_regex' in ctx:
|
||||
sample_names = get_unique_values_in_df_column(df, column_name="name")
|
||||
rerun_regex = re.compile(fr"{ctx.rerun_regex}")
|
||||
for sample in sample_names:
|
||||
if rerun_regex.search(sample):
|
||||
first_run = re.sub(rerun_regex, "", sample)
|
||||
df = df.drop(df[df.name == first_run].index)
|
||||
return df
|
||||
|
||||
|
||||
def generic_figure_markers(fig: Figure, modes: list = [], ytitle: str | None = None) -> Figure:
|
||||
"""
|
||||
Adds standard layout to figure.
|
||||
|
||||
@@ -101,7 +133,8 @@ def generic_figure_markers(fig:Figure, modes:list=[], ytitle:str|None=None) -> F
|
||||
assert type(fig) == Figure
|
||||
return fig
|
||||
|
||||
def make_buttons(modes:list, fig_len:int) -> list:
|
||||
|
||||
def make_buttons(modes: list, fig_len: int) -> list:
|
||||
"""
|
||||
Creates list of buttons with one for each mode to be used in showing/hiding mode traces.
|
||||
|
||||
@@ -127,13 +160,14 @@ def make_buttons(modes:list, fig_len:int) -> list:
|
||||
mode_vis = [item for sublist in mode_vis for item in sublist]
|
||||
# Now, make button to add to list
|
||||
buttons.append(dict(label=mode, method="update", args=[
|
||||
{"visible": mode_vis},
|
||||
{"yaxis.title.text": mode},
|
||||
]
|
||||
))
|
||||
{"visible": mode_vis},
|
||||
{"yaxis.title.text": mode},
|
||||
]
|
||||
))
|
||||
return buttons
|
||||
|
||||
def output_figures(figs:list, group_name:str):
|
||||
|
||||
def output_figures(figs: list, group_name: str):
|
||||
"""
|
||||
Writes plotly figure to html file.
|
||||
|
||||
@@ -150,7 +184,8 @@ def output_figures(figs:list, group_name:str):
|
||||
except AttributeError:
|
||||
logger.error(f"The following figure was a string: {fig}")
|
||||
|
||||
def construct_chart(df:pd.DataFrame, modes:list, ytitle:str|None=None) -> Figure:
|
||||
|
||||
def construct_chart(df: pd.DataFrame, modes: list, ytitle: str | None = None) -> Figure:
|
||||
"""
|
||||
Creates a plotly chart for controls from a pandas dataframe
|
||||
|
||||
@@ -161,53 +196,40 @@ def construct_chart(df:pd.DataFrame, modes:list, ytitle:str|None=None) -> Figure
|
||||
|
||||
Returns:
|
||||
Figure: output stacked bar chart.
|
||||
"""
|
||||
"""
|
||||
fig = Figure()
|
||||
for ii, mode in enumerate(modes):
|
||||
if "count" in mode:
|
||||
df[mode] = pd.to_numeric(df[mode],errors='coerce')
|
||||
df[mode] = pd.to_numeric(df[mode], errors='coerce')
|
||||
color = "genus"
|
||||
color_discrete_sequence=None
|
||||
color_discrete_sequence = None
|
||||
elif 'percent' in mode:
|
||||
color = "genus"
|
||||
color_discrete_sequence=None
|
||||
color_discrete_sequence = None
|
||||
else:
|
||||
color = "target"
|
||||
match get_unique_values_in_df_column(df, 'target'):
|
||||
case ['Target']:
|
||||
color_discrete_sequence=["blue"]
|
||||
color_discrete_sequence = ["blue"]
|
||||
case ['Off-target']:
|
||||
color_discrete_sequence=['red']
|
||||
color_discrete_sequence = ['red']
|
||||
case _:
|
||||
color_discrete_sequence=['blue', 'red']
|
||||
bar = px.bar(df, x="submitted_date",
|
||||
y=mode,
|
||||
color=color,
|
||||
title=mode,
|
||||
barmode='stack',
|
||||
hover_data=["genus", "name", "target", mode],
|
||||
text="genera",
|
||||
color_discrete_sequence=color_discrete_sequence
|
||||
)
|
||||
bar.update_traces(visible = ii == 0)
|
||||
color_discrete_sequence = ['blue', 'red']
|
||||
bar = px.bar(df, x="submitted_date",
|
||||
y=mode,
|
||||
color=color,
|
||||
title=mode,
|
||||
barmode='stack',
|
||||
hover_data=["genus", "name", "target", mode],
|
||||
text="genera",
|
||||
color_discrete_sequence=color_discrete_sequence
|
||||
)
|
||||
bar.update_traces(visible=ii == 0)
|
||||
fig.add_traces(bar.data)
|
||||
return generic_figure_markers(fig=fig, modes=modes, ytitle=ytitle)
|
||||
|
||||
def divide_chunks(input_list:list, chunk_count:int):
|
||||
"""
|
||||
Divides a list into {chunk_count} equal parts
|
||||
|
||||
Args:
|
||||
input_list (list): Initials list
|
||||
chunk_count (int): size of each chunk
|
||||
|
||||
Returns:
|
||||
tuple: tuple containing sublists.
|
||||
"""
|
||||
k, m = divmod(len(input_list), chunk_count)
|
||||
return (input_list[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(chunk_count))
|
||||
|
||||
def construct_html(figure:Figure) -> str:
|
||||
def construct_html(figure: Figure) -> str:
|
||||
"""
|
||||
Creates final html code from plotly
|
||||
|
||||
@@ -216,10 +238,11 @@ def construct_html(figure:Figure) -> str:
|
||||
|
||||
Returns:
|
||||
str: html string
|
||||
"""
|
||||
"""
|
||||
html = '<html><body>'
|
||||
if figure is not None:
|
||||
html += plotly.offline.plot(figure, output_type='div', include_plotlyjs='cdn')#, image = 'png', auto_open=True, image_filename='plot_image')
|
||||
html += plotly.offline.plot(figure, output_type='div',
|
||||
include_plotlyjs='cdn') #, image = 'png', auto_open=True, image_filename='plot_image')
|
||||
else:
|
||||
html += "<h1>No data was retrieved for the given parameters.</h1>"
|
||||
html += '</body></html>'
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
'''
|
||||
"""
|
||||
Handles display of control charts
|
||||
'''
|
||||
"""
|
||||
from datetime import timedelta
|
||||
from typing import Tuple
|
||||
|
||||
from PyQt6.QtWebEngineWidgets import QWebEngineView
|
||||
from PyQt6.QtWidgets import (
|
||||
QWidget, QVBoxLayout, QComboBox, QHBoxLayout,
|
||||
@@ -10,8 +13,9 @@ from PyQt6.QtCore import QSignalBlocker
|
||||
from backend.db import ControlType, Control
|
||||
from PyQt6.QtCore import QDate, QSize
|
||||
import logging
|
||||
from pandas import DataFrame
|
||||
from tools import Report, Result
|
||||
from backend.excel.reports import convert_data_list_to_df
|
||||
# from backend.excel.reports import convert_data_list_to_df
|
||||
from frontend.visualizations.control_charts import create_charts, construct_html
|
||||
|
||||
logger = logging.getLogger(f"submissions.{__name__}")
|
||||
@@ -28,17 +32,17 @@ class ControlsViewer(QWidget):
|
||||
# set tab2 layout
|
||||
self.layout = QVBoxLayout(self)
|
||||
self.control_typer = QComboBox()
|
||||
# fetch types of controls
|
||||
# NOTE: fetch types of controls
|
||||
con_types = [item.name for item in ControlType.query()]
|
||||
self.control_typer.addItems(con_types)
|
||||
# create custom widget to get types of analysis
|
||||
# NOTE: create custom widget to get types of analysis
|
||||
self.mode_typer = QComboBox()
|
||||
mode_types = Control.get_modes()
|
||||
self.mode_typer.addItems(mode_types)
|
||||
# create custom widget to get subtypes of analysis
|
||||
# NOTE: create custom widget to get subtypes of analysis
|
||||
self.sub_typer = QComboBox()
|
||||
self.sub_typer.setEnabled(False)
|
||||
# add widgets to tab2 layout
|
||||
# NOTE: add widgets to tab2 layout
|
||||
self.layout.addWidget(self.datepicker)
|
||||
self.layout.addWidget(self.control_typer)
|
||||
self.layout.addWidget(self.mode_typer)
|
||||
@@ -118,8 +122,8 @@ class ControlsViewer(QWidget):
|
||||
Tuple[QMainWindow, dict]: Collection of new main app window and result dict
|
||||
"""
|
||||
report = Report()
|
||||
# logger.debug(f"Control getter context: \n\tControl type: {self.con_type}\n\tMode: {self.mode}\n\tStart Date: {self.start_date}\n\tEnd Date: {self.end_date}")
|
||||
# NOTE: set the subtype for kraken
|
||||
# logger.debug(f"Control getter context: \n\tControl type: {self.con_type}\n\tMode: {self.mode}\n\tStart
|
||||
# Date: {self.start_date}\n\tEnd Date: {self.end_date}") NOTE: set the subtype for kraken
|
||||
if self.sub_typer.currentText() == "":
|
||||
self.subtype = None
|
||||
else:
|
||||
@@ -140,7 +144,7 @@ class ControlsViewer(QWidget):
|
||||
self.report.add_result(Result(status="Critical", msg="No data found for controls in given date range."))
|
||||
return
|
||||
# NOTE send to dataframe creator
|
||||
df = convert_data_list_to_df(input=data, subtype=self.subtype)
|
||||
df = self.convert_data_list_to_df(input_df=data)
|
||||
if self.subtype is None:
|
||||
title = self.mode
|
||||
else:
|
||||
@@ -156,6 +160,116 @@ class ControlsViewer(QWidget):
|
||||
# logger.debug("Figure updated... I hope.")
|
||||
self.report.add_result(report)
|
||||
|
||||
def convert_data_list_to_df(self, input_df: list[dict]) -> DataFrame:
|
||||
"""
|
||||
Convert list of control records to dataframe
|
||||
|
||||
Args:
|
||||
ctx (dict): settings passed from gui
|
||||
input_df (list[dict]): list of dictionaries containing records
|
||||
subtype (str | None, optional): name of submission type. Defaults to None.
|
||||
|
||||
Returns:
|
||||
DataFrame: dataframe of controls
|
||||
"""
|
||||
|
||||
df = DataFrame.from_records(input_df)
|
||||
safe = ['name', 'submitted_date', 'genus', 'target']
|
||||
for column in df.columns:
|
||||
if "percent" in column:
|
||||
count_col = [item for item in df.columns if "count" in item][0]
|
||||
# NOTE: The actual percentage from kraken was off due to exclusion of NaN, recalculating.
|
||||
df[column] = 100 * df[count_col] / df.groupby('name')[count_col].transform('sum')
|
||||
if column not in safe:
|
||||
if self.subtype is not None and column != self.subtype:
|
||||
del df[column]
|
||||
# NOTE: move date of sample submitted on same date as previous ahead one.
|
||||
df = self.displace_date(df=df)
|
||||
# NOTE: ad hoc method to make data labels more accurate.
|
||||
df = self.df_column_renamer(df=df)
|
||||
return df
|
||||
|
||||
def df_column_renamer(self, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Ad hoc function I created to clarify some fields
|
||||
|
||||
Args:
|
||||
df (DataFrame): input dataframe
|
||||
|
||||
Returns:
|
||||
DataFrame: dataframe with 'clarified' column names
|
||||
"""
|
||||
df = df[df.columns.drop(list(df.filter(regex='_hashes')))]
|
||||
return df.rename(columns={
|
||||
"contains_ratio": "contains_shared_hashes_ratio",
|
||||
"matches_ratio": "matches_shared_hashes_ratio",
|
||||
"kraken_count": "kraken2_read_count_(top_50)",
|
||||
"kraken_percent": "kraken2_read_percent_(top_50)"
|
||||
})
|
||||
|
||||
def displace_date(self, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
This function serves to split samples that were submitted on the same date by incrementing dates.
|
||||
It will shift the date forward by one day if it is the same day as an existing date in a list.
|
||||
|
||||
Args:
|
||||
df (DataFrame): input dataframe composed of control records
|
||||
|
||||
Returns:
|
||||
DataFrame: output dataframe with dates incremented.
|
||||
"""
|
||||
# logger.debug(f"Unique items: {df['name'].unique()}")
|
||||
# NOTE: get submitted dates for each control
|
||||
dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in
|
||||
sorted(df['name'].unique())]
|
||||
previous_dates = []
|
||||
for _, item in enumerate(dict_list):
|
||||
df, previous_dates = self.check_date(df=df, item=item, previous_dates=previous_dates)
|
||||
return df
|
||||
|
||||
def check_date(self, df: DataFrame, item: dict, previous_dates: list) -> Tuple[DataFrame, list]:
|
||||
"""
|
||||
Checks if an items date is already present in df and adjusts df accordingly
|
||||
|
||||
Args:
|
||||
df (DataFrame): input dataframe
|
||||
item (dict): control for checking
|
||||
previous_dates (list): list of dates found in previous controls
|
||||
|
||||
Returns:
|
||||
Tuple[DataFrame, list]: Output dataframe and appended list of previous dates
|
||||
"""
|
||||
try:
|
||||
check = item['date'] in previous_dates
|
||||
except IndexError:
|
||||
check = False
|
||||
previous_dates.append(item['date'])
|
||||
if check:
|
||||
# logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
|
||||
# NOTE: get df locations where name == item name
|
||||
mask = df['name'] == item['name']
|
||||
# NOTE: increment date in dataframe
|
||||
df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
|
||||
item['date'] += timedelta(days=1)
|
||||
passed = False
|
||||
else:
|
||||
passed = True
|
||||
# logger.debug(f"\n\tCurrent date: {item['date']}\n\tPrevious dates:{previous_dates}")
|
||||
# logger.debug(f"DF: {type(df)}, previous_dates: {type(previous_dates)}")
|
||||
# NOTE: if run didn't lead to changed date, return values
|
||||
if passed:
|
||||
# logger.debug(f"Date check passed, returning.")
|
||||
return df, previous_dates
|
||||
# NOTE: if date was changed, rerun with new date
|
||||
else:
|
||||
logger.warning(f"Date check failed, running recursion")
|
||||
df, previous_dates = self.check_date(df, item, previous_dates)
|
||||
return df, previous_dates
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class ControlsDatePicker(QWidget):
|
||||
"""
|
||||
custom widget to pick start and end dates for controls graphs
|
||||
|
||||
Reference in New Issue
Block a user