Conversion of some functions to generators.

This commit is contained in:
lwark
2024-07-31 13:08:56 -05:00
parent eb6cdc63e2
commit 8266275354
15 changed files with 374 additions and 369 deletions

View File

@@ -1,16 +1,12 @@
"""
Functions for constructing controls graphs using plotly.
TODO: Move these functions to widgets.controls_charts
"""
import re
import plotly
import plotly.express as px
import pandas as pd
from pandas import DataFrame
from plotly.graph_objects import Figure
import logging
# from backend.excel import get_unique_values_in_df_column
from tools import Settings, get_unique_values_in_df_column, divide_chunks
from tools import get_unique_values_in_df_column, divide_chunks
from frontend.widgets.functions import select_save_file
logger = logging.getLogger(f"submissions.{__name__}")
@@ -18,232 +14,164 @@ logger = logging.getLogger(f"submissions.{__name__}")
class CustomFigure(Figure):
def __init__(self, ctx: Settings, df: pd.DataFrame, ytitle: str | None = None):
def __init__(self, df: pd.DataFrame, modes: list, ytitle: str | None = None):
super().__init__()
self.construct_chart(df=df, modes=modes)
self.generic_figure_markers(modes=modes, ytitle=ytitle)
def construct_chart(self, df: pd.DataFrame, modes: list):
"""
Creates a plotly chart for controls from a pandas dataframe
# NOTE: Start here.
def create_charts(ctx: Settings, df: pd.DataFrame, ytitle: str | None = None) -> Figure:
"""
Constructs figures based on parsed pandas dataframe.
Args:
df (pd.DataFrame): input dataframe of controls
modes (list): analysis modes to construct charts for
ytitle (str | None, optional): title on the y-axis. Defaults to None.
Args:
ctx (Settings): settings passed down from gui
df (pd.DataFrame): input dataframe
ytitle (str | None, optional): title for the y-axis. Defaults to None.
Returns:
Figure: Plotly figure
"""
# from backend.excel import drop_reruns_from_df
# converts starred genera to normal and splits off list of starred
genera = []
if df.empty:
return None
for item in df['genus'].to_list():
try:
if item[-1] == "*":
genera.append(item[-1])
else:
genera.append("")
except IndexError:
genera.append("")
df['genus'] = df['genus'].replace({'\*': ''}, regex=True).replace({"NaN": "Unknown"})
df['genera'] = genera
# NOTE: remove original runs, using reruns if applicable
df = drop_reruns_from_df(ctx=ctx, df=df)
# NOTE: sort by and exclude from
sorts = ['submitted_date', "target", "genus"]
exclude = ['name', 'genera']
modes = [item for item in df.columns if item not in sorts and item not in exclude] # and "_hashes" not in item]
# NOTE: Set descending for any columns that have "{mode}" in the header.
ascending = [False if item == "target" else True for item in sorts]
df = df.sort_values(by=sorts, ascending=ascending)
# logger.debug(df[df.isna().any(axis=1)])
# NOTE: actual chart construction is done by
fig = construct_chart(df=df, modes=modes, ytitle=ytitle)
return fig
def drop_reruns_from_df(ctx: Settings, df: DataFrame) -> DataFrame:
"""
Removes semi-duplicates from dataframe after finding sequencing repeats.
Args:
settings (dict): settings passed from gui
df (DataFrame): initial dataframe
Returns:
DataFrame: dataframe with originals removed in favour of repeats.
"""
if 'rerun_regex' in ctx:
sample_names = get_unique_values_in_df_column(df, column_name="name")
rerun_regex = re.compile(fr"{ctx.rerun_regex}")
for sample in sample_names:
if rerun_regex.search(sample):
first_run = re.sub(rerun_regex, "", sample)
df = df.drop(df[df.name == first_run].index)
return df
def generic_figure_markers(fig: Figure, modes: list = [], ytitle: str | None = None) -> Figure:
"""
Adds standard layout to figure.
Args:
fig (Figure): Input figure.
modes (list, optional): List of modes included in figure. Defaults to [].
ytitle (str, optional): Title for the y-axis. Defaults to None.
Returns:
Figure: Output figure with updated titles, rangeslider, buttons.
"""
if modes != []:
ytitle = modes[0]
# Creating visibles list for each mode.
fig.update_layout(
xaxis_title="Submitted Date (* - Date parsed from fastq file creation date)",
yaxis_title=ytitle,
showlegend=True,
barmode='stack',
updatemenus=[
dict(
type="buttons",
direction="right",
x=0.7,
y=1.2,
showactive=True,
buttons=make_buttons(modes=modes, fig_len=len(fig.data)),
)
]
)
fig.update_xaxes(
rangeslider_visible=True,
rangeselector=dict(
buttons=list([
dict(count=1, label="1m", step="month", stepmode="backward"),
dict(count=3, label="3m", step="month", stepmode="backward"),
dict(count=6, label="6m", step="month", stepmode="backward"),
dict(count=1, label="YTD", step="year", stepmode="todate"),
dict(count=1, label="1y", step="year", stepmode="backward"),
dict(step="all")
])
)
)
assert type(fig) == Figure
return fig
def make_buttons(modes: list, fig_len: int) -> list:
"""
Creates list of buttons with one for each mode to be used in showing/hiding mode traces.
Args:
modes (list): list of modes used by main parser.
fig_len (int): number of traces in the figure
Returns:
list: list of buttons.
"""
buttons = []
if len(modes) > 1:
Returns:
Figure: output stacked bar chart.
"""
# fig = Figure()
for ii, mode in enumerate(modes):
# What I need to do is create a list of bools with the same length as the fig.data
mode_vis = [True] * fig_len
# And break it into {len(modes)} chunks
mode_vis = list(divide_chunks(mode_vis, len(modes)))
# Then, for each chunk, if the chunk index isn't equal to the index of the current mode, set to false
for jj, sublist in enumerate(mode_vis):
if jj != ii:
mode_vis[jj] = [not elem for elem in mode_vis[jj]]
# Finally, flatten list.
mode_vis = [item for sublist in mode_vis for item in sublist]
# Now, make button to add to list
buttons.append(dict(label=mode, method="update", args=[
{"visible": mode_vis},
{"yaxis.title.text": mode},
if "count" in mode:
df[mode] = pd.to_numeric(df[mode], errors='coerce')
color = "genus"
color_discrete_sequence = None
elif 'percent' in mode:
color = "genus"
color_discrete_sequence = None
else:
color = "target"
match get_unique_values_in_df_column(df, 'target'):
case ['Target']:
color_discrete_sequence = ["blue"]
case ['Off-target']:
color_discrete_sequence = ['red']
case _:
color_discrete_sequence = ['blue', 'red']
bar = px.bar(df,
x="submitted_date",
y=mode,
color=color,
title=mode,
barmode='stack',
hover_data=["genus", "name", "target", mode],
text="genera",
color_discrete_sequence=color_discrete_sequence
)
bar.update_traces(visible=ii == 0)
self.add_traces(bar.data)
# return generic_figure_markers(modes=modes, ytitle=ytitle)
def generic_figure_markers(self, modes: list = [], ytitle: str | None = None):
"""
Adds standard layout to figure.
Args:
fig (Figure): Input figure.
modes (list, optional): List of modes included in figure. Defaults to [].
ytitle (str, optional): Title for the y-axis. Defaults to None.
Returns:
Figure: Output figure with updated titles, rangeslider, buttons.
"""
if modes:
ytitle = modes[0]
# Creating visibles list for each mode.
self.update_layout(
xaxis_title="Submitted Date (* - Date parsed from fastq file creation date)",
yaxis_title=ytitle,
showlegend=True,
barmode='stack',
updatemenus=[
dict(
type="buttons",
direction="right",
x=0.7,
y=1.2,
showactive=True,
buttons=[button for button in self.make_buttons(modes=modes)],
)
]
))
return buttons
)
self.update_xaxes(
rangeslider_visible=True,
rangeselector=dict(
buttons=list([
dict(count=1, label="1m", step="month", stepmode="backward"),
dict(count=3, label="3m", step="month", stepmode="backward"),
dict(count=6, label="6m", step="month", stepmode="backward"),
dict(count=1, label="YTD", step="year", stepmode="todate"),
dict(count=1, label="1y", step="year", stepmode="backward"),
dict(step="all")
])
)
)
assert isinstance(self, Figure)
# return fig
def make_buttons(self, modes: list) -> list:
"""
Creates list of buttons with one for each mode to be used in showing/hiding mode traces.
def output_figures(figs: list, group_name: str):
"""
Writes plotly figure to html file.
Args:
modes (list): list of modes used by main parser.
fig_len (int): number of traces in the figure
Args:
settings (dict): settings passed down from click
fig (Figure): input figure object
group_name (str): controltype
"""
output = select_save_file(None, default_name=group_name, extension="html")
with open(output, "w") as f:
for fig in figs:
Returns:
list: list of buttons.
"""
fig_len = len(self.data)
if len(modes) > 1:
for ii, mode in enumerate(modes):
# What I need to do is create a list of bools with the same length as the fig.data
mode_vis = [True] * fig_len
# And break it into {len(modes)} chunks
mode_vis = list(divide_chunks(mode_vis, len(modes)))
# Then, for each chunk, if the chunk index isn't equal to the index of the current mode, set to false
for jj, sublist in enumerate(mode_vis):
if jj != ii:
mode_vis[jj] = [not elem for elem in mode_vis[jj]]
# Finally, flatten list.
mode_vis = [item for sublist in mode_vis for item in sublist]
# Now, yield button to add to list
yield dict(label=mode, method="update", args=[
{"visible": mode_vis},
{"yaxis.title.text": mode},
])
def save_figure(self, group_name: str = "plotly_output"):
"""
Writes plotly figure to html file.
Args:
figs ():
settings (dict): settings passed down from click
fig (Figure): input figure object
group_name (str): controltype
"""
output = select_save_file(None, default_name=group_name, extension="html")
with open(output, "w") as f:
try:
f.write(fig.to_html(full_html=False, include_plotlyjs='cdn'))
f.write(self.to_html())
except AttributeError:
logger.error(f"The following figure was a string: {fig}")
logger.error(f"The following figure was a string: {self}")
def to_html(self) -> str:
"""
Creates final html code from plotly
def construct_chart(df: pd.DataFrame, modes: list, ytitle: str | None = None) -> Figure:
"""
Creates a plotly chart for controls from a pandas dataframe
Args:
figure (Figure): input figure
Args:
df (pd.DataFrame): input dataframe of controls
modes (list): analysis modes to construct charts for
ytitle (str | None, optional): title on the y-axis. Defaults to None.
Returns:
Figure: output stacked bar chart.
"""
fig = Figure()
for ii, mode in enumerate(modes):
if "count" in mode:
df[mode] = pd.to_numeric(df[mode], errors='coerce')
color = "genus"
color_discrete_sequence = None
elif 'percent' in mode:
color = "genus"
color_discrete_sequence = None
Returns:
str: html string
"""
html = '<html><body>'
if self is not None:
html += plotly.offline.plot(self, output_type='div',
include_plotlyjs='cdn') #, image = 'png', auto_open=True, image_filename='plot_image')
else:
color = "target"
match get_unique_values_in_df_column(df, 'target'):
case ['Target']:
color_discrete_sequence = ["blue"]
case ['Off-target']:
color_discrete_sequence = ['red']
case _:
color_discrete_sequence = ['blue', 'red']
bar = px.bar(df, x="submitted_date",
y=mode,
color=color,
title=mode,
barmode='stack',
hover_data=["genus", "name", "target", mode],
text="genera",
color_discrete_sequence=color_discrete_sequence
)
bar.update_traces(visible=ii == 0)
fig.add_traces(bar.data)
return generic_figure_markers(fig=fig, modes=modes, ytitle=ytitle)
def construct_html(figure: Figure) -> str:
"""
Creates final html code from plotly
Args:
figure (Figure): input figure
Returns:
str: html string
"""
html = '<html><body>'
if figure is not None:
html += plotly.offline.plot(figure, output_type='div',
include_plotlyjs='cdn') #, image = 'png', auto_open=True, image_filename='plot_image')
else:
html += "<h1>No data was retrieved for the given parameters.</h1>"
html += '</body></html>'
return html
html += "<h1>No data was retrieved for the given parameters.</h1>"
html += '</body></html>'
return html

View File

@@ -1,9 +1,9 @@
"""
Handles display of control charts
"""
import re
from datetime import timedelta
from typing import Tuple
from PyQt6.QtWebEngineWidgets import QWebEngineView
from PyQt6.QtWidgets import (
QWidget, QVBoxLayout, QComboBox, QHBoxLayout,
@@ -14,9 +14,9 @@ from backend.db import ControlType, Control
from PyQt6.QtCore import QDate, QSize
import logging
from pandas import DataFrame
from tools import Report, Result
from tools import Report, Result, get_unique_values_in_df_column, Settings, report_result
# from backend.excel.reports import convert_data_list_to_df
from frontend.visualizations.control_charts import create_charts, construct_html
from frontend.visualizations.control_charts import CustomFigure
logger = logging.getLogger(f"submissions.{__name__}")
@@ -60,7 +60,8 @@ class ControlsViewer(QWidget):
Lookup controls from database and send to chartmaker
"""
self.controls_getter_function()
@report_result
def controls_getter_function(self):
"""
Get controls based on start/end dates
@@ -103,7 +104,7 @@ class ControlsViewer(QWidget):
self.sub_typer.clear()
self.sub_typer.setEnabled(False)
self.chart_maker()
self.report.add_result(report)
return report
def chart_maker(self):
"""
@@ -111,6 +112,7 @@ class ControlsViewer(QWidget):
"""
self.chart_maker_function()
@report_result
def chart_maker_function(self):
"""
Create html chart for controls reporting
@@ -141,7 +143,7 @@ class ControlsViewer(QWidget):
data = [item for sublist in data for item in sublist]
# logger.debug(f"Control objects going into df conversion: {type(data)}")
if not data:
self.report.add_result(Result(status="Critical", msg="No data found for controls in given date range."))
report.add_result(Result(status="Critical", msg="No data found for controls in given date range."))
return
# NOTE send to dataframe creator
df = self.convert_data_list_to_df(input_df=data)
@@ -150,15 +152,16 @@ class ControlsViewer(QWidget):
else:
title = f"{self.mode} - {self.subtype}"
# NOTE: send dataframe to chart maker
fig = create_charts(ctx=self.app.ctx, df=df, ytitle=title)
df, modes = self.prep_df(ctx=self.app.ctx, df=df)
fig = CustomFigure(df=df, ytitle=title, modes=modes)
# logger.debug(f"Updating figure...")
# NOTE: construct html for webview
html = construct_html(figure=fig)
html = fig.to_html()
# logger.debug(f"The length of html code is: {len(html)}")
self.webengineview.setHtml(html)
self.webengineview.update()
# logger.debug("Figure updated... I hope.")
self.report.add_result(report)
return report
def convert_data_list_to_df(self, input_df: list[dict]) -> DataFrame:
"""
@@ -266,8 +269,65 @@ class ControlsViewer(QWidget):
df, previous_dates = self.check_date(df, item, previous_dates)
return df, previous_dates
def prep_df(self, ctx: Settings, df: DataFrame) -> DataFrame:
"""
Constructs figures based on parsed pandas dataframe.
Args:
ctx (Settings): settings passed down from gui
df (pd.DataFrame): input dataframe
ytitle (str | None, optional): title for the y-axis. Defaults to None.
Returns:
Figure: Plotly figure
"""
# from backend.excel import drop_reruns_from_df
# converts starred genera to normal and splits off list of starred
genera = []
if df.empty:
return None
for item in df['genus'].to_list():
try:
if item[-1] == "*":
genera.append(item[-1])
else:
genera.append("")
except IndexError:
genera.append("")
df['genus'] = df['genus'].replace({'\*': ''}, regex=True).replace({"NaN": "Unknown"})
df['genera'] = genera
# NOTE: remove original runs, using reruns if applicable
df = self.drop_reruns_from_df(ctx=ctx, df=df)
# NOTE: sort by and exclude from
sorts = ['submitted_date', "target", "genus"]
exclude = ['name', 'genera']
modes = [item for item in df.columns if item not in sorts and item not in exclude] # and "_hashes" not in item]
# NOTE: Set descending for any columns that have "{mode}" in the header.
ascending = [False if item == "target" else True for item in sorts]
df = df.sort_values(by=sorts, ascending=ascending)
# logger.debug(df[df.isna().any(axis=1)])
# NOTE: actual chart construction is done by
return df, modes
def drop_reruns_from_df(self, ctx: Settings, df: DataFrame) -> DataFrame:
"""
Removes semi-duplicates from dataframe after finding sequencing repeats.
Args:
settings (dict): settings passed from gui
df (DataFrame): initial dataframe
Returns:
DataFrame: dataframe with originals removed in favour of repeats.
"""
if 'rerun_regex' in ctx:
sample_names = get_unique_values_in_df_column(df, column_name="name")
rerun_regex = re.compile(fr"{ctx.rerun_regex}")
for sample in sample_names:
if rerun_regex.search(sample):
first_run = re.sub(rerun_regex, "", sample)
df = df.drop(df[df.name == first_run].index)
return df
class ControlsDatePicker(QWidget):

View File

@@ -97,10 +97,10 @@ class RoleComboBox(QWidget):
self.layout = QGridLayout()
self.role = role
self.check = QCheckBox()
if role.name in used:
self.check.setChecked(False)
else:
self.check.setChecked(True)
# if role.name in used:
self.check.setChecked(False)
# else:
# self.check.setChecked(True)
self.check.stateChanged.connect(self.toggle_checked)
self.box = QComboBox()
self.box.setMaximumWidth(200)
@@ -157,6 +157,7 @@ class RoleComboBox(QWidget):
widget.setMinimumWidth(200)
widget.setMaximumWidth(200)
self.layout.addWidget(widget, 0, 4)
widget.setEnabled(self.check.isChecked())
def parse_form(self) -> PydEquipment | None:
"""
@@ -190,7 +191,4 @@ class RoleComboBox(QWidget):
case QCheckBox():
continue
case _:
if self.check.isChecked():
widget.setEnabled(True)
else:
widget.setEnabled(False)
widget.setEnabled(self.check.isChecked())

View File

@@ -1,6 +1,6 @@
'''
"""
Webview to show submission and sample details.
'''
"""
from PyQt6.QtWidgets import (QDialog, QPushButton, QVBoxLayout,
QDialogButtonBox, QTextEdit)
from PyQt6.QtWebEngineWidgets import QWebEngineView
@@ -84,7 +84,7 @@ class SubmissionDetails(QDialog):
if isinstance(submission, str):
submission = BasicSubmission.query(rsl_plate_num=submission)
self.base_dict = submission.to_dict(full_data=True)
# logger.debug(f"Submission details data:\n{pformat({k:v for k,v in self.base_dict.items() if k != 'samples'})}")
logger.debug(f"Submission details data:\n{pformat({k:v for k,v in self.base_dict.items() if k == 'reagents'})}")
# NOTE: don't want id
self.base_dict = submission.finalize_details(self.base_dict)
# logger.debug(f"Creating barcode.")

View File

@@ -134,10 +134,10 @@ class SubmissionsSheet(QTableView):
"""
Pull extraction logs into the db
"""
self.report = Report()
self.link_extractions_function()
self.report.add_result(self.report)
return self.report
report = Report()
result = self.link_extractions_function()
report.add_result(result)
return report
def link_extractions_function(self):
"""
@@ -148,7 +148,8 @@ class SubmissionsSheet(QTableView):
Returns:
Tuple[QMainWindow, dict]: Collection of new main app window and result dict
"""
"""
report = Report()
fname = select_open_file(self, file_extension="csv")
with open(fname.__str__(), 'r') as f:
# split csv on commas
@@ -178,17 +179,18 @@ class SubmissionsSheet(QTableView):
continue
sub.set_attribute('extraction_info', new_run)
sub.save()
self.report.add_result(Result(msg=f"We added {count} logs to the database.", status='Information'))
report.add_result(Result(msg=f"We added {count} logs to the database.", status='Information'))
return report
@report_result
def link_pcr(self):
"""
Pull pcr logs into the db
"""
self.link_pcr_function()
self.app.report.add_result(self.report)
self.report = Report()
return self.report
"""
report = Report()
result = self.link_pcr_function()
report.add_result(result)
return report
def link_pcr_function(self):
"""
@@ -199,7 +201,8 @@ class SubmissionsSheet(QTableView):
Returns:
Tuple[QMainWindow, dict]: Collection of new main app window and result dict
"""
"""
report = Report()
fname = select_open_file(self, file_extension="csv")
with open(fname.__str__(), 'r') as f:
# NOTE: split csv rows on comma
@@ -226,16 +229,18 @@ class SubmissionsSheet(QTableView):
sub.set_attribute('pcr_info', new_run)
# NOTE: check if pcr_info already exists
sub.save()
self.report.add_result(Result(msg=f"We added {count} logs to the database.", status='Information'))
report.add_result(Result(msg=f"We added {count} logs to the database.", status='Information'))
return report
@report_result
def generate_report(self, *args):
"""
Make a report
"""
self.report = Report()
self.generate_report_function()
return self.report
report = Report()
result = self.generate_report_function()
report.add_result(result)
return report
def generate_report_function(self):
"""
@@ -255,4 +260,4 @@ class SubmissionsSheet(QTableView):
fname = select_save_file(obj=self, default_name=f"Submissions_Report_{info['start_date']}-{info['end_date']}.docx", extension="docx")
rp = ReportMaker(start_date=info['start_date'], end_date=info['end_date'])
rp.write_report(filename=fname, obj=self)
self.report.add_result(report)
return report

View File

@@ -121,8 +121,7 @@ class SubmissionFormContainer(QWidget):
# logger.debug(f"Attempting to parse file: {fname}")
if not fname.exists():
report.add_result(Result(msg=f"File {fname.__str__()} not found.", status="critical"))
self.report.add_result(report)
return
return report
# NOTE: create sheetparser using excel sheet and context from gui
try:
self.prsr = SheetParser(filepath=fname)
@@ -136,7 +135,7 @@ class SubmissionFormContainer(QWidget):
# logger.debug(f"Pydantic result: \n\n{pformat(self.pyd)}\n\n")
self.form = self.pyd.to_form(parent=self)
self.layout().addWidget(self.form)
self.report.add_result(report)
return report
# logger.debug(f"Outgoing report: {self.report.results}")
# logger.debug(f"All attributes of submission container:\n{pformat(self.__dict__)}")