Before control chart update.

This commit is contained in:
lwark
2024-07-31 07:50:23 -05:00
parent 2a07265cbc
commit eb6cdc63e2
10 changed files with 305 additions and 396 deletions

View File

@@ -1,19 +1,29 @@
'''
"""
Functions for constructing controls graphs using plotly.
'''
TODO: Move these functions to widgets.controls_charts
"""
import re
import plotly
import plotly.express as px
import pandas as pd
from pandas import DataFrame
from plotly.graph_objects import Figure
import logging
# from backend.excel import get_unique_values_in_df_column
from tools import Settings, get_unique_values_in_df_column
from tools import Settings, get_unique_values_in_df_column, divide_chunks
from frontend.widgets.functions import select_save_file
logger = logging.getLogger(f"submissions.{__name__}")
def create_charts(ctx:Settings, df:pd.DataFrame, ytitle:str|None=None) -> Figure:
class CustomFigure(Figure):
def __init__(self, ctx: Settings, df: pd.DataFrame, ytitle: str | None = None):
super().__init__()
# NOTE: Start here.
def create_charts(ctx: Settings, df: pd.DataFrame, ytitle: str | None = None) -> Figure:
"""
Constructs figures based on parsed pandas dataframe.
@@ -24,8 +34,8 @@ def create_charts(ctx:Settings, df:pd.DataFrame, ytitle:str|None=None) -> Figure
Returns:
Figure: Plotly figure
"""
from backend.excel import drop_reruns_from_df
"""
# from backend.excel import drop_reruns_from_df
# converts starred genera to normal and splits off list of starred
genera = []
if df.empty:
@@ -33,28 +43,50 @@ def create_charts(ctx:Settings, df:pd.DataFrame, ytitle:str|None=None) -> Figure
for item in df['genus'].to_list():
try:
if item[-1] == "*":
genera.append(item[-1])
genera.append(item[-1])
else:
genera.append("")
except IndexError:
genera.append("")
df['genus'] = df['genus'].replace({'\*':''}, regex=True).replace({"NaN":"Unknown"})
df['genus'] = df['genus'].replace({'\*': ''}, regex=True).replace({"NaN": "Unknown"})
df['genera'] = genera
# remove original runs, using reruns if applicable
# NOTE: remove original runs, using reruns if applicable
df = drop_reruns_from_df(ctx=ctx, df=df)
# sort by and exclude from
# NOTE: sort by and exclude from
sorts = ['submitted_date', "target", "genus"]
exclude = ['name', 'genera']
modes = [item for item in df.columns if item not in sorts and item not in exclude]# and "_hashes" not in item]
# Set descending for any columns that have "{mode}" in the header.
modes = [item for item in df.columns if item not in sorts and item not in exclude] # and "_hashes" not in item]
# NOTE: Set descending for any columns that have "{mode}" in the header.
ascending = [False if item == "target" else True for item in sorts]
df = df.sort_values(by=sorts, ascending=ascending)
# logger.debug(df[df.isna().any(axis=1)])
# actual chart construction is done by
# NOTE: actual chart construction is done by
fig = construct_chart(df=df, modes=modes, ytitle=ytitle)
return fig
def generic_figure_markers(fig:Figure, modes:list=[], ytitle:str|None=None) -> Figure:
def drop_reruns_from_df(ctx: Settings, df: DataFrame) -> DataFrame:
"""
Removes semi-duplicates from dataframe after finding sequencing repeats.
Args:
settings (dict): settings passed from gui
df (DataFrame): initial dataframe
Returns:
DataFrame: dataframe with originals removed in favour of repeats.
"""
if 'rerun_regex' in ctx:
sample_names = get_unique_values_in_df_column(df, column_name="name")
rerun_regex = re.compile(fr"{ctx.rerun_regex}")
for sample in sample_names:
if rerun_regex.search(sample):
first_run = re.sub(rerun_regex, "", sample)
df = df.drop(df[df.name == first_run].index)
return df
def generic_figure_markers(fig: Figure, modes: list = [], ytitle: str | None = None) -> Figure:
"""
Adds standard layout to figure.
@@ -101,7 +133,8 @@ def generic_figure_markers(fig:Figure, modes:list=[], ytitle:str|None=None) -> F
assert type(fig) == Figure
return fig
def make_buttons(modes:list, fig_len:int) -> list:
def make_buttons(modes: list, fig_len: int) -> list:
"""
Creates list of buttons with one for each mode to be used in showing/hiding mode traces.
@@ -127,13 +160,14 @@ def make_buttons(modes:list, fig_len:int) -> list:
mode_vis = [item for sublist in mode_vis for item in sublist]
# Now, make button to add to list
buttons.append(dict(label=mode, method="update", args=[
{"visible": mode_vis},
{"yaxis.title.text": mode},
]
))
{"visible": mode_vis},
{"yaxis.title.text": mode},
]
))
return buttons
def output_figures(figs:list, group_name:str):
def output_figures(figs: list, group_name: str):
"""
Writes plotly figure to html file.
@@ -150,7 +184,8 @@ def output_figures(figs:list, group_name:str):
except AttributeError:
logger.error(f"The following figure was a string: {fig}")
def construct_chart(df:pd.DataFrame, modes:list, ytitle:str|None=None) -> Figure:
def construct_chart(df: pd.DataFrame, modes: list, ytitle: str | None = None) -> Figure:
"""
Creates a plotly chart for controls from a pandas dataframe
@@ -161,53 +196,40 @@ def construct_chart(df:pd.DataFrame, modes:list, ytitle:str|None=None) -> Figure
Returns:
Figure: output stacked bar chart.
"""
"""
fig = Figure()
for ii, mode in enumerate(modes):
if "count" in mode:
df[mode] = pd.to_numeric(df[mode],errors='coerce')
df[mode] = pd.to_numeric(df[mode], errors='coerce')
color = "genus"
color_discrete_sequence=None
color_discrete_sequence = None
elif 'percent' in mode:
color = "genus"
color_discrete_sequence=None
color_discrete_sequence = None
else:
color = "target"
match get_unique_values_in_df_column(df, 'target'):
case ['Target']:
color_discrete_sequence=["blue"]
color_discrete_sequence = ["blue"]
case ['Off-target']:
color_discrete_sequence=['red']
color_discrete_sequence = ['red']
case _:
color_discrete_sequence=['blue', 'red']
bar = px.bar(df, x="submitted_date",
y=mode,
color=color,
title=mode,
barmode='stack',
hover_data=["genus", "name", "target", mode],
text="genera",
color_discrete_sequence=color_discrete_sequence
)
bar.update_traces(visible = ii == 0)
color_discrete_sequence = ['blue', 'red']
bar = px.bar(df, x="submitted_date",
y=mode,
color=color,
title=mode,
barmode='stack',
hover_data=["genus", "name", "target", mode],
text="genera",
color_discrete_sequence=color_discrete_sequence
)
bar.update_traces(visible=ii == 0)
fig.add_traces(bar.data)
return generic_figure_markers(fig=fig, modes=modes, ytitle=ytitle)
def divide_chunks(input_list:list, chunk_count:int):
"""
Divides a list into {chunk_count} equal parts
Args:
input_list (list): Initials list
chunk_count (int): size of each chunk
Returns:
tuple: tuple containing sublists.
"""
k, m = divmod(len(input_list), chunk_count)
return (input_list[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(chunk_count))
def construct_html(figure:Figure) -> str:
def construct_html(figure: Figure) -> str:
"""
Creates final html code from plotly
@@ -216,10 +238,11 @@ def construct_html(figure:Figure) -> str:
Returns:
str: html string
"""
"""
html = '<html><body>'
if figure is not None:
html += plotly.offline.plot(figure, output_type='div', include_plotlyjs='cdn')#, image = 'png', auto_open=True, image_filename='plot_image')
html += plotly.offline.plot(figure, output_type='div',
include_plotlyjs='cdn') #, image = 'png', auto_open=True, image_filename='plot_image')
else:
html += "<h1>No data was retrieved for the given parameters.</h1>"
html += '</body></html>'