Pre code clean-up

This commit is contained in:
lwark
2024-10-17 13:09:35 -05:00
parent c3a4aac68b
commit 495d1a5a7f
15 changed files with 322 additions and 263 deletions

View File

@@ -10,9 +10,12 @@ from frontend.widgets.functions import select_save_file
class CustomFigure(Figure):
df = None
def __init__(self, df: pd.DataFrame, modes: list, ytitle: str | None = None, parent: QWidget | None = None,
months: int = 6):
super().__init__()
self.df = df
def save_figure(self, group_name: str = "plotly_output", parent: QWidget | None = None):
"""
@@ -28,6 +31,11 @@ class CustomFigure(Figure):
output = select_save_file(obj=parent, default_name=group_name, extension="png")
self.write_image(output.absolute().__str__(), engine="kaleido")
def save_data(self, group_name: str = "plotly_export", parent:QWidget|None=None):
output = select_save_file(obj=parent, default_name=group_name, extension="xlsx")
self.df.to_excel(output.absolute().__str__(), engine="openpyxl", index=False)
def to_html(self) -> str:
"""
Creates final html code from plotly

View File

@@ -21,6 +21,7 @@ class PCRFigure(CustomFigure):
def __init__(self, df: pd.DataFrame, modes: list, ytitle: str | None = None, parent: QWidget | None = None,
months: int = 6):
super().__init__(df=df, modes=modes)
logger.debug(f"DF: {self.df}")
self.construct_chart(df=df)
# self.generic_figure_markers(modes=modes, ytitle=ytitle, months=months)

View File

@@ -17,7 +17,7 @@ from PyQt6.QtCore import QDate, QSize
import logging
from pandas import DataFrame
from tools import Report, Result, get_unique_values_in_df_column, Settings, report_result
from frontend.visualizations import IridaFigure, PCRFigure
from frontend.visualizations import IridaFigure, PCRFigure, CustomFigure
from .misc import StartEndDatePicker
logger = logging.getLogger(f"submissions.{__name__}")
@@ -54,11 +54,13 @@ class ControlsViewer(QWidget):
self.layout.addWidget(self.datepicker, 0, 0, 1, 2)
self.save_button = QPushButton("Save Chart", parent=self)
self.layout.addWidget(self.save_button, 0, 2, 1, 1)
self.layout.addWidget(self.control_sub_typer, 1, 0, 1, 3)
self.layout.addWidget(self.mode_typer, 2, 0, 1, 3)
self.layout.addWidget(self.mode_sub_typer, 3, 0, 1, 3)
self.export_button = QPushButton("Save Data", parent=self)
self.layout.addWidget(self.export_button, 0, 3, 1, 1)
self.layout.addWidget(self.control_sub_typer, 1, 0, 1, 4)
self.layout.addWidget(self.mode_typer, 2, 0, 1, 4)
self.layout.addWidget(self.mode_sub_typer, 3, 0, 1, 4)
self.archetype.get_instance_class().make_parent_buttons(parent=self)
self.layout.addWidget(self.webengineview, self.layout.rowCount(), 0, 1, 3)
self.layout.addWidget(self.webengineview, self.layout.rowCount(), 0, 1, 4)
self.setLayout(self.layout)
self.controls_getter_function()
self.control_sub_typer.currentIndexChanged.connect(self.controls_getter_function)
@@ -66,11 +68,15 @@ class ControlsViewer(QWidget):
self.datepicker.start_date.dateChanged.connect(self.controls_getter_function)
self.datepicker.end_date.dateChanged.connect(self.controls_getter_function)
self.save_button.pressed.connect(self.save_chart_function)
self.export_button.pressed.connect(self.save_data_function)
def save_chart_function(self):
self.fig.save_figure(parent=self)
def save_data_function(self):
self.fig.save_data(parent=self)
# def controls_getter(self):
# """
# Lookup controls from database and send to chartmaker
@@ -152,10 +158,9 @@ class ControlsViewer(QWidget):
mode=self.mode,
sub_mode=self.mode_sub_type, parent=self, months=months)
_, self.fig = self.archetype.get_instance_class().make_chart(chart_settings=chart_settings, parent=self, ctx=self.app.ctx)
# if isinstance(self.fig, IridaFigure):
# self.save_button.setEnabled(True)
if issubclass(self.fig.__class__, CustomFigure):
self.save_button.setEnabled(True)
# logger.debug(f"Updating figure...")
# self.fig = fig
# NOTE: construct html for webview
html = self.fig.to_html()
# logger.debug(f"The length of html code is: {len(html)}")
@@ -164,164 +169,164 @@ class ControlsViewer(QWidget):
# logger.debug("Figure updated... I hope.")
return report
def convert_data_list_to_df(self, input_df: list[dict]) -> DataFrame:
"""
Convert list of control records to dataframe
Args:
ctx (dict): settings passed from gui
input_df (list[dict]): list of dictionaries containing records
mode_sub_type (str | None, optional): sub_type of submission type. Defaults to None.
Returns:
DataFrame: dataframe of controls
"""
df = DataFrame.from_records(input_df)
safe = ['name', 'submitted_date', 'genus', 'target']
for column in df.columns:
if column not in safe:
if self.mode_sub_type is not None and column != self.mode_sub_type:
continue
else:
safe.append(column)
if "percent" in column:
# count_col = [item for item in df.columns if "count" in item][0]
try:
count_col = next(item for item in df.columns if "count" in item)
except StopIteration:
continue
# NOTE: The actual percentage from kraken was off due to exclusion of NaN, recalculating.
df[column] = 100 * df[count_col] / df.groupby('name')[count_col].transform('sum')
df = df[[c for c in df.columns if c in safe]]
# NOTE: move date of sample submitted on same date as previous ahead one.
df = self.displace_date(df=df)
# NOTE: ad hoc method to make data labels more accurate.
df = self.df_column_renamer(df=df)
return df
def df_column_renamer(self, df: DataFrame) -> DataFrame:
"""
Ad hoc function I created to clarify some fields
Args:
df (DataFrame): input dataframe
Returns:
DataFrame: dataframe with 'clarified' column names
"""
df = df[df.columns.drop(list(df.filter(regex='_hashes')))]
return df.rename(columns={
"contains_ratio": "contains_shared_hashes_ratio",
"matches_ratio": "matches_shared_hashes_ratio",
"kraken_count": "kraken2_read_count_(top_50)",
"kraken_percent": "kraken2_read_percent_(top_50)"
})
def displace_date(self, df: DataFrame) -> DataFrame:
"""
This function serves to split samples that were submitted on the same date by incrementing dates.
It will shift the date forward by one day if it is the same day as an existing date in a list.
Args:
df (DataFrame): input dataframe composed of control records
Returns:
DataFrame: output dataframe with dates incremented.
"""
# logger.debug(f"Unique items: {df['name'].unique()}")
# NOTE: get submitted dates for each control
dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in
sorted(df['name'].unique())]
previous_dates = set()
# for _, item in enumerate(dict_list):
for item in dict_list:
df, previous_dates = self.check_date(df=df, item=item, previous_dates=previous_dates)
return df
def check_date(self, df: DataFrame, item: dict, previous_dates: set) -> Tuple[DataFrame, list]:
"""
Checks if an items date is already present in df and adjusts df accordingly
Args:
df (DataFrame): input dataframe
item (dict): control for checking
previous_dates (list): list of dates found in previous controls
Returns:
Tuple[DataFrame, list]: Output dataframe and appended list of previous dates
"""
try:
check = item['date'] in previous_dates
except IndexError:
check = False
previous_dates.add(item['date'])
if check:
# logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
# NOTE: get df locations where name == item name
mask = df['name'] == item['name']
# NOTE: increment date in dataframe
df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
item['date'] += timedelta(days=1)
passed = False
else:
passed = True
# logger.debug(f"\n\tCurrent date: {item['date']}\n\tPrevious dates:{previous_dates}")
# logger.debug(f"DF: {type(df)}, previous_dates: {type(previous_dates)}")
# NOTE: if run didn't lead to changed date, return values
if passed:
# logger.debug(f"Date check passed, returning.")
return df, previous_dates
# NOTE: if date was changed, rerun with new date
else:
logger.warning(f"Date check failed, running recursion")
df, previous_dates = self.check_date(df, item, previous_dates)
return df, previous_dates
def prep_df(self, ctx: Settings, df: DataFrame) -> Tuple[DataFrame, list]:
"""
Constructs figures based on parsed pandas dataframe.
Args:
ctx (Settings): settings passed down from gui
df (pd.DataFrame): input dataframe
ytitle (str | None, optional): title for the y-axis. Defaults to None.
Returns:
Figure: Plotly figure
"""
# NOTE: converts starred genera to normal and splits off list of starred
if df.empty:
return None
df['genus'] = df['genus'].replace({'\*': ''}, regex=True).replace({"NaN": "Unknown"})
df['genera'] = [item[-1] if item and item[-1] == "*" else "" for item in df['genus'].to_list()]
# NOTE: remove original runs, using reruns if applicable
df = self.drop_reruns_from_df(ctx=ctx, df=df)
# NOTE: sort by and exclude from
sorts = ['submitted_date', "target", "genus"]
exclude = ['name', 'genera']
modes = [item for item in df.columns if item not in sorts and item not in exclude]
# NOTE: Set descending for any columns that have "{mode}" in the header.
ascending = [False if item == "target" else True for item in sorts]
df = df.sort_values(by=sorts, ascending=ascending)
# logger.debug(df[df.isna().any(axis=1)])
# NOTE: actual chart construction is done by
return df, modes
def drop_reruns_from_df(self, ctx: Settings, df: DataFrame) -> DataFrame:
"""
Removes semi-duplicates from dataframe after finding sequencing repeats.
Args:
settings (dict): settings passed from gui
df (DataFrame): initial dataframe
Returns:
DataFrame: dataframe with originals removed in favour of repeats.
"""
if 'rerun_regex' in ctx:
sample_names = get_unique_values_in_df_column(df, column_name="name")
rerun_regex = re.compile(fr"{ctx.rerun_regex}")
exclude = [re.sub(rerun_regex, "", sample) for sample in sample_names if rerun_regex.search(sample)]
df = df[df.name not in exclude]
return df
# def convert_data_list_to_df(self, input_df: list[dict]) -> DataFrame:
# """
# Convert list of control records to dataframe
#
# Args:
# ctx (dict): settings passed from gui
# input_df (list[dict]): list of dictionaries containing records
# mode_sub_type (str | None, optional): sub_type of submission type. Defaults to None.
#
# Returns:
# DataFrame: dataframe of controls
# """
#
# df = DataFrame.from_records(input_df)
# safe = ['name', 'submitted_date', 'genus', 'target']
# for column in df.columns:
# if column not in safe:
# if self.mode_sub_type is not None and column != self.mode_sub_type:
# continue
# else:
# safe.append(column)
# if "percent" in column:
# # count_col = [item for item in df.columns if "count" in item][0]
# try:
# count_col = next(item for item in df.columns if "count" in item)
# except StopIteration:
# continue
# # NOTE: The actual percentage from kraken was off due to exclusion of NaN, recalculating.
# df[column] = 100 * df[count_col] / df.groupby('name')[count_col].transform('sum')
# df = df[[c for c in df.columns if c in safe]]
# # NOTE: move date of sample submitted on same date as previous ahead one.
# df = self.displace_date(df=df)
# # NOTE: ad hoc method to make data labels more accurate.
# df = self.df_column_renamer(df=df)
# return df
#
# def df_column_renamer(self, df: DataFrame) -> DataFrame:
# """
# Ad hoc function I created to clarify some fields
#
# Args:
# df (DataFrame): input dataframe
#
# Returns:
# DataFrame: dataframe with 'clarified' column names
# """
# df = df[df.columns.drop(list(df.filter(regex='_hashes')))]
# return df.rename(columns={
# "contains_ratio": "contains_shared_hashes_ratio",
# "matches_ratio": "matches_shared_hashes_ratio",
# "kraken_count": "kraken2_read_count_(top_50)",
# "kraken_percent": "kraken2_read_percent_(top_50)"
# })
#
# def displace_date(self, df: DataFrame) -> DataFrame:
# """
# This function serves to split samples that were submitted on the same date by incrementing dates.
# It will shift the date forward by one day if it is the same day as an existing date in a list.
#
# Args:
# df (DataFrame): input dataframe composed of control records
#
# Returns:
# DataFrame: output dataframe with dates incremented.
# """
# # logger.debug(f"Unique items: {df['name'].unique()}")
# # NOTE: get submitted dates for each control
# dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in
# sorted(df['name'].unique())]
# previous_dates = set()
# # for _, item in enumerate(dict_list):
# for item in dict_list:
# df, previous_dates = self.check_date(df=df, item=item, previous_dates=previous_dates)
# return df
#
# def check_date(self, df: DataFrame, item: dict, previous_dates: set) -> Tuple[DataFrame, list]:
# """
# Checks if an items date is already present in df and adjusts df accordingly
#
# Args:
# df (DataFrame): input dataframe
# item (dict): control for checking
# previous_dates (list): list of dates found in previous controls
#
# Returns:
# Tuple[DataFrame, list]: Output dataframe and appended list of previous dates
# """
# try:
# check = item['date'] in previous_dates
# except IndexError:
# check = False
# previous_dates.add(item['date'])
# if check:
# # logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
# # NOTE: get df locations where name == item name
# mask = df['name'] == item['name']
# # NOTE: increment date in dataframe
# df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
# item['date'] += timedelta(days=1)
# passed = False
# else:
# passed = True
# # logger.debug(f"\n\tCurrent date: {item['date']}\n\tPrevious dates:{previous_dates}")
# # logger.debug(f"DF: {type(df)}, previous_dates: {type(previous_dates)}")
# # NOTE: if run didn't lead to changed date, return values
# if passed:
# # logger.debug(f"Date check passed, returning.")
# return df, previous_dates
# # NOTE: if date was changed, rerun with new date
# else:
# logger.warning(f"Date check failed, running recursion")
# df, previous_dates = self.check_date(df, item, previous_dates)
# return df, previous_dates
#
# def prep_df(self, ctx: Settings, df: DataFrame) -> Tuple[DataFrame, list]:
# """
# Constructs figures based on parsed pandas dataframe.
#
# Args:
# ctx (Settings): settings passed down from gui
# df (pd.DataFrame): input dataframe
# ytitle (str | None, optional): title for the y-axis. Defaults to None.
#
# Returns:
# Figure: Plotly figure
# """
# # NOTE: converts starred genera to normal and splits off list of starred
# if df.empty:
# return None
# df['genus'] = df['genus'].replace({'\*': ''}, regex=True).replace({"NaN": "Unknown"})
# df['genera'] = [item[-1] if item and item[-1] == "*" else "" for item in df['genus'].to_list()]
# # NOTE: remove original runs, using reruns if applicable
# df = self.drop_reruns_from_df(ctx=ctx, df=df)
# # NOTE: sort by and exclude from
# sorts = ['submitted_date', "target", "genus"]
# exclude = ['name', 'genera']
# modes = [item for item in df.columns if item not in sorts and item not in exclude]
# # NOTE: Set descending for any columns that have "{mode}" in the header.
# ascending = [False if item == "target" else True for item in sorts]
# df = df.sort_values(by=sorts, ascending=ascending)
# # logger.debug(df[df.isna().any(axis=1)])
# # NOTE: actual chart construction is done by
# return df, modes
#
# def drop_reruns_from_df(self, ctx: Settings, df: DataFrame) -> DataFrame:
# """
# Removes semi-duplicates from dataframe after finding sequencing repeats.
#
# Args:
# settings (dict): settings passed from gui
# df (DataFrame): initial dataframe
#
# Returns:
# DataFrame: dataframe with originals removed in favour of repeats.
# """
# if 'rerun_regex' in ctx:
# sample_names = get_unique_values_in_df_column(df, column_name="name")
# rerun_regex = re.compile(fr"{ctx.rerun_regex}")
# exclude = [re.sub(rerun_regex, "", sample) for sample in sample_names if rerun_regex.search(sample)]
# df = df[df.name not in exclude]
# return df

View File

@@ -76,11 +76,11 @@ class SubmissionDetails(QDialog):
if "Submission" in title:
self.btn.setEnabled(True)
self.export_plate = title.split(" ")[-1]
logger.debug(f"Updating export plate to: {self.export_plate}")
# logger.debug(f"Updating export plate to: {self.export_plate}")
else:
self.btn.setEnabled(False)
if title == self.webview.history().items()[0].title():
logger.debug("Disabling back button")
# logger.debug("Disabling back button")
self.back.setEnabled(False)
else:
self.back.setEnabled(True)
@@ -93,7 +93,7 @@ class SubmissionDetails(QDialog):
Args:
sample (str): Submitter Id of the sample.
"""
logger.debug(f"Details: {sample}")
# logger.debug(f"Details: {sample}")
if isinstance(sample, str):
sample = BasicSample.query(submitter_id=sample)
base_dict = sample.to_sub_dict(full_data=True)

View File

@@ -731,10 +731,8 @@ class SubmissionFormWidget(QWidget):
self.setEditable(True)
looked_up_rt = KitTypeReagentRoleAssociation.query(reagent_role=reagent.role,
kit_type=extraction_kit)
# relevant_reagents = [str(item.lot) for item in
# self.relevant_reagents(assoc=looked_up_rt)]
relevant_reagents = [str(item.lot) for item in looked_up_rt.get_all_relevant_reagents()]
# logger.debug(f"Relevant reagents for {reagent.lot}: {relevant_reagents}")
logger.debug(f"Relevant reagents for {reagent}: {relevant_reagents}")
# NOTE: if reagent in sheet is not found insert it into the front of relevant reagents so it shows
if str(reagent.lot) not in relevant_reagents:
if check_not_nan(reagent.lot):
@@ -749,12 +747,13 @@ class SubmissionFormWidget(QWidget):
if isinstance(looked_up_reg, list):
looked_up_reg = None
# logger.debug(f"Because there was no reagent listed for {reagent.lot}, we will insert the last lot used: {looked_up_reg}")
if looked_up_reg is not None:
if looked_up_reg:
try:
relevant_reagents.remove(str(looked_up_reg.lot))
relevant_reagents.insert(0, str(looked_up_reg.lot))
except ValueError as e:
logger.error(f"Error reordering relevant reagents: {e}")
relevant_reagents.insert(0, str(looked_up_reg.lot))
else:
if len(relevant_reagents) > 1:
# logger.debug(f"Found {reagent.lot} in relevant reagents: {relevant_reagents}. Moving to front of list.")
@@ -765,26 +764,26 @@ class SubmissionFormWidget(QWidget):
else:
# logger.debug(f"Found {reagent.lot} in relevant reagents: {relevant_reagents}. But no need to move due to short list.")
pass
# logger.debug(f"New relevant reagents: {relevant_reagents}")
logger.debug(f"New relevant reagents: {relevant_reagents}")
self.setObjectName(f"lot_{reagent.role}")
self.addItems(relevant_reagents)
self.setToolTip(f"Enter lot number for the reagent used for {reagent.role}")
# self.setStyleSheet(main_form_style)
def relevant_reagents(self, assoc: KitTypeReagentRoleAssociation):
# logger.debug(f"Attempting lookup of reagents by type: {reagent.type}")
lookup = Reagent.query(reagent_role=assoc.reagent_role)
try:
regex = assoc.uses['exclude_regex']
except KeyError:
regex = "^$"
relevant_reagents = [item for item in lookup if
not check_regex_match(pattern=regex, check=str(item.lot))]
for rel_reagent in relevant_reagents:
# # NOTE: extract strings from any sets.
# if isinstance(rel_reagent, set):
# for thing in rel_reagent:
# yield thing
# elif isinstance(rel_reagent, str):
# yield rel_reagent
yield rel_reagent
# def relevant_reagents(self, assoc: KitTypeReagentRoleAssociation):
# # logger.debug(f"Attempting lookup of reagents by type: {reagent.type}")
# lookup = Reagent.query(reagent_role=assoc.reagent_role)
# try:
# regex = assoc.uses['exclude_regex']
# except KeyError:
# regex = "^$"
# relevant_reagents = [item for item in lookup if
# not check_regex_match(pattern=regex, check=str(item.lot))]
# for rel_reagent in relevant_reagents:
# # # NOTE: extract strings from any sets.
# # if isinstance(rel_reagent, set):
# # for thing in rel_reagent:
# # yield thing
# # elif isinstance(rel_reagent, str):
# # yield rel_reagent
# yield rel_reagent