Pre code clean-up
This commit is contained in:
@@ -17,7 +17,7 @@ from PyQt6.QtCore import QDate, QSize
|
||||
import logging
|
||||
from pandas import DataFrame
|
||||
from tools import Report, Result, get_unique_values_in_df_column, Settings, report_result
|
||||
from frontend.visualizations import IridaFigure, PCRFigure
|
||||
from frontend.visualizations import IridaFigure, PCRFigure, CustomFigure
|
||||
from .misc import StartEndDatePicker
|
||||
|
||||
logger = logging.getLogger(f"submissions.{__name__}")
|
||||
@@ -54,11 +54,13 @@ class ControlsViewer(QWidget):
|
||||
self.layout.addWidget(self.datepicker, 0, 0, 1, 2)
|
||||
self.save_button = QPushButton("Save Chart", parent=self)
|
||||
self.layout.addWidget(self.save_button, 0, 2, 1, 1)
|
||||
self.layout.addWidget(self.control_sub_typer, 1, 0, 1, 3)
|
||||
self.layout.addWidget(self.mode_typer, 2, 0, 1, 3)
|
||||
self.layout.addWidget(self.mode_sub_typer, 3, 0, 1, 3)
|
||||
self.export_button = QPushButton("Save Data", parent=self)
|
||||
self.layout.addWidget(self.export_button, 0, 3, 1, 1)
|
||||
self.layout.addWidget(self.control_sub_typer, 1, 0, 1, 4)
|
||||
self.layout.addWidget(self.mode_typer, 2, 0, 1, 4)
|
||||
self.layout.addWidget(self.mode_sub_typer, 3, 0, 1, 4)
|
||||
self.archetype.get_instance_class().make_parent_buttons(parent=self)
|
||||
self.layout.addWidget(self.webengineview, self.layout.rowCount(), 0, 1, 3)
|
||||
self.layout.addWidget(self.webengineview, self.layout.rowCount(), 0, 1, 4)
|
||||
self.setLayout(self.layout)
|
||||
self.controls_getter_function()
|
||||
self.control_sub_typer.currentIndexChanged.connect(self.controls_getter_function)
|
||||
@@ -66,11 +68,15 @@ class ControlsViewer(QWidget):
|
||||
self.datepicker.start_date.dateChanged.connect(self.controls_getter_function)
|
||||
self.datepicker.end_date.dateChanged.connect(self.controls_getter_function)
|
||||
self.save_button.pressed.connect(self.save_chart_function)
|
||||
self.export_button.pressed.connect(self.save_data_function)
|
||||
|
||||
|
||||
def save_chart_function(self):
|
||||
self.fig.save_figure(parent=self)
|
||||
|
||||
def save_data_function(self):
|
||||
self.fig.save_data(parent=self)
|
||||
|
||||
# def controls_getter(self):
|
||||
# """
|
||||
# Lookup controls from database and send to chartmaker
|
||||
@@ -152,10 +158,9 @@ class ControlsViewer(QWidget):
|
||||
mode=self.mode,
|
||||
sub_mode=self.mode_sub_type, parent=self, months=months)
|
||||
_, self.fig = self.archetype.get_instance_class().make_chart(chart_settings=chart_settings, parent=self, ctx=self.app.ctx)
|
||||
# if isinstance(self.fig, IridaFigure):
|
||||
# self.save_button.setEnabled(True)
|
||||
if issubclass(self.fig.__class__, CustomFigure):
|
||||
self.save_button.setEnabled(True)
|
||||
# logger.debug(f"Updating figure...")
|
||||
# self.fig = fig
|
||||
# NOTE: construct html for webview
|
||||
html = self.fig.to_html()
|
||||
# logger.debug(f"The length of html code is: {len(html)}")
|
||||
@@ -164,164 +169,164 @@ class ControlsViewer(QWidget):
|
||||
# logger.debug("Figure updated... I hope.")
|
||||
return report
|
||||
|
||||
def convert_data_list_to_df(self, input_df: list[dict]) -> DataFrame:
|
||||
"""
|
||||
Convert list of control records to dataframe
|
||||
|
||||
Args:
|
||||
ctx (dict): settings passed from gui
|
||||
input_df (list[dict]): list of dictionaries containing records
|
||||
mode_sub_type (str | None, optional): sub_type of submission type. Defaults to None.
|
||||
|
||||
Returns:
|
||||
DataFrame: dataframe of controls
|
||||
"""
|
||||
|
||||
df = DataFrame.from_records(input_df)
|
||||
safe = ['name', 'submitted_date', 'genus', 'target']
|
||||
for column in df.columns:
|
||||
if column not in safe:
|
||||
if self.mode_sub_type is not None and column != self.mode_sub_type:
|
||||
continue
|
||||
else:
|
||||
safe.append(column)
|
||||
if "percent" in column:
|
||||
# count_col = [item for item in df.columns if "count" in item][0]
|
||||
try:
|
||||
count_col = next(item for item in df.columns if "count" in item)
|
||||
except StopIteration:
|
||||
continue
|
||||
# NOTE: The actual percentage from kraken was off due to exclusion of NaN, recalculating.
|
||||
df[column] = 100 * df[count_col] / df.groupby('name')[count_col].transform('sum')
|
||||
df = df[[c for c in df.columns if c in safe]]
|
||||
# NOTE: move date of sample submitted on same date as previous ahead one.
|
||||
df = self.displace_date(df=df)
|
||||
# NOTE: ad hoc method to make data labels more accurate.
|
||||
df = self.df_column_renamer(df=df)
|
||||
return df
|
||||
|
||||
def df_column_renamer(self, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Ad hoc function I created to clarify some fields
|
||||
|
||||
Args:
|
||||
df (DataFrame): input dataframe
|
||||
|
||||
Returns:
|
||||
DataFrame: dataframe with 'clarified' column names
|
||||
"""
|
||||
df = df[df.columns.drop(list(df.filter(regex='_hashes')))]
|
||||
return df.rename(columns={
|
||||
"contains_ratio": "contains_shared_hashes_ratio",
|
||||
"matches_ratio": "matches_shared_hashes_ratio",
|
||||
"kraken_count": "kraken2_read_count_(top_50)",
|
||||
"kraken_percent": "kraken2_read_percent_(top_50)"
|
||||
})
|
||||
|
||||
def displace_date(self, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
This function serves to split samples that were submitted on the same date by incrementing dates.
|
||||
It will shift the date forward by one day if it is the same day as an existing date in a list.
|
||||
|
||||
Args:
|
||||
df (DataFrame): input dataframe composed of control records
|
||||
|
||||
Returns:
|
||||
DataFrame: output dataframe with dates incremented.
|
||||
"""
|
||||
# logger.debug(f"Unique items: {df['name'].unique()}")
|
||||
# NOTE: get submitted dates for each control
|
||||
dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in
|
||||
sorted(df['name'].unique())]
|
||||
previous_dates = set()
|
||||
# for _, item in enumerate(dict_list):
|
||||
for item in dict_list:
|
||||
df, previous_dates = self.check_date(df=df, item=item, previous_dates=previous_dates)
|
||||
return df
|
||||
|
||||
def check_date(self, df: DataFrame, item: dict, previous_dates: set) -> Tuple[DataFrame, list]:
|
||||
"""
|
||||
Checks if an items date is already present in df and adjusts df accordingly
|
||||
|
||||
Args:
|
||||
df (DataFrame): input dataframe
|
||||
item (dict): control for checking
|
||||
previous_dates (list): list of dates found in previous controls
|
||||
|
||||
Returns:
|
||||
Tuple[DataFrame, list]: Output dataframe and appended list of previous dates
|
||||
"""
|
||||
try:
|
||||
check = item['date'] in previous_dates
|
||||
except IndexError:
|
||||
check = False
|
||||
previous_dates.add(item['date'])
|
||||
if check:
|
||||
# logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
|
||||
# NOTE: get df locations where name == item name
|
||||
mask = df['name'] == item['name']
|
||||
# NOTE: increment date in dataframe
|
||||
df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
|
||||
item['date'] += timedelta(days=1)
|
||||
passed = False
|
||||
else:
|
||||
passed = True
|
||||
# logger.debug(f"\n\tCurrent date: {item['date']}\n\tPrevious dates:{previous_dates}")
|
||||
# logger.debug(f"DF: {type(df)}, previous_dates: {type(previous_dates)}")
|
||||
# NOTE: if run didn't lead to changed date, return values
|
||||
if passed:
|
||||
# logger.debug(f"Date check passed, returning.")
|
||||
return df, previous_dates
|
||||
# NOTE: if date was changed, rerun with new date
|
||||
else:
|
||||
logger.warning(f"Date check failed, running recursion")
|
||||
df, previous_dates = self.check_date(df, item, previous_dates)
|
||||
return df, previous_dates
|
||||
|
||||
def prep_df(self, ctx: Settings, df: DataFrame) -> Tuple[DataFrame, list]:
|
||||
"""
|
||||
Constructs figures based on parsed pandas dataframe.
|
||||
|
||||
Args:
|
||||
ctx (Settings): settings passed down from gui
|
||||
df (pd.DataFrame): input dataframe
|
||||
ytitle (str | None, optional): title for the y-axis. Defaults to None.
|
||||
|
||||
Returns:
|
||||
Figure: Plotly figure
|
||||
"""
|
||||
# NOTE: converts starred genera to normal and splits off list of starred
|
||||
if df.empty:
|
||||
return None
|
||||
df['genus'] = df['genus'].replace({'\*': ''}, regex=True).replace({"NaN": "Unknown"})
|
||||
df['genera'] = [item[-1] if item and item[-1] == "*" else "" for item in df['genus'].to_list()]
|
||||
# NOTE: remove original runs, using reruns if applicable
|
||||
df = self.drop_reruns_from_df(ctx=ctx, df=df)
|
||||
# NOTE: sort by and exclude from
|
||||
sorts = ['submitted_date', "target", "genus"]
|
||||
exclude = ['name', 'genera']
|
||||
modes = [item for item in df.columns if item not in sorts and item not in exclude]
|
||||
# NOTE: Set descending for any columns that have "{mode}" in the header.
|
||||
ascending = [False if item == "target" else True for item in sorts]
|
||||
df = df.sort_values(by=sorts, ascending=ascending)
|
||||
# logger.debug(df[df.isna().any(axis=1)])
|
||||
# NOTE: actual chart construction is done by
|
||||
return df, modes
|
||||
|
||||
def drop_reruns_from_df(self, ctx: Settings, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Removes semi-duplicates from dataframe after finding sequencing repeats.
|
||||
|
||||
Args:
|
||||
settings (dict): settings passed from gui
|
||||
df (DataFrame): initial dataframe
|
||||
|
||||
Returns:
|
||||
DataFrame: dataframe with originals removed in favour of repeats.
|
||||
"""
|
||||
if 'rerun_regex' in ctx:
|
||||
sample_names = get_unique_values_in_df_column(df, column_name="name")
|
||||
rerun_regex = re.compile(fr"{ctx.rerun_regex}")
|
||||
exclude = [re.sub(rerun_regex, "", sample) for sample in sample_names if rerun_regex.search(sample)]
|
||||
df = df[df.name not in exclude]
|
||||
return df
|
||||
# def convert_data_list_to_df(self, input_df: list[dict]) -> DataFrame:
|
||||
# """
|
||||
# Convert list of control records to dataframe
|
||||
#
|
||||
# Args:
|
||||
# ctx (dict): settings passed from gui
|
||||
# input_df (list[dict]): list of dictionaries containing records
|
||||
# mode_sub_type (str | None, optional): sub_type of submission type. Defaults to None.
|
||||
#
|
||||
# Returns:
|
||||
# DataFrame: dataframe of controls
|
||||
# """
|
||||
#
|
||||
# df = DataFrame.from_records(input_df)
|
||||
# safe = ['name', 'submitted_date', 'genus', 'target']
|
||||
# for column in df.columns:
|
||||
# if column not in safe:
|
||||
# if self.mode_sub_type is not None and column != self.mode_sub_type:
|
||||
# continue
|
||||
# else:
|
||||
# safe.append(column)
|
||||
# if "percent" in column:
|
||||
# # count_col = [item for item in df.columns if "count" in item][0]
|
||||
# try:
|
||||
# count_col = next(item for item in df.columns if "count" in item)
|
||||
# except StopIteration:
|
||||
# continue
|
||||
# # NOTE: The actual percentage from kraken was off due to exclusion of NaN, recalculating.
|
||||
# df[column] = 100 * df[count_col] / df.groupby('name')[count_col].transform('sum')
|
||||
# df = df[[c for c in df.columns if c in safe]]
|
||||
# # NOTE: move date of sample submitted on same date as previous ahead one.
|
||||
# df = self.displace_date(df=df)
|
||||
# # NOTE: ad hoc method to make data labels more accurate.
|
||||
# df = self.df_column_renamer(df=df)
|
||||
# return df
|
||||
#
|
||||
# def df_column_renamer(self, df: DataFrame) -> DataFrame:
|
||||
# """
|
||||
# Ad hoc function I created to clarify some fields
|
||||
#
|
||||
# Args:
|
||||
# df (DataFrame): input dataframe
|
||||
#
|
||||
# Returns:
|
||||
# DataFrame: dataframe with 'clarified' column names
|
||||
# """
|
||||
# df = df[df.columns.drop(list(df.filter(regex='_hashes')))]
|
||||
# return df.rename(columns={
|
||||
# "contains_ratio": "contains_shared_hashes_ratio",
|
||||
# "matches_ratio": "matches_shared_hashes_ratio",
|
||||
# "kraken_count": "kraken2_read_count_(top_50)",
|
||||
# "kraken_percent": "kraken2_read_percent_(top_50)"
|
||||
# })
|
||||
#
|
||||
# def displace_date(self, df: DataFrame) -> DataFrame:
|
||||
# """
|
||||
# This function serves to split samples that were submitted on the same date by incrementing dates.
|
||||
# It will shift the date forward by one day if it is the same day as an existing date in a list.
|
||||
#
|
||||
# Args:
|
||||
# df (DataFrame): input dataframe composed of control records
|
||||
#
|
||||
# Returns:
|
||||
# DataFrame: output dataframe with dates incremented.
|
||||
# """
|
||||
# # logger.debug(f"Unique items: {df['name'].unique()}")
|
||||
# # NOTE: get submitted dates for each control
|
||||
# dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in
|
||||
# sorted(df['name'].unique())]
|
||||
# previous_dates = set()
|
||||
# # for _, item in enumerate(dict_list):
|
||||
# for item in dict_list:
|
||||
# df, previous_dates = self.check_date(df=df, item=item, previous_dates=previous_dates)
|
||||
# return df
|
||||
#
|
||||
# def check_date(self, df: DataFrame, item: dict, previous_dates: set) -> Tuple[DataFrame, list]:
|
||||
# """
|
||||
# Checks if an items date is already present in df and adjusts df accordingly
|
||||
#
|
||||
# Args:
|
||||
# df (DataFrame): input dataframe
|
||||
# item (dict): control for checking
|
||||
# previous_dates (list): list of dates found in previous controls
|
||||
#
|
||||
# Returns:
|
||||
# Tuple[DataFrame, list]: Output dataframe and appended list of previous dates
|
||||
# """
|
||||
# try:
|
||||
# check = item['date'] in previous_dates
|
||||
# except IndexError:
|
||||
# check = False
|
||||
# previous_dates.add(item['date'])
|
||||
# if check:
|
||||
# # logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
|
||||
# # NOTE: get df locations where name == item name
|
||||
# mask = df['name'] == item['name']
|
||||
# # NOTE: increment date in dataframe
|
||||
# df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
|
||||
# item['date'] += timedelta(days=1)
|
||||
# passed = False
|
||||
# else:
|
||||
# passed = True
|
||||
# # logger.debug(f"\n\tCurrent date: {item['date']}\n\tPrevious dates:{previous_dates}")
|
||||
# # logger.debug(f"DF: {type(df)}, previous_dates: {type(previous_dates)}")
|
||||
# # NOTE: if run didn't lead to changed date, return values
|
||||
# if passed:
|
||||
# # logger.debug(f"Date check passed, returning.")
|
||||
# return df, previous_dates
|
||||
# # NOTE: if date was changed, rerun with new date
|
||||
# else:
|
||||
# logger.warning(f"Date check failed, running recursion")
|
||||
# df, previous_dates = self.check_date(df, item, previous_dates)
|
||||
# return df, previous_dates
|
||||
#
|
||||
# def prep_df(self, ctx: Settings, df: DataFrame) -> Tuple[DataFrame, list]:
|
||||
# """
|
||||
# Constructs figures based on parsed pandas dataframe.
|
||||
#
|
||||
# Args:
|
||||
# ctx (Settings): settings passed down from gui
|
||||
# df (pd.DataFrame): input dataframe
|
||||
# ytitle (str | None, optional): title for the y-axis. Defaults to None.
|
||||
#
|
||||
# Returns:
|
||||
# Figure: Plotly figure
|
||||
# """
|
||||
# # NOTE: converts starred genera to normal and splits off list of starred
|
||||
# if df.empty:
|
||||
# return None
|
||||
# df['genus'] = df['genus'].replace({'\*': ''}, regex=True).replace({"NaN": "Unknown"})
|
||||
# df['genera'] = [item[-1] if item and item[-1] == "*" else "" for item in df['genus'].to_list()]
|
||||
# # NOTE: remove original runs, using reruns if applicable
|
||||
# df = self.drop_reruns_from_df(ctx=ctx, df=df)
|
||||
# # NOTE: sort by and exclude from
|
||||
# sorts = ['submitted_date', "target", "genus"]
|
||||
# exclude = ['name', 'genera']
|
||||
# modes = [item for item in df.columns if item not in sorts and item not in exclude]
|
||||
# # NOTE: Set descending for any columns that have "{mode}" in the header.
|
||||
# ascending = [False if item == "target" else True for item in sorts]
|
||||
# df = df.sort_values(by=sorts, ascending=ascending)
|
||||
# # logger.debug(df[df.isna().any(axis=1)])
|
||||
# # NOTE: actual chart construction is done by
|
||||
# return df, modes
|
||||
#
|
||||
# def drop_reruns_from_df(self, ctx: Settings, df: DataFrame) -> DataFrame:
|
||||
# """
|
||||
# Removes semi-duplicates from dataframe after finding sequencing repeats.
|
||||
#
|
||||
# Args:
|
||||
# settings (dict): settings passed from gui
|
||||
# df (DataFrame): initial dataframe
|
||||
#
|
||||
# Returns:
|
||||
# DataFrame: dataframe with originals removed in favour of repeats.
|
||||
# """
|
||||
# if 'rerun_regex' in ctx:
|
||||
# sample_names = get_unique_values_in_df_column(df, column_name="name")
|
||||
# rerun_regex = re.compile(fr"{ctx.rerun_regex}")
|
||||
# exclude = [re.sub(rerun_regex, "", sample) for sample in sample_names if rerun_regex.search(sample)]
|
||||
# df = df[df.name not in exclude]
|
||||
# return df
|
||||
|
||||
Reference in New Issue
Block a user