From 7a53cfd9a1e725769e8264a8c8e26a972d0b1c31 Mon Sep 17 00:00:00 2001 From: Landon Wark Date: Mon, 23 Jan 2023 14:28:24 -0600 Subject: [PATCH] controls working --- src/submissions/__main__.py | 7 +- src/submissions/backend/db/__init__.py | 55 +++- src/submissions/backend/excel/__init__.py | 43 +++ src/submissions/backend/excel/parser.py | 30 +- src/submissions/backend/excel/reports.py | 83 +++++- src/submissions/frontend/__init__.py | 257 ++++++++-------- .../frontend/custom_widgets/__init__.py | 31 +- .../frontend/visualizations/__init__.py | 0 .../frontend/visualizations/charts.py | 278 ++++++++++++++++++ 9 files changed, 625 insertions(+), 159 deletions(-) create mode 100644 src/submissions/frontend/visualizations/__init__.py create mode 100644 src/submissions/frontend/visualizations/charts.py diff --git a/src/submissions/__main__.py b/src/submissions/__main__.py index 3ff7971..ddd8b21 100644 --- a/src/submissions/__main__.py +++ b/src/submissions/__main__.py @@ -1,5 +1,10 @@ import sys from pathlib import Path +import os +if getattr(sys, 'frozen', False): + os.environ['QTWEBENGINE_DISABLE_SANDBOX'] = "1" +else : + pass from configure import get_config, create_database_session, setup_logger ctx = get_config(None) from PyQt6.QtWidgets import QApplication @@ -10,7 +15,7 @@ logger = setup_logger(verbose=True) ctx["database_session"] = create_database_session(Path(ctx['database'])) if __name__ == '__main__': - app = QApplication(sys.argv) + app = QApplication(['', '--no-sandbox']) ex = App(ctx=ctx) sys.exit(app.exec()) diff --git a/src/submissions/backend/db/__init__.py b/src/submissions/backend/db/__init__.py index 09ef3f9..725a6c5 100644 --- a/src/submissions/backend/db/__init__.py +++ b/src/submissions/backend/db/__init__.py @@ -3,10 +3,13 @@ import pandas as pd # from sqlite3 import IntegrityError from sqlalchemy.exc import IntegrityError import logging -import datetime +from datetime import date, datetime from sqlalchemy import and_ import uuid import base64 +from sqlalchemy import JSON +import json +from dateutil.relativedelta import relativedelta logger = logging.getLogger(__name__) @@ -226,4 +229,52 @@ def create_kit_from_yaml(ctx:dict, exp:dict) -> None: def lookup_all_sample_types(ctx:dict) -> list[str]: uses = [item.used_for for item in ctx['database_session'].query(models.KitType).all()] uses = list(set([item for sublist in uses for item in sublist])) - return uses \ No newline at end of file + return uses + + + +def get_all_available_modes(ctx:dict) -> list[str]: + rel = ctx['database_session'].query(models.Control).first() + cols = [item.name for item in list(rel.__table__.columns) if isinstance(item.type, JSON)] + return cols + + + +def get_all_controls_by_type(ctx:dict, con_type:str, start_date:date|None=None, end_date:date|None=None) -> list: + """ + Returns a list of control objects that are instances of the input controltype. + + Args: + con_type (str): Name of the control type. + ctx (dict): Settings passed down from gui. + + Returns: + list: Control instances. + """ + + # print(f"Using dates: {start_date} to {end_date}") + query = ctx['database_session'].query(models.ControlType).filter_by(name=con_type) + try: + output = query.first().instances + except AttributeError: + output = None + # Hacky solution to my not being able to get the sql query to work. + if start_date != None and end_date != None: + output = [item for item in output if item.submitted_date.date() > start_date and item.submitted_date.date() < end_date] + # print(f"Type {con_type}: {query.first()}") + return output + + +def get_control_subtypes(ctx:dict, type:str, mode:str): + try: + outs = get_all_controls_by_type(ctx=ctx, con_type=type)[0] + except TypeError: + return [] + jsoner = json.loads(getattr(outs, mode)) + print(f"JSON out: {jsoner}") + try: + genera = list(jsoner.keys())[0] + except IndexError: + return [] + subtypes = [item for item in jsoner[genera] if "_hashes" not in item and "_ratio" not in item] + return subtypes diff --git a/src/submissions/backend/excel/__init__.py b/src/submissions/backend/excel/__init__.py index e69de29..5842e3d 100644 --- a/src/submissions/backend/excel/__init__.py +++ b/src/submissions/backend/excel/__init__.py @@ -0,0 +1,43 @@ + +from pandas import DataFrame +import re + + + +def get_unique_values_in_df_column(df: DataFrame, column_name: str) -> list: + """ + _summary_ + + Args: + df (DataFrame): _description_ + column_name (str): _description_ + + Returns: + list: _description_ + """ + return sorted(df[column_name].unique()) + + +def drop_reruns_from_df(ctx:dict, df: DataFrame) -> DataFrame: + """ + Removes semi-duplicates from dataframe after finding sequencing repeats. + + Args: + settings (dict): settings passed down from click + df (DataFrame): initial dataframe + + Returns: + DataFrame: dataframe with originals removed in favour of repeats. + """ + sample_names = get_unique_values_in_df_column(df, column_name="name") + if 'rerun_regex' in ctx: + # logger.debug(f"Compiling regex from: {settings['rerun_regex']}") + rerun_regex = re.compile(fr"{ctx['rerun_regex']}") + for sample in sample_names: + # logger.debug(f'Running search on {sample}') + if rerun_regex.search(sample): + # logger.debug(f'Match on {sample}') + first_run = re.sub(rerun_regex, "", sample) + # logger.debug(f"First run: {first_run}") + df = df.drop(df[df.name == first_run].index) + return df diff --git a/src/submissions/backend/excel/parser.py b/src/submissions/backend/excel/parser.py index 9d8615e..fb7eb6e 100644 --- a/src/submissions/backend/excel/parser.py +++ b/src/submissions/backend/excel/parser.py @@ -54,7 +54,6 @@ class SheetParser(object): def _parse_bacterial_culture(self): - # submission_info = self.xl.parse("Sample List") submission_info = self._parse_generic("Sample List") # iloc is [row][column] and the first row is set as header row so -2 tech = str(submission_info.iloc[11][1]) @@ -86,13 +85,6 @@ class SheetParser(object): enrichment_info = self.xl.parse("Enrichment Worksheet") extraction_info = self.xl.parse("Extraction Worksheet") qprc_info = self.xl.parse("qPCR Worksheet") - # iloc is [row][column] and the first row is set as header row so -2 - # self.sub['submitter_plate_num'] = submission_info.iloc[0][1] - # self.sub['rsl_plate_num'] = str(submission_info.iloc[10][1]) - # self.sub['submitted_date'] = submission_info.iloc[1][1].date()#.strftime("%Y-%m-%d") - # self.sub['submitting_lab'] = submission_info.iloc[0][3] - # self.sub['sample_count'] = str(submission_info.iloc[2][3]) - # self.sub['extraction_kit'] = submission_info.iloc[3][3] self.sub['technician'] = f"Enr: {enrichment_info.columns[2]}, Ext: {extraction_info.columns[2]}, PCR: {qprc_info.columns[2]}" # reagents self.sub['lot_lysis_buffer'] = enrichment_info.iloc[0][14] @@ -112,24 +104,6 @@ class SheetParser(object): sample_parser = SampleParser(submission_info.iloc[16:40]) sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples") self.sub['samples'] = sample_parse() - # tech = str(submission_info.iloc[11][1]) - # if tech == "nan": - # tech = "Unknown" - # elif len(tech.split(",")) > 1: - # tech_reg = re.compile(r"[A-Z]{2}") - # tech = ", ".join(tech_reg.findall(tech)) - - - # self.sub['lot_wash_1'] = submission_info.iloc[1][6] - # self.sub['lot_wash_2'] = submission_info.iloc[2][6] - # self.sub['lot_binding_buffer'] = submission_info.iloc[3][6] - # self.sub['lot_magnetic_beads'] = submission_info.iloc[4][6] - # self.sub['lot_lysis_buffer'] = submission_info.iloc[5][6] - # self.sub['lot_elution_buffer'] = submission_info.iloc[6][6] - # self.sub['lot_isopropanol'] = submission_info.iloc[9][6] - # self.sub['lot_ethanol'] = submission_info.iloc[10][6] - # self.sub['lot_positive_control'] = None #submission_info.iloc[103][1] - # self.sub['lot_plate'] = submission_info.iloc[12][6] class SampleParser(object): @@ -147,9 +121,9 @@ class SampleParser(object): new.sample_id = sample['Unnamed: 1'] new.organism = sample['Unnamed: 2'] new.concentration = sample['Unnamed: 3'] - print(f"Sample object: {new.sample_id} = {type(new.sample_id)}") + # print(f"Sample object: {new.sample_id} = {type(new.sample_id)}") try: - not_a_nan = not np.isnan(new.sample_id) + not_a_nan = not np.isnan(new.sample_id) and new.sample_id.lower() != 'blank' except TypeError: not_a_nan = True if not_a_nan: diff --git a/src/submissions/backend/excel/reports.py b/src/submissions/backend/excel/reports.py index 48a55b6..e9daad0 100644 --- a/src/submissions/backend/excel/reports.py +++ b/src/submissions/backend/excel/reports.py @@ -1,5 +1,8 @@ +import pandas as pd from pandas import DataFrame import numpy as np +from backend.db import models +import json def make_report_xlsx(records:list[dict]) -> DataFrame: df = DataFrame.from_records(records) @@ -10,4 +13,82 @@ def make_report_xlsx(records:list[dict]) -> DataFrame: print(df2.columns) # df2['Cost']['sum'] = df2['Cost']['sum'].apply('${:,.2f}'.format) df2.iloc[:, (df2.columns.get_level_values(1)=='sum') & (df2.columns.get_level_values(0)=='Cost')] = df2.iloc[:, (df2.columns.get_level_values(1)=='sum') & (df2.columns.get_level_values(0)=='Cost')].applymap('${:,.2f}'.format) - return df2 \ No newline at end of file + return df2 + + +# def split_controls_dictionary(ctx:dict, input_dict) -> list[dict]: +# # this will be the date in string form +# dict_name = list(input_dict.keys())[0] +# # the data associated with the date key +# sub_dict = input_dict[dict_name] +# # How many "count", "Percent", etc are in the dictionary +# data_size = get_dict_size(sub_dict) +# output = [] +# for ii in range(data_size): +# new_dict = {} +# for genus in sub_dict: +# print(genus) +# sub_name = list(sub_dict[genus].keys())[ii] +# new_dict[genus] = sub_dict[genus][sub_name] +# output.append({"date":dict_name, "name": sub_name, "data": new_dict}) +# return output + + +# def get_dict_size(input:dict): +# return max(len(input[item]) for item in input) + + +# def convert_all_controls(ctx:dict, data:list) -> dict: +# dfs = {} +# dict_list = [split_controls_dictionary(ctx, datum) for datum in data] +# dict_list = [item for sublist in dict_list for item in sublist] +# names = list(set([datum['name'] for datum in dict_list])) +# for name in names: + + +# # df = DataFrame() +# # entries = [{item['date']:item['data']} for item in dict_list if item['name']==name] +# # series_list = [] +# # df = pd.json_normalize(entries) +# # for entry in entries: +# # col_name = list(entry.keys())[0] +# # col_dict = entry[col_name] +# # series = pd.Series(data=col_dict.values(), index=col_dict.keys(), name=col_name) +# # # df[col_name] = series.values +# # # print(df.index) +# # series_list.append(series) +# # df = DataFrame(series_list).T.fillna(0) +# # print(df) +# dfs['name'] = df +# return dfs + +def convert_control_by_mode(ctx:dict, control:models.Control, mode:str): + output = [] + data = json.loads(getattr(control, mode)) + for genus in data: + _dict = {} + _dict['name'] = control.name + _dict['submitted_date'] = control.submitted_date + _dict['genus'] = genus + _dict['target'] = 'Target' if genus.strip("*") in control.controltype.targets else "Off-target" + for key in data[genus]: + _dict[key] = data[genus][key] + output.append(_dict) + # print(output) + return output + + +def convert_data_list_to_df(ctx:dict, input:list[dict], subtype:str|None=None) -> DataFrame: + df = DataFrame.from_records(input) + safe = ['name', 'submitted_date', 'genus', 'target'] + print(df) + for column in df.columns: + if "percent" in column: + count_col = [item for item in df.columns if "count" in item][0] + # The actual percentage from kraken was off due to exclusion of NaN, recalculating. + df[column] = 100 * df[count_col] / df.groupby('submitted_date')[count_col].transform('sum') + if column not in safe: + if subtype != None and column != subtype: + del df[column] + # print(df) + return df diff --git a/src/submissions/frontend/__init__.py b/src/submissions/frontend/__init__.py index b3cad65..44dfaa6 100644 --- a/src/submissions/frontend/__init__.py +++ b/src/submissions/frontend/__init__.py @@ -7,7 +7,7 @@ from PyQt6.QtWidgets import ( QSpinBox ) from PyQt6.QtGui import QAction, QIcon -from PyQt6.QtCore import QDateTime, QDate +from PyQt6.QtCore import QDateTime, QDate, QSignalBlocker from PyQt6.QtCore import pyqtSlot from PyQt6.QtWebEngineWidgets import QWebEngineView @@ -19,17 +19,21 @@ import plotly.express as px import yaml from backend.excel.parser import SheetParser +from backend.excel.reports import convert_control_by_mode, convert_data_list_to_df from backend.db import (construct_submission_info, lookup_reagent, construct_reagent, store_reagent, store_submission, lookup_kittype_by_use, lookup_regent_by_type_name_and_kit_name, lookup_all_orgs, lookup_submissions_by_date_range, - get_all_Control_Types_names, create_kit_from_yaml + get_all_Control_Types_names, create_kit_from_yaml, get_all_available_modes, get_all_controls_by_type, + get_control_subtypes ) from backend.excel.reports import make_report_xlsx import numpy -from frontend.custom_widgets import AddReagentQuestion, AddReagentForm, SubmissionsSheet, ReportDatePicker, KitAdder +from frontend.custom_widgets import AddReagentQuestion, AddReagentForm, SubmissionsSheet, ReportDatePicker, KitAdder, ControlsDatePicker import logging import difflib +from frontend.visualizations.charts import create_charts + logger = logging.getLogger(__name__) logger.info("Hello, I am a logger") @@ -54,7 +58,8 @@ class App(QMainWindow): self._createMenuBar() self._createToolBar() self._connectActions() - self.renderPage() + # self.renderPage() + self.controls_getter() self.show() def _createMenuBar(self): @@ -86,6 +91,10 @@ class App(QMainWindow): self.addReagentAction.triggered.connect(self.add_reagent) self.generateReportAction.triggered.connect(self.generateReport) self.addKitAction.triggered.connect(self.add_kit) + self.table_widget.control_typer.currentIndexChanged.connect(self.controls_getter) + self.table_widget.mode_typer.currentIndexChanged.connect(self.controls_getter) + self.table_widget.datepicker.start_date.dateChanged.connect(self.controls_getter) + self.table_widget.datepicker.end_date.dateChanged.connect(self.controls_getter) def importSubmission(self): @@ -207,6 +216,28 @@ class App(QMainWindow): html += '' self.table_widget.webengineview.setHtml(html) self.table_widget.webengineview.update() + # type = self.table_widget.control_typer.currentText() + # mode = self.table_widget.mode_typer.currentText() + # controls = get_all_controls_by_type(ctx=self.ctx, type=type) + # data = [] + # for control in controls: + # dicts = convert_control_by_mode(ctx=self.ctx, control=control, mode=mode) + # data.append(dicts) + # data = [item for sublist in data for item in sublist] + # # print(data) + # df = convert_data_list_to_df(ctx=self.ctx, input=data) + # fig = create_charts(ctx=self.ctx, df=df) + + # print(fig) + # html = '' + # html += plotly.offline.plot(fig, output_type='div', auto_open=True, image = 'png', image_filename='plot_image') + # html += '' + # html = plotly.io.to_html(fig) + # # print(html) + # # with open("C:\\Users\\lwark\\Desktop\\test.html", "w") as f: + # # f.write(html) + # self.table_widget.webengineview.setHtml(html) + # self.table_widget.webengineview.update() def submit_new_sample(self): @@ -294,19 +325,101 @@ class App(QMainWindow): def add_kit(self): home_dir = str(Path(self.ctx["directory_path"])) - fname = Path(QFileDialog.getOpenFileName(self, 'Open file', home_dir)[0]) + fname = Path(QFileDialog.getOpenFileName(self, 'Open file', home_dir, filter = "yml(*.yml)")[0]) assert fname.exists() - with open(fname.__str__(), "r") as stream: - try: - exp = yaml.load(stream, Loader=yaml.Loader) - except yaml.YAMLError as exc: - logger.error(f'Error reading yaml file {fname}: {exc}') - return {} + try: + with open(fname.__str__(), "r") as stream: + try: + exp = yaml.load(stream, Loader=yaml.Loader) + except yaml.YAMLError as exc: + logger.error(f'Error reading yaml file {fname}: {exc}') + return {} + except PermissionError: + return create_kit_from_yaml(ctx=self.ctx, exp=exp) + + + def controls_getter(self): + # self.table_widget.webengineview.setHtml("") + try: + self.table_widget.sub_typer.disconnect() + except TypeError: + pass + if self.table_widget.datepicker.start_date.date() > self.table_widget.datepicker.end_date.date(): + print("that is not allowed!") + # self.table_widget.datepicker.start_date.setDate(e_date) + threemonthsago = self.table_widget.datepicker.end_date.date().addDays(-90) + with QSignalBlocker(self.table_widget.datepicker.start_date) as blocker: + self.table_widget.datepicker.start_date.setDate(threemonthsago) + self.controls_getter() + return + self.start_date = self.table_widget.datepicker.start_date.date().toPyDate() + self.end_date = self.table_widget.datepicker.end_date.date().toPyDate() + self.con_type = self.table_widget.control_typer.currentText() + self.mode = self.table_widget.mode_typer.currentText() + self.table_widget.sub_typer.clear() + sub_types = get_control_subtypes(ctx=self.ctx, type=self.con_type, mode=self.mode) + if sub_types != []: + with QSignalBlocker(self.table_widget.sub_typer) as blocker: + self.table_widget.sub_typer.addItems(sub_types) + self.table_widget.sub_typer.setEnabled(True) + self.table_widget.sub_typer.currentTextChanged.connect(self.chart_maker) + else: + + self.table_widget.sub_typer.clear() + self.table_widget.sub_typer.setEnabled(False) + self.chart_maker() + def chart_maker(self): + print(f"Control getter context: \n\tControl type: {self.con_type}\n\tMode: {self.mode}\n\tStart Date: {self.start_date}\n\tEnd Date: {self.end_date}") + if self.table_widget.sub_typer.currentText() == "": + self.subtype = None + else: + self.subtype = self.table_widget.sub_typer.currentText() + print(f"Subtype: {self.subtype}") + controls = get_all_controls_by_type(ctx=self.ctx, con_type=self.con_type, start_date=self.start_date, end_date=self.end_date) + if controls == None: + return + data = [] + for control in controls: + dicts = convert_control_by_mode(ctx=self.ctx, control=control, mode=self.mode) + data.append(dicts) + data = [item for sublist in data for item in sublist] + # print(data) + df = convert_data_list_to_df(ctx=self.ctx, input=data, subtype=self.subtype) + if self.subtype == None: + title = self.mode + else: + title = f"{self.mode} - {self.subtype}" + fig = create_charts(ctx=self.ctx, df=df, ytitle=title) + print(f"Updating figure...") + html = '' + if fig != None: + html += plotly.offline.plot(fig, output_type='div', include_plotlyjs='cdn')#, image = 'png', auto_open=True, image_filename='plot_image') + else: + html += "

No data was retrieved for the given parameters.

" + html += '' + # with open("C:\\Users\\lwark\\Desktop\\test.html", "w") as f: + # f.write(html) + self.table_widget.webengineview.setHtml(html) + self.table_widget.webengineview.update() + print("Figure updated... I hope.") + + + # def datechange(self): - + # s_date = self.table_widget.datepicker.start_date.date() + # e_date = self.table_widget.datepicker.end_date.date() + # if s_date > e_date: + # print("that is not allowed!") + # # self.table_widget.datepicker.start_date.setDate(e_date) + # threemonthsago = e_date.addDays(-90) + # self.table_widget.datepicker.start_date.setDate(threemonthsago) + # self.chart_maker() + + + class AddSubForm(QWidget): def __init__(self, parent): @@ -354,7 +467,7 @@ class AddSubForm(QWidget): # self.tab1.layout.addWidget(self.scroller) # self.tab1.setWidget(self.scroller) # self.tab1.setMinimumHeight(300) - + self.datepicker = ControlsDatePicker() self.webengineview = QWebEngineView() # data = '''Hello World''' # self.webengineview.setHtml(data) @@ -362,7 +475,15 @@ class AddSubForm(QWidget): self.control_typer = QComboBox() con_types = get_all_Control_Types_names(ctx=parent.ctx) self.control_typer.addItems(con_types) + self.mode_typer = QComboBox() + mode_types = get_all_available_modes(ctx=parent.ctx) + self.mode_typer.addItems(mode_types) + self.sub_typer = QComboBox() + self.sub_typer.setEnabled(False) + self.tab2.layout.addWidget(self.datepicker) self.tab2.layout.addWidget(self.control_typer) + self.tab2.layout.addWidget(self.mode_typer) + self.tab2.layout.addWidget(self.sub_typer) self.tab2.layout.addWidget(self.webengineview) self.tab2.setLayout(self.tab2.layout) # Add tabs to widget @@ -372,113 +493,3 @@ class AddSubForm(QWidget): self.tab3.setLayout(self.tab3.layout) self.layout.addWidget(self.tabs) self.setLayout(self.layout) - - - - # @pyqtSlot() - # def on_click(self): - # print("\n") - # for currentQTableWidgetItem in self.tableWidget.selectedItems(): - # print(currentQTableWidgetItem.row(), currentQTableWidgetItem.column(), currentQTableWidgetItem.text()) - - - -# import sys -# from pathlib import Path - -# from textual import events -# from textual.app import App, ComposeResult -# from textual.containers import Container, Vertical -# from textual.reactive import var -# from textual.widgets import DirectoryTree, Footer, Header, Input, Label -# from textual.css.query import NoMatches -# sys.path.append(Path(__file__).absolute().parents[1].__str__()) -# from backend.excel.parser import SheetParser - - -# class FormField(Input): - -# def on_mount(self): -# self.placeholder = "Value not set." - -# def update(self, input:str): -# self.value = input - - -# class DataBrowser(App): -# """ -# File browser input -# """ - -# CSS_PATH = "static/css/data_browser.css" -# BINDINGS = [ -# ("ctrl+f", "toggle_files", "Toggle Files"), -# ("ctrl+q", "quit", "Quit"), -# ] - -# show_tree = var(True) -# context = {} - -# def watch_show_tree(self, show_tree: bool) -> None: -# """Called when show_tree is modified.""" -# self.set_class(show_tree, "-show-tree") - -# def compose(self) -> ComposeResult: -# """Compose our UI.""" -# if 'directory_path' in self.context: -# path = self.context['directory_path'] -# else: -# path = "." -# yield Header() -# yield Container( -# DirectoryTree(path, id="tree-view"), -# Vertical( -# Label("[b]File Name[/b]", classes='box'), FormField(id="file-name", classes='box'), -# # Label("[b]Sample Type[/b]", classes='box'), FormField(id="sample-type", classes='box'), -# id="form-view" -# ) -# ) -# yield Footer() - - -# def on_mount(self, event: events.Mount) -> None: -# self.query_one(DirectoryTree).focus() - - -# def on_directory_tree_file_selected(self, event: DirectoryTree.FileSelected) -> None: -# """Called when the user click a file in the directory tree.""" -# event.stop() -# sample = SheetParser(Path(event.path), **self.context) -# sample_view = self.query_one("#file-name", FormField) -# # sample_type = self.query_one("#sample-type", FormField) -# sample_view.update(event.path) -# # sample_type.update(sample.sub['sample_type']) -# form_view = self.query_one("#form-view", Vertical) -# if sample.sub != None: -# for var in sample.sub.keys(): -# # if var == "sample_type": -# # continue -# try: -# deleter = self.query_one(f"#{var}_label") -# deleter.remove() -# except NoMatches: -# pass -# try: -# deleter = self.query_one(f"#{var}") -# deleter.remove() -# except NoMatches: -# pass -# form_view.mount(Label(var.replace("_", " ").upper(), id=f"{var}_label", classes='box added')) -# form_view.mount(FormField(id=var, classes='box added', value=sample.sub[var])) -# else: -# adds = self.query(".added") -# for add in adds: -# add.remove() - -# def action_toggle_files(self) -> None: -# """Called in response to key binding.""" -# self.show_tree = not self.show_tree - -# if __name__ == "__main__": -# app = DataBrowser() -# app.run() \ No newline at end of file diff --git a/src/submissions/frontend/custom_widgets/__init__.py b/src/submissions/frontend/custom_widgets/__init__.py index a0c228e..7a21032 100644 --- a/src/submissions/frontend/custom_widgets/__init__.py +++ b/src/submissions/frontend/custom_widgets/__init__.py @@ -4,9 +4,9 @@ from PyQt6.QtWidgets import ( QDialogButtonBox, QDateEdit, QTableView, QTextEdit, QSizePolicy, QWidget, QGridLayout, QPushButton, QSpinBox, - QScrollBar, QScrollArea + QScrollBar, QScrollArea, QHBoxLayout ) -from PyQt6.QtCore import Qt, QDate, QAbstractTableModel +from PyQt6.QtCore import Qt, QDate, QAbstractTableModel, QSize from PyQt6.QtGui import QFontMetrics from backend.db import get_all_reagenttype_names, submissions_to_df, lookup_submission_by_id, lookup_all_sample_types, create_kit_from_yaml @@ -35,7 +35,7 @@ class AddReagentQuestion(QDialog): self.buttonBox.rejected.connect(self.reject) self.layout = QVBoxLayout() - message = QLabel(f"Couldn't find reagent type {reagent_type.replace('_', ' ').title()}: {reagent_lot} in the database.\nWould you like to add it?") + message = QLabel(f"Couldn't find reagent type {reagent_type.replace('_', ' ').title().strip('Lot')}: {reagent_lot} in the database.\nWould you like to add it?") self.layout.addWidget(message) self.layout.addWidget(self.buttonBox) self.setLayout(self.layout) @@ -151,6 +151,7 @@ class SubmissionDetails(QDialog): interior.setParent(self) data = lookup_submission_by_id(ctx=ctx, id=id) base_dict = data.to_dict() + del base_dict['id'] base_dict['reagents'] = [item.to_sub_dict() for item in data.reagents] base_dict['samples'] = [item.to_sub_dict() for item in data.samples] template = env.get_template("submission_details.txt") @@ -306,4 +307,26 @@ class ReagentTypeForm(QWidget): grid.addWidget(QLabel("Extension of Life (months):"),0,2) eol = QSpinBox() eol.setMinimum(0) - grid.addWidget(eol, 0,3) \ No newline at end of file + grid.addWidget(eol, 0,3) + + +class ControlsDatePicker(QWidget): + def __init__(self) -> None: + super().__init__() + + self.start_date = QDateEdit(calendarPopup=True) + threemonthsago = QDate.currentDate().addDays(-90) + self.start_date.setDate(threemonthsago) + self.end_date = QDateEdit(calendarPopup=True) + self.end_date.setDate(QDate.currentDate()) + self.layout = QHBoxLayout() + self.layout.addWidget(QLabel("Start Date")) + self.layout.addWidget(self.start_date) + self.layout.addWidget(QLabel("End Date")) + self.layout.addWidget(self.end_date) + + self.setLayout(self.layout) + self.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Fixed) + + def sizeHint(self): + return QSize(80,20) diff --git a/src/submissions/frontend/visualizations/__init__.py b/src/submissions/frontend/visualizations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/submissions/frontend/visualizations/charts.py b/src/submissions/frontend/visualizations/charts.py new file mode 100644 index 0000000..97e8d2c --- /dev/null +++ b/src/submissions/frontend/visualizations/charts.py @@ -0,0 +1,278 @@ +import plotly.express as px +import pandas as pd +from pathlib import Path +from plotly.graph_objects import Figure +import logging +from backend.excel import get_unique_values_in_df_column + +logger = logging.getLogger("controls.tools.vis_functions") + + +def create_charts(ctx:dict, df:pd.DataFrame, ytitle:str|None=None) -> Figure: + """ + Constructs figures based on parsed pandas dataframe. + + Args: + settings (dict): settings passed down from click + df (pd.DataFrame): input dataframe + group_name (str): controltype + + Returns: + Figure: _description_ + """ + from backend.excel import drop_reruns_from_df + genera = [] + if df.empty: + return None + for item in df['genus'].to_list(): + try: + if item[-1] == "*": + genera.append(item[-1]) + else: + genera.append("") + except IndexError: + genera.append("") + df['genus'] = df['genus'].replace({'\*':''}, regex=True) + df['genera'] = genera + df = df.dropna() + df = drop_reruns_from_df(ctx=ctx, df=df) + sorts = ['submitted_date', "target", "genus"] + exclude = ['name', 'genera'] + modes = [item for item in df.columns if item not in sorts and item not in exclude and "_hashes" not in item] + # Set descending for any columns that have "{mode}" in the header. + ascending = [False if item == "target" else True for item in sorts] + df = df.sort_values(by=sorts, ascending=ascending) + fig = construct_chart(ctx=ctx, df=df, modes=modes, ytitle=ytitle) + + return fig + + + +def generic_figure_markers(fig:Figure, modes:list=[], ytitle:str|None=None) -> Figure: + """ + Adds standard layout to figure. + + Args: + fig (Figure): Input figure. + modes (list, optional): List of modes included in figure. Defaults to []. + + Returns: + Figure: Output figure with updated titles, rangeslider, buttons. + """ + if modes != []: + ytitle = modes[0] + # Creating visibles list for each mode. + fig.update_layout( + xaxis_title="Submitted Date (* - Date parsed from fastq file creation date)", + yaxis_title=ytitle, + showlegend=True, + barmode='stack', + updatemenus=[ + dict( + type="buttons", + direction="right", + x=0.7, + y=1.2, + showactive=True, + buttons=make_buttons(modes=modes, fig_len=len(fig.data)), + ) + ] + ) + fig.update_xaxes( + rangeslider_visible=True, + rangeselector=dict( + buttons=list([ + dict(count=1, label="1m", step="month", stepmode="backward"), + dict(count=3, label="3m", step="month", stepmode="backward"), + dict(count=6, label="6m", step="month", stepmode="backward"), + dict(count=1, label="YTD", step="year", stepmode="todate"), + dict(count=1, label="1y", step="year", stepmode="backward"), + dict(step="all") + ]) + ) + ) + logger.debug(f"Returning figure {fig}") + assert type(fig) == Figure + return fig + + +def make_buttons(modes:list, fig_len:int) -> list: + """ + Creates list of buttons with one for each mode to be used in showing/hiding mode traces. + + Args: + modes (list): list of modes used by main parser. + fig_len (int): number of traces in the figure + + Returns: + list: list of buttons. + """ + buttons = [] + if len(modes) > 1: + for ii, mode in enumerate(modes): + # What I need to do is create a list of bools with the same length as the fig.data + mode_vis = [True] * fig_len + # And break it into {len(modes)} chunks + mode_vis = list(divide_chunks(mode_vis, len(modes))) + # Then, for each chunk, if the chunk index isn't equal to the index of the current mode, set to false + for jj, sublist in enumerate(mode_vis): + if jj != ii: + mode_vis[jj] = [not elem for elem in mode_vis[jj]] + # Finally, flatten list. + mode_vis = [item for sublist in mode_vis for item in sublist] + # Now, make button to add to list + buttons.append(dict(label=mode, method="update", args=[ + {"visible": mode_vis}, + {"yaxis.title.text": mode}, + ] + )) + return buttons + +def output_figures(settings:dict, figs:list, group_name:str): + """ + Writes plotly figure to html file. + + Args: + settings (dict): settings passed down from click + fig (Figure): input figure object + group_name (str): controltype + """ + with open(Path(settings['folder']['output']).joinpath(f'{group_name}.html'), "w") as f: + for fig in figs: + try: + f.write(fig.to_html(full_html=False, include_plotlyjs='cdn')) + except AttributeError: + logger.error(f"The following figure was a string: {fig}") + +# Below are the individual construction functions. They must be named "construct_{mode}_chart" and +# take only json_in and mode to hook into the main processor. + +def construct_chart(ctx:dict, df:pd.DataFrame, modes:list, ytitle:str|None=None) -> Figure: + fig = Figure() + + for ii, mode in enumerate(modes): + if "count" in mode: + df[mode] = pd.to_numeric(df[mode],errors='coerce') + color = "genus" + color_discrete_sequence=None + elif 'percent' in mode: + color = "genus" + color_discrete_sequence=None + else: + color = "target" + print(get_unique_values_in_df_column(df, 'target')) + match get_unique_values_in_df_column(df, 'target'): + case ['Target']: + color_discrete_sequence=["blue"] + case ['Off-target']: + color_discrete_sequence=['red'] + case _: + color_discrete_sequence=['blue', 'red'] + bar = px.bar(df, x="submitted_date", + y=mode, + color=color, + title=mode, + barmode='stack', + hover_data=["genus", "name", "target", mode], + text="genera", + color_discrete_sequence=color_discrete_sequence + ) + bar.update_traces(visible = ii == 0) + fig.add_traces(bar.data) + # sys.exit(f"number of traces={len(fig.data)}") + return generic_figure_markers(fig=fig, modes=modes, ytitle=ytitle) + + + +def construct_refseq_chart(settings:dict, df:pd.DataFrame, group_name:str, mode:str) -> Figure: + """ + Constructs intial refseq chart for both contains and matches. + + Args: + settings (dict): settings passed down from click. + df (pd.DataFrame): dataframe containing all sample data for the group. + group_name (str): name of the group being processed. + mode (str): contains or matches, overwritten by hardcoding, so don't think about it too hard. + + Returns: + Figure: initial figure with contains and matches traces. + """ + # This overwrites the mode from the signature, might get confusing. + fig = Figure() + modes = ['contains', 'matches'] + for ii, mode in enumerate(modes): + bar = px.bar(df, x="submitted_date", + y=f"{mode}_ratio", + color="target", + title=f"{group_name}_{mode}", + barmode='stack', + hover_data=["genus", "name", f"{mode}_hashes"], + text="genera" + ) + bar.update_traces(visible = ii == 0) + # Plotly express returns a full figure, so we have to use the data from that figure only. + fig.add_traces(bar.data) + # sys.exit(f"number of traces={len(fig.data)}") + return generic_figure_markers(fig=fig, modes=modes) + + +def construct_kraken_chart(settings:dict, df:pd.DataFrame, group_name:str, mode:str) -> Figure: + """ + Constructs intial refseq chart for each mode in the kraken config settings. + + Args: + settings (dict): settings passed down from click. + df (pd.DataFrame): dataframe containing all sample data for the group. + group_name (str): name of the group being processed. + mode (str): kraken modes retrieved from config file by setup. + + Returns: + Figure: initial figure with traces for modes + """ + df[f'{mode}_count'] = pd.to_numeric(df[f'{mode}_count'],errors='coerce') + # The actual percentage from kraken was off due to exclusion of NaN, recalculating. + df[f'{mode}_percent'] = 100 * df[f'{mode}_count'] / df.groupby('submitted_date')[f'{mode}_count'].transform('sum') + modes = settings['modes'][mode] + # This overwrites the mode from the signature, might get confusing. + fig = Figure() + for ii, entry in enumerate(modes): + bar = px.bar(df, x="submitted_date", + y=entry, + color="genus", + title=f"{group_name}_{entry}", + barmode="stack", + hover_data=["genus", "name", "target"], + text="genera", + ) + bar.update_traces(visible = ii == 0) + fig.add_traces(bar.data) + return generic_figure_markers(fig=fig, modes=modes) + + +def divide_chunks(input_list:list, chunk_count:int): + """ + Divides a list into {chunk_count} equal parts + + Args: + input_list (list): Initials list + chunk_count (int): size of each chunk + + Returns: + tuple: tuple containing sublists. + """ + k, m = divmod(len(input_list), chunk_count) + return (input_list[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(chunk_count)) + +########This must be at bottom of module########### + +function_map = {} +for item in dict(locals().items()): + try: + if dict(locals().items())[item].__module__ == __name__: + try: + function_map[item] = dict(locals().items())[item] + except KeyError: + pass + except AttributeError: + pass +################################################### \ No newline at end of file