diff --git a/CHANGELOG.md b/CHANGELOG.md index d99510f..4e4144c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ ## 202410.03 - Added code for cataloging of PCR controls. +- Data from control charts now exportable. +- Irida parser updated. ## 202410.02 diff --git a/src/submissions/backend/db/models/__init__.py b/src/submissions/backend/db/models/__init__.py index d1a4eaf..cf6114d 100644 --- a/src/submissions/backend/db/models/__init__.py +++ b/src/submissions/backend/db/models/__init__.py @@ -135,7 +135,7 @@ class BaseClass(Base): singles = model.get_default_info('singles') logger.info(f"Querying: {model}, with kwargs: {kwargs}") for k, v in kwargs.items(): - logger.debug(f"Using key: {k} with value: {v}") + logger.info(f"Using key: {k} with value: {v}") try: attr = getattr(model, k) query = query.filter(attr == v) diff --git a/src/submissions/backend/db/models/controls.py b/src/submissions/backend/db/models/controls.py index a484be4..6ff8000 100644 --- a/src/submissions/backend/db/models/controls.py +++ b/src/submissions/backend/db/models/controls.py @@ -12,7 +12,7 @@ import logging, re from operator import itemgetter from . import BaseClass -from tools import setup_lookup, report_result, Result, Report, Settings, get_unique_values_in_df_column +from tools import setup_lookup, report_result, Result, Report, Settings, get_unique_values_in_df_column, super_splitter from datetime import date, datetime, timedelta from typing import List, Literal, Tuple, Generator from dateutil.parser import parse @@ -81,31 +81,33 @@ class ControlType(BaseClass): return [] # NOTE: remove items that don't have relevant data subtypes = [item for item in jsoner[genera] if "_hashes" not in item and "_ratio" not in item] - logger.debug(f"subtypes out: {pformat(subtypes)}") + # logger.debug(f"subtypes out: {pformat(subtypes)}") return subtypes def get_instance_class(self): return Control.find_polymorphic_subclass(polymorphic_identity=self.name) @classmethod - def get_positive_control_types(cls) -> Generator[ControlType, None, None]: + def get_positive_control_types(cls, control_type: str) -> Generator[str, None, None]: """ Gets list of Control types if they have targets Returns: List[ControlType]: Control types that have targets """ - return (item for item in cls.query() if item.targets) + ct = cls.query(name=control_type).targets + return (item for item in ct.keys() if ct[item]) @classmethod - def build_positive_regex(cls) -> Pattern: + def build_positive_regex(cls, control_type:str) -> Pattern: """ Creates a re.Pattern that will look for positive control types Returns: Pattern: Constructed pattern """ - strings = list(set([item.name.split("-")[0] for item in cls.get_positive_control_types()])) + # strings = list(set([item.name.split("-")[0] for item in cls.get_positive_control_types()])) + strings = list(set([super_splitter(item, "-", 0) for item in cls.get_positive_control_types(control_type)])) return re.compile(rf"(^{'|^'.join(strings)})-.*", flags=re.IGNORECASE) @@ -298,7 +300,8 @@ class PCRControl(Control): parent.mode_typer.clear() parent.mode_typer.setEnabled(False) report = Report() - controls = cls.query(sub_type=chart_settings['sub_type'], start_date=chart_settings['start_date'], end_date=chart_settings['end_date']) + controls = cls.query(sub_type=chart_settings['sub_type'], start_date=chart_settings['start_date'], + end_date=chart_settings['end_date']) data = [control.to_sub_dict() for control in controls] df = DataFrame.from_records(data) try: diff --git a/src/submissions/backend/db/models/kits.py b/src/submissions/backend/db/models/kits.py index fc5e539..df79404 100644 --- a/src/submissions/backend/db/models/kits.py +++ b/src/submissions/backend/db/models/kits.py @@ -198,7 +198,7 @@ class KitType(BaseClass): # logger.debug("Get all KitTypeReagentTypeAssociation for SubmissionType") for assoc in assocs: try: - logger.debug(f"Yielding: {assoc.reagent_role.name}, {assoc.uses}") + # logger.debug(f"Yielding: {assoc.reagent_role.name}, {assoc.uses}") yield assoc.reagent_role.name, assoc.uses except TypeError: continue @@ -1156,7 +1156,7 @@ class KitTypeReagentRoleAssociation(BaseClass): base_dict[k] = v return base_dict - def get_all_relevant_reagents(self) -> Generator[Reagent, None, None]: + def get_all_relevant_reagents(self, override:Reagent|None=None) -> Generator[Reagent, None, None]: """ Creates a generator that will resolve in to a list filling the role associated with this object. diff --git a/src/submissions/backend/db/models/organizations.py b/src/submissions/backend/db/models/organizations.py index a784fc7..31c2e97 100644 --- a/src/submissions/backend/db/models/organizations.py +++ b/src/submissions/backend/db/models/organizations.py @@ -102,7 +102,6 @@ class Organization(BaseClass): else: raise Exception(f"Filetype {filepath.suffix} not supported.") data = import_dict['orgs'] - logger.debug(pformat(import_dict)) for org in data: organ = Organization.query(name=org['name']) if organ is None: diff --git a/src/submissions/backend/db/models/submissions.py b/src/submissions/backend/db/models/submissions.py index e3cf596..6bcfc5b 100644 --- a/src/submissions/backend/db/models/submissions.py +++ b/src/submissions/backend/db/models/submissions.py @@ -235,7 +235,7 @@ class BasicSubmission(BaseClass): Returns: SubmissionType: SubmissionType with name equal to this polymorphic identity """ - logger.debug(f"Running search for {sub_type}") + # logger.debug(f"Running search for {sub_type}") if isinstance(sub_type, dict): try: sub_type = sub_type['value'] @@ -521,7 +521,7 @@ class BasicSubmission(BaseClass): Returns: pd.DataFrame: Pandas Dataframe of all relevant submissions """ - logger.debug(f"Querying Type: {submission_type}") + # logger.debug(f"Querying Type: {submission_type}") # logger.debug(f"Using limit: {limit}") # NOTE: use lookup function to create list of dicts subs = [item.to_dict() for item in @@ -827,7 +827,7 @@ class BasicSubmission(BaseClass): return input_dict @classmethod - def custom_validation(cls, pyd: "PydSubmission") -> dict: + def custom_validation(cls, pyd: "PydSubmission") -> "PydSubmission": """ Performs any final custom parsing of the excel file. @@ -1412,14 +1412,16 @@ class BacterialCulture(BasicSubmission): dict: Updated dictionary. """ from . import ControlType + # logger.debug(f"\n\nHello from BacterialCulture custom_validation") pyd = super().custom_validation(pyd) # NOTE: build regex for all control types that have targets - regex = ControlType.build_positive_regex() + regex = ControlType.build_positive_regex(control_type="Irida Control") + logger.debug(regex) # NOTE: search samples for match for sample in pyd.samples: matched = regex.match(sample.submitter_id) if bool(matched): - # logger.debug(f"Control match found: {sample['submitter_id']}") + # logger.debug(f"Control match found: {sample.submitter_id}") new_lot = matched.group() try: pos_control_reg = \ @@ -1429,6 +1431,7 @@ class BacterialCulture(BasicSubmission): return pyd pos_control_reg.lot = new_lot pos_control_reg.missing = False + # logger.debug(f"Got positive control: {pos_control_reg}") return pyd @classmethod @@ -1785,6 +1788,7 @@ class WastewaterArtic(BasicSubmission): ii in range(source_plates_section['start_row'], source_plates_section['end_row'] + 1)] for datum in data: + logger.debug(f"Datum: {datum}") if datum['plate'] in ["None", None, ""]: continue else: @@ -1869,7 +1873,13 @@ class WastewaterArtic(BasicSubmission): dict: Updated sample dictionary """ input_dict = super().parse_samples(input_dict) + logger.debug(f"WWA input dict: {pformat(input_dict)}") input_dict['sample_type'] = "Wastewater Sample" + # NOTE: Stop gap solution because WW is sloppy with their naming schemes + try: + input_dict['source_plate'] = input_dict['source_plate'].replace("WW20", "WW-20") + except KeyError: + pass # NOTE: Because generate_sample_object needs the submitter_id and the artic has the "({origin well})" # at the end, this has to be done here. No moving to sqlalchemy object :( input_dict['submitter_id'] = re.sub(r"\s\(.+\)\s?$", "", str(input_dict['submitter_id'])).strip() diff --git a/src/submissions/backend/excel/parser.py b/src/submissions/backend/excel/parser.py index 3ae8362..3c95448 100644 --- a/src/submissions/backend/excel/parser.py +++ b/src/submissions/backend/excel/parser.py @@ -260,7 +260,7 @@ class ReagentParser(object): extraction_kit (str): Extraction kit used. sub_object (BasicSubmission | None, optional): Submission object holding methods. Defaults to None. """ - logger.debug("\n\nHello from ReagentParser!\n\n") + logger.info("\n\nHello from ReagentParser!\n\n") if isinstance(submission_type, str): submission_type = SubmissionType.query(name=submission_type) self.submission_type_obj = submission_type @@ -303,11 +303,11 @@ class ReagentParser(object): for sheet in self.xl.sheetnames: ws = self.xl[sheet] relevant = {k.strip(): v for k, v in self.map.items() if sheet in self.map[k]['sheet']} - logger.debug(f"relevant map for {sheet}: {pformat(relevant)}") + # logger.debug(f"relevant map for {sheet}: {pformat(relevant)}") if relevant == {}: continue for item in relevant: - logger.debug(f"Attempting to scrape: {item}") + # logger.debug(f"Attempting to scrape: {item}") try: reagent = relevant[item] name = ws.cell(row=reagent['name']['row'], column=reagent['name']['column']).value @@ -325,10 +325,10 @@ class ReagentParser(object): missing = False else: missing = True - logger.debug(f"Got lot for {item}-{name}: {lot} as {type(lot)}") + # logger.debug(f"Got lot for {item}-{name}: {lot} as {type(lot)}") lot = str(lot) - logger.debug( - f"Going into pydantic: name: {name}, lot: {lot}, expiry: {expiry}, type: {item.strip()}, comment: {comment}") + # logger.debug( + # f"Going into pydantic: name: {name}, lot: {lot}, expiry: {expiry}, type: {item.strip()}, comment: {comment}") try: check = name.lower() != "not applicable" except AttributeError: @@ -353,12 +353,12 @@ class SampleParser(object): sample_map (dict | None, optional): Locations in database where samples are found. Defaults to None. sub_object (BasicSubmission | None, optional): Submission object holding methods. Defaults to None. """ - logger.debug("\n\nHello from SampleParser!\n\n") + logger.info("\n\nHello from SampleParser!\n\n") self.samples = [] self.xl = xl if isinstance(submission_type, str): submission_type = SubmissionType.query(name=submission_type) - logger.debug(f"Sample parser is using submission type: {submission_type}") + # logger.debug(f"Sample parser is using submission type: {submission_type}") self.submission_type = submission_type.name self.submission_type_obj = submission_type if sub_object is None: @@ -456,40 +456,49 @@ class SampleParser(object): List[dict]: Reconciled samples """ if not self.plate_map_samples or not self.lookup_samples: - logger.error(f"No separate samples, returning") - self.samples = self.lookup_samples or self.plate_map_samples - return - merge_on_id = self.sample_info_map['lookup_table']['merge_on_id'] - plate_map_samples = sorted(copy(self.plate_map_samples), key=lambda d: d['id']) - lookup_samples = sorted(copy(self.lookup_samples), key=lambda d: d[merge_on_id]) - for ii, psample in enumerate(plate_map_samples): - # NOTE: See if we can do this the easy way and just use the same list index. - try: - check = psample['id'] == lookup_samples[ii][merge_on_id] - except (KeyError, IndexError): - check = False - if check: - # logger.debug(f"Direct match found for {psample['id']}") - new = lookup_samples[ii] | psample - lookup_samples[ii] = {} - else: - logger.warning(f"Match for {psample['id']} not direct, running search.") - for jj, lsample in enumerate(lookup_samples): - try: - check = lsample[merge_on_id] == psample['id'] - except KeyError: - check = False - if check: - new = lsample | psample - lookup_samples[jj] = {} - break - else: - new = psample - if not check_key_or_attr(key='submitter_id', interest=new, check_none=True): - new['submitter_id'] = psample['id'] - new = self.sub_object.parse_samples(new) - del new['id'] - yield new + logger.warning(f"No separate samples") + samples = self.lookup_samples or self.plate_map_samples + for new in samples: + if not check_key_or_attr(key='submitter_id', interest=new, check_none=True): + new['submitter_id'] = new['id'] + new = self.sub_object.parse_samples(new) + try: + del new['id'] + except KeyError: + pass + yield new + else: + merge_on_id = self.sample_info_map['lookup_table']['merge_on_id'] + plate_map_samples = sorted(copy(self.plate_map_samples), key=lambda d: d['id']) + lookup_samples = sorted(copy(self.lookup_samples), key=lambda d: d[merge_on_id]) + for ii, psample in enumerate(plate_map_samples): + # NOTE: See if we can do this the easy way and just use the same list index. + try: + check = psample['id'] == lookup_samples[ii][merge_on_id] + except (KeyError, IndexError): + check = False + if check: + # logger.debug(f"Direct match found for {psample['id']}") + new = lookup_samples[ii] | psample + lookup_samples[ii] = {} + else: + logger.warning(f"Match for {psample['id']} not direct, running search.") + for jj, lsample in enumerate(lookup_samples): + try: + check = lsample[merge_on_id] == psample['id'] + except KeyError: + check = False + if check: + new = lsample | psample + lookup_samples[jj] = {} + break + else: + new = psample + if not check_key_or_attr(key='submitter_id', interest=new, check_none=True): + new['submitter_id'] = psample['id'] + new = self.sub_object.parse_samples(new) + del new['id'] + yield new class EquipmentParser(object): diff --git a/src/submissions/backend/validators/__init__.py b/src/submissions/backend/validators/__init__.py index ec67eac..58f853d 100644 --- a/src/submissions/backend/validators/__init__.py +++ b/src/submissions/backend/validators/__init__.py @@ -49,12 +49,13 @@ class RSLNamer(object): str: parsed submission type """ def st_from_path(filename:Path) -> str: - logger.debug(f"Using path method for {filename}.") + logger.info(f"Using path method for {filename}.") if filename.exists(): wb = load_workbook(filename) try: # NOTE: Gets first category in the metadata. - submission_type = next(item.strip().title() for item in wb.properties.category.split(";")) + categories = wb.properties.category.split(";") + submission_type = next(item.strip().title() for item in categories) except (StopIteration, AttributeError): sts = {item.name: item.get_template_file_sheets() for item in SubmissionType.query()} try: @@ -67,12 +68,12 @@ class RSLNamer(object): return submission_type def st_from_str(filename:str) -> str: regex = BasicSubmission.construct_regex() - logger.debug(f"Using string method for {filename}.") - logger.debug(f"Using regex: {regex}") + logger.info(f"Using string method for {filename}.") + # logger.debug(f"Using regex: {regex}") m = regex.search(filename) try: submission_type = m.lastgroup - logger.debug(f"Got submission type: {submission_type}") + # logger.debug(f"Got submission type: {submission_type}") except AttributeError as e: submission_type = None logger.critical(f"No submission type found or submission type found!: {e}") @@ -118,7 +119,7 @@ class RSLNamer(object): regex = re.compile(rf'{regex}', re.IGNORECASE | re.VERBOSE) except re.error as e: regex = BasicSubmission.construct_regex() - logger.debug(f"Using regex: {regex}") + logger.info(f"Using regex: {regex}") match filename: case Path(): m = regex.search(filename.stem) diff --git a/src/submissions/backend/validators/pydant.py b/src/submissions/backend/validators/pydant.py index 98946cc..6831ffd 100644 --- a/src/submissions/backend/validators/pydant.py +++ b/src/submissions/backend/validators/pydant.py @@ -57,14 +57,14 @@ class PydReagent(BaseModel): @classmethod def rescue_lot_string(cls, value): if value is not None: - return convert_nans_to_nones(str(value)) + return convert_nans_to_nones(str(value).strip()) return value @field_validator("lot") @classmethod def enforce_lot_string(cls, value): if value is not None: - return value.upper() + return value.upper().strip() return value @field_validator("expiry", mode="before") @@ -97,9 +97,9 @@ class PydReagent(BaseModel): @classmethod def enforce_name(cls, value, values): if value is not None: - return convert_nans_to_nones(str(value)) + return convert_nans_to_nones(str(value).strip()) else: - return values.data['role'] + return values.data['role'].strip() def improved_dict(self) -> dict: """ @@ -210,6 +210,18 @@ class PydSample(BaseModel, extra='allow'): def int_to_str(cls, value): return str(value) + @field_validator("submitter_id") + @classmethod + def strip_sub_id(cls, value): + match value: + case dict(): + value['value'] = value['value'].strip().upper() + case str(): + value = value.strip().upper() + case _: + pass + return value + def improved_dict(self) -> dict: """ Constructs a dictionary consisting of model.fields and model.extras @@ -439,6 +451,7 @@ class PydSubmission(BaseModel, extra='allow'): if value['value'] in [None, "None"]: return dict(value=uuid.uuid4().hex.upper(), missing=True) else: + value['value'] = value['value'].strip() return value @field_validator("submitted_date", mode="before") @@ -523,6 +536,7 @@ class PydSubmission(BaseModel, extra='allow'): # logger.debug(f"RSL-plate initial value: {value['value']} and other values: {values.data}") sub_type = values.data['submission_type']['value'] if check_not_nan(value['value']): + value['value'] = value['value'].strip() return value else: # logger.debug("Constructing plate sub_type.") @@ -808,6 +822,7 @@ class PydSubmission(BaseModel, extra='allow'): for sample in self.samples: sample, associations, _ = sample.toSQL(submission=instance) # logger.debug(f"Sample SQL object to be added to submission: {sample.__dict__}") + logger.debug(associations) for assoc in associations: if assoc is not None and assoc not in instance.submission_sample_associations: instance.submission_sample_associations.append(assoc) diff --git a/src/submissions/frontend/visualizations/__init__.py b/src/submissions/frontend/visualizations/__init__.py index 56d54df..3c247fa 100644 --- a/src/submissions/frontend/visualizations/__init__.py +++ b/src/submissions/frontend/visualizations/__init__.py @@ -10,9 +10,12 @@ from frontend.widgets.functions import select_save_file class CustomFigure(Figure): + df = None + def __init__(self, df: pd.DataFrame, modes: list, ytitle: str | None = None, parent: QWidget | None = None, months: int = 6): super().__init__() + self.df = df def save_figure(self, group_name: str = "plotly_output", parent: QWidget | None = None): """ @@ -28,6 +31,11 @@ class CustomFigure(Figure): output = select_save_file(obj=parent, default_name=group_name, extension="png") self.write_image(output.absolute().__str__(), engine="kaleido") + def save_data(self, group_name: str = "plotly_export", parent:QWidget|None=None): + output = select_save_file(obj=parent, default_name=group_name, extension="xlsx") + self.df.to_excel(output.absolute().__str__(), engine="openpyxl", index=False) + + def to_html(self) -> str: """ Creates final html code from plotly diff --git a/src/submissions/frontend/visualizations/pcr_charts.py b/src/submissions/frontend/visualizations/pcr_charts.py index 77bc024..06a119b 100644 --- a/src/submissions/frontend/visualizations/pcr_charts.py +++ b/src/submissions/frontend/visualizations/pcr_charts.py @@ -21,6 +21,7 @@ class PCRFigure(CustomFigure): def __init__(self, df: pd.DataFrame, modes: list, ytitle: str | None = None, parent: QWidget | None = None, months: int = 6): super().__init__(df=df, modes=modes) + logger.debug(f"DF: {self.df}") self.construct_chart(df=df) # self.generic_figure_markers(modes=modes, ytitle=ytitle, months=months) diff --git a/src/submissions/frontend/widgets/controls_chart.py b/src/submissions/frontend/widgets/controls_chart.py index a39f15f..271057a 100644 --- a/src/submissions/frontend/widgets/controls_chart.py +++ b/src/submissions/frontend/widgets/controls_chart.py @@ -17,7 +17,7 @@ from PyQt6.QtCore import QDate, QSize import logging from pandas import DataFrame from tools import Report, Result, get_unique_values_in_df_column, Settings, report_result -from frontend.visualizations import IridaFigure, PCRFigure +from frontend.visualizations import IridaFigure, PCRFigure, CustomFigure from .misc import StartEndDatePicker logger = logging.getLogger(f"submissions.{__name__}") @@ -54,11 +54,13 @@ class ControlsViewer(QWidget): self.layout.addWidget(self.datepicker, 0, 0, 1, 2) self.save_button = QPushButton("Save Chart", parent=self) self.layout.addWidget(self.save_button, 0, 2, 1, 1) - self.layout.addWidget(self.control_sub_typer, 1, 0, 1, 3) - self.layout.addWidget(self.mode_typer, 2, 0, 1, 3) - self.layout.addWidget(self.mode_sub_typer, 3, 0, 1, 3) + self.export_button = QPushButton("Save Data", parent=self) + self.layout.addWidget(self.export_button, 0, 3, 1, 1) + self.layout.addWidget(self.control_sub_typer, 1, 0, 1, 4) + self.layout.addWidget(self.mode_typer, 2, 0, 1, 4) + self.layout.addWidget(self.mode_sub_typer, 3, 0, 1, 4) self.archetype.get_instance_class().make_parent_buttons(parent=self) - self.layout.addWidget(self.webengineview, self.layout.rowCount(), 0, 1, 3) + self.layout.addWidget(self.webengineview, self.layout.rowCount(), 0, 1, 4) self.setLayout(self.layout) self.controls_getter_function() self.control_sub_typer.currentIndexChanged.connect(self.controls_getter_function) @@ -66,11 +68,15 @@ class ControlsViewer(QWidget): self.datepicker.start_date.dateChanged.connect(self.controls_getter_function) self.datepicker.end_date.dateChanged.connect(self.controls_getter_function) self.save_button.pressed.connect(self.save_chart_function) + self.export_button.pressed.connect(self.save_data_function) def save_chart_function(self): self.fig.save_figure(parent=self) + def save_data_function(self): + self.fig.save_data(parent=self) + # def controls_getter(self): # """ # Lookup controls from database and send to chartmaker @@ -152,10 +158,9 @@ class ControlsViewer(QWidget): mode=self.mode, sub_mode=self.mode_sub_type, parent=self, months=months) _, self.fig = self.archetype.get_instance_class().make_chart(chart_settings=chart_settings, parent=self, ctx=self.app.ctx) - # if isinstance(self.fig, IridaFigure): - # self.save_button.setEnabled(True) + if issubclass(self.fig.__class__, CustomFigure): + self.save_button.setEnabled(True) # logger.debug(f"Updating figure...") - # self.fig = fig # NOTE: construct html for webview html = self.fig.to_html() # logger.debug(f"The length of html code is: {len(html)}") @@ -164,164 +169,164 @@ class ControlsViewer(QWidget): # logger.debug("Figure updated... I hope.") return report - def convert_data_list_to_df(self, input_df: list[dict]) -> DataFrame: - """ - Convert list of control records to dataframe - - Args: - ctx (dict): settings passed from gui - input_df (list[dict]): list of dictionaries containing records - mode_sub_type (str | None, optional): sub_type of submission type. Defaults to None. - - Returns: - DataFrame: dataframe of controls - """ - - df = DataFrame.from_records(input_df) - safe = ['name', 'submitted_date', 'genus', 'target'] - for column in df.columns: - if column not in safe: - if self.mode_sub_type is not None and column != self.mode_sub_type: - continue - else: - safe.append(column) - if "percent" in column: - # count_col = [item for item in df.columns if "count" in item][0] - try: - count_col = next(item for item in df.columns if "count" in item) - except StopIteration: - continue - # NOTE: The actual percentage from kraken was off due to exclusion of NaN, recalculating. - df[column] = 100 * df[count_col] / df.groupby('name')[count_col].transform('sum') - df = df[[c for c in df.columns if c in safe]] - # NOTE: move date of sample submitted on same date as previous ahead one. - df = self.displace_date(df=df) - # NOTE: ad hoc method to make data labels more accurate. - df = self.df_column_renamer(df=df) - return df - - def df_column_renamer(self, df: DataFrame) -> DataFrame: - """ - Ad hoc function I created to clarify some fields - - Args: - df (DataFrame): input dataframe - - Returns: - DataFrame: dataframe with 'clarified' column names - """ - df = df[df.columns.drop(list(df.filter(regex='_hashes')))] - return df.rename(columns={ - "contains_ratio": "contains_shared_hashes_ratio", - "matches_ratio": "matches_shared_hashes_ratio", - "kraken_count": "kraken2_read_count_(top_50)", - "kraken_percent": "kraken2_read_percent_(top_50)" - }) - - def displace_date(self, df: DataFrame) -> DataFrame: - """ - This function serves to split samples that were submitted on the same date by incrementing dates. - It will shift the date forward by one day if it is the same day as an existing date in a list. - - Args: - df (DataFrame): input dataframe composed of control records - - Returns: - DataFrame: output dataframe with dates incremented. - """ - # logger.debug(f"Unique items: {df['name'].unique()}") - # NOTE: get submitted dates for each control - dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in - sorted(df['name'].unique())] - previous_dates = set() - # for _, item in enumerate(dict_list): - for item in dict_list: - df, previous_dates = self.check_date(df=df, item=item, previous_dates=previous_dates) - return df - - def check_date(self, df: DataFrame, item: dict, previous_dates: set) -> Tuple[DataFrame, list]: - """ - Checks if an items date is already present in df and adjusts df accordingly - - Args: - df (DataFrame): input dataframe - item (dict): control for checking - previous_dates (list): list of dates found in previous controls - - Returns: - Tuple[DataFrame, list]: Output dataframe and appended list of previous dates - """ - try: - check = item['date'] in previous_dates - except IndexError: - check = False - previous_dates.add(item['date']) - if check: - # logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}") - # NOTE: get df locations where name == item name - mask = df['name'] == item['name'] - # NOTE: increment date in dataframe - df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1)) - item['date'] += timedelta(days=1) - passed = False - else: - passed = True - # logger.debug(f"\n\tCurrent date: {item['date']}\n\tPrevious dates:{previous_dates}") - # logger.debug(f"DF: {type(df)}, previous_dates: {type(previous_dates)}") - # NOTE: if run didn't lead to changed date, return values - if passed: - # logger.debug(f"Date check passed, returning.") - return df, previous_dates - # NOTE: if date was changed, rerun with new date - else: - logger.warning(f"Date check failed, running recursion") - df, previous_dates = self.check_date(df, item, previous_dates) - return df, previous_dates - - def prep_df(self, ctx: Settings, df: DataFrame) -> Tuple[DataFrame, list]: - """ - Constructs figures based on parsed pandas dataframe. - - Args: - ctx (Settings): settings passed down from gui - df (pd.DataFrame): input dataframe - ytitle (str | None, optional): title for the y-axis. Defaults to None. - - Returns: - Figure: Plotly figure - """ - # NOTE: converts starred genera to normal and splits off list of starred - if df.empty: - return None - df['genus'] = df['genus'].replace({'\*': ''}, regex=True).replace({"NaN": "Unknown"}) - df['genera'] = [item[-1] if item and item[-1] == "*" else "" for item in df['genus'].to_list()] - # NOTE: remove original runs, using reruns if applicable - df = self.drop_reruns_from_df(ctx=ctx, df=df) - # NOTE: sort by and exclude from - sorts = ['submitted_date', "target", "genus"] - exclude = ['name', 'genera'] - modes = [item for item in df.columns if item not in sorts and item not in exclude] - # NOTE: Set descending for any columns that have "{mode}" in the header. - ascending = [False if item == "target" else True for item in sorts] - df = df.sort_values(by=sorts, ascending=ascending) - # logger.debug(df[df.isna().any(axis=1)]) - # NOTE: actual chart construction is done by - return df, modes - - def drop_reruns_from_df(self, ctx: Settings, df: DataFrame) -> DataFrame: - """ - Removes semi-duplicates from dataframe after finding sequencing repeats. - - Args: - settings (dict): settings passed from gui - df (DataFrame): initial dataframe - - Returns: - DataFrame: dataframe with originals removed in favour of repeats. - """ - if 'rerun_regex' in ctx: - sample_names = get_unique_values_in_df_column(df, column_name="name") - rerun_regex = re.compile(fr"{ctx.rerun_regex}") - exclude = [re.sub(rerun_regex, "", sample) for sample in sample_names if rerun_regex.search(sample)] - df = df[df.name not in exclude] - return df + # def convert_data_list_to_df(self, input_df: list[dict]) -> DataFrame: + # """ + # Convert list of control records to dataframe + # + # Args: + # ctx (dict): settings passed from gui + # input_df (list[dict]): list of dictionaries containing records + # mode_sub_type (str | None, optional): sub_type of submission type. Defaults to None. + # + # Returns: + # DataFrame: dataframe of controls + # """ + # + # df = DataFrame.from_records(input_df) + # safe = ['name', 'submitted_date', 'genus', 'target'] + # for column in df.columns: + # if column not in safe: + # if self.mode_sub_type is not None and column != self.mode_sub_type: + # continue + # else: + # safe.append(column) + # if "percent" in column: + # # count_col = [item for item in df.columns if "count" in item][0] + # try: + # count_col = next(item for item in df.columns if "count" in item) + # except StopIteration: + # continue + # # NOTE: The actual percentage from kraken was off due to exclusion of NaN, recalculating. + # df[column] = 100 * df[count_col] / df.groupby('name')[count_col].transform('sum') + # df = df[[c for c in df.columns if c in safe]] + # # NOTE: move date of sample submitted on same date as previous ahead one. + # df = self.displace_date(df=df) + # # NOTE: ad hoc method to make data labels more accurate. + # df = self.df_column_renamer(df=df) + # return df + # + # def df_column_renamer(self, df: DataFrame) -> DataFrame: + # """ + # Ad hoc function I created to clarify some fields + # + # Args: + # df (DataFrame): input dataframe + # + # Returns: + # DataFrame: dataframe with 'clarified' column names + # """ + # df = df[df.columns.drop(list(df.filter(regex='_hashes')))] + # return df.rename(columns={ + # "contains_ratio": "contains_shared_hashes_ratio", + # "matches_ratio": "matches_shared_hashes_ratio", + # "kraken_count": "kraken2_read_count_(top_50)", + # "kraken_percent": "kraken2_read_percent_(top_50)" + # }) + # + # def displace_date(self, df: DataFrame) -> DataFrame: + # """ + # This function serves to split samples that were submitted on the same date by incrementing dates. + # It will shift the date forward by one day if it is the same day as an existing date in a list. + # + # Args: + # df (DataFrame): input dataframe composed of control records + # + # Returns: + # DataFrame: output dataframe with dates incremented. + # """ + # # logger.debug(f"Unique items: {df['name'].unique()}") + # # NOTE: get submitted dates for each control + # dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in + # sorted(df['name'].unique())] + # previous_dates = set() + # # for _, item in enumerate(dict_list): + # for item in dict_list: + # df, previous_dates = self.check_date(df=df, item=item, previous_dates=previous_dates) + # return df + # + # def check_date(self, df: DataFrame, item: dict, previous_dates: set) -> Tuple[DataFrame, list]: + # """ + # Checks if an items date is already present in df and adjusts df accordingly + # + # Args: + # df (DataFrame): input dataframe + # item (dict): control for checking + # previous_dates (list): list of dates found in previous controls + # + # Returns: + # Tuple[DataFrame, list]: Output dataframe and appended list of previous dates + # """ + # try: + # check = item['date'] in previous_dates + # except IndexError: + # check = False + # previous_dates.add(item['date']) + # if check: + # # logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}") + # # NOTE: get df locations where name == item name + # mask = df['name'] == item['name'] + # # NOTE: increment date in dataframe + # df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1)) + # item['date'] += timedelta(days=1) + # passed = False + # else: + # passed = True + # # logger.debug(f"\n\tCurrent date: {item['date']}\n\tPrevious dates:{previous_dates}") + # # logger.debug(f"DF: {type(df)}, previous_dates: {type(previous_dates)}") + # # NOTE: if run didn't lead to changed date, return values + # if passed: + # # logger.debug(f"Date check passed, returning.") + # return df, previous_dates + # # NOTE: if date was changed, rerun with new date + # else: + # logger.warning(f"Date check failed, running recursion") + # df, previous_dates = self.check_date(df, item, previous_dates) + # return df, previous_dates + # + # def prep_df(self, ctx: Settings, df: DataFrame) -> Tuple[DataFrame, list]: + # """ + # Constructs figures based on parsed pandas dataframe. + # + # Args: + # ctx (Settings): settings passed down from gui + # df (pd.DataFrame): input dataframe + # ytitle (str | None, optional): title for the y-axis. Defaults to None. + # + # Returns: + # Figure: Plotly figure + # """ + # # NOTE: converts starred genera to normal and splits off list of starred + # if df.empty: + # return None + # df['genus'] = df['genus'].replace({'\*': ''}, regex=True).replace({"NaN": "Unknown"}) + # df['genera'] = [item[-1] if item and item[-1] == "*" else "" for item in df['genus'].to_list()] + # # NOTE: remove original runs, using reruns if applicable + # df = self.drop_reruns_from_df(ctx=ctx, df=df) + # # NOTE: sort by and exclude from + # sorts = ['submitted_date', "target", "genus"] + # exclude = ['name', 'genera'] + # modes = [item for item in df.columns if item not in sorts and item not in exclude] + # # NOTE: Set descending for any columns that have "{mode}" in the header. + # ascending = [False if item == "target" else True for item in sorts] + # df = df.sort_values(by=sorts, ascending=ascending) + # # logger.debug(df[df.isna().any(axis=1)]) + # # NOTE: actual chart construction is done by + # return df, modes + # + # def drop_reruns_from_df(self, ctx: Settings, df: DataFrame) -> DataFrame: + # """ + # Removes semi-duplicates from dataframe after finding sequencing repeats. + # + # Args: + # settings (dict): settings passed from gui + # df (DataFrame): initial dataframe + # + # Returns: + # DataFrame: dataframe with originals removed in favour of repeats. + # """ + # if 'rerun_regex' in ctx: + # sample_names = get_unique_values_in_df_column(df, column_name="name") + # rerun_regex = re.compile(fr"{ctx.rerun_regex}") + # exclude = [re.sub(rerun_regex, "", sample) for sample in sample_names if rerun_regex.search(sample)] + # df = df[df.name not in exclude] + # return df diff --git a/src/submissions/frontend/widgets/submission_details.py b/src/submissions/frontend/widgets/submission_details.py index 903f89f..bccd39a 100644 --- a/src/submissions/frontend/widgets/submission_details.py +++ b/src/submissions/frontend/widgets/submission_details.py @@ -76,11 +76,11 @@ class SubmissionDetails(QDialog): if "Submission" in title: self.btn.setEnabled(True) self.export_plate = title.split(" ")[-1] - logger.debug(f"Updating export plate to: {self.export_plate}") + # logger.debug(f"Updating export plate to: {self.export_plate}") else: self.btn.setEnabled(False) if title == self.webview.history().items()[0].title(): - logger.debug("Disabling back button") + # logger.debug("Disabling back button") self.back.setEnabled(False) else: self.back.setEnabled(True) @@ -93,7 +93,7 @@ class SubmissionDetails(QDialog): Args: sample (str): Submitter Id of the sample. """ - logger.debug(f"Details: {sample}") + # logger.debug(f"Details: {sample}") if isinstance(sample, str): sample = BasicSample.query(submitter_id=sample) base_dict = sample.to_sub_dict(full_data=True) diff --git a/src/submissions/frontend/widgets/submission_widget.py b/src/submissions/frontend/widgets/submission_widget.py index 93e5d41..4be9c65 100644 --- a/src/submissions/frontend/widgets/submission_widget.py +++ b/src/submissions/frontend/widgets/submission_widget.py @@ -731,10 +731,8 @@ class SubmissionFormWidget(QWidget): self.setEditable(True) looked_up_rt = KitTypeReagentRoleAssociation.query(reagent_role=reagent.role, kit_type=extraction_kit) - # relevant_reagents = [str(item.lot) for item in - # self.relevant_reagents(assoc=looked_up_rt)] relevant_reagents = [str(item.lot) for item in looked_up_rt.get_all_relevant_reagents()] - # logger.debug(f"Relevant reagents for {reagent.lot}: {relevant_reagents}") + logger.debug(f"Relevant reagents for {reagent}: {relevant_reagents}") # NOTE: if reagent in sheet is not found insert it into the front of relevant reagents so it shows if str(reagent.lot) not in relevant_reagents: if check_not_nan(reagent.lot): @@ -749,12 +747,13 @@ class SubmissionFormWidget(QWidget): if isinstance(looked_up_reg, list): looked_up_reg = None # logger.debug(f"Because there was no reagent listed for {reagent.lot}, we will insert the last lot used: {looked_up_reg}") - if looked_up_reg is not None: + if looked_up_reg: try: relevant_reagents.remove(str(looked_up_reg.lot)) - relevant_reagents.insert(0, str(looked_up_reg.lot)) + except ValueError as e: logger.error(f"Error reordering relevant reagents: {e}") + relevant_reagents.insert(0, str(looked_up_reg.lot)) else: if len(relevant_reagents) > 1: # logger.debug(f"Found {reagent.lot} in relevant reagents: {relevant_reagents}. Moving to front of list.") @@ -765,26 +764,26 @@ class SubmissionFormWidget(QWidget): else: # logger.debug(f"Found {reagent.lot} in relevant reagents: {relevant_reagents}. But no need to move due to short list.") pass - # logger.debug(f"New relevant reagents: {relevant_reagents}") + logger.debug(f"New relevant reagents: {relevant_reagents}") self.setObjectName(f"lot_{reagent.role}") self.addItems(relevant_reagents) self.setToolTip(f"Enter lot number for the reagent used for {reagent.role}") # self.setStyleSheet(main_form_style) - def relevant_reagents(self, assoc: KitTypeReagentRoleAssociation): - # logger.debug(f"Attempting lookup of reagents by type: {reagent.type}") - lookup = Reagent.query(reagent_role=assoc.reagent_role) - try: - regex = assoc.uses['exclude_regex'] - except KeyError: - regex = "^$" - relevant_reagents = [item for item in lookup if - not check_regex_match(pattern=regex, check=str(item.lot))] - for rel_reagent in relevant_reagents: - # # NOTE: extract strings from any sets. - # if isinstance(rel_reagent, set): - # for thing in rel_reagent: - # yield thing - # elif isinstance(rel_reagent, str): - # yield rel_reagent - yield rel_reagent + # def relevant_reagents(self, assoc: KitTypeReagentRoleAssociation): + # # logger.debug(f"Attempting lookup of reagents by type: {reagent.type}") + # lookup = Reagent.query(reagent_role=assoc.reagent_role) + # try: + # regex = assoc.uses['exclude_regex'] + # except KeyError: + # regex = "^$" + # relevant_reagents = [item for item in lookup if + # not check_regex_match(pattern=regex, check=str(item.lot))] + # for rel_reagent in relevant_reagents: + # # # NOTE: extract strings from any sets. + # # if isinstance(rel_reagent, set): + # # for thing in rel_reagent: + # # yield thing + # # elif isinstance(rel_reagent, str): + # # yield rel_reagent + # yield rel_reagent diff --git a/src/submissions/tools/__init__.py b/src/submissions/tools/__init__.py index d13a0f6..79e35e1 100644 --- a/src/submissions/tools/__init__.py +++ b/src/submissions/tools/__init__.py @@ -884,6 +884,13 @@ def yaml_regex_creator(loader, node): return f"(?P<{name}>RSL(?:-|_)?{abbr}(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)?\d?([^_0123456789\sA-QS-Z]|$)?R?\d?)?)" +def super_splitter(input:str, ioi:str, idx:int) -> str: + try: + return input.split(ioi)[idx] + except IndexError: + return input + + ctx = get_config(None)