From 6f134d387022ab30d82d3432e658b49b4a447748 Mon Sep 17 00:00:00 2001 From: lwark Date: Fri, 25 Jul 2025 08:05:18 -0500 Subject: [PATCH] Moments before disaster. --- src/submissions/backend/db/models/kits.py | 2 +- .../backend/excel/parsers/__init__.py | 103 +++++++++++------- .../excel/parsers/clientsubmission_parser.py | 23 +++- .../results_parsers/pcr_results_parser.py | 2 +- src/submissions/backend/managers/__init__.py | 6 +- .../backend/managers/clientsubmissions.py | 13 ++- .../backend/validators/__init__.py | 2 +- src/submissions/backend/validators/pydant.py | 16 +++ .../frontend/widgets/submission_widget.py | 2 +- 9 files changed, 112 insertions(+), 57 deletions(-) diff --git a/src/submissions/backend/db/models/kits.py b/src/submissions/backend/db/models/kits.py index db3cf6b..5a0a23f 100644 --- a/src/submissions/backend/db/models/kits.py +++ b/src/submissions/backend/db/models/kits.py @@ -1558,7 +1558,7 @@ class Procedure(BaseClass): def to_pydantic(self, **kwargs): from backend.validators.pydant import PydResults, PydReagent output = super().to_pydantic() - print(f"Pydantic output: \n\n{pformat(output.__dict__)}\n\n") + logger.debug(f"Pydantic output: \n\n{pformat(output.__dict__)}\n\n") try: output.kittype = dict(value=output.kittype['name'], missing=False) except KeyError: diff --git a/src/submissions/backend/excel/parsers/__init__.py b/src/submissions/backend/excel/parsers/__init__.py index 463020b..b42dc13 100644 --- a/src/submissions/backend/excel/parsers/__init__.py +++ b/src/submissions/backend/excel/parsers/__init__.py @@ -7,14 +7,16 @@ from pathlib import Path from typing import Generator, Tuple, TYPE_CHECKING from openpyxl.reader.excel import load_workbook +from openpyxl.worksheet.worksheet import Worksheet from pandas import DataFrame from backend.validators import pydant + if TYPE_CHECKING: from backend.db.models import ProcedureType - logger = logging.getLogger(f"submissions.{__name__}") + class DefaultParser(object): def __repr__(self): @@ -32,8 +34,8 @@ class DefaultParser(object): instance.filepath = filepath return instance - - def __init__(self, filepath: Path | str, proceduretype: ProcedureType|None=None, range_dict: dict | None = None, *args, **kwargs): + def __init__(self, filepath: Path | str, proceduretype: ProcedureType | None = None, range_dict: dict | None = None, + *args, **kwargs): """ Args: @@ -43,23 +45,30 @@ class DefaultParser(object): *args (): **kwargs (): """ + + logger.debug(f"\n\nHello from {self.__class__.__name__}\n\n") self.proceduretype = proceduretype try: - self._pyd_object = getattr(pydant, f"Pyd{self.__class__.__name__.replace('Parser', '').replace('Info', '')}") + self._pyd_object = getattr(pydant, + f"Pyd{self.__class__.__name__.replace('Parser', '').replace('Info', '')}") except AttributeError as e: - logger.error(f"Couldn't get pyd object: Pyd{self.__class__.__name__.replace('Parser', '').replace('Info', '')}") + logger.error( + f"Couldn't get pyd object: Pyd{self.__class__.__name__.replace('Parser', '').replace('Info', '')}, using {self.__class__.pyd_name}") self._pyd_object = getattr(pydant, self.__class__.pyd_name) self.workbook = load_workbook(self.filepath, data_only=True) if not range_dict: self.range_dict = self.__class__.default_range_dict else: self.range_dict = range_dict + logger.debug(f"Default parser range dict: {self.range_dict}") for item in self.range_dict: item['worksheet'] = self.workbook[item['sheet']] def to_pydantic(self): - data = {key: value for key, value in self.parsed_info} + # data = {key: value['value'] for key, value in self.parsed_info.items()} + data = self.parsed_info data['filepath'] = self.filepath + return self._pyd_object(**data) @classmethod @@ -69,48 +78,61 @@ class DefaultParser(object): proceduretype = ProcedureType.query(name=proceduretype) return proceduretype + @classmethod + def delineate_end_row(cls, worksheet: Worksheet, start_row: int = 1): + for iii, row in enumerate(worksheet.iter_rows(min_row=start_row), start=1): + if all([item.value is None for item in row]): + return iii + class DefaultKEYVALUEParser(DefaultParser): + # default_range_dict = [dict( + # start_row=2, + # end_row=18, + # key_column=1, + # value_column=2, + # sheet="Sample List" + # )] - default_range_dict = [dict( - start_row=2, - end_row=18, - key_column=1, - value_column=2, - sheet="Sample List" - )] - - @property - def parsed_info(self) -> Generator[Tuple, None, None]: - for item in self.range_dict: - rows = range(item['start_row'], item['end_row'] + 1) - for row in rows: - key = item['worksheet'].cell(row, item['key_column']).value - if key: - # Note: Remove anything in brackets. - key = re.sub(r"\(.*\)", "", key) - key = key.lower().replace(":", "").strip().replace(" ", "_") - value = item['worksheet'].cell(row, item['value_column']).value - missing = False if value else True - location_map = dict(row=row, key_column=item['key_column'], value_column=item['value_column'], sheet=item['sheet']) - value = dict(value=value, location=location_map, missing=missing) - logger.debug(f"Yieldings {value} for {key}") - yield key, value - - -class DefaultTABLEParser(DefaultParser): - - default_range_dict = [dict( - header_row=20, - sheet="Sample List" - )] + # default_range_dict = [dict(sheet="Sample List", start_row=2)] @property def parsed_info(self): for item in self.range_dict: - list_worksheet = self.workbook[item['sheet']] + item['end_row'] = self.delineate_end_row(item['worksheet'], start_row=item['start_row']) + rows = range(item['start_row'], item['end_row']) + # item['start_row'] = item['end_row'] + # del item['end_row'] + for row in rows: + key = item['worksheet'].cell(row, 1).value + if key: + # Note: Remove anything in brackets. + key = re.sub(r"\(.*\)", "", key) + key = key.lower().replace(":", "").strip().replace(" ", "_") + value = item['worksheet'].cell(row, 2).value + missing = False if value else True + location_map = dict(row=row, key_column=1, value_column=2, + sheet=item['sheet']) + value = dict(value=value, location=location_map, missing=missing) + logger.debug(f"Yielding {value} for {key}") + yield key, value + + + +class DefaultTABLEParser(DefaultParser): + default_range_dict = [dict( + header_row=18, + sheet="Sample List" + )] + + @property + def parsed_info(self) -> Generator[dict, None, None]: + for item in self.range_dict: + # list_worksheet = self.workbook[item['sheet']] + list_worksheet = item['worksheet'] if "end_row" in item.keys(): - list_df = DataFrame([item for item in list_worksheet.values][item['header_row'] - 1:item['end_row']-1]) + list_df = DataFrame( + [item for item in list_worksheet.values][item['header_row'] - 1:item['end_row'] - 1]) else: list_df = DataFrame([item for item in list_worksheet.values][item['header_row'] - 1:]) list_df.columns = list_df.iloc[0] @@ -129,5 +151,6 @@ class DefaultTABLEParser(DefaultParser): def to_pydantic(self, **kwargs): return [self._pyd_object(**output) for output in self.parsed_info] + from .clientsubmission_parser import ClientSubmissionSampleParser, ClientSubmissionInfoParser from backend.excel.parsers.results_parsers.pcr_results_parser import PCRInfoParser, PCRSampleParser diff --git a/src/submissions/backend/excel/parsers/clientsubmission_parser.py b/src/submissions/backend/excel/parsers/clientsubmission_parser.py index daf72a6..8a9e2ad 100644 --- a/src/submissions/backend/excel/parsers/clientsubmission_parser.py +++ b/src/submissions/backend/excel/parsers/clientsubmission_parser.py @@ -50,7 +50,7 @@ class SubmissionTyperMixin(object): def get_subtype_from_preparse(cls, filepath: Path): from backend.db.models import SubmissionType parser = ClientSubmissionInfoParser(filepath) - sub_type = next((value for k, value in parser.parsed_info if k == "submissiontype"), None) + sub_type = next((value for k, value in parser.parsed_info.items() if k == "submissiontype"), None) sub_type = SubmissionType.query(name=sub_type) if isinstance(sub_type, list): sub_type = None @@ -91,9 +91,9 @@ class ClientSubmissionInfoParser(DefaultKEYVALUEParser, SubmissionTyperMixin): self.submissiontype = self.retrieve_submissiontype(filepath=filepath) else: self.submissiontype = submissiontype - if "range_dict" not in kwargs: - kwargs['range_dict'] = self.submissiontype.info_map - super().__init__(filepath=filepath, **kwargs) + # if "range_dict" not in kwargs: + # kwargs['range_dict'] = self.submissiontype.info_map + super().__init__(filepath=filepath, range_dict=[dict(sheet="Client Info")], **kwargs) allowed_procedure_types = [item.name for item in self.submissiontype.proceduretype] for name in allowed_procedure_types: if name in self.workbook.sheetnames: @@ -108,6 +108,18 @@ class ClientSubmissionInfoParser(DefaultKEYVALUEParser, SubmissionTyperMixin): self.manager = manager(proceduretype=name) pass + @property + def parsed_info(self): + output = {k:v for k, v in super().parsed_info} + try: + output['clientlab'] = output['client_lab'] + except KeyError: + pass + logger.debug(f"Data: {output}") + output['submissiontype'] = self.submissiontype.name + return output + + class ClientSubmissionSampleParser(DefaultTABLEParser, SubmissionTyperMixin): """ @@ -135,7 +147,7 @@ class ClientSubmissionSampleParser(DefaultTABLEParser, SubmissionTyperMixin): def parsed_info(self) -> Generator[dict, None, None]: output = super().parsed_info for ii, sample in enumerate(output): - # logger.debug(f"Parsed info sample: {sample}") + logger.debug(f"Parsed info sample: {sample}") if isinstance(sample["row"], str) and sample["row"].lower() in ascii_lowercase[0:8]: try: sample["row"] = row_keys[sample["row"]] @@ -145,4 +157,5 @@ class ClientSubmissionSampleParser(DefaultTABLEParser, SubmissionTyperMixin): yield sample def to_pydantic(self): + logger.debug(f"Attempting to pydantify: {self._pyd_object}") return [self._pyd_object(**sample) for sample in self.parsed_info if sample['sample_id']] diff --git a/src/submissions/backend/excel/parsers/results_parsers/pcr_results_parser.py b/src/submissions/backend/excel/parsers/results_parsers/pcr_results_parser.py index 3b4b02e..8a59e2a 100644 --- a/src/submissions/backend/excel/parsers/results_parsers/pcr_results_parser.py +++ b/src/submissions/backend/excel/parsers/results_parsers/pcr_results_parser.py @@ -29,7 +29,7 @@ class PCRInfoParser(DefaultKEYVALUEParser): def to_pydantic(self): # from backend.db.models import Procedure - data = dict(results={key: value for key, value in self.parsed_info}, filepath=self.filepath, + data = dict(results={k:v for k, v in self.parsed_info}, filepath=self.filepath, result_type="PCR") return self._pyd_object(**data, parent=self.procedure) diff --git a/src/submissions/backend/managers/__init__.py b/src/submissions/backend/managers/__init__.py index 28c9ba5..4b30370 100644 --- a/src/submissions/backend/managers/__init__.py +++ b/src/submissions/backend/managers/__init__.py @@ -19,10 +19,10 @@ class DefaultManager(object): match input_object: case str(): self.input_object = Path(input_object) - self.pyd = self.parse() + self.pyd = self.to_pydantic() case Path(): self.input_object = input_object - self.pyd = self.parse() + self.pyd = self.to_pydantic() case x if issubclass(input_object.__class__, PydBaseClass): # logger.debug("Subclass of PydBaseClass") self.pyd = input_object @@ -31,7 +31,7 @@ class DefaultManager(object): self.pyd = input_object.to_pydantic() case _: self.input_object = select_open_file(file_extension="xlsx", obj=get_application_from_parent(parent)) - self.pyd = self.parse() + self.pyd = self.to_pydantic() # logger.debug(f"FName after correction: {input_object}") diff --git a/src/submissions/backend/managers/clientsubmissions.py b/src/submissions/backend/managers/clientsubmissions.py index fecd81d..ecaa508 100644 --- a/src/submissions/backend/managers/clientsubmissions.py +++ b/src/submissions/backend/managers/clientsubmissions.py @@ -39,16 +39,19 @@ class DefaultClientSubmissionManager(DefaultManager): self.submissiontype = submissiontype super().__init__(parent=parent, input_object=input_object) - def parse(self): + def to_pydantic(self): self.info_parser = ClientSubmissionInfoParser(filepath=self.input_object, submissiontype=self.submissiontype) self.sample_parser = ClientSubmissionSampleParser(filepath=self.input_object, submissiontype=self.submissiontype) - self.to_pydantic() + logger.debug(f"Info Parser range dict: {self.info_parser.range_dict}") + self.clientsubmission = self.info_parser.to_pydantic() + + self.clientsubmission.sample = self.sample_parser.to_pydantic() return self.clientsubmission - def to_pydantic(self): - self.clientsubmission = self.info_parser.to_pydantic() - self.clientsubmission.sample = self.sample_parser.to_pydantic() + # def to_pydantic(self): + # self.clientsubmission = self.info_parser.to_pydantic() + # self.clientsubmission.sample = self.sample_parser.to_pydantic() def write(self): workbook: Workbook = load_workbook(BytesIO(self.submissiontype.template_file)) diff --git a/src/submissions/backend/validators/__init__.py b/src/submissions/backend/validators/__init__.py index 4a470d5..5c82a5f 100644 --- a/src/submissions/backend/validators/__init__.py +++ b/src/submissions/backend/validators/__init__.py @@ -61,7 +61,7 @@ class ClientSubmissionNamer(DefaultNamer): def get_subtype_from_preparse(self): from backend.excel.parsers.clientsubmission_parser import ClientSubmissionInfoParser parser = ClientSubmissionInfoParser(self.filepath) - sub_type = next((value for k, value in parser.parsed_info if k == "submissiontype"), None) + sub_type = next((value for k, value in parser.parsed_info.items() if k == "submissiontype"), None) sub_type = SubmissionType.query(name=sub_type) if isinstance(sub_type, list): sub_type = None diff --git a/src/submissions/backend/validators/pydant.py b/src/submissions/backend/validators/pydant.py index a2d333c..713a693 100644 --- a/src/submissions/backend/validators/pydant.py +++ b/src/submissions/backend/validators/pydant.py @@ -1604,6 +1604,20 @@ class PydClientSubmission(PydBaseClass): submitter_plate_id: dict | None = Field(default=dict(value=None, missing=True), validate_default=True) sample: List[PydSample] | None = Field(default=[]) + # @field_validator("submissiontype", mode="before") + # @classmethod + # def enforce_submissiontype(cls, value): + # if isinstance(value, str): + # value = dict(value=value, missing=False) + # return value + + @field_validator("submissiontype", "clientlab", "contact", mode="before") + @classmethod + def enforce_value(cls, value): + if isinstance(value, str): + value = dict(value=value, missing=False) + return value + @field_validator("submitted_date", mode="before") @classmethod def enforce_submitted_date(cls, value): @@ -1659,6 +1673,8 @@ class PydClientSubmission(PydBaseClass): @field_validator("submitted_date") @classmethod def rescue_date(cls, value): + if not value: + value = dict(value=None) try: check = value['value'] is None except TypeError: diff --git a/src/submissions/frontend/widgets/submission_widget.py b/src/submissions/frontend/widgets/submission_widget.py index 9a81f5e..0821193 100644 --- a/src/submissions/frontend/widgets/submission_widget.py +++ b/src/submissions/frontend/widgets/submission_widget.py @@ -145,7 +145,7 @@ class SubmissionFormContainer(QWidget): # self.pydsamples = self.sampleparser.to_pydantic() # logger.debug(f"Samples: {pformat(self.pydclientsubmission.sample)}") self.clientsubmission_manager = DefaultClientSubmissionManager(parent=self, input_object=fname) - self.pydclientsubmission = self.clientsubmission_manager.parse() + self.pydclientsubmission = self.clientsubmission_manager.to_pydantic() checker = SampleChecker(self, "Sample Checker", self.pydclientsubmission.sample) if checker.exec(): # logger.debug(pformat(self.pydclientsubmission.sample))