Debugged upgrades.

2024-05-13 07:44:06 -05:00
parent f30f6403d6
commit 84fac23890
15 changed files with 447 additions and 487 deletions
--- a/src/submissions/backend/excel/parser.py
+++ b/src/submissions/backend/excel/parser.py
@@ -45,7 +45,7 @@ class SheetParser(object):
                raise ValueError("No filepath given.")
        try:
            # self.xl = pd.ExcelFile(filepath)
-            self.xl = load_workbook(filepath, read_only=True, data_only=True)
+            self.xl = load_workbook(filepath, data_only=True)
        except ValueError as e:
            logger.error(f"Incorrect value: {e}")
            raise FileNotFoundError(f"Couldn't parse file {self.filepath}")
@@ -53,6 +53,8 @@ class SheetParser(object):
        # make decision about type of sample we have
        self.sub['submission_type'] = dict(value=RSLNamer.retrieve_submission_type(filename=self.filepath),
                                           missing=True)
+        self.submission_type = SubmissionType.query(name=self.sub['submission_type'])
+        self.sub_object = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type)
        # grab the info map from the submission type in database
        self.parse_info()
        self.import_kit_validation_check()
@@ -67,7 +69,7 @@ class SheetParser(object):
        """
        Pulls basic information from the excel sheet
        """
-        parser = InfoParser(xl=self.xl, submission_type=self.sub['submission_type']['value'])
+        parser = InfoParser(xl=self.xl, submission_type=self.submission_type, sub_object=self.sub_object)
        info = parser.parse_info()
        self.info_map = parser.map
        # exclude_from_info = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.sub['submission_type']).exclude_from_info_parser()
@@ -87,21 +89,21 @@ class SheetParser(object):
            extraction_kit (str | None, optional): Relevant extraction kit for reagent map. Defaults to None.
        """
        if extraction_kit == None:
-            extraction_kit = extraction_kit = self.sub['extraction_kit']
+            extraction_kit = self.sub['extraction_kit']
        # logger.debug(f"Parsing reagents for {extraction_kit}")
-        self.sub['reagents'] = ReagentParser(xl=self.xl, submission_type=self.sub['submission_type'],
+        self.sub['reagents'] = ReagentParser(xl=self.xl, submission_type=self.submission_type,
                                             extraction_kit=extraction_kit).parse_reagents()

    def parse_samples(self):
        """
        Pulls sample info from the excel sheet
        """
-        parser = SampleParser(xl=self.xl, submission_type=self.sub['submission_type']['value'])
+        parser = SampleParser(xl=self.xl, submission_type=self.submission_type)
        self.sub['samples'] = parser.reconcile_samples()
        # self.plate_map = parser.plate_map

    def parse_equipment(self):
-        parser = EquipmentParser(xl=self.xl, submission_type=self.sub['submission_type']['value'])
+        parser = EquipmentParser(xl=self.xl, submission_type=self.submission_type)
        self.sub['equipment'] = parser.parse_equipment()

    def import_kit_validation_check(self):
@@ -120,22 +122,13 @@ class SheetParser(object):
            if isinstance(self.sub['extraction_kit'], str):
                self.sub['extraction_kit'] = dict(value=self.sub['extraction_kit'], missing=True)

-    def import_reagent_validation_check(self):
-        """
-        Enforce that only allowed reagents get into the Pydantic Model
-        """
-        kit = KitType.query(name=self.sub['extraction_kit']['value'])
-        allowed_reagents = [item.name for item in kit.get_reagents()]
-        # logger.debug(f"List of reagents for comparison with allowed_reagents: {pformat(self.sub['reagents'])}")
-        self.sub['reagents'] = [reagent for reagent in self.sub['reagents'] if reagent.type in allowed_reagents]
-
    def finalize_parse(self):
        """
        Run custom final validations of data for submission subclasses.
        """
-        finisher = BasicSubmission.find_polymorphic_subclass(
-            polymorphic_identity=self.sub['submission_type']).finalize_parse
-        self.sub = finisher(input_dict=self.sub, xl=self.xl, info_map=self.info_map)
+        # finisher = BasicSubmission.find_polymorphic_subclass(
+        #     polymorphic_identity=self.sub['submission_type']).finalize_parse
+        self.sub = self.sub_object.finalize_parse(input_dict=self.sub, xl=self.xl, info_map=self.info_map)

    def to_pydantic(self) -> PydSubmission:
        """
@@ -163,9 +156,14 @@ class SheetParser(object):

 class InfoParser(object):

-    def __init__(self, xl: Workbook, submission_type: str):
+    def __init__(self, xl: Workbook, submission_type: str|SubmissionType, sub_object: BasicSubmission|None=None):
        logger.info(f"\n\Hello from InfoParser!\n\n")
-        self.submission_type = submission_type
+        if isinstance(submission_type, str):
+            submission_type = SubmissionType.query(name=submission_type)
+        if sub_object is None:
+            sub_object = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=submission_type.name)
+        self.submission_type_obj = submission_type
+        self.sub_object = sub_object
        self.map = self.fetch_submission_info_map()
        self.xl = xl
        logger.debug(f"Info map for InfoParser: {pformat(self.map)}")
@@ -180,16 +178,14 @@ class InfoParser(object):
        Returns:
            dict: Location map of all info for this submission type
        """
-        if isinstance(self.submission_type, str):
-            self.submission_type = dict(value=self.submission_type, missing=True)
+        self.submission_type = dict(value=self.submission_type_obj.name, missing=True)
        logger.debug(f"Looking up submission type: {self.submission_type['value']}")
        # submission_type = SubmissionType.query(name=self.submission_type['value'])
        # info_map = submission_type.info_map
-        self.sub_object: BasicSubmission = \
-            BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type['value'])
+        # self.sub_object: BasicSubmission = \
+        #     BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type['value'])
        info_map = self.sub_object.construct_info_map("read")
        # Get the parse_info method from the submission type specified
-
        return info_map

    def parse_info(self) -> dict:
@@ -199,8 +195,8 @@ class InfoParser(object):
        Returns:
            dict: key:value of basic info
        """
-        if isinstance(self.submission_type, str):
-            self.submission_type = dict(value=self.submission_type, missing=True)
+        # if isinstance(self.submission_type, str):
+        #     self.submission_type = dict(value=self.submission_type, missing=True)
        dicto = {}
        # exclude_from_generic = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type['value']).get_default_info("parser_ignore")
        # This loop parses generic info
@@ -224,7 +220,13 @@ class InfoParser(object):
                # if check:
                #     relevant[k] = v
                for location in v:
-                    if location['sheet'] == sheet:
+                    try:
+                        check = location['sheet'] == sheet
+                    except TypeError:
+                        logger.warning(f"Location is likely a string, skipping")
+                        dicto[k] = dict(value=location, missing=False)
+                        check = False
+                    if check:
                        new = location
                        new['name'] = k
                        relevant.append(new)
@@ -257,13 +259,18 @@ class InfoParser(object):
                        dicto[item['name']] = dict(value=value, missing=missing)
                    except (KeyError, IndexError):
                        continue
-        return self.sub_object.parse_info(input_dict=dicto, xl=self.xl)
+        return self.sub_object.custom_info_parser(input_dict=dicto, xl=self.xl)


 class ReagentParser(object):

-    def __init__(self, xl: Workbook, submission_type: str, extraction_kit: str):
+    def __init__(self, xl: Workbook, submission_type: str, extraction_kit: str, sub_object:BasicSubmission|None=None):
        logger.debug("\n\nHello from ReagentParser!\n\n")
+        self.submission_type_obj = submission_type
+        self.sub_object = sub_object
+        if isinstance(extraction_kit, dict):
+            extraction_kit = extraction_kit['value']
+        self.kit_object = KitType.query(name=extraction_kit)
        self.map = self.fetch_kit_info_map(extraction_kit=extraction_kit, submission_type=submission_type)
        logger.debug(f"Reagent Parser map: {self.map}")
        self.xl = xl
@@ -279,13 +286,14 @@ class ReagentParser(object):
        Returns:
            dict: locations of reagent info for the kit.
        """
-        if isinstance(extraction_kit, dict):
-            extraction_kit = extraction_kit['value']
-        kit = KitType.query(name=extraction_kit)
+
        if isinstance(submission_type, dict):
            submission_type = submission_type['value']
-        reagent_map = kit.construct_xl_map_for_use(submission_type.title())
-        del reagent_map['info']
+        reagent_map = self.kit_object.construct_xl_map_for_use(submission_type)
+        try:
+            del reagent_map['info']
+        except KeyError:
+            pass
        return reagent_map

    def parse_reagents(self) -> List[PydReagent]:
@@ -348,7 +356,7 @@ class SampleParser(object):
    object to pull data for samples in excel sheet and construct individual sample objects
    """

-    def __init__(self, xl: Workbook, submission_type: str, sample_map: dict | None = None) -> None:
+    def __init__(self, xl: Workbook, submission_type: SubmissionType, sample_map: dict | None = None, sub_object:BasicSubmission|None=None) -> None:
        """
        convert sample sub-dataframe to dictionary of records

@@ -359,7 +367,11 @@ class SampleParser(object):
        logger.debug("\n\nHello from SampleParser!\n\n")
        self.samples = []
        self.xl = xl
-        self.submission_type = submission_type
+        self.submission_type = submission_type.name
+        self.submission_type_obj = submission_type
+        if sub_object is None:
+            sub_object = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type_obj.name)
+        self.sub_object = sub_object
        self.sample_info_map = self.fetch_sample_info_map(submission_type=submission_type, sample_map=sample_map)
        logger.debug(f"sample_info_map: {self.sample_info_map}")
        # self.plate_map = self.construct_plate_map(plate_map_location=sample_info_map['plate_map'])
@@ -385,9 +397,10 @@ class SampleParser(object):
        """
        logger.debug(f"Looking up submission type: {submission_type}")
        # submission_type = SubmissionType.query(name=submission_type)
-        self.sub_object = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=submission_type)
+        # self.sub_object = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=submission_type)
        # self.custom_sub_parser = .parse_samples
-        self.samp_object = BasicSample.find_polymorphic_subclass(polymorphic_identity=f"{submission_type} Sample")
+        self.sample_type = self.sub_object.get_default_info("sample_type")
+        self.samp_object = BasicSample.find_polymorphic_subclass(polymorphic_identity=self.sample_type)
        logger.debug(f"Got sample class: {self.samp_object.__name__}")
        # self.custom_sample_parser = .parse_sample
        # logger.debug(f"info_map: {pformat(se)}")
@@ -398,46 +411,46 @@ class SampleParser(object):
            sample_info_map = sample_map
        return sample_info_map

-    def construct_plate_map(self, plate_map_location: dict) -> pd.DataFrame:
-        """
-        Gets location of samples from plate map grid in excel sheet.
-
-        Args:
-            plate_map_location (dict): sheet name, start/end row/column
-
-        Returns:
-            pd.DataFrame: Plate map grid
-        """
-        logger.debug(f"Plate map location: {plate_map_location}")
-        df = self.xl.parse(plate_map_location['sheet'], header=None, dtype=object)
-        df = df.iloc[plate_map_location['start_row'] - 1:plate_map_location['end_row'],
-             plate_map_location['start_column'] - 1:plate_map_location['end_column']]
-        df = pd.DataFrame(df.values[1:], columns=df.iloc[0])
-        df = df.set_index(df.columns[0])
-        logger.debug(f"Vanilla platemap: {df}")
-        # custom_mapper = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type)
-        df = self.sub_object.custom_platemap(self.xl, df)
-        # logger.debug(f"Custom platemap:\n{df}")
-        return df
-
-    def construct_lookup_table(self, lookup_table_location: dict) -> pd.DataFrame:
-        """
-        Gets table of misc information from excel book
-
-        Args:
-            lookup_table_location (dict): sheet name, start/end row
-
-        Returns:
-            pd.DataFrame: _description_
-        """
-        try:
-            df = self.xl.parse(lookup_table_location['sheet'], header=None, dtype=object)
-        except KeyError:
-            return None
-        df = df.iloc[lookup_table_location['start_row'] - 1:lookup_table_location['end_row']]
-        df = pd.DataFrame(df.values[1:], columns=df.iloc[0])
-        df = df.reset_index(drop=True)
-        return df
+    # def construct_plate_map(self, plate_map_location: dict) -> pd.DataFrame:
+    #     """
+    #     Gets location of samples from plate map grid in excel sheet.
+    #
+    #     Args:
+    #         plate_map_location (dict): sheet name, start/end row/column
+    #
+    #     Returns:
+    #         pd.DataFrame: Plate map grid
+    #     """
+    #     logger.debug(f"Plate map location: {plate_map_location}")
+    #     df = self.xl.parse(plate_map_location['sheet'], header=None, dtype=object)
+    #     df = df.iloc[plate_map_location['start_row'] - 1:plate_map_location['end_row'],
+    #          plate_map_location['start_column'] - 1:plate_map_location['end_column']]
+    #     df = pd.DataFrame(df.values[1:], columns=df.iloc[0])
+    #     df = df.set_index(df.columns[0])
+    #     logger.debug(f"Vanilla platemap: {df}")
+    #     # custom_mapper = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type)
+    #     df = self.sub_object.custom_platemap(self.xl, df)
+    #     # logger.debug(f"Custom platemap:\n{df}")
+    #     return df
+    #
+    # def construct_lookup_table(self, lookup_table_location: dict) -> pd.DataFrame:
+    #     """
+    #     Gets table of misc information from excel book
+    #
+    #     Args:
+    #         lookup_table_location (dict): sheet name, start/end row
+    #
+    #     Returns:
+    #         pd.DataFrame: _description_
+    #     """
+    #     try:
+    #         df = self.xl.parse(lookup_table_location['sheet'], header=None, dtype=object)
+    #     except KeyError:
+    #         return None
+    #     df = df.iloc[lookup_table_location['start_row'] - 1:lookup_table_location['end_row']]
+    #     df = pd.DataFrame(df.values[1:], columns=df.iloc[0])
+    #     df = df.reset_index(drop=True)
+    #     return df

    def parse_plate_map(self):
        """
@@ -471,7 +484,7 @@ class SampleParser(object):
                if check_not_nan(id):
                    if id not in invalids:
                        sample_dict = dict(id=id, row=ii, column=jj)
-                        sample_dict['sample_type'] = f"{self.submission_type} Sample"
+                        sample_dict['sample_type'] = self.sample_type
                        plate_map_samples.append(sample_dict)
                    else:
                        # logger.error(f"Sample cell ({row}, {column}) has invalid value: {id}.")
@@ -524,7 +537,7 @@ class SampleParser(object):
                row_dict[lmap['merge_on_id']] = str(row_dict[lmap['merge_on_id']])
            except KeyError:
                pass
-            row_dict['sample_type'] = f"{self.submission_type} Sample"
+            row_dict['sample_type'] = self.sample_type
            row_dict['submission_rank'] = ii
            try:
                check = check_not_nan(row_dict[lmap['merge_on_id']])
@@ -567,22 +580,22 @@ class SampleParser(object):
            new_samples.append(PydSample(**translated_dict))
        return result, new_samples

-    def grab_plates(self) -> List[str]:
-        """
-        Parse plate names from 
-
-        Returns:
-            List[str]: list of plate names.
-        """
-        plates = []
-        for plate in self.plates:
-            df = self.xl.parse(plate['sheet'], header=None)
-            if isinstance(df.iat[plate['row'] - 1, plate['column'] - 1], str):
-                output = RSLNamer.retrieve_rsl_number(filename=df.iat[plate['row'] - 1, plate['column'] - 1])
-            else:
-                continue
-            plates.append(output)
-        return plates
+    # def grab_plates(self) -> List[str]:
+    #     """
+    #     Parse plate names from
+    #
+    #     Returns:
+    #         List[str]: list of plate names.
+    #     """
+    #     plates = []
+    #     for plate in self.plates:
+    #         df = self.xl.parse(plate['sheet'], header=None)
+    #         if isinstance(df.iat[plate['row'] - 1, plate['column'] - 1], str):
+    #             output = RSLNamer.retrieve_rsl_number(filename=df.iat[plate['row'] - 1, plate['column'] - 1])
+    #         else:
+    #             continue
+    #         plates.append(output)
+    #     return plates

    def reconcile_samples(self):
        # TODO: Move to pydantic validator?
@@ -630,20 +643,24 @@ class SampleParser(object):
                    else:
                        new = psample
                        # samples.append(psample)
-            new['sample_type'] = f"{self.submission_type} Sample"
+            # new['sample_type'] = f"{self.submission_type} Sample"
            try:
                check = new['submitter_id'] is None
            except KeyError:
                check = True
            if check:
                new['submitter_id'] = psample['id']
+            new = self.sub_object.parse_samples(new)
            samples.append(new)
        samples = remove_key_from_list_of_dicts(samples, "id")
        return sorted(samples, key=lambda k: (k['row'], k['column']))

 class EquipmentParser(object):

-    def __init__(self, xl: Workbook, submission_type: str) -> None:
+    def __init__(self, xl: Workbook, submission_type: str|SubmissionType) -> None:
+        if isinstance(submission_type, str):
+            submission_type = SubmissionType.query(name=submission_type)
+
        self.submission_type = submission_type
        self.xl = xl
        self.map = self.fetch_equipment_map()
@@ -655,8 +672,8 @@ class EquipmentParser(object):
        Returns:
            List[dict]: List of locations
        """
-        submission_type = SubmissionType.query(name=self.submission_type)
-        return submission_type.construct_equipment_map()
+        # submission_type = SubmissionType.query(name=self.submission_type)
+        return self.submission_type.construct_equipment_map()

    def get_asset_number(self, input: str) -> str:
        """