Post RSLNamer move, pre-restore

2023-10-16 14:24:33 -05:00
parent 80d77117e1
commit 0a90542e8e
11 changed files with 575 additions and 286 deletions
--- a/src/submissions/backend/excel/parser.py
+++ b/src/submissions/backend/excel/parser.py
@@ -6,20 +6,78 @@ import pprint
 from typing import List
 import pandas as pd
 from pathlib import Path
-from backend.db import models, lookup_kit_types, lookup_submission_type, lookup_samples, get_polymorphic_subclass
+from backend.db import models, lookup_kit_types, lookup_submission_type, lookup_samples
 from backend.pydant import PydSubmission, PydReagent
 import logging
 from collections import OrderedDict
 import re
 from datetime import date
 from dateutil.parser import parse, ParserError
-from tools import check_not_nan, RSLNamer, convert_nans_to_nones, Settings
+from tools import check_not_nan, convert_nans_to_nones, Settings
+# from backend.namer import RSLNamer
 from frontend.custom_widgets.pop_ups import SubmissionTypeSelector, KitSelector

 logger = logging.getLogger(f"submissions.{__name__}")

 row_keys = dict(A=1, B=2, C=3, D=4, E=5, F=6, G=7, H=8)

+class RSLNamer(object):
+    """
+    Object that will enforce proper formatting on RSL plate names.
+    NOTE: Depreciated in favour of object based methods in 'submissions.py'
+    """
+    def __init__(self, ctx, instr:str, sub_type:str|None=None):
+        self.ctx = ctx
+        self.submission_type = sub_type
+        self.retrieve_rsl_number(in_str=instr)
+        if self.submission_type != None:
+            # custom_enforcer = get_polymorphic_subclass(BasicSubmission, self.submission_type).enforce_naming_schema
+            parser = getattr(self, f"enforce_{self.submission_type.replace(' ', '_').lower()}")
+            parser()
+            self.parsed_name = self.parsed_name.replace("_", "-")
+        
+    def retrieve_rsl_number(self, in_str:str|Path):
+        """
+        Uses regex to retrieve the plate number and submission type from an input string
+
+        Args:
+            in_str (str): string to be parsed
+        """    
+        if not isinstance(in_str, Path):
+            in_str = Path(in_str)
+        self.out_str = in_str.stem
+        logger.debug(f"Attempting match of {self.out_str}")
+        logger.debug(f"The initial plate name is: {self.out_str}")
+        # regex = re.compile(r"""
+        #         # (?P<wastewater>RSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(?:_|-)\d?((?!\d)|R)?\d(?!\d))?)|
+        #         (?P<wastewater>RSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?)|
+        #         (?P<bacterial_culture>RSL-?\d{2}-?\d{4})|
+        #         (?P<wastewater_artic>(\d{4}-\d{2}-\d{2}(?:-|_)(?:\d_)?artic)|(RSL(?:-|_)?AR(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?))
+        #         """, flags = re.IGNORECASE | re.VERBOSE)
+        regex = models.BasicSubmission.RSLNamer.construct_regex()
+        m = regex.search(self.out_str)
+        if m != None:
+            self.parsed_name = m.group().upper().strip(".")
+            logger.debug(f"Got parsed submission name: {self.parsed_name}")
+            if self.submission_type == None:
+                try:
+                    self.submission_type = m.lastgroup
+                except AttributeError as e:
+                    logger.critical("No RSL plate number found or submission type found!")
+                    logger.debug(f"The cause of the above error was: {e}")
+                    logger.warning(f"We're going to have to create the submission type from the excel sheet properties...")
+                    if in_str.exists():
+                        my_xl = pd.ExcelFile(in_str)
+                        if my_xl.book.properties.category != None:
+                            categories = [item.strip().title() for item in my_xl.book.properties.category.split(";")]
+                            self.submission_type = categories[0].replace(" ", "_").lower()
+                        else:
+                            raise AttributeError(f"File {in_str.__str__()} has no categories.")
+                    else:
+                        raise FileNotFoundError()
+        # else:
+        #     raise ValueError(f"No parsed name could be created for {self.out_str}.")
+
 class SheetParser(object):
    """
    object to pull and contain data from excel file
@@ -69,12 +127,20 @@ class SheetParser(object):
        else:
            # This code is going to be depreciated once there is full adoption of the client sheets
            # with updated metadata... but how will it work for Artic?
+            
+            # sub = get_polymorphic_subclass()
+            try:
+                logger.debug(f"Attempting to match file name regex")
+                namer = models.BasicSubmission.RSLNamer(ctx=self.ctx, instr=self.filepath)
+                return namer.submission_type
+            except Exception as e:
+                logger.error(f"Unable to find file name regex match")
            logger.debug("Using excel map to find type...")
            try:
                for type in self.ctx.submission_types:
                    # This gets the *first* submission type that matches the sheet names in the workbook 
                    if self.xl.sheet_names == self.ctx.submission_types[type]['excel_map']:
-                        return dict(value=type.title(), parsed=True)
+                        return dict(value=type.title(), parsed=False)
                return "Unknown"
            except Exception as e:
                logger.warning(f"We were unable to parse the submission type due to: {e}")
@@ -119,43 +185,6 @@ class SheetParser(object):
        """        
        self.sample_result, self.sub['samples'] = SampleParser(ctx=self.ctx, xl=self.xl, submission_type=self.sub['submission_type']['value']).parse_samples()

-    # def parse_bacterial_culture(self, input_dict) -> dict:
-    #     """
-    #     Update submission dictionary with type specific information
-
-    #     Args:
-    #         input_dict (dict): Input sample dictionary
-
-    #     Returns:
-    #         dict: Updated sample dictionary
-    #     """        
-    #     return input_dict
-        
-    # def parse_wastewater(self, input_dict) -> dict:
-    #     """
-    #     Update submission dictionary with type specific information
-
-    #     Args:
-    #         input_dict (dict): Input sample dictionary
-
-    #     Returns:
-    #         dict: Updated sample dictionary
-    #     """        
-    #     return input_dict
-
-    # def parse_wastewater_artic(self, input_dict:dict) -> dict:
-    #     """
-    #     Update submission dictionary with type specific information
-
-    #     Args:
-    #         input_dict (dict): Input sample dictionary
-
-    #     Returns:
-    #         dict: Updated sample dictionary
-    #     """        
-    #     return input_dict
-
-
    def import_kit_validation_check(self):
        """
        Enforce that the parser has an extraction kit
@@ -224,7 +253,8 @@ class InfoParser(object):
        submission_type = lookup_submission_type(ctx=self.ctx, name=submission_type['value'])
        info_map = submission_type.info_map
        # Get the parse_info method from the submission type specified
-        self.custom_parser = get_polymorphic_subclass(models.BasicSubmission, submission_type.name).parse_info
+        # self.custom_parser = get_polymorphic_subclass(models.BasicSubmission, submission_type.name).parse_info
+        self.custom_parser = models.BasicSubmission.find_polymorphic_subclass(polymorphic_identity=submission_type.name).parse_info
        return info_map

    def parse_info(self) -> dict:
@@ -359,7 +389,8 @@ class SampleParser(object):
        submission_type = lookup_submission_type(ctx=self.ctx, name=submission_type)
        logger.debug(f"info_map: {pprint.pformat(submission_type.info_map)}")
        sample_info_map = submission_type.info_map['samples']
-        self.custom_parser = get_polymorphic_subclass(models.BasicSubmission, submission_type.name).parse_samples
+        # self.custom_parser = get_polymorphic_subclass(models.BasicSubmission, submission_type.name).parse_samples
+        self.custom_parser = models.BasicSubmission.find_polymorphic_subclass(polymorphic_identity=submission_type.name).parse_samples
        return sample_info_map

    def construct_plate_map(self, plate_map_location:dict) -> pd.DataFrame:
@@ -376,7 +407,8 @@ class SampleParser(object):
        df = df.iloc[plate_map_location['start_row']-1:plate_map_location['end_row'], plate_map_location['start_column']-1:plate_map_location['end_column']]
        df = pd.DataFrame(df.values[1:], columns=df.iloc[0])
        df = df.set_index(df.columns[0])
-        custom_mapper = get_polymorphic_subclass(models.BasicSubmission, self.submission_type)
+        # custom_mapper = get_polymorphic_subclass(models.BasicSubmission, self.submission_type)
+        custom_mapper = models.BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type)
        df = custom_mapper.custom_platemap(self.xl, df)
        return df
    
@@ -596,7 +628,7 @@ class SampleParser(object):
        for plate in self.plates:
            df = self.xl.parse(plate['sheet'], header=None)
            if isinstance(df.iat[plate['row']-1, plate['column']-1], str):
-                output = RSLNamer(ctx=self.ctx, instr=df.iat[plate['row']-1, plate['column']-1]).parsed_name
+                output = models.BasicSubmission.RSLNamer(ctx=self.ctx, instr=df.iat[plate['row']-1, plate['column']-1]).parsed_name
            else:
                continue
            plates.append(output)
@@ -631,7 +663,7 @@ class PCRParser(object):
                return
        # self.pcr = OrderedDict()
        self.pcr = {}
-        namer = RSLNamer(ctx=self.ctx, instr=filepath.__str__())
+        namer = models.BasicSubmission.RSLNamer(ctx=self.ctx, instr=filepath.__str__())
        self.plate_num = namer.parsed_name
        self.submission_type = namer.submission_type
        logger.debug(f"Set plate number to {self.plate_num} and type to {self.submission_type}")
@@ -672,7 +704,7 @@ class PCRParser(object):
        self.pcr['imported_by'] = getuser()
        return df

-    def parse_wastewater(self):
+    def parse_Wastewater(self):
        """
        Parse specific to wastewater samples.
        """