Addition of WW artic parsers, large-scale shake-up of parser structure.

2023-06-08 14:43:36 -05:00
parent 1d6823705c
commit a7132cd1b4
14 changed files with 376 additions and 56 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,8 @@
 ## 202306.01
 - Large scale shake up of import and scraper functions.
 - Addition of Artic scrapers.
 ## 202305.05
 - Hitpicking now creates source plate map image.
--- a/alembic.ini
+++ b/alembic.ini
@@ -55,9 +55,9 @@ version_path_separator = os  # Use os.pathsep. Default configuration used for ne
 # are written from script.py.mako
 # output_encoding = utf-8
-sqlalchemy.url = sqlite:///L:\Robotics Laboratory Support\Submissions\submissions.db
+; sqlalchemy.url = sqlite:///L:\Robotics Laboratory Support\Submissions\submissions.db
-; sqlalchemy.url = sqlite:///C:\Users\lwark\Documents\Archives\DB_backups\submissions-20230427.db
+sqlalchemy.url = sqlite:///C:\Users\lwark\Documents\Archives\DB_backups\submissions-20230605.db
-; msqlalchemy.url = sqlite:///C:\Users\lwark\Documents\python\submissions\tests\test_assets\submissions_test.db
+; sqlalchemy.url = sqlite:///C:\Users\lwark\Documents\python\submissions\tests\test_assets\submissions_test.db
 [post_write_hooks]
--- a/alembic/versions/8d32abdafe2b_moved_artic_info_to_ww_samples.py
+++ b/alembic/versions/8d32abdafe2b_moved_artic_info_to_ww_samples.py
@@ -0,0 +1,30 @@
 """moved artic info to ww_samples
 Revision ID: 8d32abdafe2b
 Revises: aac569c672de
 Create Date: 2023-06-05 10:10:37.650733
 """
 from alembic import op
 import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = '8d32abdafe2b'
 down_revision = 'aac569c672de'
 branch_labels = None
 depends_on = None
 def upgrade() -> None:
    # ### commands auto generated by Alembic - please adjust! ###
    with op.batch_alter_table('_ww_samples', schema=None) as batch_op:
        batch_op.add_column(sa.Column('artic_well_number', sa.String(length=8), nullable=True))
    # ### end Alembic commands ###
 def downgrade() -> None:
    # ### commands auto generated by Alembic - please adjust! ###
    with op.batch_alter_table('_ww_samples', schema=None) as batch_op:
        batch_op.drop_column('artic_well_number')
    # ### end Alembic commands ###
--- a/alembic/versions/aac569c672de_added_in_artic_information.py
+++ b/alembic/versions/aac569c672de_added_in_artic_information.py
@@ -0,0 +1,63 @@
 """added in artic information
 Revision ID: aac569c672de
 Revises: 64fec6271a50
 Create Date: 2023-06-02 15:14:13.726489
 """
 from alembic import op
 import sqlalchemy as sa
 from sqlalchemy.dialects import sqlite
 # revision identifiers, used by Alembic.
 revision = 'aac569c672de'
 down_revision = '64fec6271a50'
 branch_labels = None
 depends_on = None
 def upgrade() -> None:
    # ### commands auto generated by Alembic - please adjust! ###
    # op.create_table('_artic_samples',
    # sa.Column('id', sa.INTEGER(), nullable=False),
    # sa.Column('well_number', sa.String(length=8), nullable=True),
    # sa.Column('rsl_plate_id', sa.INTEGER(), nullable=True),
    # sa.Column('ww_sample_full_id', sa.String(length=64), nullable=False),
    # sa.Column('lims_sample_id', sa.String(length=64), nullable=False),
    # sa.Column('ct_1', sa.FLOAT(precision=2), nullable=True),
    # sa.Column('ct_2', sa.FLOAT(precision=2), nullable=True),
    # sa.ForeignKeyConstraint(['rsl_plate_id'], ['_submissions.id'], name='fk_WWA_submission_id', ondelete='SET NULL'),
    # sa.PrimaryKeyConstraint('id')
    # )
    op.drop_table('_alembic_tmp__submissions')
    # ### end Alembic commands ###
 def downgrade() -> None:
    # ### commands auto generated by Alembic - please adjust! ###
    op.create_table('_alembic_tmp__submissions',
    sa.Column('id', sa.INTEGER(), nullable=False),
    sa.Column('rsl_plate_num', sa.VARCHAR(length=32), nullable=False),
    sa.Column('submitter_plate_num', sa.VARCHAR(length=127), nullable=True),
    sa.Column('submitted_date', sa.TIMESTAMP(), nullable=True),
    sa.Column('submitting_lab_id', sa.INTEGER(), nullable=True),
    sa.Column('sample_count', sa.INTEGER(), nullable=True),
    sa.Column('extraction_kit_id', sa.INTEGER(), nullable=True),
    sa.Column('submission_type', sa.VARCHAR(length=32), nullable=True),
    sa.Column('technician', sa.VARCHAR(length=64), nullable=True),
    sa.Column('reagents_id', sa.VARCHAR(), nullable=True),
    sa.Column('extraction_info', sqlite.JSON(), nullable=True),
    sa.Column('run_cost', sa.FLOAT(), nullable=True),
    sa.Column('uploaded_by', sa.VARCHAR(length=32), nullable=True),
    sa.Column('pcr_info', sqlite.JSON(), nullable=True),
    sa.Column('comment', sqlite.JSON(), nullable=True),
    sa.ForeignKeyConstraint(['extraction_kit_id'], ['_kits.id'], ondelete='SET NULL'),
    sa.ForeignKeyConstraint(['reagents_id'], ['_reagents.id'], ondelete='SET NULL'),
    sa.ForeignKeyConstraint(['submitting_lab_id'], ['_organizations.id'], ondelete='SET NULL'),
    sa.PrimaryKeyConstraint('id'),
    sa.UniqueConstraint('rsl_plate_num'),
    sa.UniqueConstraint('submitter_plate_num')
    )
    # op.drop_table('_artic_samples')
    # ### end Alembic commands ###
--- a/src/submissions/init.py
+++ b/src/submissions/init.py
@@ -4,7 +4,7 @@ from pathlib import Path
 # Version of the realpython-reader package
 __project__ = "submissions"
-__version__ = "202305.4b"
+__version__ = "202306.1b"
 __author__ = {"name":"Landon Wark", "email":"Landon.Wark@phac-aspc.gc.ca"}
 __copyright__ = "2022-2023, Government of Canada"
@@ -28,3 +28,7 @@ class bcolors:
 # Second, you will have to update the model in backend.db.models.submissions and provide a new polymorph to the BasicSubmission object.
 # The BSO should hold the majority of the general info.
 # You can also update any of the parsers to pull out any custom info you need, like enforcing RSL plate numbers, scraping PCR results, etc.
 # Landon, this is your slightly less past self here. For the most part, Past Landon has not screwed us. I've been able to add in the
 # Wastewater Artic with minimal difficulties, except that the parser of the non-standard, user-generated excel sheets required slightly
 # more work.
--- a/src/submissions/backend/db/functions.py
+++ b/src/submissions/backend/db/functions.py
@@ -21,6 +21,7 @@ import numpy as np
 import yaml
 from pathlib import Path
 logger = logging.getLogger(f"submissions.{__name__}")
 # The below _should_ allow automatic creation of foreign keys in the database
@@ -41,12 +42,19 @@ def store_submission(ctx:dict, base_submission:models.BasicSubmission) -> None|d
    Returns:
        None|dict : object that indicates issue raised for reporting in gui
    """    
-    from tools import format_rsl_number
+    from tools import RSLNamer
    logger.debug(f"Hello from store_submission")
    # Add all samples to sample table
-    base_submission.rsl_plate_num = format_rsl_number(base_submission.rsl_plate_num)
+    typer = RSLNamer(base_submission.rsl_plate_num)
    base_submission.rsl_plate_num = typer.parsed_name
    for sample in base_submission.samples:
        logger.debug(f"Typer: {typer.submission_type}")
        # Suuuuuper hacky way to be sure that the artic doesn't overwrite the ww plate in a ww sample
        # need something more elegant
        if "_artic" not in typer.submission_type:
            sample.rsl_plate = base_submission
        else:
            sample.artic_rsl_plate = base_submission
        logger.debug(f"Attempting to add sample: {sample.to_string()}")
        try:
            ctx['database_session'].add(sample)
@@ -152,7 +160,7 @@ def construct_submission_info(ctx:dict, info_dict:dict) -> models.BasicSubmissio
                # Because of unique constraint, there will be problems with 
                # multiple submissions named 'None', so...
                logger.debug(f"Submitter plate id: {info_dict[item]}")
-                if info_dict[item] == None or info_dict[item] == "None":
+                if info_dict[item] == None or info_dict[item] == "None" or info_dict[item] == "":
                    logger.debug(f"Got None as a submitter plate number, inserting random string to preserve database unique constraint.")
                    info_dict[item] = uuid.uuid4().hex.upper()
                field_value = info_dict[item]
@@ -170,8 +178,6 @@ def construct_submission_info(ctx:dict, info_dict:dict) -> models.BasicSubmissio
        # ceil(instance.sample_count / 8) will get number of columns
        # the cost of a full run multiplied by (that number / 12) is x twelfths the cost of a full run
        logger.debug(f"Calculating costs for procedure...")
        # cols_count = ceil(int(instance.sample_count) / 8)
        # instance.run_cost = instance.extraction_kit.constant_cost + (instance.extraction_kit.mutable_cost * (cols_count / 12))
        instance.calculate_base_cost()
    except (TypeError, AttributeError) as e:
        logger.debug(f"Looks like that kit doesn't have cost breakdown yet due to: {e}, using full plate cost.")
@@ -471,7 +477,7 @@ def create_kit_from_yaml(ctx:dict, exp:dict) -> dict:
    Returns:
        dict: a dictionary containing results of db addition
    """    
-    from tools import check_is_power_user
+    from tools import check_is_power_user, massage_common_reagents
    # Don't want just anyone adding kits
    if not check_is_power_user(ctx=ctx):
        logger.debug(f"{getuser()} does not have permission to add kits.")
@@ -491,6 +497,7 @@ def create_kit_from_yaml(ctx:dict, exp:dict) -> dict:
            # A kit contains multiple reagent types.
            for r in exp[type]['kits'][kt]['reagenttypes']:
                # check if reagent type already exists.
                r = massage_common_reagents(r)
                look_up = ctx['database_session'].query(models.ReagentType).filter(models.ReagentType.name==r).first()
                if look_up == None:
                    rt = models.ReagentType(name=r.replace(" ", "_").lower(), eol_ext=timedelta(30*exp[type]['kits'][kt]['reagenttypes'][r]['eol_ext']), kits=[kit])
@@ -689,7 +696,10 @@ def delete_submission_by_id(ctx:dict, id:int) -> None:
        pass
    sub.reagents = []
    for sample in sub.samples:
        if sample.rsl_plate == sub:
            ctx['database_session'].delete(sample)
        else:
            logger.warning(f"Not deleting sample {sample.ww_sample_full_id} because it belongs to another plate.")
    ctx["database_session"].delete(sub)
    ctx["database_session"].commit()
@@ -706,6 +716,19 @@ def lookup_ww_sample_by_rsl_sample_number(ctx:dict, rsl_number:str) -> models.WW
    """    
    return ctx['database_session'].query(models.WWSample).filter(models.WWSample.rsl_number==rsl_number).first()
 def lookup_ww_sample_by_ww_sample_num(ctx:dict, sample_number:str) -> models.WWSample:
    """
    Retrieves wastewater sample from database by ww sample number
    Args:
        ctx (dict): settings passed down from gui
        sample_number (str): sample number assigned by wastewater
    Returns:
        models.WWSample: instance of wastewater sample
    """    
    return ctx['database_session'].query(models.WWSample).filter(models.WWSample.ww_sample_full_id==sample_number).first()
 def lookup_ww_sample_by_sub_sample_rsl(ctx:dict, sample_rsl:str, plate_rsl:str) -> models.WWSample:
    """
    Retrieves a wastewater sample from the database by its rsl sample number and parent rsl plate number.
@@ -775,7 +798,6 @@ def lookup_discounts_by_org_and_kit(ctx:dict, kit_id:int, lab_id:int):
        models.Organization.id==lab_id
        )).all()
 def hitpick_plate(submission:models.BasicSubmission, plate_number:int=0) -> list:
    plate_dicto = []
    for sample in submission.samples:
--- a/src/submissions/backend/db/models/init.py
+++ b/src/submissions/backend/db/models/init.py
@@ -10,4 +10,4 @@ from .controls import Control, ControlType
 from .kits import KitType, ReagentType, Reagent, Discount
 from .organizations import Organization, Contact
 from .samples import WWSample, BCSample
-from .submissions import BasicSubmission, BacterialCulture, Wastewater
+from .submissions import BasicSubmission, BacterialCulture, Wastewater, WastewaterArtic
--- a/src/submissions/backend/db/models/samples.py
+++ b/src/submissions/backend/db/models/samples.py
@@ -37,6 +37,8 @@ class WWSample(Base):
    sample_type = Column(String(8))
    pcr_results = Column(JSON)
    elution_well = Column(String(8)) #: location on 96 well plate
    artic_rsl_plate = relationship("WastewaterArtic", back_populates="samples")
    artic_well_number = Column(String(8))
    def to_string(self) -> str:
@@ -131,3 +133,41 @@ class BCSample(Base):
            "well": self.well_number,
            "name": f"{self.sample_id} - ({self.organism})",
        }
 # class ArticSample(Base):
 #     """
 #     base of artic sample
 #     """    
 #     __tablename__ = "_artic_samples"
 #     id = Column(INTEGER, primary_key=True) #: primary key
 #     well_number = Column(String(8)) #: location on parent plate
 #     rsl_plate = relationship("WastewaterArtic", back_populates="samples") #: relationship to parent plate
 #     rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_WWA_submission_id"))
 #     ww_sample_full_id = Column(String(64), nullable=False)
 #     lims_sample_id = Column(String(64), nullable=False)
 #     ct_1 = Column(FLOAT(2)) #: first ct value in column
 #     ct_2 = Column(FLOAT(2)) #: second ct value in column
 #     def to_string(self) -> str:
 #         """
 #         string representing sample object
 #         Returns:
 #             str: string representing location and sample id
 #         """        
 #         return f"{self.well_number}: {self.ww_sample_full_id}"
 #     def to_sub_dict(self) -> dict:
 #         """
 #         gui friendly dictionary
 #         Returns:
 #             dict: well location and name (sample id, organism) NOTE: keys must sync with WWSample to_sub_dict above
 #         """
 #         return {
 #             "well": self.well_number,
 #             "name": self.ww_sample_full_id,
 #         }
--- a/src/submissions/backend/db/models/submissions.py
+++ b/src/submissions/backend/db/models/submissions.py
@@ -161,7 +161,16 @@ class BasicSubmission(Base):
        }
        return output
-    
+    def calculate_base_cost(self):
        try:
            cols_count_96 = ceil(int(self.sample_count) / 8)
        except Exception as e:
            logger.error(f"Column count error: {e}")
        # cols_count_24 = ceil(int(self.sample_count) / 3)
        try:
            self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count))
        except Exception as e:
            logger.error(f"Calculation error: {e}")
 # Below are the custom submission types
@@ -185,16 +194,16 @@ class  BacterialCulture(BasicSubmission):
        return output
-    def calculate_base_cost(self):
+    # def calculate_base_cost(self):
-        try:
+    #     try:
-            cols_count_96 = ceil(int(self.sample_count) / 8)
+    #         cols_count_96 = ceil(int(self.sample_count) / 8)
-        except Exception as e:
+    #     except Exception as e:
-            logger.error(f"Column count error: {e}")
+    #         logger.error(f"Column count error: {e}")
-        # cols_count_24 = ceil(int(self.sample_count) / 3)
+    #     # cols_count_24 = ceil(int(self.sample_count) / 3)
-        try:
+    #     try:
-            self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count))
+    #         self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count))
-        except Exception as e:
+    #     except Exception as e:
-            logger.error(f"Calculation error: {e}")
+    #         logger.error(f"Calculation error: {e}")
 class Wastewater(BasicSubmission):
@@ -220,14 +229,22 @@ class Wastewater(BasicSubmission):
            pass
        return output
-    def calculate_base_cost(self):
+    # def calculate_base_cost(self):
-        try:
+    #     try:
-            cols_count_96 = ceil(int(self.sample_count) / 8) + 1 #: Adding in one column to account for 24 samples + ext negatives
+    #         cols_count_96 = ceil(int(self.sample_count) / 8) + 1 #: Adding in one column to account for 24 samples + ext negatives
-        except Exception as e:
+    #     except Exception as e:
-            logger.error(f"Column count error: {e}")
+    #         logger.error(f"Column count error: {e}")
-        # cols_count_24 = ceil(int(self.sample_count) / 3)
+    #     # cols_count_24 = ceil(int(self.sample_count) / 3)
-        try:
+    #     try:
-            self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count))
+    #         self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count))
-        except Exception as e:
+    #     except Exception as e:
-            logger.error(f"Calculation error: {e}")
+    #         logger.error(f"Calculation error: {e}")
 class WastewaterArtic(BasicSubmission):
    """
    derivative submission type for artic wastewater
    """    
    samples = relationship("WWSample", back_populates="artic_rsl_plate", uselist=True)
    # Can in use the pcr_info from the wastewater? Cause I can't define pcr_info here due to conflicts with that
    __mapper_args__ = {"polymorphic_identity": "wastewater_artic", "polymorphic_load": "inline"}
--- a/src/submissions/backend/excel/parser.py
+++ b/src/submissions/backend/excel/parser.py
@@ -2,18 +2,19 @@
 contains parser object for pulling values from client generated submission sheets.
 '''
 from getpass import getuser
 import math
 from typing import Tuple
 import pandas as pd
 from pathlib import Path
 from backend.db.models import WWSample, BCSample
-# from backend.db import lookup_ww_sample_by_rsl_sample_number
+from backend.db import lookup_ww_sample_by_ww_sample_num
 import logging
 from collections import OrderedDict
 import re
 import numpy as np
 from datetime import date, datetime
 import uuid
-from tools import check_not_nan, RSLNamer
+from tools import check_not_nan, RSLNamer, massage_common_reagents
 logger = logging.getLogger(f"submissions.{__name__}")
@@ -21,20 +22,22 @@ class SheetParser(object):
    """
    object to pull and contain data from excel file
    """
-    def __init__(self, filepath:Path|None = None, **kwargs):
+    def __init__(self, ctx:dict, filepath:Path|None = None):
        """
        Args:
            filepath (Path | None, optional): file path to excel sheet. Defaults to None.
        """
        self.ctx = ctx
        logger.debug(f"Parsing {filepath.__str__()}")
        # set attributes based on kwargs from gui ctx
-        for kwarg in kwargs:
+        # for kwarg in kwargs:
-            setattr(self, f"_{kwarg}", kwargs[kwarg])
+        #     setattr(self, f"_{kwarg}", kwargs[kwarg])
        # self.__dict__.update(kwargs)
        if filepath == None:
            logger.error(f"No filepath given.")
            self.xl = None
        else:
            self.filepath = filepath
            try:
                self.xl = pd.ExcelFile(filepath.__str__())
            except ValueError as e:
@@ -55,8 +58,8 @@ class SheetParser(object):
            str: submission type name
        """        
        try:
-            for type in self._submission_types:
+            for type in self.ctx['submission_types']:
-                if self.xl.sheet_names == self._submission_types[type]['excel_map']:
+                if self.xl.sheet_names == self.ctx['submission_types'][type]['excel_map']:
                    return type.title()
            return "Unknown"
        except Exception as e:
@@ -74,7 +77,7 @@ class SheetParser(object):
    def parse_generic(self, sheet_name:str) -> pd.DataFrame:
        """
-        Pulls information common to all submission types and passes on dataframe
+        Pulls information common to all wasterwater/bacterial culture types and passes on dataframe
        Args:
            sheet_name (str): name of excel worksheet to pull from
@@ -107,8 +110,6 @@ class SheetParser(object):
            """            
            for ii, row in df.iterrows():
                # skip positive control
                # if ii == 12:
                #     continue
                logger.debug(f"Running reagent parse for {row[1]} with type {type(row[1])} and value: {row[2]} with type {type(row[2])}")
                if not isinstance(row[2], float) and check_not_nan(row[1]):
                    # must be prefixed with 'lot_' to be recognized by gui
@@ -156,7 +157,7 @@ class SheetParser(object):
        logger.debug(reagent_range)
        parse_reagents(reagent_range)
        # get individual sample info
-        sample_parser = SampleParser(submission_info.iloc[16:112])
+        sample_parser = SampleParser(self.ctx, submission_info.iloc[16:112])
        sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
        logger.debug(f"Parser result: {self.sub}")
        self.sub['samples'] = sample_parse()
@@ -181,6 +182,7 @@ class SheetParser(object):
                    # regex below will remove 80% from 80% ethanol in the Wastewater kit.
                    output_key = re.sub(r"^\d{1,3}%\s?", "", row[0].lower().strip().replace(' ', '_'))
                    output_key = output_key.strip("_")
                    # output_var is the lot number
                    try:
                        output_var = row[5].upper()
                    except AttributeError:
@@ -214,24 +216,97 @@ class SheetParser(object):
        parse_reagents(ext_reagent_range)
        parse_reagents(pcr_reagent_range)
        # parse samples
-        sample_parser = SampleParser(submission_info.iloc[16:])
+        sample_parser = SampleParser(self.ctx, submission_info.iloc[16:])
        sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
        self.sub['samples'] = sample_parse()
        self.sub['csv'] = self.xl.parse("Copy to import file", dtype=object)
    def parse_wastewater_artic(self) -> None:
        """
        pulls info specific to wastewater_arctic submission type
        """
        def parse_reagents(df:pd.DataFrame):
            logger.debug(df)
            for ii, row in df.iterrows():
                if check_not_nan(row[0]):
                    try:
                        output_key = re.sub(r"\(.+?\)", "", row[0].lower().strip().replace(' ', '_'))
                    except AttributeError:
                        continue
                    output_key = output_key.strip("_")
                    output_key = massage_common_reagents(output_key)
                    try:
                        output_var = row[1].upper()
                    except AttributeError:
                        logger.debug(f"Couldn't upperize {row[1]}, must be a number")
                        output_var = row[1]
                    logger.debug(f"Output variable is {output_var}")
                    logger.debug(f"Expiry date for imported reagent: {row[2]}")
                    if check_not_nan(row[2]):
                        try:
                            expiry = row[2].date()
                        except AttributeError as e:
                            try:
                                expiry = datetime.strptime(row[2], "%Y-%m-%d")
                            except TypeError as e:
                                expiry = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + row[2] - 2)
                            except ValueError as e:
                                continue
                    else:
                        logger.debug(f"Date: {row[2]}")
                        expiry = date.today()
                    self.sub[f"lot_{output_key}"] = {'lot':output_var, 'exp':expiry}
                else:
                    continue
        def massage_samples(df:pd.DataFrame) -> pd.DataFrame:
            df.set_index(df.columns[0], inplace=True)
            df.columns = df.iloc[0]
            logger.debug(f"df to massage\n: {df}")
            return_list = []
            for _, ii in df.iloc[1:,1:].iterrows():
                for c in df.columns.to_list():
                    logger.debug(f"Checking {ii.name}{c}")
                    if check_not_nan(df.loc[ii.name, int(c)]) and df.loc[ii.name, int(c)] != "EMPTY":
                        return_list.append(dict(sample_name=re.sub(r"\s?\(.*\)", "", df.loc[ii.name, int(c)]), \
                                                well=f"{ii.name}{c}",
                                                artic_plate=self.sub['rsl_plate_num']))
            logger.debug(f"massaged sample list for {self.sub['rsl_plate_num']}: {return_list}")
            return return_list
        submission_info = self.xl.parse("First Strand", dtype=object)
        biomek_info = self.xl.parse("ArticV4 Biomek", dtype=object)
        sub_reagent_range = submission_info.iloc[56:, 1:4].dropna(how='all')
        biomek_reagent_range = biomek_info.iloc[60:, 0:3].dropna(how='all')
        self.sub['submitter_plate_num'] = ""
        self.sub['rsl_plate_num'] =  RSLNamer(self.filepath.__str__()).parsed_name
        self.sub['submitted_date'] = submission_info.iloc[0][2]
        self.sub['submitting_lab'] = "Enterics Wastewater Genomics"
        self.sub['sample_count'] = submission_info.iloc[4][6]
        self.sub['extraction_kit'] = "ArticV4.1"
        self.sub['technician'] = f"MM: {biomek_info.iloc[2][1]}, Bio: {biomek_info.iloc[3][1]}"
        parse_reagents(sub_reagent_range)
        parse_reagents(biomek_reagent_range)
        samples = massage_samples(biomek_info.iloc[22:31, 0:])
        sample_parser = SampleParser(self.ctx, pd.DataFrame.from_records(samples))
        sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
        self.sub['samples'] = sample_parse()
 class SampleParser(object):
    """
    object to pull data for samples in excel sheet and construct individual sample objects
    """
-    def __init__(self, df:pd.DataFrame) -> None:
+    def __init__(self, ctx:dict, df:pd.DataFrame) -> None:
        """
        convert sample sub-dataframe to dictionary of records
        Args:
            df (pd.DataFrame): input sample dataframe
        """        
        self.ctx = ctx
        self.samples = df.to_dict("records")
@@ -296,6 +371,29 @@ class SampleParser(object):
            new_list.append(new)
        return new_list
    def parse_wastewater_artic_samples(self) -> list[WWSample]:
        """
        The artic samples are the wastewater samples that are to be sequenced
        So we will need to lookup existing ww samples and append Artic well # and plate relation
        Returns:
            list[WWSample]: list of wastewater samples to be updated
        """        
        new_list = []
        for sample in self.samples:
            with self.ctx['database_session'].no_autoflush:
                instance = lookup_ww_sample_by_ww_sample_num(ctx=self.ctx, sample_number=sample['sample_name'])
            logger.debug(f"Checking: {sample['sample_name']}")
            if instance == None:
                logger.error(f"Unable to find match for: {sample['sample_name']}")
                continue
            logger.debug(f"Got instance: {instance.ww_sample_full_id}")
            instance.artic_well_number = sample['well']
            new_list.append(instance)
        return new_list
 class PCRParser(object):
    """
--- a/src/submissions/frontend/custom_widgets/sub_details.py
+++ b/src/submissions/frontend/custom_widgets/sub_details.py
@@ -423,7 +423,7 @@ class SubmissionComment(QDialog):
    def add_comment(self):
        commenter = getuser()
        comment = self.txt_editor.toPlainText()
-        dt = datetime.strftime(datetime.now(), "%Y-%m-d %H:%M:%S")
+        dt = datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S")
        full_comment = {"name":commenter, "time": dt, "text": comment}
        logger.debug(f"Full comment: {full_comment}")
        sub = lookup_submission_by_rsl_num(ctx = self.ctx, rsl_num=self.rsl)
--- a/src/submissions/frontend/main_window_functions.py
+++ b/src/submissions/frontend/main_window_functions.py
@@ -53,7 +53,7 @@ def import_submission_function(obj:QMainWindow) -> Tuple[QMainWindow, dict|None]
        return obj, result
    # create sheetparser using excel sheet and context from gui
    try:
-        prsr = SheetParser(fname, **obj.ctx)
+        prsr = SheetParser(ctx=obj.ctx, filepath=fname)
    except PermissionError:
        logger.error(f"Couldn't get permission to access file: {fname}")
        return
--- a/src/submissions/templates/submission_details.html
+++ b/src/submissions/templates/submission_details.html
@@ -94,7 +94,7 @@
        {% endfor %}</p>
        {% endif %}
        {% if sub['platemap'] %}
-        <h3><u>>Plate map:</u></h3>
+        <h3><u>Plate map:</u></h3>
        <img height="300px" width="650px" src="data:image/jpeg;base64,{{ sub['platemap'] | safe }}">
        {% endif %}
    </body>
--- a/src/submissions/tools/init.py
+++ b/src/submissions/tools/init.py
@@ -83,7 +83,10 @@ def check_kit_integrity(sub:BasicSubmission|KitType, reagenttypes:list|None=None
        case BasicSubmission():
            ext_kit_rtypes = [reagenttype.name for reagenttype in sub.extraction_kit.reagent_types]
            # Overwrite function parameter reagenttypes
            try:
                reagenttypes = [reagent.type.name for reagent in sub.reagents]
            except AttributeError as e:
                logger.error(f"Problem parsing reagents: {[f'{reagent.lot}, {reagent.type}' for reagent in sub.reagents]}")
        case KitType():
            ext_kit_rtypes = [reagenttype.name for reagenttype in sub.reagent_types]
    logger.debug(f"Kit reagents: {ext_kit_rtypes}")
@@ -191,9 +194,12 @@ class RSLNamer(object):
            self.submission_type = None
            return
        logger.debug(f"Attempting match of {in_str}")
        print(f"The initial plate name is: {in_str}")
        regex = re.compile(r"""
-            (?P<wastewater>RSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(?:_|-)\d?(?!\d)R?\d(?!\d))?)|
+            # (?P<wastewater>RSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(?:_|-)\d?((?!\d)|R)?\d(?!\d))?)|
-            (?P<bacterial_culture>RSL-?\d{2}-?\d{4})
+            (?P<wastewater>RSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?)|
            (?P<bacterial_culture>RSL-?\d{2}-?\d{4})|
            (?P<wastewater_artic>(\d{4}-\d{2}-\d{2}_(?:\d_)?artic)|(RSL(?:-|_)?AR(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?))
            """, flags = re.IGNORECASE | re.VERBOSE)
        m = regex.search(in_str)
        try:
@@ -212,6 +218,25 @@ class RSLNamer(object):
        self.parsed_name = self.parsed_name.replace("RSLWW", "RSL-WW")
        self.parsed_name = re.sub(r"WW(\d{4})", r"WW-\1", self.parsed_name, flags=re.IGNORECASE)
        self.parsed_name = re.sub(r"(\d{4})-(\d{2})-(\d{2})", r"\1\2\3", self.parsed_name)
        print(f"Coming out of the preliminary parsing, the plate name is {self.parsed_name}")
        try:
            plate_number = re.search(r"(?:(-|_)\d)(?!\d)", self.parsed_name).group().strip("_").strip("-")
            print(f"Plate number is: {plate_number}")
        except AttributeError as e:
            plate_number = "1"
        # self.parsed_name = re.sub(r"(\d{8})(-|_\d)?(R\d)?", fr"\1-{plate_number}\3", self.parsed_name)
        self.parsed_name = re.sub(r"(\d{8})(-|_)?\d?(R\d?)?", rf"\1-{plate_number}\3", self.parsed_name)
        print(f"After addition of plate number the plate name is: {self.parsed_name}")
        try:
            repeat = re.search(r"-\dR(?P<repeat>\d)?", self.parsed_name).groupdict()['repeat']
            if repeat == None:
                repeat = "1"
        except AttributeError as e:
            repeat = ""
        self.parsed_name = re.sub(r"(-\dR)\d?", rf"\1 {repeat}", self.parsed_name).replace(" ", "")
    def enforce_bacterial_culture(self):
        """
@@ -220,3 +245,19 @@ class RSLNamer(object):
        self.parsed_name = re.sub(r"RSL(\d{2})", r"RSL-\1", self.parsed_name, flags=re.IGNORECASE)
        self.parsed_name = re.sub(r"RSL-(\d{2})(\d{4})", r"RSL-\1-\2", self.parsed_name, flags=re.IGNORECASE)
    def enforce_wastewater_artic(self):
        self.parsed_name = re.sub(r"(\d{4})-(\d{2})-(\d{2})", r"RSL-AR-\1\2\3", self.parsed_name, flags=re.IGNORECASE)
        try:
            plate_number = int(re.search(r"_\d?_", self.parsed_name).group().strip("_"))
        except AttributeError as e:
            plate_number = 1
        self.parsed_name = re.sub(r"(_\d)?_ARTIC", f"-{plate_number}", self.parsed_name)
 def massage_common_reagents(reagent_name:str):
    logger.debug(f"Attempting to massage {reagent_name}")
    if reagent_name.endswith("water") or "H2O" in reagent_name:
        reagent_name = "molecular_grade_water"
    reagent_name = reagent_name.replace("µ", "u")
    return reagent_name