Better flexibility with parsers pulling methods from database objects.

This commit is contained in:
Landon Wark
2023-10-17 15:16:34 -05:00
parent 0a90542e8e
commit 39b94405e5
14 changed files with 296 additions and 610 deletions

View File

@@ -1,6 +1,6 @@
## 202310.03 ## 202310.03
- Replaced RSLNamer class with Submission object specific class methods. - Better flexibility with parsers pulling methods from database objects.
## 202310.02 ## 202310.02

View File

@@ -1,6 +1,6 @@
- [ ] Convert Pydantic models to Submission models? - [ ] Validate form data using pydantic.
- [x] Move RSLNamer into Submission database object. - [x] Rebuild RSLNamer and fix circular imports
- Having second thoughts about some of this. Move into parser module? - Should be used when coming in to parser and when leaving form. NO OTHER PLACES.
- [x] Change 'check_is_power_user' to decorator. - [x] Change 'check_is_power_user' to decorator.
- [x] Drag and drop files into submission form area? - [x] Drag and drop files into submission form area?
- [ ] Get info for controls into their sample hitpicks. - [ ] Get info for controls into their sample hitpicks.

View File

@@ -1,3 +1,3 @@
''' '''
Contains database, pydantic and excel operations. Contains database, validators and excel operations.
''' '''

View File

@@ -1,12 +1,9 @@
''' '''
Used to construct models from input dictionaries. Used to construct models from input dictionaries.
''' '''
from getpass import getuser
from tools import Settings, check_regex_match, check_authorization, massage_common_reagents from tools import Settings, check_regex_match, check_authorization, massage_common_reagents
from .. import models from .. import models
# from .misc import RSLNamer
# from backend.namer import RSLNamer
# from .misc import get_polymorphic_subclass
from .lookups import * from .lookups import *
import logging import logging
from datetime import date, timedelta from datetime import date, timedelta
@@ -73,10 +70,10 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> Tuple[models.Basi
instance = None instance = None
msg = "A proper RSL plate number is required." msg = "A proper RSL plate number is required."
return instance, {'code': 2, 'message': "A proper RSL plate number is required."} return instance, {'code': 2, 'message': "A proper RSL plate number is required."}
else: # else:
# enforce conventions on the rsl plate number from the form # # enforce conventions on the rsl plate number from the form
# info_dict['rsl_plate_num'] = RSLNamer(ctx=ctx, instr=info_dict["rsl_plate_num"]).parsed_name # # info_dict['rsl_plate_num'] = RSLNamer(ctx=ctx, instr=info_dict["rsl_plate_num"]).parsed_name
info_dict['rsl_plate_num'] = model.RSLNamer(ctx=ctx, instr=info_dict["rsl_plate_num"], sub_type=info_dict['submission_type']).parsed_name # info_dict['rsl_plate_num'] = RSLNamer(ctx=ctx, instr=info_dict["rsl_plate_num"], sub_type=info_dict['submission_type']).parsed_name
# check database for existing object # check database for existing object
instance = lookup_submissions(ctx=ctx, rsl_number=info_dict['rsl_plate_num']) instance = lookup_submissions(ctx=ctx, rsl_number=info_dict['rsl_plate_num'])
# get model based on submission type converted above # get model based on submission type converted above

View File

@@ -155,10 +155,13 @@ def lookup_submissions(ctx:Settings,
chronologic:bool=False, limit:int=0, chronologic:bool=False, limit:int=0,
**kwargs **kwargs
) -> models.BasicSubmission | List[models.BasicSubmission]: ) -> models.BasicSubmission | List[models.BasicSubmission]:
if rsl_number == None: if submission_type == None:
model = models.BasicSubmission.find_subclasses(ctx=ctx, attrs=kwargs) model = models.BasicSubmission.find_subclasses(ctx=ctx, attrs=kwargs)
else: else:
model = models.BasicSubmission.find_subclasses(ctx=ctx, rsl_number=rsl_number) if isinstance(submission_type, models.SubmissionType):
model = models.BasicSubmission.find_subclasses(ctx=ctx, submission_type=submission_type.name)
else:
model = models.BasicSubmission.find_subclasses(ctx=ctx, submission_type=submission_type)
query = setup_lookup(ctx=ctx, locals=locals()).query(model) query = setup_lookup(ctx=ctx, locals=locals()).query(model)
# by submission type # by submission type
match submission_type: match submission_type:
@@ -211,14 +214,6 @@ def lookup_submissions(ctx:Settings,
# by rsl number (returns only a single value) # by rsl number (returns only a single value)
match rsl_number: match rsl_number:
case str(): case str():
namer = model.RSLNamer(ctx=ctx, instr=rsl_number)
logger.debug(f"Looking up BasicSubmission with rsl number: {rsl_number}")
try:
rsl_number = namer.parsed_name
logger.debug(f"Got {rsl_number} from {model}.")
except AttributeError as e:
logger.error(f"No parsed name found, returning None.")
return None
# query = query.filter(models.BasicSubmission.rsl_plate_num==rsl_number) # query = query.filter(models.BasicSubmission.rsl_plate_num==rsl_number)
query = query.filter(model.rsl_plate_num==rsl_number) query = query.filter(model.rsl_plate_num==rsl_number)
logger.debug(f"At this point the query gets: {query.all()}") logger.debug(f"At this point the query gets: {query.all()}")

View File

@@ -298,68 +298,21 @@ class BasicSubmission(Base):
""" """
return input_excel return input_excel
class _RSLNamer(object): @classmethod
def enforce_name(cls, ctx:Settings, instr:str) -> str:
alias = None logger.debug(f"Hello from {cls.__mapper_args__['polymorphic_identity']} Enforcer!")
return instr
def __init__(self, ctx:Settings, instr:str|Path, sub_type:str|None=None, parent=None):
if parent != None:
logger.debug(f"Hello from {parent.__mapper_args__['polymorphic_identity']} Namer!")
self.ctx = ctx
self.submission_type = sub_type
self.retrieve_rsl_number(instr=instr)
try:
ncls = [item for item in self.__class__.__subclasses__() if item.alias == self.submission_type][0]
enforcer = ncls.enforce_name
enforcer(self=self, parent=parent)
except IndexError:
enforcer = self.enforce_name
enforcer(parent=parent)
def retrieve_rsl_number(self, instr:str|Path):
"""
Uses regex to retrieve the plate number and submission type from an input string
Args:
in_str (str): string to be parsed
"""
if not isinstance(instr, Path):
instr = Path(instr)
self.out_str = instr.stem
logger.debug(f"Attempting match of {self.out_str}")
logger.debug(f"The initial plate name is: {self.out_str}")
regex = self.construct_regex()
m = regex.search(self.out_str)
if m != None:
self.parsed_name = m.group().upper().strip(".")
logger.debug(f"Got parsed submission name: {self.parsed_name}")
if self.submission_type == None:
try:
self.submission_type = m.lastgroup.replace("_", " ")
except AttributeError as e:
self.submission_type = None
def enforce_name(self, parent):
if parent != None:
logger.debug(f"Hello from {parent.__mapper_args__['polymorphic_identity']} Enforcer!")
self.parsed_name = self.parsed_name
@classmethod
def construct_regex(cls):
rstring = rf'{"|".join([item.get_regex() for item in cls.__subclasses__()])}'
regex = re.compile(rstring, flags = re.IGNORECASE | re.VERBOSE)
return regex
@classmethod @classmethod
def RSLNamer(cls, ctx:Settings, instr:str, sub_type:str|None=None): def construct_regex(cls):
return cls._RSLNamer(parent=cls, ctx=ctx, instr=instr, sub_type=sub_type) rstring = rf'{"|".join([item.get_regex() for item in cls.__subclasses__()])}'
regex = re.compile(rstring, flags = re.IGNORECASE | re.VERBOSE)
return regex
@classmethod @classmethod
def find_subclasses(cls, ctx:Settings, attrs:dict|None=None, rsl_number:str|None=None): def find_subclasses(cls, ctx:Settings, attrs:dict|None=None, submission_type:str|None=None):
if rsl_number != None: if submission_type != None:
namer = cls._RSLNamer(ctx=ctx, instr=rsl_number) return cls.find_polymorphic_subclass(submission_type)
return cls.find_polymorphic_subclass(namer.submission_type)
if len(attrs) == 0 or attrs == None: if len(attrs) == 0 or attrs == None:
return cls return cls
if any([not hasattr(cls, attr) for attr in attrs]): if any([not hasattr(cls, attr) for attr in attrs]):
@@ -386,6 +339,11 @@ class BasicSubmission(Base):
logger.error(f"Could not get polymorph {polymorphic_identity} of {cls} due to {e}") logger.error(f"Could not get polymorph {polymorphic_identity} of {cls} due to {e}")
return cls return cls
@classmethod
def parse_pcr(cls, xl:pd.DataFrame, rsl_number:str) -> list:
logger.debug(f"Hello from {cls.__mapper_args__['polymorphic_identity']} PCR parser!")
return []
# Below are the custom submission types # Below are the custom submission types
class BacterialCulture(BasicSubmission): class BacterialCulture(BasicSubmission):
@@ -450,58 +408,49 @@ class BacterialCulture(BasicSubmission):
input_excel["Sample List"].cell(row=15, column=2, value=getuser()[0:2].upper()) input_excel["Sample List"].cell(row=15, column=2, value=getuser()[0:2].upper())
return input_excel return input_excel
class _RSLNamer(BasicSubmission._RSLNamer): @classmethod
def enforce_name(cls, ctx:Settings, instr:str) -> str:
outstr = super().enforce_name(ctx=ctx, instr=instr)
def construct(ctx) -> str:
"""
DEPRECIATED due to slowness. Search for the largest rsl number and increment by 1
alias = "Bacterial Culture" Returns:
str: new RSL number
"""
logger.debug(f"Attempting to construct RSL number from scratch...")
# directory = Path(self.ctx['directory_path']).joinpath("Bacteria")
directory = Path(ctx.directory_path).joinpath("Bacteria")
year = str(datetime.now().year)[-2:]
if directory.exists():
logger.debug(f"Year: {year}")
relevant_rsls = []
all_xlsx = [item.stem for item in directory.rglob("*.xlsx") if bool(re.search(r"RSL-\d{2}-\d{4}", item.stem)) and year in item.stem[4:6]]
logger.debug(f"All rsls: {all_xlsx}")
for item in all_xlsx:
try:
relevant_rsls.append(re.match(r"RSL-\d{2}-\d{4}", item).group(0))
except Exception as e:
logger.error(f"Regex error: {e}")
continue
logger.debug(f"Initial xlsx: {relevant_rsls}")
max_number = max([int(item[-4:]) for item in relevant_rsls])
logger.debug(f"The largest sample number is: {max_number}")
return f"RSL-{year}-{str(max_number+1).zfill(4)}"
else:
# raise FileNotFoundError(f"Unable to locate the directory: {directory.__str__()}")
return f"RSL-{year}-0000"
try:
outstr = re.sub(r"RSL(\d{2})", r"RSL-\1", outstr, flags=re.IGNORECASE)
except (AttributeError, TypeError) as e:
outstr = construct(ctx=ctx)
# year = datetime.now().year
# self.parsed_name = f"RSL-{str(year)[-2:]}-0000"
return re.sub(r"RSL-(\d{2})(\d{4})", r"RSL-\1-\2", outstr, flags=re.IGNORECASE)
@classmethod @classmethod
def construct_regex(cls): def get_regex(cls):
rstring = rf'{cls.get_regex()}' return "(?P<Bacterial_Culture>RSL-?\\d{2}-?\\d{4})"
regex = re.compile(rstring, flags = re.IGNORECASE | re.VERBOSE)
return regex
def enforce_name(self, parent):
# super().enforce_name(parent)
def construct(ctx) -> str:
"""
DEPRECIATED due to slowness. Search for the largest rsl number and increment by 1
Returns:
str: new RSL number
"""
logger.debug(f"Attempting to construct RSL number from scratch...")
# directory = Path(self.ctx['directory_path']).joinpath("Bacteria")
directory = Path(ctx.directory_path).joinpath("Bacteria")
year = str(datetime.now().year)[-2:]
if directory.exists():
logger.debug(f"Year: {year}")
relevant_rsls = []
all_xlsx = [item.stem for item in directory.rglob("*.xlsx") if bool(re.search(r"RSL-\d{2}-\d{4}", item.stem)) and year in item.stem[4:6]]
logger.debug(f"All rsls: {all_xlsx}")
for item in all_xlsx:
try:
relevant_rsls.append(re.match(r"RSL-\d{2}-\d{4}", item).group(0))
except Exception as e:
logger.error(f"Regex error: {e}")
continue
logger.debug(f"Initial xlsx: {relevant_rsls}")
max_number = max([int(item[-4:]) for item in relevant_rsls])
logger.debug(f"The largest sample number is: {max_number}")
return f"RSL-{year}-{str(max_number+1).zfill(4)}"
else:
# raise FileNotFoundError(f"Unable to locate the directory: {directory.__str__()}")
return f"RSL-{year}-0000"
try:
self.parsed_name = re.sub(r"RSL(\d{2})", r"RSL-\1", self.parsed_name, flags=re.IGNORECASE)
except AttributeError as e:
self.parsed_name = construct(ctx=self.ctx)
# year = datetime.now().year
# self.parsed_name = f"RSL-{str(year)[-2:]}-0000"
self.parsed_name = re.sub(r"RSL-(\d{2})(\d{4})", r"RSL-\1-\2", self.parsed_name, flags=re.IGNORECASE)
@classmethod
def get_regex(cls):
return "(?P<Bacterial_Culture>RSL-?\\d{2}-?\\d{4})"
class Wastewater(BasicSubmission): class Wastewater(BasicSubmission):
""" """
@@ -543,49 +492,78 @@ class Wastewater(BasicSubmission):
input_dict['csv'] = xl.parse("Copy to import file") input_dict['csv'] = xl.parse("Copy to import file")
return input_dict return input_dict
class _RSLNamer(BasicSubmission._RSLNamer): @classmethod
def parse_pcr(cls, xl: pd.ExcelFile, rsl_number:str) -> list:
alias = "Wastewater" """
Parse specific to wastewater samples.
@classmethod """
def construct_regex(cls): samples = super().parse_pcr(xl=xl, rsl_number=rsl_number)
rstring = rf'{cls.get_regex()}' df = xl.parse(sheet_name="Results", dtype=object).fillna("")
regex = re.compile(rstring, flags = re.IGNORECASE | re.VERBOSE) column_names = ["Well", "Well Position", "Omit","Sample","Target","Task"," Reporter","Quencher","Amp Status","Amp Score","Curve Quality","Result Quality Issues","Cq","Cq Confidence","Cq Mean","Cq SD","Auto Threshold","Threshold", "Auto Baseline", "Baseline Start", "Baseline End"]
return regex samples_df = df.iloc[23:][0:]
logger.debug(f"Dataframe of PCR results:\n\t{samples_df}")
def enforce_name(self, parent): samples_df.columns = column_names
# super().enforce_name(parent) logger.debug(f"Samples columns: {samples_df.columns}")
def construct(): well_call_df = xl.parse(sheet_name="Well Call").iloc[24:][0:].iloc[:,-1:]
today = datetime.now() try:
return f"RSL-WW-{today.year}{str(today.month).zfill(2)}{str(today.day).zfill(2)}" samples_df['Assessment'] = well_call_df.values
except ValueError:
logger.error("Well call number doesn't match sample number")
logger.debug(f"Well call df: {well_call_df}")
for ii, row in samples_df.iterrows():
try: try:
self.parsed_name = re.sub(r"PCR(-|_)", "", self.parsed_name) sample_obj = [sample for sample in samples if sample['sample'] == row[3]][0]
except AttributeError as e: except IndexError:
logger.error(f"Problem using regex: {e}") sample_obj = dict(
self.parsed_name = construct() sample = row['Sample'],
self.parsed_name = self.parsed_name.replace("RSLWW", "RSL-WW") plate_rsl = rsl_number,
self.parsed_name = re.sub(r"WW(\d{4})", r"WW-\1", self.parsed_name, flags=re.IGNORECASE) )
self.parsed_name = re.sub(r"(\d{4})-(\d{2})-(\d{2})", r"\1\2\3", self.parsed_name) logger.debug(f"Got sample obj: {sample_obj}")
logger.debug(f"Coming out of the preliminary parsing, the plate name is {self.parsed_name}") if isinstance(row['Cq'], float):
sample_obj[f"ct_{row['Target'].lower()}"] = row['Cq']
else:
sample_obj[f"ct_{row['Target'].lower()}"] = 0.0
try: try:
plate_number = re.search(r"(?:(-|_)\d)(?!\d)", self.parsed_name).group().strip("_").strip("-") sample_obj[f"{row['Target'].lower()}_status"] = row['Assessment']
logger.debug(f"Plate number is: {plate_number}") except KeyError:
except AttributeError as e: logger.error(f"No assessment for {sample_obj['sample']}")
plate_number = "1" samples.append(sample_obj)
# self.parsed_name = re.sub(r"(\d{8})(-|_\d)?(R\d)?", fr"\1-{plate_number}\3", self.parsed_name) return samples
self.parsed_name = re.sub(r"(\d{8})(-|_)?\d?(R\d?)?", rf"\1-{plate_number}\3", self.parsed_name)
logger.debug(f"After addition of plate number the plate name is: {self.parsed_name}")
try:
repeat = re.search(r"-\dR(?P<repeat>\d)?", self.parsed_name).groupdict()['repeat']
if repeat == None:
repeat = "1"
except AttributeError as e:
repeat = ""
self.parsed_name = re.sub(r"(-\dR)\d?", rf"\1 {repeat}", self.parsed_name).replace(" ", "")
@classmethod @classmethod
def get_regex(cls): def enforce_name(cls, ctx:Settings, instr:str) -> str:
return "(?P<Wastewater>RSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?)" outstr = super().enforce_name(ctx=ctx, instr=instr)
def construct():
today = datetime.now()
return f"RSL-WW-{today.year}{str(today.month).zfill(2)}{str(today.day).zfill(2)}"
try:
outstr = re.sub(r"PCR(-|_)", "", outstr)
except AttributeError as e:
logger.error(f"Problem using regex: {e}")
outstr = construct()
outstr = outstr.replace("RSLWW", "RSL-WW")
outstr = re.sub(r"WW(\d{4})", r"WW-\1", outstr, flags=re.IGNORECASE)
outstr = re.sub(r"(\d{4})-(\d{2})-(\d{2})", r"\1\2\3", outstr)
logger.debug(f"Coming out of the preliminary parsing, the plate name is {outstr}")
try:
plate_number = re.search(r"(?:(-|_)\d)(?!\d)", outstr).group().strip("_").strip("-")
logger.debug(f"Plate number is: {plate_number}")
except AttributeError as e:
plate_number = "1"
# self.parsed_name = re.sub(r"(\d{8})(-|_\d)?(R\d)?", fr"\1-{plate_number}\3", self.parsed_name)
outstr = re.sub(r"(\d{8})(-|_)?\d?(R\d?)?", rf"\1-{plate_number}\3", outstr)
logger.debug(f"After addition of plate number the plate name is: {outstr}")
try:
repeat = re.search(r"-\dR(?P<repeat>\d)?", outstr).groupdict()['repeat']
if repeat == None:
repeat = "1"
except AttributeError as e:
repeat = ""
return re.sub(r"(-\dR)\d?", rf"\1 {repeat}", outstr).replace(" ", "")
@classmethod
def get_regex(cls):
return "(?P<Wastewater>RSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?)"
class WastewaterArtic(BasicSubmission): class WastewaterArtic(BasicSubmission):
""" """
@@ -630,34 +608,25 @@ class WastewaterArtic(BasicSubmission):
input_dict['submitter_id'] = re.sub(r"\s\(.+\)$", "", str(input_dict['submitter_id'])).strip() input_dict['submitter_id'] = re.sub(r"\s\(.+\)$", "", str(input_dict['submitter_id'])).strip()
return input_dict return input_dict
class _RSLNamer(BasicSubmission._RSLNamer): @classmethod
def enforce_name(cls, ctx:Settings, instr:str) -> str:
outstr = super().enforce_name(ctx=ctx, instr=instr)
def construct():
today = datetime.now()
return f"RSL-AR-{today.year}{str(today.month).zfill(2)}{str(today.day).zfill(2)}"
try:
outstr = re.sub(r"(\d{4})-(\d{2})-(\d{2})", r"RSL-AR-\1\2\3", outstr, flags=re.IGNORECASE)
except AttributeError:
outstr = construct()
try:
plate_number = int(re.search(r"_|-\d?_", outstr).group().strip("_").strip("-"))
except (AttributeError, ValueError) as e:
plate_number = 1
return re.sub(r"(_|-\d)?_ARTIC", f"-{plate_number}", outstr)
alias = "Wastewater Artic" @classmethod
def get_regex(cls):
@classmethod return "(?P<Wastewater_Artic>(\\d{4}-\\d{2}-\\d{2}(?:-|_)(?:\\d_)?artic)|(RSL(?:-|_)?AR(?:-|_)?20\\d{2}-?\\d{2}-?\\d{2}(?:(_|-)\\d?(\\D|$)R?\\d?)?))"
def construct_regex(cls):
rstring = rf'{cls.get_regex()}'
regex = re.compile(rstring, flags = re.IGNORECASE | re.VERBOSE)
return regex
def enforce_name(self, parent):
# super().enforce_name(parent)
def construct():
today = datetime.now()
return f"RSL-AR-{today.year}{str(today.month).zfill(2)}{str(today.day).zfill(2)}"
try:
self.parsed_name = re.sub(r"(\d{4})-(\d{2})-(\d{2})", r"RSL-AR-\1\2\3", self.parsed_name, flags=re.IGNORECASE)
except AttributeError:
self.parsed_name = construct()
try:
plate_number = int(re.search(r"_|-\d?_", self.parsed_name).group().strip("_").strip("-"))
except (AttributeError, ValueError) as e:
plate_number = 1
self.parsed_name = re.sub(r"(_|-\d)?_ARTIC", f"-{plate_number}", self.parsed_name)
@classmethod
def get_regex(cls):
return "(?P<Wastewater_Artic>(\\d{4}-\\d{2}-\\d{2}(?:-|_)(?:\\d_)?artic)|(RSL(?:-|_)?AR(?:-|_)?20\\d{2}-?\\d{2}-?\\d{2}(?:(_|-)\\d?(\\D|$)R?\\d?)?))"
class BasicSample(Base): class BasicSample(Base):
""" """
@@ -677,7 +646,7 @@ class BasicSample(Base):
) )
__mapper_args__ = { __mapper_args__ = {
"polymorphic_identity": "basic_sample", "polymorphic_identity": "Basic Sample",
# "polymorphic_on": sample_type, # "polymorphic_on": sample_type,
"polymorphic_on": case( "polymorphic_on": case(
[ [
@@ -685,7 +654,7 @@ class BasicSample(Base):
(sample_type == "Wastewater Artic Sample", "Wastewater Sample"), (sample_type == "Wastewater Artic Sample", "Wastewater Sample"),
(sample_type == "Bacterial Culture Sample", "Bacterial Culture Sample"), (sample_type == "Bacterial Culture Sample", "Bacterial Culture Sample"),
], ],
else_="basic_sample" else_="Basic Sample"
), ),
"with_polymorphic": "*", "with_polymorphic": "*",
} }
@@ -863,7 +832,6 @@ class WastewaterSample(BasicSample):
except IndexError: except IndexError:
return None return None
class BacterialCultureSample(BasicSample): class BacterialCultureSample(BasicSample):
""" """
base of bacterial culture sample base of bacterial culture sample

View File

@@ -7,77 +7,19 @@ from typing import List
import pandas as pd import pandas as pd
from pathlib import Path from pathlib import Path
from backend.db import models, lookup_kit_types, lookup_submission_type, lookup_samples from backend.db import models, lookup_kit_types, lookup_submission_type, lookup_samples
from backend.pydant import PydSubmission, PydReagent from backend.validators import PydSheetSubmission, PydSheetReagent, RSLNamer
import logging import logging
from collections import OrderedDict from collections import OrderedDict
import re import re
from datetime import date from datetime import date
from dateutil.parser import parse, ParserError from dateutil.parser import parse, ParserError
from tools import check_not_nan, convert_nans_to_nones, Settings from tools import check_not_nan, convert_nans_to_nones, Settings
# from backend.namer import RSLNamer from frontend.custom_widgets.pop_ups import KitSelector
from frontend.custom_widgets.pop_ups import SubmissionTypeSelector, KitSelector
logger = logging.getLogger(f"submissions.{__name__}") logger = logging.getLogger(f"submissions.{__name__}")
row_keys = dict(A=1, B=2, C=3, D=4, E=5, F=6, G=7, H=8) row_keys = dict(A=1, B=2, C=3, D=4, E=5, F=6, G=7, H=8)
class RSLNamer(object):
"""
Object that will enforce proper formatting on RSL plate names.
NOTE: Depreciated in favour of object based methods in 'submissions.py'
"""
def __init__(self, ctx, instr:str, sub_type:str|None=None):
self.ctx = ctx
self.submission_type = sub_type
self.retrieve_rsl_number(in_str=instr)
if self.submission_type != None:
# custom_enforcer = get_polymorphic_subclass(BasicSubmission, self.submission_type).enforce_naming_schema
parser = getattr(self, f"enforce_{self.submission_type.replace(' ', '_').lower()}")
parser()
self.parsed_name = self.parsed_name.replace("_", "-")
def retrieve_rsl_number(self, in_str:str|Path):
"""
Uses regex to retrieve the plate number and submission type from an input string
Args:
in_str (str): string to be parsed
"""
if not isinstance(in_str, Path):
in_str = Path(in_str)
self.out_str = in_str.stem
logger.debug(f"Attempting match of {self.out_str}")
logger.debug(f"The initial plate name is: {self.out_str}")
# regex = re.compile(r"""
# # (?P<wastewater>RSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(?:_|-)\d?((?!\d)|R)?\d(?!\d))?)|
# (?P<wastewater>RSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?)|
# (?P<bacterial_culture>RSL-?\d{2}-?\d{4})|
# (?P<wastewater_artic>(\d{4}-\d{2}-\d{2}(?:-|_)(?:\d_)?artic)|(RSL(?:-|_)?AR(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?))
# """, flags = re.IGNORECASE | re.VERBOSE)
regex = models.BasicSubmission.RSLNamer.construct_regex()
m = regex.search(self.out_str)
if m != None:
self.parsed_name = m.group().upper().strip(".")
logger.debug(f"Got parsed submission name: {self.parsed_name}")
if self.submission_type == None:
try:
self.submission_type = m.lastgroup
except AttributeError as e:
logger.critical("No RSL plate number found or submission type found!")
logger.debug(f"The cause of the above error was: {e}")
logger.warning(f"We're going to have to create the submission type from the excel sheet properties...")
if in_str.exists():
my_xl = pd.ExcelFile(in_str)
if my_xl.book.properties.category != None:
categories = [item.strip().title() for item in my_xl.book.properties.category.split(";")]
self.submission_type = categories[0].replace(" ", "_").lower()
else:
raise AttributeError(f"File {in_str.__str__()} has no categories.")
else:
raise FileNotFoundError()
# else:
# raise ValueError(f"No parsed name could be created for {self.out_str}.")
class SheetParser(object): class SheetParser(object):
""" """
object to pull and contain data from excel file object to pull and contain data from excel file
@@ -90,20 +32,22 @@ class SheetParser(object):
""" """
self.ctx = ctx self.ctx = ctx
logger.debug(f"Parsing {filepath.__str__()}") logger.debug(f"Parsing {filepath.__str__()}")
if filepath == None: match filepath:
logger.error(f"No filepath given.") case Path():
self.xl = None self.filepath = filepath
else: case str():
self.filepath = filepath self.filepath = Path(filepath)
# Open excel file case _:
try: logger.error(f"No filepath given.")
self.xl = pd.ExcelFile(filepath) raise ValueError("No filepath given.")
except ValueError as e: try:
logger.error(f"Incorrect value: {e}") self.xl = pd.ExcelFile(filepath)
self.xl = None except ValueError as e:
logger.error(f"Incorrect value: {e}")
raise FileNotFoundError(f"Couldn't parse file {self.filepath}")
self.sub = OrderedDict() self.sub = OrderedDict()
# make decision about type of sample we have # make decision about type of sample we have
self.sub['submission_type'] = self.type_decider() self.sub['submission_type'] = dict(value=RSLNamer.retrieve_submission_type(ctx=self.ctx, instr=self.filepath), parsed=False)
# # grab the info map from the submission type in database # # grab the info map from the submission type in database
self.parse_info() self.parse_info()
self.import_kit_validation_check() self.import_kit_validation_check()
@@ -111,57 +55,11 @@ class SheetParser(object):
self.import_reagent_validation_check() self.import_reagent_validation_check()
self.parse_samples() self.parse_samples()
def type_decider(self) -> str:
"""
makes decisions about submission type based on structure of excel file
Returns:
str: submission type name
"""
# Check metadata for category, return first category
if self.xl.book.properties.category != None:
logger.debug("Using file properties to find type...")
categories = [item.strip().replace("_", " ").title() for item in self.xl.book.properties.category.split(";")]
return dict(value=categories[0], parsed=False)
else:
# This code is going to be depreciated once there is full adoption of the client sheets
# with updated metadata... but how will it work for Artic?
# sub = get_polymorphic_subclass()
try:
logger.debug(f"Attempting to match file name regex")
namer = models.BasicSubmission.RSLNamer(ctx=self.ctx, instr=self.filepath)
return namer.submission_type
except Exception as e:
logger.error(f"Unable to find file name regex match")
logger.debug("Using excel map to find type...")
try:
for type in self.ctx.submission_types:
# This gets the *first* submission type that matches the sheet names in the workbook
if self.xl.sheet_names == self.ctx.submission_types[type]['excel_map']:
return dict(value=type.title(), parsed=False)
return "Unknown"
except Exception as e:
logger.warning(f"We were unable to parse the submission type due to: {e}")
# return "Unknown"
dlg = SubmissionTypeSelector(ctx=self.ctx, title="Select Submission Type", message="We were unable to find the submission type from the excel metadata. Please select from below.")
if dlg.exec():
return dict(value=dlg.getValues(), parsed=False)
else:
logger.warning(f"Last attempt at getting submission was rejected.")
raise ValueError("Submission Type needed.")
def parse_info(self): def parse_info(self):
""" """
Pulls basic information from the excel sheet Pulls basic information from the excel sheet
""" """
info = InfoParser(ctx=self.ctx, xl=self.xl, submission_type=self.sub['submission_type']['value']).parse_info() info = InfoParser(ctx=self.ctx, xl=self.xl, submission_type=self.sub['submission_type']['value']).parse_info()
# parser_query = f"parse_{self.sub['submission_type']['value'].replace(' ', '_').lower()}"
# custom_parser = getattr(self, parser_query)
# except AttributeError:
# logger.error(f"Couldn't find submission parser: {parser_query}")
for k,v in info.items(): for k,v in info.items():
match k: match k:
case "sample": case "sample":
@@ -215,7 +113,7 @@ class SheetParser(object):
logger.debug(f"List of reagents for comparison with allowed_reagents: {pprint.pformat(self.sub['reagents'])}") logger.debug(f"List of reagents for comparison with allowed_reagents: {pprint.pformat(self.sub['reagents'])}")
self.sub['reagents'] = [reagent for reagent in self.sub['reagents'] if reagent['value'].type in allowed_reagents] self.sub['reagents'] = [reagent for reagent in self.sub['reagents'] if reagent['value'].type in allowed_reagents]
def to_pydantic(self) -> PydSubmission: def to_pydantic(self) -> PydSheetSubmission:
""" """
Generates a pydantic model of scraped data for validation Generates a pydantic model of scraped data for validation
@@ -223,7 +121,7 @@ class SheetParser(object):
PydSubmission: output pydantic model PydSubmission: output pydantic model
""" """
logger.debug(f"Submission dictionary coming into 'to_pydantic':\n{pprint.pformat(self.sub)}") logger.debug(f"Submission dictionary coming into 'to_pydantic':\n{pprint.pformat(self.sub)}")
psm = PydSubmission(ctx=self.ctx, filepath=self.filepath, **self.sub) psm = PydSheetSubmission(ctx=self.ctx, filepath=self.filepath, **self.sub)
delattr(psm, "filepath") delattr(psm, "filepath")
return psm return psm
@@ -249,11 +147,9 @@ class InfoParser(object):
if isinstance(submission_type, str): if isinstance(submission_type, str):
submission_type = dict(value=submission_type, parsed=False) submission_type = dict(value=submission_type, parsed=False)
logger.debug(f"Looking up submission type: {submission_type['value']}") logger.debug(f"Looking up submission type: {submission_type['value']}")
# submission_type = lookup_submissiontype_by_name(ctx=self.ctx, type_name=submission_type['value'])
submission_type = lookup_submission_type(ctx=self.ctx, name=submission_type['value']) submission_type = lookup_submission_type(ctx=self.ctx, name=submission_type['value'])
info_map = submission_type.info_map info_map = submission_type.info_map
# Get the parse_info method from the submission type specified # Get the parse_info method from the submission type specified
# self.custom_parser = get_polymorphic_subclass(models.BasicSubmission, submission_type.name).parse_info
self.custom_parser = models.BasicSubmission.find_polymorphic_subclass(polymorphic_identity=submission_type.name).parse_info self.custom_parser = models.BasicSubmission.find_polymorphic_subclass(polymorphic_identity=submission_type.name).parse_info
return info_map return info_map
@@ -301,8 +197,6 @@ class InfoParser(object):
check = False check = False
return self.custom_parser(input_dict=dicto, xl=self.xl) return self.custom_parser(input_dict=dicto, xl=self.xl)
class ReagentParser(object): class ReagentParser(object):
def __init__(self, ctx:Settings, xl:pd.ExcelFile, submission_type:str, extraction_kit:str): def __init__(self, ctx:Settings, xl:pd.ExcelFile, submission_type:str, extraction_kit:str):
@@ -335,7 +229,7 @@ class ReagentParser(object):
lot = df.iat[relevant[item]['lot']['row']-1, relevant[item]['lot']['column']-1] lot = df.iat[relevant[item]['lot']['row']-1, relevant[item]['lot']['column']-1]
expiry = df.iat[relevant[item]['expiry']['row']-1, relevant[item]['expiry']['column']-1] expiry = df.iat[relevant[item]['expiry']['row']-1, relevant[item]['expiry']['column']-1]
except (KeyError, IndexError): except (KeyError, IndexError):
listo.append(dict(value=PydReagent(type=item.strip(), lot=None, exp=None, name=None), parsed=False)) listo.append(dict(value=PydSheetReagent(type=item.strip(), lot=None, exp=None, name=None), parsed=False))
continue continue
if check_not_nan(lot): if check_not_nan(lot):
parsed = True parsed = True
@@ -343,7 +237,7 @@ class ReagentParser(object):
parsed = False parsed = False
logger.debug(f"Got lot for {item}-{name}: {lot} as {type(lot)}") logger.debug(f"Got lot for {item}-{name}: {lot} as {type(lot)}")
lot = str(lot) lot = str(lot)
listo.append(dict(value=PydReagent(type=item.strip(), lot=lot, exp=expiry, name=name), parsed=parsed)) listo.append(dict(value=PydSheetReagent(type=item.strip(), lot=lot, exp=expiry, name=name), parsed=parsed))
logger.debug(f"Returning listo: {listo}") logger.debug(f"Returning listo: {listo}")
return listo return listo
@@ -516,12 +410,7 @@ class SampleParser(object):
except KeyError: except KeyError:
translated_dict[k] = convert_nans_to_nones(v) translated_dict[k] = convert_nans_to_nones(v)
translated_dict['sample_type'] = f"{self.submission_type} Sample" translated_dict['sample_type'] = f"{self.submission_type} Sample"
# parser_query = f"parse_{translated_dict['sample_type'].replace(' ', '_').lower()}"
# try:
# custom_parser = getattr(self, parser_query)
translated_dict = self.custom_parser(translated_dict) translated_dict = self.custom_parser(translated_dict)
# except AttributeError:
# logger.error(f"Couldn't get custom parser: {parser_query}")
if generate: if generate:
new_samples.append(self.generate_sample_object(translated_dict)) new_samples.append(self.generate_sample_object(translated_dict))
else: else:
@@ -558,65 +447,6 @@ class SampleParser(object):
logger.debug(f"Sample {instance.submitter_id} already exists, will run update.") logger.debug(f"Sample {instance.submitter_id} already exists, will run update.")
return dict(sample=instance, row=input_dict['row'], column=input_dict['column']) return dict(sample=instance, row=input_dict['row'], column=input_dict['column'])
# def parse_bacterial_culture_sample(self, input_dict:dict) -> dict:
# """
# Update sample dictionary with bacterial culture specific information
# Args:
# input_dict (dict): Input sample dictionary
# Returns:
# dict: Updated sample dictionary
# """
# logger.debug("Called bacterial culture sample parser")
# return input_dict
# def parse_wastewater_sample(self, input_dict:dict) -> dict:
# """
# Update sample dictionary with wastewater specific information
# Args:
# input_dict (dict): Input sample dictionary
# Returns:
# dict: Updated sample dictionary
# """
# logger.debug(f"Called wastewater sample parser")
# return input_dict
# def parse_wastewater_artic_sample(self, input_dict:dict) -> dict:
# """
# Update sample dictionary with artic specific information
# Args:
# input_dict (dict): Input sample dictionary
# Returns:
# dict: Updated sample dictionary
# """
# logger.debug("Called wastewater artic sample parser")
# input_dict['sample_type'] = "Wastewater Sample"
# # Because generate_sample_object needs the submitter_id and the artic has the "({origin well})"
# # at the end, this has to be done here. No moving to sqlalchemy object :(
# input_dict['submitter_id'] = re.sub(r"\s\(.+\)$", "", str(input_dict['submitter_id'])).strip()
# return input_dict
# def parse_first_strand_sample(self, input_dict:dict) -> dict:
# """
# Update sample dictionary with first strand specific information
# Args:
# input_dict (dict): Input sample dictionary
# Returns:
# dict: Updated sample dictionary
# """
# logger.debug("Called first strand sample parser")
# input_dict['well'] = re.search(r"\s\((.*)\)$", input_dict['submitter_id']).groups()[0]
# input_dict['submitter_id'] = re.sub(r"\s\(.*\)$", "", str(input_dict['submitter_id'])).strip()
# return input_dict
def grab_plates(self) -> List[str]: def grab_plates(self) -> List[str]:
""" """
Parse plate names from Parse plate names from
@@ -628,7 +458,7 @@ class SampleParser(object):
for plate in self.plates: for plate in self.plates:
df = self.xl.parse(plate['sheet'], header=None) df = self.xl.parse(plate['sheet'], header=None)
if isinstance(df.iat[plate['row']-1, plate['column']-1], str): if isinstance(df.iat[plate['row']-1, plate['column']-1], str):
output = models.BasicSubmission.RSLNamer(ctx=self.ctx, instr=df.iat[plate['row']-1, plate['column']-1]).parsed_name output = RSLNamer.retrieve_rsl_number(ctx=self.ctx, instr=df.iat[plate['row']-1, plate['column']-1])
else: else:
continue continue
plates.append(output) plates.append(output)
@@ -637,7 +467,6 @@ class SampleParser(object):
class PCRParser(object): class PCRParser(object):
""" """
Object to pull data from Design and Analysis PCR export file. Object to pull data from Design and Analysis PCR export file.
TODO: Generify this object.
""" """
def __init__(self, ctx:dict, filepath:Path|None = None) -> None: def __init__(self, ctx:dict, filepath:Path|None = None) -> None:
""" """
@@ -662,15 +491,13 @@ class PCRParser(object):
logger.error(f"Couldn't get permissions for {filepath.__str__()}. Operation might have been cancelled.") logger.error(f"Couldn't get permissions for {filepath.__str__()}. Operation might have been cancelled.")
return return
# self.pcr = OrderedDict() # self.pcr = OrderedDict()
self.pcr = {} self.parse_general(sheet_name="Results")
namer = models.BasicSubmission.RSLNamer(ctx=self.ctx, instr=filepath.__str__()) namer = RSLNamer(ctx=self.ctx, instr=filepath.__str__())
self.plate_num = namer.parsed_name self.plate_num = namer.parsed_name
self.submission_type = namer.submission_type self.submission_type = namer.submission_type
logger.debug(f"Set plate number to {self.plate_num} and type to {self.submission_type}") logger.debug(f"Set plate number to {self.plate_num} and type to {self.submission_type}")
self.samples = [] parser = models.BasicSubmission.find_polymorphic_subclass(self.submission_type)
parser = getattr(self, f"parse_{self.submission_type}") self.samples = parser.parse_pcr(xl=self.xl, rsl_number=self.plate_num)
parser()
def parse_general(self, sheet_name:str): def parse_general(self, sheet_name:str):
""" """
@@ -679,6 +506,7 @@ class PCRParser(object):
Args: Args:
sheet_name (str): Name of sheet in excel workbook that holds info. sheet_name (str): Name of sheet in excel workbook that holds info.
""" """
self.pcr = {}
df = self.xl.parse(sheet_name=sheet_name, dtype=object).fillna("") df = self.xl.parse(sheet_name=sheet_name, dtype=object).fillna("")
self.pcr['comment'] = df.iloc[0][1] self.pcr['comment'] = df.iloc[0][1]
self.pcr['operator'] = df.iloc[1][1] self.pcr['operator'] = df.iloc[1][1]
@@ -702,42 +530,5 @@ class PCRParser(object):
self.pcr['plugin'] = df.iloc[19][1] self.pcr['plugin'] = df.iloc[19][1]
self.pcr['exported_on'] = df.iloc[20][1] self.pcr['exported_on'] = df.iloc[20][1]
self.pcr['imported_by'] = getuser() self.pcr['imported_by'] = getuser()
return df
def parse_Wastewater(self):
"""
Parse specific to wastewater samples.
"""
df = self.parse_general(sheet_name="Results")
column_names = ["Well", "Well Position", "Omit","Sample","Target","Task"," Reporter","Quencher","Amp Status","Amp Score","Curve Quality","Result Quality Issues","Cq","Cq Confidence","Cq Mean","Cq SD","Auto Threshold","Threshold", "Auto Baseline", "Baseline Start", "Baseline End"]
self.samples_df = df.iloc[23:][0:]
logger.debug(f"Dataframe of PCR results:\n\t{self.samples_df}")
self.samples_df.columns = column_names
logger.debug(f"Samples columns: {self.samples_df.columns}")
well_call_df = self.xl.parse(sheet_name="Well Call").iloc[24:][0:].iloc[:,-1:]
try:
self.samples_df['Assessment'] = well_call_df.values
except ValueError:
logger.error("Well call number doesn't match sample number")
logger.debug(f"Well call df: {well_call_df}")
for ii, row in self.samples_df.iterrows():
try:
sample_obj = [sample for sample in self.samples if sample['sample'] == row[3]][0]
except IndexError:
sample_obj = dict(
sample = row['Sample'],
plate_rsl = self.plate_num,
)
logger.debug(f"Got sample obj: {sample_obj}")
if isinstance(row['Cq'], float):
sample_obj[f"ct_{row['Target'].lower()}"] = row['Cq']
else:
sample_obj[f"ct_{row['Target'].lower()}"] = 0.0
try:
sample_obj[f"{row['Target'].lower()}_status"] = row['Assessment']
except KeyError:
logger.error(f"No assessment for {sample_obj['sample']}")
self.samples.append(sample_obj)

View File

@@ -218,7 +218,5 @@ def drop_reruns_from_df(ctx:dict, df: DataFrame) -> DataFrame:
df = df.drop(df[df.name == first_run].index) df = df.drop(df[df.name == first_run].index)
return df return df
def make_hitpicks(input:list) -> DataFrame: def make_hitpicks(input:list) -> DataFrame:
return DataFrame.from_records(input) return DataFrame.from_records(input)

View File

@@ -0,0 +1,92 @@
import logging, re
from pathlib import Path
from openpyxl import load_workbook
from backend.db.models import BasicSubmission
from tools import Settings
logger = logging.getLogger(f"submissions.{__name__}")
class RSLNamer(object):
"""
Object that will enforce proper formatting on RSL plate names.
NOTE: Depreciated in favour of object based methods in 'submissions.py'
"""
def __init__(self, ctx, instr:str, sub_type:str|None=None):
self.ctx = ctx
self.submission_type = sub_type
if self.submission_type == None:
self.submission_type = self.retrieve_submission_type(ctx=self.ctx, instr=instr)
print(self.submission_type)
if self.submission_type != None:
enforcer = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type)
self.parsed_name = self.retrieve_rsl_number(instr=instr, regex=enforcer.get_regex())
self.parsed_name = enforcer.enforce_name(ctx=ctx, instr=self.parsed_name)
@classmethod
def retrieve_submission_type(cls, ctx:Settings, instr:str|Path) -> str:
match instr:
case Path():
logger.debug(f"Using path method.")
if instr.exists():
wb = load_workbook(instr)
try:
submission_type = [item.strip().title() for item in wb.properties.category.split(";")][0]
except AttributeError:
try:
for type in ctx.submission_types:
# This gets the *first* submission type that matches the sheet names in the workbook
if wb.sheetnames == ctx.submission_types[type]['excel_map']:
submission_type = type.title()
except:
submission_type = cls.retrieve_submission_type(ctx=ctx, instr=instr.stem.__str__())
case str():
regex = BasicSubmission.construct_regex()
logger.debug(f"Using string method.")
m = regex.search(instr)
try:
submission_type = m.lastgroup
except AttributeError as e:
logger.critical("No RSL plate number found or submission type found!")
case _:
submission_type = None
if submission_type == None:
from frontend.custom_widgets import SubmissionTypeSelector
dlg = SubmissionTypeSelector(ctx, title="Couldn't parse submission type.", message="Please select submission type from list below.")
if dlg.exec():
submission_type = dlg.parse_form()
submission_type = submission_type.replace("_", " ")
return submission_type
@classmethod
def retrieve_rsl_number(cls, instr:str|Path, regex:str|None=None):
"""
Uses regex to retrieve the plate number and submission type from an input string
Args:
in_str (str): string to be parsed
"""
if regex == None:
regex = BasicSubmission.construct_regex()
else:
regex = re.compile(rf'{regex}', re.IGNORECASE | re.VERBOSE)
match instr:
case Path():
m = regex.search(instr.stem)
case str():
logger.debug(f"Using string method.")
m = regex.search(instr)
case _:
pass
if m != None:
try:
parsed_name = m.group().upper().strip(".")
except:
parsed_name = None
else:
parsed_name = None
logger.debug(f"Got parsed submission name: {parsed_name}")
return parsed_name
from .pydant import *

View File

@@ -7,19 +7,16 @@ from datetime import date, datetime
from dateutil.parser import parse from dateutil.parser import parse
from dateutil.parser._parser import ParserError from dateutil.parser._parser import ParserError
from typing import List, Any from typing import List, Any
# from backend.namer import RSLNamer from . import RSLNamer
from pathlib import Path from pathlib import Path
import re import re
import logging import logging
from tools import check_not_nan, convert_nans_to_nones, Settings from tools import check_not_nan, convert_nans_to_nones, Settings
from backend.db.functions import lookup_submissions from backend.db.functions import lookup_submissions
from backend.db.models import BasicSubmission
logger = logging.getLogger(f"submissions.{__name__}") logger = logging.getLogger(f"submissions.{__name__}")
class PydReagent(BaseModel): class PydSheetReagent(BaseModel):
type: str|None type: str|None
lot: str|None lot: str|None
exp: date|None exp: date|None
@@ -73,9 +70,7 @@ class PydReagent(BaseModel):
else: else:
return values.data['type'] return values.data['type']
class PydSheetSubmission(BaseModel, extra='allow'):
class PydSubmission(BaseModel, extra='allow'):
ctx: Settings ctx: Settings
filepath: Path filepath: Path
submission_type: dict|None submission_type: dict|None
@@ -91,7 +86,6 @@ class PydSubmission(BaseModel, extra='allow'):
reagents: List[dict] = [] reagents: List[dict] = []
samples: List[Any] samples: List[Any]
@field_validator("submitter_plate_num") @field_validator("submitter_plate_num")
@classmethod @classmethod
def enforce_with_uuid(cls, value): def enforce_with_uuid(cls, value):
@@ -153,10 +147,10 @@ class PydSubmission(BaseModel, extra='allow'):
else: else:
logger.warning(f"Submission number {value} already exists in DB, attempting salvage with filepath") logger.warning(f"Submission number {value} already exists in DB, attempting salvage with filepath")
# output = RSLNamer(ctx=values.data['ctx'], instr=values.data['filepath'].__str__(), sub_type=sub_type).parsed_name # output = RSLNamer(ctx=values.data['ctx'], instr=values.data['filepath'].__str__(), sub_type=sub_type).parsed_name
output = BasicSubmission.RSLNamer(ctx=values.data['ctx'], instr=values.data['filepath'].__str__(), sub_type=sub_type).parsed_name output = RSLNamer(ctx=values.data['ctx'], instr=values.data['filepath'].__str__(), sub_type=sub_type).parsed_name
return dict(value=output, parsed=False) return dict(value=output, parsed=False)
else: else:
output = BasicSubmission.RSLNamer(ctx=values.data['ctx'], instr=values.data['filepath'].__str__(), sub_type=sub_type).parsed_name output = RSLNamer(ctx=values.data['ctx'], instr=values.data['filepath'].__str__(), sub_type=sub_type).parsed_name
return dict(value=output, parsed=False) return dict(value=output, parsed=False)
@field_validator("technician", mode="before") @field_validator("technician", mode="before")
@@ -206,8 +200,10 @@ class PydSubmission(BaseModel, extra='allow'):
if check_not_nan(value['value']): if check_not_nan(value['value']):
value = value['value'].title() value = value['value'].title()
return dict(value=value, parsed=True) return dict(value=value, parsed=True)
# else:
# return dict(value="RSL Name not found.")
else: else:
return dict(value=BasicSubmission.RSLNamer(ctx=values.data['ctx'], instr=values.data['filepath'].__str__()).submission_type.title(), parsed=False) return dict(value=RSLNamer(ctx=values.data['ctx'], instr=values.data['filepath'].__str__()).submission_type.title(), parsed=False)
@field_validator("submission_category") @field_validator("submission_category")
@classmethod @classmethod
@@ -215,4 +211,3 @@ class PydSubmission(BaseModel, extra='allow'):
if value['value'] not in ["Research", "Diagnostic", "Surveillance"]: if value['value'] not in ["Research", "Diagnostic", "Surveillance"]:
value['value'] = values.data['submission_type']['value'] value['value'] = values.data['submission_type']['value']
return value return value

View File

@@ -17,11 +17,11 @@ from backend.db.functions import construct_kit_from_yaml, \
lookup_reagent_types, lookup_reagents, lookup_submission_type, lookup_reagenttype_kittype_association, \ lookup_reagent_types, lookup_reagents, lookup_submission_type, lookup_reagenttype_kittype_association, \
lookup_submissions lookup_submissions
from backend.db.models import SubmissionTypeKitTypeAssociation from backend.db.models import SubmissionTypeKitTypeAssociation
from sqlalchemy import FLOAT, INTEGER, String from sqlalchemy import FLOAT, INTEGER
import logging import logging
import numpy as np import numpy as np
from .pop_ups import AlertPop from .pop_ups import AlertPop
from backend.pydant import PydReagent from backend.validators import PydSheetReagent
from typing import Tuple from typing import Tuple
logger = logging.getLogger(f"submissions.{__name__}") logger = logging.getLogger(f"submissions.{__name__}")
@@ -386,11 +386,11 @@ class ControlsDatePicker(QWidget):
class ImportReagent(QComboBox): class ImportReagent(QComboBox):
def __init__(self, ctx:Settings, reagent:dict|PydReagent, extraction_kit:str): def __init__(self, ctx:Settings, reagent:dict|PydSheetReagent, extraction_kit:str):
super().__init__() super().__init__()
self.setEditable(True) self.setEditable(True)
if isinstance(reagent, dict): if isinstance(reagent, dict):
reagent = PydReagent(**reagent) reagent = PydSheetReagent(**reagent)
# Ensure that all reagenttypes have a name that matches the items in the excel parser # Ensure that all reagenttypes have a name that matches the items in the excel parser
query_var = reagent.type query_var = reagent.type
logger.debug(f"Import Reagent is looking at: {reagent.lot} for {query_var}") logger.debug(f"Import Reagent is looking at: {reagent.lot} for {query_var}")

View File

@@ -96,5 +96,5 @@ class SubmissionTypeSelector(QDialog):
self.layout.addWidget(self.buttonBox) self.layout.addWidget(self.buttonBox)
self.setLayout(self.layout) self.setLayout(self.layout)
def getValues(self): def parse_form(self):
return self.widget.currentText() return self.widget.currentText()

View File

@@ -27,7 +27,6 @@ from backend.db.functions import (
construct_submission_info, lookup_reagents, construct_kit_from_yaml, construct_org_from_yaml, get_control_subtypes, construct_submission_info, lookup_reagents, construct_kit_from_yaml, construct_org_from_yaml, get_control_subtypes,
update_subsampassoc_with_pcr, check_kit_integrity, update_last_used, lookup_organizations, lookup_kit_types, update_subsampassoc_with_pcr, check_kit_integrity, update_last_used, lookup_organizations, lookup_kit_types,
lookup_submissions, lookup_controls, lookup_samples, lookup_submission_sample_association, store_object, lookup_submission_type, lookup_submissions, lookup_controls, lookup_samples, lookup_submission_sample_association, store_object, lookup_submission_type,
get_polymorphic_subclass
) )
from backend.excel.parser import SheetParser, PCRParser, SampleParser from backend.excel.parser import SheetParser, PCRParser, SampleParser
from backend.excel.reports import make_report_html, make_report_xlsx, convert_data_list_to_df from backend.excel.reports import make_report_html, make_report_xlsx, convert_data_list_to_df
@@ -56,9 +55,7 @@ def import_submission_function(obj:QMainWindow, fname:Path|None=None) -> Tuple[Q
logger.debug(obj.ctx) logger.debug(obj.ctx)
# initialize samples # initialize samples
obj.samples = [] obj.samples = []
obj.missing_info = [] obj.missing_info = []
# set file dialog # set file dialog
if isinstance(fname, bool) or fname == None: if isinstance(fname, bool) or fname == None:
fname = select_open_file(obj, file_extension="xlsx") fname = select_open_file(obj, file_extension="xlsx")

View File

@@ -134,153 +134,6 @@ def massage_common_reagents(reagent_name:str):
reagent_name = reagent_name.replace("µ", "u") reagent_name = reagent_name.replace("µ", "u")
return reagent_name return reagent_name
# class RSLNamer(object):
# """
# Object that will enforce proper formatting on RSL plate names.
# NOTE: Depreciated in favour of object based methods in 'submissions.py'
# """
# def __init__(self, ctx, instr:str, sub_type:str|None=None):
# self.ctx = ctx
# self.submission_type = sub_type
# self.retrieve_rsl_number(in_str=instr)
# if self.submission_type != None:
# # custom_enforcer = get_polymorphic_subclass(BasicSubmission, self.submission_type).enforce_naming_schema
# parser = getattr(self, f"enforce_{self.submission_type.replace(' ', '_').lower()}")
# parser()
# self.parsed_name = self.parsed_name.replace("_", "-")
# def retrieve_rsl_number(self, in_str:str|Path):
# """
# Uses regex to retrieve the plate number and submission type from an input string
# Args:
# in_str (str): string to be parsed
# """
# if not isinstance(in_str, Path):
# in_str = Path(in_str)
# self.out_str = in_str.stem
# logger.debug(f"Attempting match of {self.out_str}")
# logger.debug(f"The initial plate name is: {self.out_str}")
# regex = re.compile(r"""
# # (?P<wastewater>RSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(?:_|-)\d?((?!\d)|R)?\d(?!\d))?)|
# (?P<wastewater>RSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?)|
# (?P<bacterial_culture>RSL-?\d{2}-?\d{4})|
# (?P<wastewater_artic>(\d{4}-\d{2}-\d{2}(?:-|_)(?:\d_)?artic)|(RSL(?:-|_)?AR(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?))
# """, flags = re.IGNORECASE | re.VERBOSE)
# m = regex.search(self.out_str)
# if m != None:
# self.parsed_name = m.group().upper().strip(".")
# logger.debug(f"Got parsed submission name: {self.parsed_name}")
# if self.submission_type == None:
# try:
# self.submission_type = m.lastgroup
# except AttributeError as e:
# logger.critical("No RSL plate number found or submission type found!")
# logger.debug(f"The cause of the above error was: {e}")
# logger.warning(f"We're going to have to create the submission type from the excel sheet properties...")
# if in_str.exists():
# my_xl = pd.ExcelFile(in_str)
# if my_xl.book.properties.category != None:
# categories = [item.strip().title() for item in my_xl.book.properties.category.split(";")]
# self.submission_type = categories[0].replace(" ", "_").lower()
# else:
# raise AttributeError(f"File {in_str.__str__()} has no categories.")
# else:
# raise FileNotFoundError()
# # else:
# # raise ValueError(f"No parsed name could be created for {self.out_str}.")
# def enforce_wastewater(self):
# """
# Uses regex to enforce proper formatting of wastewater samples
# """
# def construct():
# today = datetime.now()
# return f"RSL-WW-{today.year}{str(today.month).zfill(2)}{str(today.day).zfill(2)}-1"
# try:
# self.parsed_name = re.sub(r"PCR(-|_)", "", self.parsed_name)
# except AttributeError as e:
# logger.error(f"Problem using regex: {e}")
# self.parsed_name = construct()
# self.parsed_name = self.parsed_name.replace("RSLWW", "RSL-WW")
# self.parsed_name = re.sub(r"WW(\d{4})", r"WW-\1", self.parsed_name, flags=re.IGNORECASE)
# self.parsed_name = re.sub(r"(\d{4})-(\d{2})-(\d{2})", r"\1\2\3", self.parsed_name)
# logger.debug(f"Coming out of the preliminary parsing, the plate name is {self.parsed_name}")
# try:
# plate_number = re.search(r"(?:(-|_)\d)(?!\d)", self.parsed_name).group().strip("_").strip("-")
# logger.debug(f"Plate number is: {plate_number}")
# except AttributeError as e:
# plate_number = "1"
# # self.parsed_name = re.sub(r"(\d{8})(-|_\d)?(R\d)?", fr"\1-{plate_number}\3", self.parsed_name)
# self.parsed_name = re.sub(r"(\d{8})(-|_)?\d?(R\d?)?", rf"\1-{plate_number}\3", self.parsed_name)
# logger.debug(f"After addition of plate number the plate name is: {self.parsed_name}")
# try:
# repeat = re.search(r"-\dR(?P<repeat>\d)?", self.parsed_name).groupdict()['repeat']
# if repeat == None:
# repeat = "1"
# except AttributeError as e:
# repeat = ""
# self.parsed_name = re.sub(r"(-\dR)\d?", rf"\1 {repeat}", self.parsed_name).replace(" ", "")
# def enforce_bacterial_culture(self):
# """
# Uses regex to enforce proper formatting of bacterial culture samples
# """
# def construct(ctx) -> str:
# """
# DEPRECIATED due to slowness. Search for the largest rsl number and increment by 1
# Returns:
# str: new RSL number
# """
# logger.debug(f"Attempting to construct RSL number from scratch...")
# # directory = Path(self.ctx['directory_path']).joinpath("Bacteria")
# directory = Path(ctx.directory_path).joinpath("Bacteria")
# year = str(datetime.now().year)[-2:]
# if directory.exists():
# logger.debug(f"Year: {year}")
# relevant_rsls = []
# all_xlsx = [item.stem for item in directory.rglob("*.xlsx") if bool(re.search(r"RSL-\d{2}-\d{4}", item.stem)) and year in item.stem[4:6]]
# logger.debug(f"All rsls: {all_xlsx}")
# for item in all_xlsx:
# try:
# relevant_rsls.append(re.match(r"RSL-\d{2}-\d{4}", item).group(0))
# except Exception as e:
# logger.error(f"Regex error: {e}")
# continue
# logger.debug(f"Initial xlsx: {relevant_rsls}")
# max_number = max([int(item[-4:]) for item in relevant_rsls])
# logger.debug(f"The largest sample number is: {max_number}")
# return f"RSL-{year}-{str(max_number+1).zfill(4)}"
# else:
# # raise FileNotFoundError(f"Unable to locate the directory: {directory.__str__()}")
# return f"RSL-{year}-0000"
# try:
# self.parsed_name = re.sub(r"RSL(\d{2})", r"RSL-\1", self.parsed_name, flags=re.IGNORECASE)
# except AttributeError as e:
# self.parsed_name = construct(ctx=self.ctx)
# # year = datetime.now().year
# # self.parsed_name = f"RSL-{str(year)[-2:]}-0000"
# self.parsed_name = re.sub(r"RSL-(\d{2})(\d{4})", r"RSL-\1-\2", self.parsed_name, flags=re.IGNORECASE)
# def enforce_wastewater_artic(self):
# """
# Uses regex to enforce proper formatting of wastewater samples
# """
# def construct():
# today = datetime.now()
# return f"RSL-AR-{today.year}{str(today.month).zfill(2)}{str(today.day).zfill(2)}"
# try:
# self.parsed_name = re.sub(r"(\d{4})-(\d{2})-(\d{2})", r"RSL-AR-\1\2\3", self.parsed_name, flags=re.IGNORECASE)
# except AttributeError:
# self.parsed_name = construct()
# try:
# plate_number = int(re.search(r"_|-\d?_", self.parsed_name).group().strip("_").strip("-"))
# except (AttributeError, ValueError) as e:
# plate_number = 1
# self.parsed_name = re.sub(r"(_|-\d)?_ARTIC", f"-{plate_number}", self.parsed_name)
class GroupWriteRotatingFileHandler(handlers.RotatingFileHandler): class GroupWriteRotatingFileHandler(handlers.RotatingFileHandler):
def doRollover(self): def doRollover(self):