Post RSLNamer move, pre-restore

This commit is contained in:
Landon Wark
2023-10-16 14:24:33 -05:00
parent 80d77117e1
commit 0a90542e8e
11 changed files with 575 additions and 286 deletions

View File

@@ -3,6 +3,7 @@ Models for the main submission types.
'''
from getpass import getuser
import math
from pprint import pformat
from . import Base
from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, Table, JSON, FLOAT, case
from sqlalchemy.orm import relationship, validates
@@ -17,7 +18,9 @@ from dateutil.parser import parse
import re
import pandas as pd
from openpyxl import Workbook
from tools import check_not_nan, row_map
from tools import check_not_nan, row_map, Settings
from pathlib import Path
from datetime import datetime
logger = logging.getLogger(f"submissions.{__name__}")
@@ -61,7 +64,7 @@ class BasicSubmission(Base):
# Allows for subclassing into ex. BacterialCulture, Wastewater, etc.
__mapper_args__ = {
"polymorphic_identity": "basic_submission",
"polymorphic_identity": "Basic Submission",
"polymorphic_on": submission_type_name,
"with_polymorphic": "*",
}
@@ -295,18 +298,93 @@ class BasicSubmission(Base):
"""
return input_excel
class _RSLNamer(object):
alias = None
def __init__(self, ctx:Settings, instr:str|Path, sub_type:str|None=None, parent=None):
if parent != None:
logger.debug(f"Hello from {parent.__mapper_args__['polymorphic_identity']} Namer!")
self.ctx = ctx
self.submission_type = sub_type
self.retrieve_rsl_number(instr=instr)
try:
ncls = [item for item in self.__class__.__subclasses__() if item.alias == self.submission_type][0]
enforcer = ncls.enforce_name
enforcer(self=self, parent=parent)
except IndexError:
enforcer = self.enforce_name
enforcer(parent=parent)
def retrieve_rsl_number(self, instr:str|Path):
"""
Uses regex to retrieve the plate number and submission type from an input string
Args:
in_str (str): string to be parsed
"""
if not isinstance(instr, Path):
instr = Path(instr)
self.out_str = instr.stem
logger.debug(f"Attempting match of {self.out_str}")
logger.debug(f"The initial plate name is: {self.out_str}")
regex = self.construct_regex()
m = regex.search(self.out_str)
if m != None:
self.parsed_name = m.group().upper().strip(".")
logger.debug(f"Got parsed submission name: {self.parsed_name}")
if self.submission_type == None:
try:
self.submission_type = m.lastgroup.replace("_", " ")
except AttributeError as e:
self.submission_type = None
def enforce_name(self, parent):
if parent != None:
logger.debug(f"Hello from {parent.__mapper_args__['polymorphic_identity']} Enforcer!")
self.parsed_name = self.parsed_name
@classmethod
def construct_regex(cls):
rstring = rf'{"|".join([item.get_regex() for item in cls.__subclasses__()])}'
regex = re.compile(rstring, flags = re.IGNORECASE | re.VERBOSE)
return regex
@classmethod
def enforce_naming_schema(cls, input_str:str) -> str:
"""
Used to ensure proper custom naming of submission.
Args:
input_str (str): name parsed by default parser
Returns:
str: custom parser output.
"""
return input_str
def RSLNamer(cls, ctx:Settings, instr:str, sub_type:str|None=None):
return cls._RSLNamer(parent=cls, ctx=ctx, instr=instr, sub_type=sub_type)
@classmethod
def find_subclasses(cls, ctx:Settings, attrs:dict|None=None, rsl_number:str|None=None):
if rsl_number != None:
namer = cls._RSLNamer(ctx=ctx, instr=rsl_number)
return cls.find_polymorphic_subclass(namer.submission_type)
if len(attrs) == 0 or attrs == None:
return cls
if any([not hasattr(cls, attr) for attr in attrs]):
# looks for first model that has all included kwargs
try:
model = [subclass for subclass in cls.__subclasses__() if all([hasattr(subclass, attr) for attr in attrs])][0]
except IndexError as e:
raise AttributeError(f"Couldn't find existing class/subclass of {cls} with all attributes:\n{pformat(attrs)}")
else:
model = cls
logger.debug(f"Using model: {model}")
return model
@classmethod
def find_polymorphic_subclass(cls, polymorphic_identity:str|None=None):
if isinstance(polymorphic_identity, dict):
polymorphic_identity = polymorphic_identity['value']
if polymorphic_identity == None:
return cls
else:
try:
return [item for item in cls.__subclasses__() if item.__mapper_args__['polymorphic_identity']==polymorphic_identity][0]
except Exception as e:
logger.error(f"Could not get polymorph {polymorphic_identity} of {cls} due to {e}")
return cls
# Below are the custom submission types
@@ -372,6 +450,59 @@ class BacterialCulture(BasicSubmission):
input_excel["Sample List"].cell(row=15, column=2, value=getuser()[0:2].upper())
return input_excel
class _RSLNamer(BasicSubmission._RSLNamer):
alias = "Bacterial Culture"
@classmethod
def construct_regex(cls):
rstring = rf'{cls.get_regex()}'
regex = re.compile(rstring, flags = re.IGNORECASE | re.VERBOSE)
return regex
def enforce_name(self, parent):
# super().enforce_name(parent)
def construct(ctx) -> str:
"""
DEPRECIATED due to slowness. Search for the largest rsl number and increment by 1
Returns:
str: new RSL number
"""
logger.debug(f"Attempting to construct RSL number from scratch...")
# directory = Path(self.ctx['directory_path']).joinpath("Bacteria")
directory = Path(ctx.directory_path).joinpath("Bacteria")
year = str(datetime.now().year)[-2:]
if directory.exists():
logger.debug(f"Year: {year}")
relevant_rsls = []
all_xlsx = [item.stem for item in directory.rglob("*.xlsx") if bool(re.search(r"RSL-\d{2}-\d{4}", item.stem)) and year in item.stem[4:6]]
logger.debug(f"All rsls: {all_xlsx}")
for item in all_xlsx:
try:
relevant_rsls.append(re.match(r"RSL-\d{2}-\d{4}", item).group(0))
except Exception as e:
logger.error(f"Regex error: {e}")
continue
logger.debug(f"Initial xlsx: {relevant_rsls}")
max_number = max([int(item[-4:]) for item in relevant_rsls])
logger.debug(f"The largest sample number is: {max_number}")
return f"RSL-{year}-{str(max_number+1).zfill(4)}"
else:
# raise FileNotFoundError(f"Unable to locate the directory: {directory.__str__()}")
return f"RSL-{year}-0000"
try:
self.parsed_name = re.sub(r"RSL(\d{2})", r"RSL-\1", self.parsed_name, flags=re.IGNORECASE)
except AttributeError as e:
self.parsed_name = construct(ctx=self.ctx)
# year = datetime.now().year
# self.parsed_name = f"RSL-{str(year)[-2:]}-0000"
self.parsed_name = re.sub(r"RSL-(\d{2})(\d{4})", r"RSL-\1-\2", self.parsed_name, flags=re.IGNORECASE)
@classmethod
def get_regex(cls):
return "(?P<Bacterial_Culture>RSL-?\\d{2}-?\\d{4})"
class Wastewater(BasicSubmission):
"""
derivative submission type from BasicSubmission
@@ -411,6 +542,50 @@ class Wastewater(BasicSubmission):
if xl != None:
input_dict['csv'] = xl.parse("Copy to import file")
return input_dict
class _RSLNamer(BasicSubmission._RSLNamer):
alias = "Wastewater"
@classmethod
def construct_regex(cls):
rstring = rf'{cls.get_regex()}'
regex = re.compile(rstring, flags = re.IGNORECASE | re.VERBOSE)
return regex
def enforce_name(self, parent):
# super().enforce_name(parent)
def construct():
today = datetime.now()
return f"RSL-WW-{today.year}{str(today.month).zfill(2)}{str(today.day).zfill(2)}"
try:
self.parsed_name = re.sub(r"PCR(-|_)", "", self.parsed_name)
except AttributeError as e:
logger.error(f"Problem using regex: {e}")
self.parsed_name = construct()
self.parsed_name = self.parsed_name.replace("RSLWW", "RSL-WW")
self.parsed_name = re.sub(r"WW(\d{4})", r"WW-\1", self.parsed_name, flags=re.IGNORECASE)
self.parsed_name = re.sub(r"(\d{4})-(\d{2})-(\d{2})", r"\1\2\3", self.parsed_name)
logger.debug(f"Coming out of the preliminary parsing, the plate name is {self.parsed_name}")
try:
plate_number = re.search(r"(?:(-|_)\d)(?!\d)", self.parsed_name).group().strip("_").strip("-")
logger.debug(f"Plate number is: {plate_number}")
except AttributeError as e:
plate_number = "1"
# self.parsed_name = re.sub(r"(\d{8})(-|_\d)?(R\d)?", fr"\1-{plate_number}\3", self.parsed_name)
self.parsed_name = re.sub(r"(\d{8})(-|_)?\d?(R\d?)?", rf"\1-{plate_number}\3", self.parsed_name)
logger.debug(f"After addition of plate number the plate name is: {self.parsed_name}")
try:
repeat = re.search(r"-\dR(?P<repeat>\d)?", self.parsed_name).groupdict()['repeat']
if repeat == None:
repeat = "1"
except AttributeError as e:
repeat = ""
self.parsed_name = re.sub(r"(-\dR)\d?", rf"\1 {repeat}", self.parsed_name).replace(" ", "")
@classmethod
def get_regex(cls):
return "(?P<Wastewater>RSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?)"
class WastewaterArtic(BasicSubmission):
"""
@@ -454,6 +629,35 @@ class WastewaterArtic(BasicSubmission):
# at the end, this has to be done here. No moving to sqlalchemy object :(
input_dict['submitter_id'] = re.sub(r"\s\(.+\)$", "", str(input_dict['submitter_id'])).strip()
return input_dict
class _RSLNamer(BasicSubmission._RSLNamer):
alias = "Wastewater Artic"
@classmethod
def construct_regex(cls):
rstring = rf'{cls.get_regex()}'
regex = re.compile(rstring, flags = re.IGNORECASE | re.VERBOSE)
return regex
def enforce_name(self, parent):
# super().enforce_name(parent)
def construct():
today = datetime.now()
return f"RSL-AR-{today.year}{str(today.month).zfill(2)}{str(today.day).zfill(2)}"
try:
self.parsed_name = re.sub(r"(\d{4})-(\d{2})-(\d{2})", r"RSL-AR-\1\2\3", self.parsed_name, flags=re.IGNORECASE)
except AttributeError:
self.parsed_name = construct()
try:
plate_number = int(re.search(r"_|-\d?_", self.parsed_name).group().strip("_").strip("-"))
except (AttributeError, ValueError) as e:
plate_number = 1
self.parsed_name = re.sub(r"(_|-\d)?_ARTIC", f"-{plate_number}", self.parsed_name)
@classmethod
def get_regex(cls):
return "(?P<Wastewater_Artic>(\\d{4}-\\d{2}-\\d{2}(?:-|_)(?:\\d_)?artic)|(RSL(?:-|_)?AR(?:-|_)?20\\d{2}-?\\d{2}-?\\d{2}(?:(_|-)\\d?(\\D|$)R?\\d?)?))"
class BasicSample(Base):
"""
@@ -542,6 +746,34 @@ class BasicSample(Base):
"""
return dict(name=self.submitter_id[:10], positive=False, tooltip=tooltip_text)
@classmethod
def find_subclasses(cls, ctx:Settings, attrs:dict|None=None, rsl_number:str|None=None):
if len(attrs) == 0 or attrs == None:
return cls
if any([not hasattr(cls, attr) for attr in attrs]):
# looks for first model that has all included kwargs
try:
model = [subclass for subclass in cls.__subclasses__() if all([hasattr(subclass, attr) for attr in attrs])][0]
except IndexError as e:
raise AttributeError(f"Couldn't find existing class/subclass of {cls} with all attributes:\n{pformat(attrs)}")
else:
model = cls
logger.debug(f"Using model: {model}")
return model
@classmethod
def find_polymorphic_subclass(cls, polymorphic_identity:str|None=None):
if isinstance(polymorphic_identity, dict):
polymorphic_identity = polymorphic_identity['value']
if polymorphic_identity == None:
return cls
else:
try:
return [item for item in cls.__subclasses__() if item.__mapper_args__['polymorphic_identity']==polymorphic_identity][0]
except Exception as e:
logger.error(f"Could not get polymorph {polymorphic_identity} of {cls} due to {e}")
return cls
class WastewaterSample(BasicSample):
"""
Derivative wastewater sample