Post RSLNamer move, pre-restore

This commit is contained in:
Landon Wark
2023-10-16 14:24:33 -05:00
parent 80d77117e1
commit 0a90542e8e
11 changed files with 575 additions and 286 deletions

View File

@@ -2,8 +2,11 @@
Used to construct models from input dictionaries.
'''
from getpass import getuser
from tools import Settings, RSLNamer, check_regex_match, check_authorization, massage_common_reagents
from tools import Settings, check_regex_match, check_authorization, massage_common_reagents
from .. import models
# from .misc import RSLNamer
# from backend.namer import RSLNamer
# from .misc import get_polymorphic_subclass
from .lookups import *
import logging
from datetime import date, timedelta
@@ -62,7 +65,9 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> Tuple[models.Basi
models.BasicSubmission: Constructed submission object
"""
# convert submission type into model name
query = info_dict['submission_type'].replace(" ", "")
# model = get_polymorphic_subclass(polymorphic_identity=info_dict['submission_type'])
model = models.BasicSubmission.find_polymorphic_subclass(polymorphic_identity=info_dict['submission_type'])
logger.debug(f"We've got the model: {type(model)}")
# Ensure an rsl plate number exists for the plate
if not check_regex_match("^RSL", info_dict["rsl_plate_num"]):
instance = None
@@ -70,13 +75,13 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> Tuple[models.Basi
return instance, {'code': 2, 'message': "A proper RSL plate number is required."}
else:
# enforce conventions on the rsl plate number from the form
info_dict['rsl_plate_num'] = RSLNamer(ctx=ctx, instr=info_dict["rsl_plate_num"]).parsed_name
# info_dict['rsl_plate_num'] = RSLNamer(ctx=ctx, instr=info_dict["rsl_plate_num"]).parsed_name
info_dict['rsl_plate_num'] = model.RSLNamer(ctx=ctx, instr=info_dict["rsl_plate_num"], sub_type=info_dict['submission_type']).parsed_name
# check database for existing object
instance = lookup_submissions(ctx=ctx, rsl_number=info_dict['rsl_plate_num'])
# get model based on submission type converted above
logger.debug(f"Looking at models for submission type: {query}")
model = getattr(models, query)
logger.debug(f"We've got the model: {type(model)}")
# logger.debug(f"Looking at models for submission type: {query}")
# if query return nothing, ie doesn't already exist in db
if instance == None:
instance = model()
@@ -218,10 +223,8 @@ def construct_kit_from_yaml(ctx:Settings, kit_dict:dict) -> dict:
kit.kit_submissiontype_associations.append(kt_st_assoc)
# A kit contains multiple reagent types.
for r in kit_dict['reagent_types']:
# check if reagent type already exists.
logger.debug(f"Constructing reagent type: {r}")
rtname = massage_common_reagents(r['rtname'])
# look_up = ctx.database_session.query(models.ReagentType).filter(models.ReagentType.name==rtname).first()
look_up = lookup_reagent_types(name=rtname)
if look_up == None:
rt = models.ReagentType(name=rtname.strip(), eol_ext=timedelta(30*r['eol']))
@@ -237,6 +240,7 @@ def construct_kit_from_yaml(ctx:Settings, kit_dict:dict) -> dict:
store_object(ctx=ctx, object=kit)
return {'code':0, 'message':'Kit has been added', 'status': 'information'}
@check_authorization
def construct_org_from_yaml(ctx:Settings, org:dict) -> dict:
"""
Create and store a new organization based on a .yml file
@@ -248,11 +252,11 @@ def construct_org_from_yaml(ctx:Settings, org:dict) -> dict:
Returns:
dict: dictionary containing results of db addition
"""
from tools import check_is_power_user
# Don't want just anyone adding in clients
if not check_is_power_user(ctx=ctx):
logger.debug(f"{getuser()} does not have permission to add kits.")
return {'code':1, 'message':"This user does not have permission to add organizations."}
# from tools import check_is_power_user
# # Don't want just anyone adding in clients
# if not check_is_power_user(ctx=ctx):
# logger.debug(f"{getuser()} does not have permission to add kits.")
# return {'code':1, 'message':"This user does not have permission to add organizations."}
# the yml can contain multiple clients
for client in org:
cli_org = models.Organization(name=client.replace(" ", "_").lower(), cost_centre=org[client]['cost centre'])

View File

@@ -1,5 +1,6 @@
from .. import models
from tools import Settings, RSLNamer
from tools import Settings
# from backend.namer import RSLNamer
from typing import List
import logging
from datetime import date, datetime
@@ -8,7 +9,6 @@ from sqlalchemy.orm.query import Query
from sqlalchemy import and_, JSON
from sqlalchemy.orm import Session
logger = logging.getLogger(f"submissions.{__name__}")
def query_return(query:Query, limit:int=0):
@@ -155,7 +155,10 @@ def lookup_submissions(ctx:Settings,
chronologic:bool=False, limit:int=0,
**kwargs
) -> models.BasicSubmission | List[models.BasicSubmission]:
model = models.find_subclasses(parent=models.BasicSubmission, attrs=kwargs)
if rsl_number == None:
model = models.BasicSubmission.find_subclasses(ctx=ctx, attrs=kwargs)
else:
model = models.BasicSubmission.find_subclasses(ctx=ctx, rsl_number=rsl_number)
query = setup_lookup(ctx=ctx, locals=locals()).query(model)
# by submission type
match submission_type:
@@ -208,14 +211,17 @@ def lookup_submissions(ctx:Settings,
# by rsl number (returns only a single value)
match rsl_number:
case str():
namer = model.RSLNamer(ctx=ctx, instr=rsl_number)
logger.debug(f"Looking up BasicSubmission with rsl number: {rsl_number}")
try:
rsl_number = RSLNamer(ctx=ctx, instr=rsl_number).parsed_name
rsl_number = namer.parsed_name
logger.debug(f"Got {rsl_number} from {model}.")
except AttributeError as e:
logger.error(f"No parsed name found, returning None.")
return None
# query = query.filter(models.BasicSubmission.rsl_plate_num==rsl_number)
query = query.filter(model.rsl_plate_num==rsl_number)
logger.debug(f"At this point the query gets: {query.all()}")
limit = 1
case _:
pass
@@ -242,6 +248,7 @@ def lookup_submissions(ctx:Settings,
if chronologic:
# query.order_by(models.BasicSubmission.submitted_date)
query.order_by(model.submitted_date)
logger.debug(f"At the end of the search, the query gets: {query.all()}")
return query_return(query=query, limit=limit)
def lookup_submission_type(ctx:Settings,
@@ -367,7 +374,8 @@ def lookup_samples(ctx:Settings,
**kwargs
) -> models.BasicSample|models.WastewaterSample|List[models.BasicSample]:
logger.debug(f"Length of kwargs: {len(kwargs)}")
model = models.find_subclasses(parent=models.BasicSample, attrs=kwargs)
# model = models.find_subclasses(parent=models.BasicSample, attrs=kwargs)
model = models.BasicSample.find_subclasses(ctx=ctx, attrs=kwargs)
query = setup_lookup(ctx=ctx, locals=locals()).query(model)
match submitter_id:
case str():

View File

@@ -12,6 +12,9 @@ from . import store_object
from sqlalchemy.exc import OperationalError as AlcOperationalError, IntegrityError as AlcIntegrityError
from sqlite3 import OperationalError as SQLOperationalError, IntegrityError as SQLIntegrityError
from pprint import pformat
import logging
logger = logging.getLogger(f"submissions.{__name__}")
def submissions_to_df(ctx:Settings, submission_type:str|None=None, limit:int=0) -> pd.DataFrame:
"""
@@ -236,25 +239,26 @@ def update_subsampassoc_with_pcr(ctx:Settings, submission:models.BasicSubmission
result = store_object(ctx=ctx, object=assoc)
return result
def get_polymorphic_subclass(base:object, polymorphic_identity:str|None=None):
"""
Retrieves any subclasses of given base class whose polymorphic identity matches the string input.
# def get_polymorphic_subclass(base:object|models.BasicSubmission=models.BasicSubmission, polymorphic_identity:str|None=None):
# """
# Retrieves any subclasses of given base class whose polymorphic identity matches the string input.
# NOTE: Depreciated in favour of class based finders in 'submissions.py'
Args:
base (object): Base (parent) class
polymorphic_identity (str | None): Name of subclass of interest. (Defaults to None)
# Args:
# base (object): Base (parent) class
# polymorphic_identity (str | None): Name of subclass of interest. (Defaults to None)
Returns:
_type_: Subclass, or parent class on
"""
if isinstance(polymorphic_identity, dict):
polymorphic_identity = polymorphic_identity['value']
if polymorphic_identity == None:
return base
else:
try:
return [item for item in base.__subclasses__() if item.__mapper_args__['polymorphic_identity']==polymorphic_identity][0]
except Exception as e:
logger.error(f"Could not get polymorph {polymorphic_identity} of {base} due to {e}")
return base
# Returns:
# _type_: Subclass, or parent class on
# """
# if isinstance(polymorphic_identity, dict):
# polymorphic_identity = polymorphic_identity['value']
# if polymorphic_identity == None:
# return base
# else:
# try:
# return [item for item in base.__subclasses__() if item.__mapper_args__['polymorphic_identity']==polymorphic_identity][0]
# except Exception as e:
# logger.error(f"Could not get polymorph {polymorphic_identity} of {base} due to {e}")
# return base

View File

@@ -11,7 +11,7 @@ metadata = Base.metadata
logger = logging.getLogger(f"submissions.{__name__}")
def find_subclasses(parent:Any, attrs:dict) -> Any:
def find_subclasses(parent:Any, attrs:dict|None=None, rsl_number:str|None=None) -> Any:
"""
Finds subclasses of a parent that does contain all
attributes if the parent does not.
@@ -26,7 +26,7 @@ def find_subclasses(parent:Any, attrs:dict) -> Any:
Returns:
_type_: Parent or subclass.
"""
if len(attrs) == 0:
if len(attrs) == 0 or attrs == None:
return parent
if any([not hasattr(parent, attr) for attr in attrs]):
# looks for first model that has all included kwargs

View File

@@ -3,6 +3,7 @@ Models for the main submission types.
'''
from getpass import getuser
import math
from pprint import pformat
from . import Base
from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, Table, JSON, FLOAT, case
from sqlalchemy.orm import relationship, validates
@@ -17,7 +18,9 @@ from dateutil.parser import parse
import re
import pandas as pd
from openpyxl import Workbook
from tools import check_not_nan, row_map
from tools import check_not_nan, row_map, Settings
from pathlib import Path
from datetime import datetime
logger = logging.getLogger(f"submissions.{__name__}")
@@ -61,7 +64,7 @@ class BasicSubmission(Base):
# Allows for subclassing into ex. BacterialCulture, Wastewater, etc.
__mapper_args__ = {
"polymorphic_identity": "basic_submission",
"polymorphic_identity": "Basic Submission",
"polymorphic_on": submission_type_name,
"with_polymorphic": "*",
}
@@ -295,18 +298,93 @@ class BasicSubmission(Base):
"""
return input_excel
class _RSLNamer(object):
alias = None
def __init__(self, ctx:Settings, instr:str|Path, sub_type:str|None=None, parent=None):
if parent != None:
logger.debug(f"Hello from {parent.__mapper_args__['polymorphic_identity']} Namer!")
self.ctx = ctx
self.submission_type = sub_type
self.retrieve_rsl_number(instr=instr)
try:
ncls = [item for item in self.__class__.__subclasses__() if item.alias == self.submission_type][0]
enforcer = ncls.enforce_name
enforcer(self=self, parent=parent)
except IndexError:
enforcer = self.enforce_name
enforcer(parent=parent)
def retrieve_rsl_number(self, instr:str|Path):
"""
Uses regex to retrieve the plate number and submission type from an input string
Args:
in_str (str): string to be parsed
"""
if not isinstance(instr, Path):
instr = Path(instr)
self.out_str = instr.stem
logger.debug(f"Attempting match of {self.out_str}")
logger.debug(f"The initial plate name is: {self.out_str}")
regex = self.construct_regex()
m = regex.search(self.out_str)
if m != None:
self.parsed_name = m.group().upper().strip(".")
logger.debug(f"Got parsed submission name: {self.parsed_name}")
if self.submission_type == None:
try:
self.submission_type = m.lastgroup.replace("_", " ")
except AttributeError as e:
self.submission_type = None
def enforce_name(self, parent):
if parent != None:
logger.debug(f"Hello from {parent.__mapper_args__['polymorphic_identity']} Enforcer!")
self.parsed_name = self.parsed_name
@classmethod
def construct_regex(cls):
rstring = rf'{"|".join([item.get_regex() for item in cls.__subclasses__()])}'
regex = re.compile(rstring, flags = re.IGNORECASE | re.VERBOSE)
return regex
@classmethod
def enforce_naming_schema(cls, input_str:str) -> str:
"""
Used to ensure proper custom naming of submission.
Args:
input_str (str): name parsed by default parser
Returns:
str: custom parser output.
"""
return input_str
def RSLNamer(cls, ctx:Settings, instr:str, sub_type:str|None=None):
return cls._RSLNamer(parent=cls, ctx=ctx, instr=instr, sub_type=sub_type)
@classmethod
def find_subclasses(cls, ctx:Settings, attrs:dict|None=None, rsl_number:str|None=None):
if rsl_number != None:
namer = cls._RSLNamer(ctx=ctx, instr=rsl_number)
return cls.find_polymorphic_subclass(namer.submission_type)
if len(attrs) == 0 or attrs == None:
return cls
if any([not hasattr(cls, attr) for attr in attrs]):
# looks for first model that has all included kwargs
try:
model = [subclass for subclass in cls.__subclasses__() if all([hasattr(subclass, attr) for attr in attrs])][0]
except IndexError as e:
raise AttributeError(f"Couldn't find existing class/subclass of {cls} with all attributes:\n{pformat(attrs)}")
else:
model = cls
logger.debug(f"Using model: {model}")
return model
@classmethod
def find_polymorphic_subclass(cls, polymorphic_identity:str|None=None):
if isinstance(polymorphic_identity, dict):
polymorphic_identity = polymorphic_identity['value']
if polymorphic_identity == None:
return cls
else:
try:
return [item for item in cls.__subclasses__() if item.__mapper_args__['polymorphic_identity']==polymorphic_identity][0]
except Exception as e:
logger.error(f"Could not get polymorph {polymorphic_identity} of {cls} due to {e}")
return cls
# Below are the custom submission types
@@ -372,6 +450,59 @@ class BacterialCulture(BasicSubmission):
input_excel["Sample List"].cell(row=15, column=2, value=getuser()[0:2].upper())
return input_excel
class _RSLNamer(BasicSubmission._RSLNamer):
alias = "Bacterial Culture"
@classmethod
def construct_regex(cls):
rstring = rf'{cls.get_regex()}'
regex = re.compile(rstring, flags = re.IGNORECASE | re.VERBOSE)
return regex
def enforce_name(self, parent):
# super().enforce_name(parent)
def construct(ctx) -> str:
"""
DEPRECIATED due to slowness. Search for the largest rsl number and increment by 1
Returns:
str: new RSL number
"""
logger.debug(f"Attempting to construct RSL number from scratch...")
# directory = Path(self.ctx['directory_path']).joinpath("Bacteria")
directory = Path(ctx.directory_path).joinpath("Bacteria")
year = str(datetime.now().year)[-2:]
if directory.exists():
logger.debug(f"Year: {year}")
relevant_rsls = []
all_xlsx = [item.stem for item in directory.rglob("*.xlsx") if bool(re.search(r"RSL-\d{2}-\d{4}", item.stem)) and year in item.stem[4:6]]
logger.debug(f"All rsls: {all_xlsx}")
for item in all_xlsx:
try:
relevant_rsls.append(re.match(r"RSL-\d{2}-\d{4}", item).group(0))
except Exception as e:
logger.error(f"Regex error: {e}")
continue
logger.debug(f"Initial xlsx: {relevant_rsls}")
max_number = max([int(item[-4:]) for item in relevant_rsls])
logger.debug(f"The largest sample number is: {max_number}")
return f"RSL-{year}-{str(max_number+1).zfill(4)}"
else:
# raise FileNotFoundError(f"Unable to locate the directory: {directory.__str__()}")
return f"RSL-{year}-0000"
try:
self.parsed_name = re.sub(r"RSL(\d{2})", r"RSL-\1", self.parsed_name, flags=re.IGNORECASE)
except AttributeError as e:
self.parsed_name = construct(ctx=self.ctx)
# year = datetime.now().year
# self.parsed_name = f"RSL-{str(year)[-2:]}-0000"
self.parsed_name = re.sub(r"RSL-(\d{2})(\d{4})", r"RSL-\1-\2", self.parsed_name, flags=re.IGNORECASE)
@classmethod
def get_regex(cls):
return "(?P<Bacterial_Culture>RSL-?\\d{2}-?\\d{4})"
class Wastewater(BasicSubmission):
"""
derivative submission type from BasicSubmission
@@ -411,6 +542,50 @@ class Wastewater(BasicSubmission):
if xl != None:
input_dict['csv'] = xl.parse("Copy to import file")
return input_dict
class _RSLNamer(BasicSubmission._RSLNamer):
alias = "Wastewater"
@classmethod
def construct_regex(cls):
rstring = rf'{cls.get_regex()}'
regex = re.compile(rstring, flags = re.IGNORECASE | re.VERBOSE)
return regex
def enforce_name(self, parent):
# super().enforce_name(parent)
def construct():
today = datetime.now()
return f"RSL-WW-{today.year}{str(today.month).zfill(2)}{str(today.day).zfill(2)}"
try:
self.parsed_name = re.sub(r"PCR(-|_)", "", self.parsed_name)
except AttributeError as e:
logger.error(f"Problem using regex: {e}")
self.parsed_name = construct()
self.parsed_name = self.parsed_name.replace("RSLWW", "RSL-WW")
self.parsed_name = re.sub(r"WW(\d{4})", r"WW-\1", self.parsed_name, flags=re.IGNORECASE)
self.parsed_name = re.sub(r"(\d{4})-(\d{2})-(\d{2})", r"\1\2\3", self.parsed_name)
logger.debug(f"Coming out of the preliminary parsing, the plate name is {self.parsed_name}")
try:
plate_number = re.search(r"(?:(-|_)\d)(?!\d)", self.parsed_name).group().strip("_").strip("-")
logger.debug(f"Plate number is: {plate_number}")
except AttributeError as e:
plate_number = "1"
# self.parsed_name = re.sub(r"(\d{8})(-|_\d)?(R\d)?", fr"\1-{plate_number}\3", self.parsed_name)
self.parsed_name = re.sub(r"(\d{8})(-|_)?\d?(R\d?)?", rf"\1-{plate_number}\3", self.parsed_name)
logger.debug(f"After addition of plate number the plate name is: {self.parsed_name}")
try:
repeat = re.search(r"-\dR(?P<repeat>\d)?", self.parsed_name).groupdict()['repeat']
if repeat == None:
repeat = "1"
except AttributeError as e:
repeat = ""
self.parsed_name = re.sub(r"(-\dR)\d?", rf"\1 {repeat}", self.parsed_name).replace(" ", "")
@classmethod
def get_regex(cls):
return "(?P<Wastewater>RSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?)"
class WastewaterArtic(BasicSubmission):
"""
@@ -454,6 +629,35 @@ class WastewaterArtic(BasicSubmission):
# at the end, this has to be done here. No moving to sqlalchemy object :(
input_dict['submitter_id'] = re.sub(r"\s\(.+\)$", "", str(input_dict['submitter_id'])).strip()
return input_dict
class _RSLNamer(BasicSubmission._RSLNamer):
alias = "Wastewater Artic"
@classmethod
def construct_regex(cls):
rstring = rf'{cls.get_regex()}'
regex = re.compile(rstring, flags = re.IGNORECASE | re.VERBOSE)
return regex
def enforce_name(self, parent):
# super().enforce_name(parent)
def construct():
today = datetime.now()
return f"RSL-AR-{today.year}{str(today.month).zfill(2)}{str(today.day).zfill(2)}"
try:
self.parsed_name = re.sub(r"(\d{4})-(\d{2})-(\d{2})", r"RSL-AR-\1\2\3", self.parsed_name, flags=re.IGNORECASE)
except AttributeError:
self.parsed_name = construct()
try:
plate_number = int(re.search(r"_|-\d?_", self.parsed_name).group().strip("_").strip("-"))
except (AttributeError, ValueError) as e:
plate_number = 1
self.parsed_name = re.sub(r"(_|-\d)?_ARTIC", f"-{plate_number}", self.parsed_name)
@classmethod
def get_regex(cls):
return "(?P<Wastewater_Artic>(\\d{4}-\\d{2}-\\d{2}(?:-|_)(?:\\d_)?artic)|(RSL(?:-|_)?AR(?:-|_)?20\\d{2}-?\\d{2}-?\\d{2}(?:(_|-)\\d?(\\D|$)R?\\d?)?))"
class BasicSample(Base):
"""
@@ -542,6 +746,34 @@ class BasicSample(Base):
"""
return dict(name=self.submitter_id[:10], positive=False, tooltip=tooltip_text)
@classmethod
def find_subclasses(cls, ctx:Settings, attrs:dict|None=None, rsl_number:str|None=None):
if len(attrs) == 0 or attrs == None:
return cls
if any([not hasattr(cls, attr) for attr in attrs]):
# looks for first model that has all included kwargs
try:
model = [subclass for subclass in cls.__subclasses__() if all([hasattr(subclass, attr) for attr in attrs])][0]
except IndexError as e:
raise AttributeError(f"Couldn't find existing class/subclass of {cls} with all attributes:\n{pformat(attrs)}")
else:
model = cls
logger.debug(f"Using model: {model}")
return model
@classmethod
def find_polymorphic_subclass(cls, polymorphic_identity:str|None=None):
if isinstance(polymorphic_identity, dict):
polymorphic_identity = polymorphic_identity['value']
if polymorphic_identity == None:
return cls
else:
try:
return [item for item in cls.__subclasses__() if item.__mapper_args__['polymorphic_identity']==polymorphic_identity][0]
except Exception as e:
logger.error(f"Could not get polymorph {polymorphic_identity} of {cls} due to {e}")
return cls
class WastewaterSample(BasicSample):
"""
Derivative wastewater sample