Files
Submissions-App/src/submissions/backend/validators/__init__.py
2025-09-12 10:14:53 -05:00

272 lines
10 KiB
Python

"""
Contains all validators
"""
import logging, re
import sys
from pathlib import Path
from openpyxl import load_workbook
from backend.db.models import Run, SubmissionType
from tools import jinja_template_loading
from jinja2 import Template
from dateutil.parser import parse
from datetime import datetime
logger = logging.getLogger(f"submissions.{__name__}")
class DefaultNamer(object):
def __init__(self, filepath: str | Path, **kwargs):
if isinstance(filepath, str):
filepath = Path(filepath)
try:
assert filepath.exists()
except AssertionError:
raise FileNotFoundError(f"File {filepath} does not exist.")
self.filepath = filepath
class ClientSubmissionNamer(DefaultNamer):
def __init__(self, filepath: str | Path, submissiontype: str|SubmissionType|None=None,
data: dict | None = None, **kwargs):
super().__init__(filepath=filepath)
if not submissiontype:
submissiontype = self.retrieve_submissiontype(filepath=self.filepath)
if isinstance(submissiontype, str):
submissiontype = SubmissionType.query(name=submissiontype)
def retrieve_submissiontype(self, filepath: str | Path):
# NOTE: Attempt 1, get from form properties:
sub_type = self.get_subtype_from_properties()
if not sub_type:
# NOTE: Attempt 2, get by opening file and using default parser
logger.warning(f"Getting submissiontype from file properties failed, falling back on preparse.\nDepending on excel structure this might yield an incorrect submissiontype")
sub_type = self.get_subtype_from_preparse()
if not sub_type:
logger.warning(f"Getting submissiontype from preparse failed, falling back on filename regex.\nDepending on file name this might yield an incorrect submissiontype")
sub_type = self.get_subtype_from_regex()
if not sub_type:
logger.warning(f"Getting submissiontype from regex failed, using default submissiontype.")
sub_type = SubmissionType.query(name="Default")
return sub_type
def get_subtype_from_regex(self) -> SubmissionType:
regex = SubmissionType.regex
m = regex.search(self.filepath.__str__())
try:
sub_type = m.lastgroup
sub_type = SubmissionType.query(name=sub_type)
except AttributeError as e:
sub_type = None
logger.critical(f"No procedure type found or procedure type found!: {e}")
return sub_type
def get_subtype_from_preparse(self) -> SubmissionType:
from backend.excel.parsers.clientsubmission_parser import ClientSubmissionInfoParser
parser = ClientSubmissionInfoParser(self.filepath)
sub_type = next((value for k, value in parser.parsed_info.items() if k == "submissiontype"), None)
sub_type = SubmissionType.query(name=sub_type)
if isinstance(sub_type, list):
sub_type = None
return sub_type
def get_subtype_from_properties(self) -> SubmissionType:
wb = load_workbook(self.filepath)
# NOTE: Gets first category in the metadata.
categories = wb.properties.category.split(";")
sub_type = next((item.strip().title() for item in categories), None)
sub_type = SubmissionType.query(name=sub_type)
if isinstance(sub_type, list):
sub_type = None
return sub_type
class RSLNamer(object):
"""
Object that will enforce proper formatting on RSL plate names.
"""
def __init__(self, filename: str, submission_type: str | None = None, data: dict | None = None):
# NOTE: Preferred method is path retrieval, but might also need validation for just string.
filename = Path(filename) if Path(filename).exists() else filename
self.submission_type = submission_type
if not self.submission_type:
self.submission_type = self.retrieve_submission_type(filename=filename)
if self.submission_type:
self.sub_object = SubmissionType.query(name=self.submission_type['name'], limit=1)
self.parsed_name = self.retrieve_rsl_number(filename=filename, regex=self.sub_object.get_regex(
submission_type=self.submission_type))
logger.info(f"Parsed name: {self.parsed_name}")
@classmethod
def retrieve_submission_type(cls, filename: str | Path) -> str:
"""
Gets procedure type from excel file properties or sheet names or regex pattern match or user input
Args:
filename (str | Path): filename
Raises:
TypeError: Raised if unsupported variable type for filename given.
Returns:
str: parsed procedure type
"""
def st_from_path(filepath: Path) -> str:
"""
Sub def to get proceduretype from a file path
Args:
filepath ():
Returns:
"""
if filepath.exists():
wb = load_workbook(filepath)
try:
# NOTE: Gets first category in the metadata.
categories = wb.properties.category.split(";")
submission_type = next(item.strip().title() for item in categories)
except (StopIteration, AttributeError):
sts = {item.name: item.template_file_sheets for item in SubmissionType.query() if
item.template_file}
try:
submission_type = next(k.title() for k, v in sts.items() if wb.sheetnames == v)
except StopIteration:
# NOTE: On failure recurse using filepath as string for string method
submission_type = cls.retrieve_submission_type(filename=filepath.stem.__str__())
else:
submission_type = cls.retrieve_submission_type(filename=filepath.stem.__str__())
return submission_type
def st_from_str(file_name: str) -> str:
if file_name.startswith("tmp"):
return "Bacterial Culture"
regex = SubmissionType.regex
m = regex.search(file_name)
try:
sub_type = m.lastgroup
except AttributeError as e:
sub_type = None
logger.critical(f"No procedure type found or procedure type found!: {e}")
return sub_type
match filename:
case Path():
submission_type = st_from_path(filepath=filename)
case str():
submission_type = st_from_str(file_name=filename)
case _:
raise TypeError(f"Unsupported filename type: {type(filename)}.")
try:
check = submission_type is None
except UnboundLocalError:
check = True
if check:
if "pytest" in sys.modules:
raise ValueError("Submission Type came back as None.")
from frontend.widgets import ObjectSelector
dlg = ObjectSelector(title="Couldn't parse procedure type.",
message="Please select procedure type from list below.",
obj_type=SubmissionType)
if dlg.exec():
submission_type = dlg.parse_form()
submission_type = submission_type.replace("_", " ")
return submission_type
@classmethod
def retrieve_rsl_number(cls, filename: str | Path, regex: re.Pattern | None = None):
"""
Uses regex to retrieve the plate number and procedure type from an input string
Args:
regex (str): string to construct pattern
filename (str): string to be parsed
"""
if regex is None:
regex = BasicRun.regex
match filename:
case Path():
m = regex.search(filename.stem)
case str():
m = regex.search(filename)
case _:
m = None
if m is not None:
try:
parsed_name = m.group().upper().strip(".")
except AttributeError:
parsed_name = None
else:
parsed_name = None
return parsed_name
@classmethod
def construct_new_plate_name(cls, data: dict) -> str:
"""
Make a brand-new plate name from procedure data.
Args:
data (dict): incoming procedure data
Returns:
str: Output filename
"""
if "submitted_date" in data.keys():
if isinstance(data['submitted_date'], dict):
if data['submitted_date']['value'] is not None:
today = data['submitted_date']['value']
else:
today = datetime.now()
else:
today = data['submitted_date']
else:
try:
today = re.search(r"\d{4}(_|-)?\d{2}(_|-)?\d{2}", data['name'])
today = parse(today.group())
except (AttributeError, KeyError):
today = datetime.now()
if isinstance(today, str):
today = datetime.strptime(today, "%Y-%m-%d")
previous = Run.query(start_date=today, end_date=today, submissiontype=data['submissiontype'])
plate_number = len(previous) + 1
return f"RSL-{data['abbreviation']}-{today.year}{str(today.month).zfill(2)}{str(today.day).zfill(2)}-{plate_number}"
@classmethod
def construct_export_name(cls, template: Template, **kwargs) -> str:
"""
Make export file name from jinja template. (currently unused)
Args:
template (jinja2.Template): Template stored in BasicRun
Returns:
str: output file name.
"""
environment = jinja_template_loading()
template = environment.from_string(template)
return template.render(**kwargs)
def calculate_repeat(self) -> str:
"""
Determines what repeat number this plate is.
Returns:
str: Repeat number.
"""
regex = re.compile(r"-\d(?P<repeat>R\d)")
m = regex.search(self.parsed_name)
if m is not None:
return m.group("repeat")
else:
return ""
from .pydant import (
PydRun, PydContact, PydClientLab, PydSample, PydReagent, PydReagentRole, PydEquipment, PydEquipmentRole, PydTips,
PydProcess, PydElastic, PydClientSubmission, PydProcedure, PydResults, PydReagentLot
)