Files
Submissions-App/src/submissions/backend/excel/parsers/clientsubmission_parser.py
2025-09-04 14:35:44 -05:00

170 lines
6.1 KiB
Python

"""
Module for clientsubmission parsing
"""
from __future__ import annotations
import logging
from pathlib import Path
from string import ascii_lowercase
from typing import Generator, TYPE_CHECKING
from openpyxl.reader.excel import load_workbook
from tools import row_keys
from . import DefaultKEYVALUEParser, DefaultTABLEParser
if TYPE_CHECKING:
from backend.db.models import SubmissionType
logger = logging.getLogger(f"submissions.{__name__}")
class SubmissionTyperMixin(object):
@classmethod
def retrieve_submissiontype(cls, filepath: Path) -> "SubmissionType":
"""
Gets the submission type from a file.
Args:
filepath (Path): The import file
Returns:
SubmissionType: The determined submissiontype
"""
# NOTE: Attempt 1, get from form properties:
sub_type = cls.get_subtype_from_properties(filepath=filepath)
if not sub_type:
# NOTE: Attempt 2, get by opening file and using default parser
logger.warning(
f"Getting submissiontype from file properties failed, falling back on preparse.\nDepending on excel structure this might yield an incorrect submissiontype")
sub_type = cls.get_subtype_from_preparse(filepath=filepath)
if not sub_type:
logger.warning(
f"Getting submissiontype from preparse failed, falling back on filename regex.\nDepending on excel structure this might yield an incorrect submissiontype")
sub_type = cls.get_subtype_from_regex(filepath=filepath)
return sub_type
@classmethod
def get_subtype_from_regex(cls, filepath: Path) -> "SubmissionType":
"""
Uses regex of the file name to determine submissiontype
Args:
filepath (Path): The import file
Returns:
SubmissionType: The determined submissiontype
"""
from backend.db.models import SubmissionType
regex = SubmissionType.regex
m = regex.search(filepath.__str__())
try:
sub_type = m.lastgroup
except AttributeError as e:
sub_type = None
logger.critical(f"No submission type or procedure type found!: {e}")
sub_type = SubmissionType.query(name=sub_type, limit=1)
if not sub_type:
return
return sub_type
@classmethod
def get_subtype_from_preparse(cls, filepath: Path) -> "SubmissionType":
"""
Performs a default parse of the file in an attempt to find the submission type.
Args:
filepath (Path): The import file
Returns:
SubmissionType: The determined submissiontype
"""
from backend.db.models import SubmissionType
parser = ClientSubmissionInfoParser(filepath=filepath, submissiontype=SubmissionType.query(name="Default"))
sub_type = next((value for k, value in parser.parsed_info.items() if k == "submissiontype" or k == "submission_type"), None)
if isinstance(sub_type, dict):
sub_type = sub_type['value']
sub_type = SubmissionType.query(name=sub_type.title())
if isinstance(sub_type, list):
return
return sub_type
@classmethod
def get_subtype_from_properties(cls, filepath: Path) -> "SubmissionType":
"""
Attempts to get submission type from the xl metadata.
Args:
filepath (Path): The import file
Returns:
SubmissionType: The determined submissiontype
"""
from backend.db.models import SubmissionType
wb = load_workbook(filepath)
# NOTE: Gets first category in the metadata.
categories = wb.properties.category.split(";")
sub_type = next((item.strip().title() for item in categories), None)
sub_type = SubmissionType.query(name=sub_type)
if isinstance(sub_type, list):
return
return sub_type
class ClientSubmissionInfoParser(DefaultKEYVALUEParser, SubmissionTyperMixin):
"""
Object for retrieving submitter info from "Client Info" sheet
"""
pyd_name = "PydClientSubmission"
def __init__(self, filepath: Path | str, submissiontype: "SubmissionType" | None = None, *args, **kwargs):
logger.debug(f"Set submission type: {submissiontype}")
if not submissiontype:
self.submissiontype = self.retrieve_submissiontype(filepath=filepath)
else:
self.submissiontype = submissiontype
super().__init__(filepath=filepath, sheet="Client Info", start_row=1, **kwargs)
@property
def parsed_info(self):
output = {k: v for k, v in super().parsed_info}
try:
output['clientlab'] = output['client_lab']
except KeyError:
pass
try:
output['submissiontype'] = output['submission_type']
output['submissiontype']['value'] = self.submissiontype.name.title()
except KeyError:
pass
return output
class ClientSubmissionSampleParser(DefaultTABLEParser, SubmissionTyperMixin):
"""
Object for retrieving submitter samples from "sample list" sheet
"""
pyd_name = "PydSample"
def __init__(self, filepath: Path | str, submissiontype: "SubmissionType" | None = None, start_row: int = 1, *args,
**kwargs):
if not submissiontype:
self.submissiontype = self.retrieve_submissiontype(filepath=filepath)
else:
self.submissiontype = submissiontype
super().__init__(filepath=filepath, sheet="Client Info", start_row=start_row, **kwargs)
@property
def parsed_info(self) -> Generator[dict, None, None]:
output = super().parsed_info
for ii, sample in enumerate(output, start=1):
if isinstance(sample["row"], str) and sample["row"].lower() in ascii_lowercase[0:8]:
try:
sample["row"] = row_keys[sample["row"]]
except KeyError:
pass
sample['submission_rank'] = ii
yield sample
def to_pydantic(self):
return [self._pyd_object(**sample) for sample in self.parsed_info if sample['sample_id']]