Updated parser functions to include identifiers.
This commit is contained in:
@@ -8,6 +8,7 @@ from openpyxl import load_workbook
|
||||
from pandas import DataFrame
|
||||
from backend.validators import pydant
|
||||
from backend.db.models import Procedure
|
||||
from dataclasses import dataclass
|
||||
|
||||
logger = logging.getLogger(f"submissions.{__name__}")
|
||||
|
||||
@@ -16,16 +17,34 @@ class DefaultParser(object):
|
||||
def __repr__(self):
|
||||
return f"{self.__class__.__name__}<{self.filepath.stem}>"
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
filepath = kwargs['filepath']
|
||||
if isinstance(filepath, str):
|
||||
filepath = Path(filepath)
|
||||
try:
|
||||
assert filepath.exists()
|
||||
except AssertionError:
|
||||
raise FileNotFoundError(f"File {filepath} does not exist.")
|
||||
instance = super().__new__(cls)
|
||||
instance.filepath = filepath
|
||||
return instance
|
||||
|
||||
|
||||
def __init__(self, filepath: Path | str, procedure: Procedure|None=None, range_dict: dict | None = None, *args, **kwargs):
|
||||
"""
|
||||
|
||||
Args:
|
||||
filepath (Path|str): Must be given as a kwarg. eg. filepath=X
|
||||
procedure ():
|
||||
range_dict ():
|
||||
*args ():
|
||||
**kwargs ():
|
||||
"""
|
||||
self.procedure = procedure
|
||||
try:
|
||||
self._pyd_object = getattr(pydant, f"Pyd{self.__class__.__name__.replace('Parser', '')}")
|
||||
except AttributeError:
|
||||
self._pyd_object = pydant.PydResults
|
||||
if isinstance(filepath, str):
|
||||
self.filepath = Path(filepath)
|
||||
else:
|
||||
self.filepath = filepath
|
||||
self.workbook = load_workbook(self.filepath, data_only=True)
|
||||
if not range_dict:
|
||||
self.range_dict = self.__class__.default_range_dict
|
||||
|
||||
@@ -2,15 +2,69 @@
|
||||
|
||||
"""
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from string import ascii_lowercase
|
||||
from typing import Generator
|
||||
|
||||
from openpyxl.reader.excel import load_workbook
|
||||
|
||||
from tools import row_keys
|
||||
from backend.db.models import SubmissionType
|
||||
from . import DefaultKEYVALUEParser, DefaultTABLEParser
|
||||
|
||||
logger = logging.getLogger(f"submissions.{__name__}")
|
||||
|
||||
|
||||
class ClientSubmissionParser(DefaultKEYVALUEParser):
|
||||
class SubmissionTyperMixin(object):
|
||||
|
||||
@classmethod
|
||||
def retrieve_submissiontype(cls, filepath: Path):
|
||||
# NOTE: Attempt 1, get from form properties:
|
||||
sub_type = cls.get_subtype_from_properties(filepath=filepath)
|
||||
if not sub_type:
|
||||
# NOTE: Attempt 2, get by opening file and using default parser
|
||||
logger.warning(
|
||||
f"Getting submissiontype from file properties failed, falling back on preparse.\nDepending on excel structure this might yield an incorrect submissiontype")
|
||||
sub_type = cls.get_subtype_from_preparse(filepath=filepath)
|
||||
if not sub_type:
|
||||
logger.warning(
|
||||
f"Getting submissiontype from preparse failed, falling back on filename regex.\nDepending on excel structure this might yield an incorrect submissiontype")
|
||||
sub_type = cls.get_subtype_from_regex(filepath=filepath)
|
||||
return sub_type
|
||||
|
||||
@classmethod
|
||||
def get_subtype_from_regex(cls, filepath: Path):
|
||||
regex = SubmissionType.regex
|
||||
m = regex.search(filepath.__str__())
|
||||
try:
|
||||
sub_type = m.lastgroup
|
||||
except AttributeError as e:
|
||||
sub_type = None
|
||||
logger.critical(f"No procedure type found or procedure type found!: {e}")
|
||||
return sub_type
|
||||
|
||||
@classmethod
|
||||
def get_subtype_from_preparse(cls, filepath: Path):
|
||||
parser = ClientSubmissionParser(filepath)
|
||||
sub_type = next((value for k, value in parser.parsed_info if k == "submissiontype"), None)
|
||||
sub_type = SubmissionType.query(name=sub_type)
|
||||
if isinstance(sub_type, list):
|
||||
sub_type = None
|
||||
return sub_type
|
||||
|
||||
@classmethod
|
||||
def get_subtype_from_properties(cls, filepath: Path):
|
||||
wb = load_workbook(filepath)
|
||||
# NOTE: Gets first category in the metadata.
|
||||
categories = wb.properties.category.split(";")
|
||||
sub_type = next((item.strip().title() for item in categories), None)
|
||||
sub_type = SubmissionType.query(name=sub_type)
|
||||
if isinstance(sub_type, list):
|
||||
sub_type = None
|
||||
return sub_type
|
||||
|
||||
|
||||
class ClientSubmissionParser(DefaultKEYVALUEParser, SubmissionTyperMixin):
|
||||
"""
|
||||
Object for retrieving submitter info from "sample list" sheet
|
||||
"""
|
||||
@@ -23,11 +77,16 @@ class ClientSubmissionParser(DefaultKEYVALUEParser):
|
||||
sheet="Sample List"
|
||||
)]
|
||||
|
||||
def __init__(self, filepath: Path | str, *args, **kwargs):
|
||||
self.submissiontype = self.retrieve_submissiontype(filepath=filepath)
|
||||
if "range_dict" not in kwargs:
|
||||
kwargs['range_dict'] = self.submissiontype.info_map
|
||||
super().__init__(filepath=filepath, **kwargs)
|
||||
|
||||
|
||||
class SampleParser(DefaultTABLEParser):
|
||||
class ClientSampleParser(DefaultTABLEParser, SubmissionTyperMixin):
|
||||
"""
|
||||
Object for retrieving submitter info from "sample list" sheet
|
||||
Object for retrieving submitter samples from "sample list" sheet
|
||||
"""
|
||||
|
||||
default_range_dict = [dict(
|
||||
@@ -36,6 +95,12 @@ class SampleParser(DefaultTABLEParser):
|
||||
sheet="Sample List"
|
||||
)]
|
||||
|
||||
def __init__(self, filepath: Path | str, *args, **kwargs):
|
||||
self.submissiontype = self.retrieve_submissiontype(filepath=filepath)
|
||||
if "range_dict" not in kwargs:
|
||||
kwargs['range_dict'] = self.submissiontype.sample_map
|
||||
super().__init__(filepath=filepath, **kwargs)
|
||||
|
||||
@property
|
||||
def parsed_info(self) -> Generator[dict, None, None]:
|
||||
output = super().parsed_info
|
||||
|
||||
Reference in New Issue
Block a user