Submissions-App/src/submissions/backend/excel/parsers/clientsubmission_parser.py

"""

"""
from __future__ import annotations
import logging
from pathlib import Path
from string import ascii_lowercase
from typing import Generator

from openpyxl.reader.excel import load_workbook

from tools import row_keys
# from backend.db.models import SubmissionType
from . import DefaultKEYVALUEParser, DefaultTABLEParser
from backend.managers import procedures as procedure_managers

logger = logging.getLogger(f"submissions.{__name__}")


class SubmissionTyperMixin(object):

    @classmethod
    def retrieve_submissiontype(cls, filepath: Path):
        # NOTE: Attempt 1, get from form properties:
        sub_type = cls.get_subtype_from_properties(filepath=filepath)
        if not sub_type:
            # NOTE: Attempt 2, get by opening file and using default parser
            logger.warning(
                f"Getting submissiontype from file properties failed, falling back on preparse.\nDepending on excel structure this might yield an incorrect submissiontype")
            sub_type = cls.get_subtype_from_preparse(filepath=filepath)
        if not sub_type:
            logger.warning(
                f"Getting submissiontype from preparse failed, falling back on filename regex.\nDepending on excel structure this might yield an incorrect submissiontype")
            sub_type = cls.get_subtype_from_regex(filepath=filepath)
        return sub_type

    @classmethod
    def get_subtype_from_regex(cls, filepath: Path):
        from backend.db.models import SubmissionType
        regex = SubmissionType.regex
        m = regex.search(filepath.__str__())
        try:
            sub_type = m.lastgroup
        except AttributeError as e:
            sub_type = None
            logger.critical(f"No procedure type found or procedure type found!: {e}")
        return sub_type

    @classmethod
    def get_subtype_from_preparse(cls, filepath: Path):
        from backend.db.models import SubmissionType
        parser = ClientSubmissionInfoParser(filepath)
        sub_type = next((value for k, value in parser.parsed_info if k == "submissiontype"), None)
        sub_type = SubmissionType.query(name=sub_type)
        if isinstance(sub_type, list):
            sub_type = None
        return sub_type

    @classmethod
    def get_subtype_from_properties(cls, filepath: Path):
        from backend.db.models import SubmissionType
        wb = load_workbook(filepath)
        # NOTE: Gets first category in the metadata.
        categories = wb.properties.category.split(";")
        sub_type = next((item.strip().title() for item in categories), None)
        sub_type = SubmissionType.query(name=sub_type)
        if isinstance(sub_type, list):
            sub_type = None
        return sub_type


class ClientSubmissionInfoParser(DefaultKEYVALUEParser, SubmissionTyperMixin):
    """
    Object for retrieving submitter info from "sample list" sheet
    """

    default_range_dict = [dict(
        start_row=2,
        end_row=18,
        key_column=1,
        value_column=2,
        sheet="Sample List"
    )]

    def __init__(self, filepath: Path | str, *args, **kwargs):
        from frontend.widgets.pop_ups import QuestionAsker
        self.submissiontype = self.retrieve_submissiontype(filepath=filepath)
        if "range_dict" not in kwargs:
            kwargs['range_dict'] = self.submissiontype.info_map
        super().__init__(filepath=filepath, **kwargs)
        allowed_procedure_types = [item.name for item in self.submissiontype.proceduretype]
        for name in allowed_procedure_types:
            if name in self.workbook.sheetnames:
                # TODO: check if run with name already exists
                add_run = QuestionAsker(title="Add Run?", message="We've detected a sheet corresponding to an associated procedure type.\nWould you like to add a new run?")
                if add_run.accepted:


                # NOTE: recruit parser.
                    try:
                        manager = getattr(procedure_managers, name)
                    except AttributeError:
                        manager = procedure_managers.DefaultManager
                    self.manager = manager(proceduretype=name)
                pass


class ClientSubmissionSampleParser(DefaultTABLEParser, SubmissionTyperMixin):
    """
    Object for retrieving submitter samples from "sample list" sheet
    """

    default_range_dict = [dict(
        header_row=19,
        end_row=115,
        sheet="Sample List"
    )]

    def __init__(self, filepath: Path | str, *args, **kwargs):
        self.submissiontype = self.retrieve_submissiontype(filepath=filepath)
        if "range_dict" not in kwargs:
            kwargs['range_dict'] = self.submissiontype.sample_map
        super().__init__(filepath=filepath, **kwargs)

    @property
    def parsed_info(self) -> Generator[dict, None, None]:
        output = super().parsed_info
        for ii, sample in enumerate(output):
            logger.debug(f"Parsed info sample: {sample}")
            if isinstance(sample["row"], str) and sample["row"].lower() in ascii_lowercase[0:8]:
                try:
                    sample["row"] = row_keys[sample["row"]]
                except KeyError:
                    pass
            sample['submission_rank'] = ii + 1
            yield sample

    def to_pydantic(self):
        return [self._pyd_object(**sample) for sample in self.parsed_info if sample['sample_id']]