Improved scraping of gel info for Artic.

2024-08-29 11:07:54 -05:00
parent 2afb57a6cc
commit 482bfa5728
7 changed files with 101 additions and 22 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,11 @@
+## 202408-05
+
+- Improved scraping for gel info of Artic submissions.
+
+## 202408.04
+
+- Fixed false error throw when tips added in xl and from app.
+
 ## 202408.03

 - Fixed issue backing up database file.
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 *should fit 90% of usage cases*

 1. Ensure a properly formatted Submission Excel form has been filled out. 
-    a. The program can fill in reagent fields and some other information automatically, but should be checked for accuracy afterward.
+   1. The program can fill in reagent fields and some other information automatically, but should be checked for accuracy afterward.
 2. Click on 'File' in the menu bar, followed by 'Import Submission' and use the file dialog to locate the form.
   1.  The Excel file may also be dragged into the grey area on the left hand side of the screen from Windows File Explorer. If so, skip step 3. 
 3. Click 'Ok'.
@@ -68,7 +68,7 @@ This is meant to import .xlsx files created from the Design & Analysis Software
 1. Click on 'Reports' -> 'Make Report' in the menu bar.
 2. Select the start date and the end date you want for the report. Click 'ok'.
 3. Use the file dialog to select a location to save the report.
-	a. Both an Excel sheet and a pdf should be generated containing summary information for submissions made by each client lab.
+	1. Both an Excel sheet and a pdf should be generated containing summary information for submissions made by each client lab.

 ## Exporting a run as an Excel file:

@@ -96,9 +96,9 @@ This is meant to import .xlsx files created from the Design & Analysis Software
 4. For each reagent type in the kit click the "Add Reagent Type" button.
 5. Fill in the name of the reagent type. Alternatively select from already existing types in the drop-down.
 6. Fill in the reagent location in the Excel submission sheet.
-	a. For example if the reagent name is in a sheet called "Reagent Info" in row 12, column 1, type "Reagent Info" in the "Excel Location Sheet Name" field. 
-	b. Set 12 in the "Name Row" and 1 in the "Name Column".
-	c. Repeat 6b for the Lot and the Expiry row and columns.
+	1. For example if the reagent name is in a sheet called "Reagent Info" in row 12, column 1, type "Reagent Info" in the "Excel Location Sheet Name" field. 
+	2. Set 12 in the "Name Row" and 1 in the "Name Column".
+	3. Repeat 6b for the Lot and the Expiry row and columns.
 7. Click the "Submit" button at the top.

 ## Linking Extraction Logs:
--- a/src/submissions/backend/db/models/kits.py
+++ b/src/submissions/backend/db/models/kits.py
@@ -1494,10 +1494,11 @@ class SubmissionEquipmentAssociation(BaseClass):

    @classmethod
    @setup_lookup
-    def query(cls, equipment_id:int, submission_id:int, role:str, limit:int=0, **kwargs) -> Any | List[Any]:
+    def query(cls, equipment_id:int, submission_id:int, role:str|None=None, limit:int=0, **kwargs) -> Any | List[Any]:
        query: Query = cls.__database_session__.query(cls)
        query = query.filter(cls.equipment_id==equipment_id)
        query = query.filter(cls.submission_id==submission_id)
+        if role is not None:
            query = query.filter(cls.role==role)
        return cls.execute_query(query=query, limit=limit, **kwargs)

@@ -1763,3 +1764,13 @@ class SubmissionTipsAssociation(BaseClass):
            dict: Values of this object
        """
        return dict(role=self.role_name, name=self.tips.name, lot=self.tips.lot)
+
+    @classmethod
+    @setup_lookup
+    def query(cls, tip_id: int, role: str, submission_id: int|None=None, limit: int = 0, **kwargs) -> Any | List[Any]:
+        query: Query = cls.__database_session__.query(cls)
+        query = query.filter(cls.tip_id == tip_id)
+        if submission_id is not None:
+            query = query.filter(cls.submission_id == submission_id)
+        query = query.filter(cls.role_name == role)
+        return cls.execute_query(query=query, limit=limit, **kwargs)
--- a/src/submissions/backend/db/models/submissions.py
+++ b/src/submissions/backend/db/models/submissions.py
@@ -9,7 +9,7 @@ from copy import deepcopy
 from getpass import getuser
 import logging, uuid, tempfile, re, yaml, base64
 from zipfile import ZipFile
-from tempfile import TemporaryDirectory
+from tempfile import TemporaryDirectory, TemporaryFile
 from operator import itemgetter
 from pprint import pformat
 from . import BaseClass, Reagent, SubmissionType, KitType, Organization, Contact, Tips
@@ -33,6 +33,7 @@ from jinja2.exceptions import TemplateNotFound
 from jinja2 import Template
 from docxtpl import InlineImage
 from docx.shared import Inches
+from PIL import Image

 logger = logging.getLogger(f"submissions.{__name__}")

@@ -469,7 +470,7 @@ class BasicSubmission(BaseClass):
                    'equipment', 'gel_info', 'gel_image', 'dna_core_submission_number', 'gel_controls',
                    'source_plates', 'pcr_technician', 'ext_technician', 'artic_technician', 'cost_centre',
                    'signed_by', 'artic_date', 'gel_barcode', 'gel_date', 'ngs_date', 'contact_phone', 'contact',
-                    'tips']
+                    'tips', 'gel_image_path']
        for item in excluded:
            try:
                df = df.drop(item, axis=1)
@@ -1187,8 +1188,10 @@ class BasicSubmission(BaseClass):
                    # logger.debug("We have tips in this equipment")
                    for tips in equip.tips:
                        tassoc = tips.to_sql(submission=self)
+                        if tassoc not in self.submission_tips_associations:
                            tassoc.save()
-
+                        else:
+                            logger.error(f"Tips already found in submission, skipping.")
        else:
            pass

@@ -1638,13 +1641,30 @@ class WastewaterArtic(BasicSubmission):
            dict: Updated sample dictionary
        """
        from backend.validators import RSLNamer
+        from openpyxl_image_loader.sheet_image_loader import SheetImageLoader
+
+        def scrape_image(wb: Workbook, info_dict: dict) -> Image or None:
+            ws = wb[info_dict['sheet']]
+            img_loader = SheetImageLoader(ws)
+            for ii in range(info_dict['start_row'], info_dict['end_row'] + 1):
+                logger.debug(f"Checking row: {ii}")
+                for jj in range(info_dict['start_column'], info_dict['end_column'] + 1):
+                    cell_str = f"{row_map[jj]}{ii}"
+                    if img_loader.image_in(cell_str):
+                        return img_loader.get(cell_str)
+            return None
+
        input_dict = super().custom_info_parser(input_dict)
-        egel_section = custom_fields['egel_results']
+        logger.debug(f"Custom fields: {custom_fields}")
+        egel_section = custom_fields['egel_controls']
        ws = xl[egel_section['sheet']]
-        data = [ws.cell(row=ii, column=jj) for jj in range(egel_section['start_column'], egel_section['end_column']+1) for
-                ii in range(egel_section['start_row'], egel_section['end_row']+1)]
+        # NOTE: Here we should be scraping the control results.
+        data = [ws.cell(row=ii, column=jj) for jj in range(egel_section['start_column'], egel_section['end_column'] + 1)
+                for
+                ii in range(egel_section['start_row'], egel_section['end_row'] + 1)]
        data = [cell for cell in data if cell.value is not None and "NTC" in cell.value]
        # logger.debug(f"Got gel control map: {data}")
+        # logger.debug(f"Checking against row_map: {row_map}")
        input_dict['gel_controls'] = [
            dict(sample_id=cell.value, location=f"{row_map[cell.row - 9]}{str(cell.column - 14).zfill(2)}") for cell in
            data]
@@ -1662,6 +1682,35 @@ class WastewaterArtic(BasicSubmission):
            else:
                datum['plate'] = RSLNamer(filename=datum['plate'], sub_type="Wastewater").parsed_name
        input_dict['source_plates'] = data
+        egel_info_section = custom_fields['egel_info']
+        ws = xl[egel_info_section['sheet']]
+        data = []
+        for ii in range(egel_info_section['start_row'], egel_info_section['end_row'] + 1):
+            datum = dict(
+                name=ws.cell(row=ii, column=egel_info_section['start_column'] - 3).value,
+                values=[]
+            )
+            for jj in range(egel_info_section['start_column'], egel_info_section['end_column'] + 1):
+                d = dict(
+                    name=ws.cell(row=egel_info_section['start_row'] - 1, column=jj).value,
+                    value=ws.cell(row=ii, column=jj).value
+                )
+                if d['value'] is not None:
+                    datum['values'].append(d)
+            data.append(datum)
+        input_dict['gel_info'] = data
+        logger.debug(f"Wastewater Artic custom info:\n\n{pformat(input_dict)}")
+        egel_image_section = custom_fields['image_range']
+        img: Image = scrape_image(wb=xl, info_dict=egel_image_section)
+        if img is not None:
+            tmp = Path(TemporaryFile().name).with_suffix(".jpg")
+            img.save(tmp.__str__())
+            with ZipFile(cls.__directory_path__.joinpath("submission_imgs.zip"), 'a') as zipf:
+                # NOTE: Add a file located at the source_path to the destination within the zip
+                # file. It will overwrite existing files if the names collide, but it
+                # will give a warning
+                zipf.write(tmp.__str__(), f"{input_dict['rsl_plate_num']['value']}.jpg")
+            input_dict['gel_image'] = f"{input_dict['rsl_plate_num']['value']}.jpg"
        return input_dict

    @classmethod
@@ -1887,13 +1936,13 @@ class WastewaterArtic(BasicSubmission):
            logger.warning(f"No source plate info found.")
        # NOTE: check for gel information
        if check_key_or_attr(key='gel_info', interest=info, check_none=True):
-            egel_section = custom_fields['egel_results']
+            egel_section = custom_fields['egel_info']
            # logger.debug(f"Gel info check passed.")
            # NOTE: print json field gel results to Egel results
            worksheet = input_excel[egel_section['sheet']]
            # TODO: Move all this into a seperate function?
-            start_row = egel_section['start_row']
-            start_column = egel_section['start_column']
+            start_row = egel_section['start_row'] - 1
+            start_column = egel_section['start_column'] - 3
            for row, ki in enumerate(info['gel_info']['value'], start=1):
                # logger.debug(f"ki: {ki}")
                # logger.debug(f"vi: {vi}")
--- a/src/submissions/backend/validators/init.py
+++ b/src/submissions/backend/validators/init.py
@@ -75,7 +75,7 @@ class RSLNamer(object):
                try:
                    submission_type = m.lastgroup
                except AttributeError as e:
-                    logger.critical("No RSL plate number found or submission type found!")
+                    logger.critical(f"No RSL plate number found or submission type found!: {e}")
            case _:
                submission_type = None
        try:
@@ -180,6 +180,14 @@ class RSLNamer(object):
        template = environment.from_string(template)
        return template.render(**kwargs)

+    def calculate_repeat(self):
+        regex = re.compile(r"-\d(?P<repeat>R\d)")
+        m = regex.search(self.parsed_name)
+        if m is not None:
+            return m.group("repeat")
+        else:
+            return ""
+

 from .pydant import PydSubmission, PydKit, PydContact, PydOrganization, PydSample, PydReagent, PydReagentRole, \
    PydEquipment, PydEquipmentRole, PydTips
--- a/src/submissions/backend/validators/pydant.py
+++ b/src/submissions/backend/validators/pydant.py
@@ -296,6 +296,8 @@ class PydTips(BaseModel):
            SubmissionTipsAssociation: Association between queried tips and submission
        """
        tips = Tips.query(name=self.name, lot=self.lot, limit=1)
+        assoc = SubmissionTipsAssociation.query(tip_id=tips.id, submission_id=submission.id, role=self.role, limit=1)
+        if assoc is None:
            assoc = SubmissionTipsAssociation(submission=submission, tips=tips, role_name=self.role)
        return assoc

@@ -640,6 +642,7 @@ class PydSubmission(BaseModel, extra='allow'):
        # this could also be done with default_factory
        self.submission_object = BasicSubmission.find_polymorphic_subclass(
            polymorphic_identity=self.submission_type['value'])
+        self.namer = RSLNamer(self.rsl_plate_num['value'])

    def set_attribute(self, key: str, value):
        """
@@ -853,7 +856,7 @@ class PydSubmission(BaseModel, extra='allow'):
        """
        template = self.submission_object.filename_template()
        # logger.debug(f"Using template string: {template}")
-        render = RSLNamer.construct_export_name(template=template, **self.improved_dict(dictionaries=False)).replace(
+        render = self.namer.construct_export_name(template=template, **self.improved_dict(dictionaries=False)).replace(
            "/", "")
        # logger.debug(f"Template rendered as: {render}")
        return render
--- a/src/submissions/frontend/widgets/submission_table.py
+++ b/src/submissions/frontend/widgets/submission_table.py
@@ -89,7 +89,7 @@ class SubmissionsSheet(QTableView):
        self.data = BasicSubmission.submissions_to_df()
        try:
            self.data['Id'] = self.data['Id'].apply(str)
-            self.data['Id'] = self.data['Id'].str.zfill(3)
+            self.data['Id'] = self.data['Id'].str.zfill(4)
        except KeyError as e:
            logger.error(f"Could not alter id to string due to {e}")
        proxyModel = QSortFilterProxyModel()