From 482bfa572803050308102e8eec5aa4f52cb8f0c9 Mon Sep 17 00:00:00 2001 From: lwark Date: Thu, 29 Aug 2024 11:07:54 -0500 Subject: [PATCH] Improved scraping of gel info for Artic. --- CHANGELOG.md | 8 +++ README.md | 12 ++-- src/submissions/backend/db/models/kits.py | 15 +++- .../backend/db/models/submissions.py | 69 ++++++++++++++++--- .../backend/validators/__init__.py | 10 ++- src/submissions/backend/validators/pydant.py | 7 +- .../frontend/widgets/submission_table.py | 2 +- 7 files changed, 101 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 723b212..2140784 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +## 202408-05 + +- Improved scraping for gel info of Artic submissions. + +## 202408.04 + +- Fixed false error throw when tips added in xl and from app. + ## 202408.03 - Fixed issue backing up database file. diff --git a/README.md b/README.md index 6956aa3..b001cf7 100644 --- a/README.md +++ b/README.md @@ -8,14 +8,14 @@ *should fit 90% of usage cases* 1. Ensure a properly formatted Submission Excel form has been filled out. - a. The program can fill in reagent fields and some other information automatically, but should be checked for accuracy afterward. + 1. The program can fill in reagent fields and some other information automatically, but should be checked for accuracy afterward. 2. Click on 'File' in the menu bar, followed by 'Import Submission' and use the file dialog to locate the form. 1. The Excel file may also be dragged into the grey area on the left hand side of the screen from Windows File Explorer. If so, skip step 3. 3. Click 'Ok'. 4. Most of the fields in the form should be automatically filled in from the form area to the left of the screen. 5. You may need to maximize the app to ensure you can see all the info. 6. Any fields that are not automatically filled in can be filled in manually from the drop-down menus. - 1. Any reagent lots not found in the drop-downs can be typed in manually. + 1. Any reagent lots not found in the drop-downs can be typed in manually. 7. Once you are certain all the information is correct, click 'Submit' at the bottom of the form. 8. Add in any new reagents the app doesn't have in the database. 9. Once the new run shows up at the bottom of the Submissions, everything is fine. @@ -68,7 +68,7 @@ This is meant to import .xlsx files created from the Design & Analysis Software 1. Click on 'Reports' -> 'Make Report' in the menu bar. 2. Select the start date and the end date you want for the report. Click 'ok'. 3. Use the file dialog to select a location to save the report. - a. Both an Excel sheet and a pdf should be generated containing summary information for submissions made by each client lab. + 1. Both an Excel sheet and a pdf should be generated containing summary information for submissions made by each client lab. ## Exporting a run as an Excel file: @@ -96,9 +96,9 @@ This is meant to import .xlsx files created from the Design & Analysis Software 4. For each reagent type in the kit click the "Add Reagent Type" button. 5. Fill in the name of the reagent type. Alternatively select from already existing types in the drop-down. 6. Fill in the reagent location in the Excel submission sheet. - a. For example if the reagent name is in a sheet called "Reagent Info" in row 12, column 1, type "Reagent Info" in the "Excel Location Sheet Name" field. - b. Set 12 in the "Name Row" and 1 in the "Name Column". - c. Repeat 6b for the Lot and the Expiry row and columns. + 1. For example if the reagent name is in a sheet called "Reagent Info" in row 12, column 1, type "Reagent Info" in the "Excel Location Sheet Name" field. + 2. Set 12 in the "Name Row" and 1 in the "Name Column". + 3. Repeat 6b for the Lot and the Expiry row and columns. 7. Click the "Submit" button at the top. ## Linking Extraction Logs: diff --git a/src/submissions/backend/db/models/kits.py b/src/submissions/backend/db/models/kits.py index f5a9f9a..c1fe6bc 100644 --- a/src/submissions/backend/db/models/kits.py +++ b/src/submissions/backend/db/models/kits.py @@ -1494,11 +1494,12 @@ class SubmissionEquipmentAssociation(BaseClass): @classmethod @setup_lookup - def query(cls, equipment_id:int, submission_id:int, role:str, limit:int=0, **kwargs) -> Any | List[Any]: + def query(cls, equipment_id:int, submission_id:int, role:str|None=None, limit:int=0, **kwargs) -> Any | List[Any]: query: Query = cls.__database_session__.query(cls) query = query.filter(cls.equipment_id==equipment_id) query = query.filter(cls.submission_id==submission_id) - query = query.filter(cls.role==role) + if role is not None: + query = query.filter(cls.role==role) return cls.execute_query(query=query, limit=limit, **kwargs) @@ -1763,3 +1764,13 @@ class SubmissionTipsAssociation(BaseClass): dict: Values of this object """ return dict(role=self.role_name, name=self.tips.name, lot=self.tips.lot) + + @classmethod + @setup_lookup + def query(cls, tip_id: int, role: str, submission_id: int|None=None, limit: int = 0, **kwargs) -> Any | List[Any]: + query: Query = cls.__database_session__.query(cls) + query = query.filter(cls.tip_id == tip_id) + if submission_id is not None: + query = query.filter(cls.submission_id == submission_id) + query = query.filter(cls.role_name == role) + return cls.execute_query(query=query, limit=limit, **kwargs) diff --git a/src/submissions/backend/db/models/submissions.py b/src/submissions/backend/db/models/submissions.py index 7229bbb..2c8c22b 100644 --- a/src/submissions/backend/db/models/submissions.py +++ b/src/submissions/backend/db/models/submissions.py @@ -9,7 +9,7 @@ from copy import deepcopy from getpass import getuser import logging, uuid, tempfile, re, yaml, base64 from zipfile import ZipFile -from tempfile import TemporaryDirectory +from tempfile import TemporaryDirectory, TemporaryFile from operator import itemgetter from pprint import pformat from . import BaseClass, Reagent, SubmissionType, KitType, Organization, Contact, Tips @@ -33,6 +33,7 @@ from jinja2.exceptions import TemplateNotFound from jinja2 import Template from docxtpl import InlineImage from docx.shared import Inches +from PIL import Image logger = logging.getLogger(f"submissions.{__name__}") @@ -469,7 +470,7 @@ class BasicSubmission(BaseClass): 'equipment', 'gel_info', 'gel_image', 'dna_core_submission_number', 'gel_controls', 'source_plates', 'pcr_technician', 'ext_technician', 'artic_technician', 'cost_centre', 'signed_by', 'artic_date', 'gel_barcode', 'gel_date', 'ngs_date', 'contact_phone', 'contact', - 'tips'] + 'tips', 'gel_image_path'] for item in excluded: try: df = df.drop(item, axis=1) @@ -1187,8 +1188,10 @@ class BasicSubmission(BaseClass): # logger.debug("We have tips in this equipment") for tips in equip.tips: tassoc = tips.to_sql(submission=self) - tassoc.save() - + if tassoc not in self.submission_tips_associations: + tassoc.save() + else: + logger.error(f"Tips already found in submission, skipping.") else: pass @@ -1638,13 +1641,30 @@ class WastewaterArtic(BasicSubmission): dict: Updated sample dictionary """ from backend.validators import RSLNamer + from openpyxl_image_loader.sheet_image_loader import SheetImageLoader + + def scrape_image(wb: Workbook, info_dict: dict) -> Image or None: + ws = wb[info_dict['sheet']] + img_loader = SheetImageLoader(ws) + for ii in range(info_dict['start_row'], info_dict['end_row'] + 1): + logger.debug(f"Checking row: {ii}") + for jj in range(info_dict['start_column'], info_dict['end_column'] + 1): + cell_str = f"{row_map[jj]}{ii}" + if img_loader.image_in(cell_str): + return img_loader.get(cell_str) + return None + input_dict = super().custom_info_parser(input_dict) - egel_section = custom_fields['egel_results'] + logger.debug(f"Custom fields: {custom_fields}") + egel_section = custom_fields['egel_controls'] ws = xl[egel_section['sheet']] - data = [ws.cell(row=ii, column=jj) for jj in range(egel_section['start_column'], egel_section['end_column']+1) for - ii in range(egel_section['start_row'], egel_section['end_row']+1)] + # NOTE: Here we should be scraping the control results. + data = [ws.cell(row=ii, column=jj) for jj in range(egel_section['start_column'], egel_section['end_column'] + 1) + for + ii in range(egel_section['start_row'], egel_section['end_row'] + 1)] data = [cell for cell in data if cell.value is not None and "NTC" in cell.value] # logger.debug(f"Got gel control map: {data}") + # logger.debug(f"Checking against row_map: {row_map}") input_dict['gel_controls'] = [ dict(sample_id=cell.value, location=f"{row_map[cell.row - 9]}{str(cell.column - 14).zfill(2)}") for cell in data] @@ -1662,6 +1682,35 @@ class WastewaterArtic(BasicSubmission): else: datum['plate'] = RSLNamer(filename=datum['plate'], sub_type="Wastewater").parsed_name input_dict['source_plates'] = data + egel_info_section = custom_fields['egel_info'] + ws = xl[egel_info_section['sheet']] + data = [] + for ii in range(egel_info_section['start_row'], egel_info_section['end_row'] + 1): + datum = dict( + name=ws.cell(row=ii, column=egel_info_section['start_column'] - 3).value, + values=[] + ) + for jj in range(egel_info_section['start_column'], egel_info_section['end_column'] + 1): + d = dict( + name=ws.cell(row=egel_info_section['start_row'] - 1, column=jj).value, + value=ws.cell(row=ii, column=jj).value + ) + if d['value'] is not None: + datum['values'].append(d) + data.append(datum) + input_dict['gel_info'] = data + logger.debug(f"Wastewater Artic custom info:\n\n{pformat(input_dict)}") + egel_image_section = custom_fields['image_range'] + img: Image = scrape_image(wb=xl, info_dict=egel_image_section) + if img is not None: + tmp = Path(TemporaryFile().name).with_suffix(".jpg") + img.save(tmp.__str__()) + with ZipFile(cls.__directory_path__.joinpath("submission_imgs.zip"), 'a') as zipf: + # NOTE: Add a file located at the source_path to the destination within the zip + # file. It will overwrite existing files if the names collide, but it + # will give a warning + zipf.write(tmp.__str__(), f"{input_dict['rsl_plate_num']['value']}.jpg") + input_dict['gel_image'] = f"{input_dict['rsl_plate_num']['value']}.jpg" return input_dict @classmethod @@ -1887,13 +1936,13 @@ class WastewaterArtic(BasicSubmission): logger.warning(f"No source plate info found.") # NOTE: check for gel information if check_key_or_attr(key='gel_info', interest=info, check_none=True): - egel_section = custom_fields['egel_results'] + egel_section = custom_fields['egel_info'] # logger.debug(f"Gel info check passed.") # NOTE: print json field gel results to Egel results worksheet = input_excel[egel_section['sheet']] # TODO: Move all this into a seperate function? - start_row = egel_section['start_row'] - start_column = egel_section['start_column'] + start_row = egel_section['start_row'] - 1 + start_column = egel_section['start_column'] - 3 for row, ki in enumerate(info['gel_info']['value'], start=1): # logger.debug(f"ki: {ki}") # logger.debug(f"vi: {vi}") diff --git a/src/submissions/backend/validators/__init__.py b/src/submissions/backend/validators/__init__.py index eeccb75..0bdad30 100644 --- a/src/submissions/backend/validators/__init__.py +++ b/src/submissions/backend/validators/__init__.py @@ -75,7 +75,7 @@ class RSLNamer(object): try: submission_type = m.lastgroup except AttributeError as e: - logger.critical("No RSL plate number found or submission type found!") + logger.critical(f"No RSL plate number found or submission type found!: {e}") case _: submission_type = None try: @@ -180,6 +180,14 @@ class RSLNamer(object): template = environment.from_string(template) return template.render(**kwargs) + def calculate_repeat(self): + regex = re.compile(r"-\d(?PR\d)") + m = regex.search(self.parsed_name) + if m is not None: + return m.group("repeat") + else: + return "" + from .pydant import PydSubmission, PydKit, PydContact, PydOrganization, PydSample, PydReagent, PydReagentRole, \ PydEquipment, PydEquipmentRole, PydTips diff --git a/src/submissions/backend/validators/pydant.py b/src/submissions/backend/validators/pydant.py index b48a05b..ded54b6 100644 --- a/src/submissions/backend/validators/pydant.py +++ b/src/submissions/backend/validators/pydant.py @@ -296,7 +296,9 @@ class PydTips(BaseModel): SubmissionTipsAssociation: Association between queried tips and submission """ tips = Tips.query(name=self.name, lot=self.lot, limit=1) - assoc = SubmissionTipsAssociation(submission=submission, tips=tips, role_name=self.role) + assoc = SubmissionTipsAssociation.query(tip_id=tips.id, submission_id=submission.id, role=self.role, limit=1) + if assoc is None: + assoc = SubmissionTipsAssociation(submission=submission, tips=tips, role_name=self.role) return assoc @@ -640,6 +642,7 @@ class PydSubmission(BaseModel, extra='allow'): # this could also be done with default_factory self.submission_object = BasicSubmission.find_polymorphic_subclass( polymorphic_identity=self.submission_type['value']) + self.namer = RSLNamer(self.rsl_plate_num['value']) def set_attribute(self, key: str, value): """ @@ -853,7 +856,7 @@ class PydSubmission(BaseModel, extra='allow'): """ template = self.submission_object.filename_template() # logger.debug(f"Using template string: {template}") - render = RSLNamer.construct_export_name(template=template, **self.improved_dict(dictionaries=False)).replace( + render = self.namer.construct_export_name(template=template, **self.improved_dict(dictionaries=False)).replace( "/", "") # logger.debug(f"Template rendered as: {render}") return render diff --git a/src/submissions/frontend/widgets/submission_table.py b/src/submissions/frontend/widgets/submission_table.py index fa0601b..ed6a868 100644 --- a/src/submissions/frontend/widgets/submission_table.py +++ b/src/submissions/frontend/widgets/submission_table.py @@ -89,7 +89,7 @@ class SubmissionsSheet(QTableView): self.data = BasicSubmission.submissions_to_df() try: self.data['Id'] = self.data['Id'].apply(str) - self.data['Id'] = self.data['Id'].str.zfill(3) + self.data['Id'] = self.data['Id'].str.zfill(4) except KeyError as e: logger.error(f"Could not alter id to string due to {e}") proxyModel = QSortFilterProxyModel()