Improved scraping of gel info for Artic.

This commit is contained in:
lwark
2024-08-29 11:07:54 -05:00
parent 2afb57a6cc
commit 482bfa5728
7 changed files with 101 additions and 22 deletions

View File

@@ -1,3 +1,11 @@
## 202408-05
- Improved scraping for gel info of Artic submissions.
## 202408.04
- Fixed false error throw when tips added in xl and from app.
## 202408.03
- Fixed issue backing up database file.

View File

@@ -8,7 +8,7 @@
*should fit 90% of usage cases*
1. Ensure a properly formatted Submission Excel form has been filled out.
a. The program can fill in reagent fields and some other information automatically, but should be checked for accuracy afterward.
1. The program can fill in reagent fields and some other information automatically, but should be checked for accuracy afterward.
2. Click on 'File' in the menu bar, followed by 'Import Submission' and use the file dialog to locate the form.
1. The Excel file may also be dragged into the grey area on the left hand side of the screen from Windows File Explorer. If so, skip step 3.
3. Click 'Ok'.
@@ -68,7 +68,7 @@ This is meant to import .xlsx files created from the Design & Analysis Software
1. Click on 'Reports' -> 'Make Report' in the menu bar.
2. Select the start date and the end date you want for the report. Click 'ok'.
3. Use the file dialog to select a location to save the report.
a. Both an Excel sheet and a pdf should be generated containing summary information for submissions made by each client lab.
1. Both an Excel sheet and a pdf should be generated containing summary information for submissions made by each client lab.
## Exporting a run as an Excel file:
@@ -96,9 +96,9 @@ This is meant to import .xlsx files created from the Design & Analysis Software
4. For each reagent type in the kit click the "Add Reagent Type" button.
5. Fill in the name of the reagent type. Alternatively select from already existing types in the drop-down.
6. Fill in the reagent location in the Excel submission sheet.
a. For example if the reagent name is in a sheet called "Reagent Info" in row 12, column 1, type "Reagent Info" in the "Excel Location Sheet Name" field.
b. Set 12 in the "Name Row" and 1 in the "Name Column".
c. Repeat 6b for the Lot and the Expiry row and columns.
1. For example if the reagent name is in a sheet called "Reagent Info" in row 12, column 1, type "Reagent Info" in the "Excel Location Sheet Name" field.
2. Set 12 in the "Name Row" and 1 in the "Name Column".
3. Repeat 6b for the Lot and the Expiry row and columns.
7. Click the "Submit" button at the top.
## Linking Extraction Logs:

View File

@@ -1494,10 +1494,11 @@ class SubmissionEquipmentAssociation(BaseClass):
@classmethod
@setup_lookup
def query(cls, equipment_id:int, submission_id:int, role:str, limit:int=0, **kwargs) -> Any | List[Any]:
def query(cls, equipment_id:int, submission_id:int, role:str|None=None, limit:int=0, **kwargs) -> Any | List[Any]:
query: Query = cls.__database_session__.query(cls)
query = query.filter(cls.equipment_id==equipment_id)
query = query.filter(cls.submission_id==submission_id)
if role is not None:
query = query.filter(cls.role==role)
return cls.execute_query(query=query, limit=limit, **kwargs)
@@ -1763,3 +1764,13 @@ class SubmissionTipsAssociation(BaseClass):
dict: Values of this object
"""
return dict(role=self.role_name, name=self.tips.name, lot=self.tips.lot)
@classmethod
@setup_lookup
def query(cls, tip_id: int, role: str, submission_id: int|None=None, limit: int = 0, **kwargs) -> Any | List[Any]:
query: Query = cls.__database_session__.query(cls)
query = query.filter(cls.tip_id == tip_id)
if submission_id is not None:
query = query.filter(cls.submission_id == submission_id)
query = query.filter(cls.role_name == role)
return cls.execute_query(query=query, limit=limit, **kwargs)

View File

@@ -9,7 +9,7 @@ from copy import deepcopy
from getpass import getuser
import logging, uuid, tempfile, re, yaml, base64
from zipfile import ZipFile
from tempfile import TemporaryDirectory
from tempfile import TemporaryDirectory, TemporaryFile
from operator import itemgetter
from pprint import pformat
from . import BaseClass, Reagent, SubmissionType, KitType, Organization, Contact, Tips
@@ -33,6 +33,7 @@ from jinja2.exceptions import TemplateNotFound
from jinja2 import Template
from docxtpl import InlineImage
from docx.shared import Inches
from PIL import Image
logger = logging.getLogger(f"submissions.{__name__}")
@@ -469,7 +470,7 @@ class BasicSubmission(BaseClass):
'equipment', 'gel_info', 'gel_image', 'dna_core_submission_number', 'gel_controls',
'source_plates', 'pcr_technician', 'ext_technician', 'artic_technician', 'cost_centre',
'signed_by', 'artic_date', 'gel_barcode', 'gel_date', 'ngs_date', 'contact_phone', 'contact',
'tips']
'tips', 'gel_image_path']
for item in excluded:
try:
df = df.drop(item, axis=1)
@@ -1187,8 +1188,10 @@ class BasicSubmission(BaseClass):
# logger.debug("We have tips in this equipment")
for tips in equip.tips:
tassoc = tips.to_sql(submission=self)
if tassoc not in self.submission_tips_associations:
tassoc.save()
else:
logger.error(f"Tips already found in submission, skipping.")
else:
pass
@@ -1638,13 +1641,30 @@ class WastewaterArtic(BasicSubmission):
dict: Updated sample dictionary
"""
from backend.validators import RSLNamer
from openpyxl_image_loader.sheet_image_loader import SheetImageLoader
def scrape_image(wb: Workbook, info_dict: dict) -> Image or None:
ws = wb[info_dict['sheet']]
img_loader = SheetImageLoader(ws)
for ii in range(info_dict['start_row'], info_dict['end_row'] + 1):
logger.debug(f"Checking row: {ii}")
for jj in range(info_dict['start_column'], info_dict['end_column'] + 1):
cell_str = f"{row_map[jj]}{ii}"
if img_loader.image_in(cell_str):
return img_loader.get(cell_str)
return None
input_dict = super().custom_info_parser(input_dict)
egel_section = custom_fields['egel_results']
logger.debug(f"Custom fields: {custom_fields}")
egel_section = custom_fields['egel_controls']
ws = xl[egel_section['sheet']]
data = [ws.cell(row=ii, column=jj) for jj in range(egel_section['start_column'], egel_section['end_column']+1) for
ii in range(egel_section['start_row'], egel_section['end_row']+1)]
# NOTE: Here we should be scraping the control results.
data = [ws.cell(row=ii, column=jj) for jj in range(egel_section['start_column'], egel_section['end_column'] + 1)
for
ii in range(egel_section['start_row'], egel_section['end_row'] + 1)]
data = [cell for cell in data if cell.value is not None and "NTC" in cell.value]
# logger.debug(f"Got gel control map: {data}")
# logger.debug(f"Checking against row_map: {row_map}")
input_dict['gel_controls'] = [
dict(sample_id=cell.value, location=f"{row_map[cell.row - 9]}{str(cell.column - 14).zfill(2)}") for cell in
data]
@@ -1662,6 +1682,35 @@ class WastewaterArtic(BasicSubmission):
else:
datum['plate'] = RSLNamer(filename=datum['plate'], sub_type="Wastewater").parsed_name
input_dict['source_plates'] = data
egel_info_section = custom_fields['egel_info']
ws = xl[egel_info_section['sheet']]
data = []
for ii in range(egel_info_section['start_row'], egel_info_section['end_row'] + 1):
datum = dict(
name=ws.cell(row=ii, column=egel_info_section['start_column'] - 3).value,
values=[]
)
for jj in range(egel_info_section['start_column'], egel_info_section['end_column'] + 1):
d = dict(
name=ws.cell(row=egel_info_section['start_row'] - 1, column=jj).value,
value=ws.cell(row=ii, column=jj).value
)
if d['value'] is not None:
datum['values'].append(d)
data.append(datum)
input_dict['gel_info'] = data
logger.debug(f"Wastewater Artic custom info:\n\n{pformat(input_dict)}")
egel_image_section = custom_fields['image_range']
img: Image = scrape_image(wb=xl, info_dict=egel_image_section)
if img is not None:
tmp = Path(TemporaryFile().name).with_suffix(".jpg")
img.save(tmp.__str__())
with ZipFile(cls.__directory_path__.joinpath("submission_imgs.zip"), 'a') as zipf:
# NOTE: Add a file located at the source_path to the destination within the zip
# file. It will overwrite existing files if the names collide, but it
# will give a warning
zipf.write(tmp.__str__(), f"{input_dict['rsl_plate_num']['value']}.jpg")
input_dict['gel_image'] = f"{input_dict['rsl_plate_num']['value']}.jpg"
return input_dict
@classmethod
@@ -1887,13 +1936,13 @@ class WastewaterArtic(BasicSubmission):
logger.warning(f"No source plate info found.")
# NOTE: check for gel information
if check_key_or_attr(key='gel_info', interest=info, check_none=True):
egel_section = custom_fields['egel_results']
egel_section = custom_fields['egel_info']
# logger.debug(f"Gel info check passed.")
# NOTE: print json field gel results to Egel results
worksheet = input_excel[egel_section['sheet']]
# TODO: Move all this into a seperate function?
start_row = egel_section['start_row']
start_column = egel_section['start_column']
start_row = egel_section['start_row'] - 1
start_column = egel_section['start_column'] - 3
for row, ki in enumerate(info['gel_info']['value'], start=1):
# logger.debug(f"ki: {ki}")
# logger.debug(f"vi: {vi}")

View File

@@ -75,7 +75,7 @@ class RSLNamer(object):
try:
submission_type = m.lastgroup
except AttributeError as e:
logger.critical("No RSL plate number found or submission type found!")
logger.critical(f"No RSL plate number found or submission type found!: {e}")
case _:
submission_type = None
try:
@@ -180,6 +180,14 @@ class RSLNamer(object):
template = environment.from_string(template)
return template.render(**kwargs)
def calculate_repeat(self):
regex = re.compile(r"-\d(?P<repeat>R\d)")
m = regex.search(self.parsed_name)
if m is not None:
return m.group("repeat")
else:
return ""
from .pydant import PydSubmission, PydKit, PydContact, PydOrganization, PydSample, PydReagent, PydReagentRole, \
PydEquipment, PydEquipmentRole, PydTips

View File

@@ -296,6 +296,8 @@ class PydTips(BaseModel):
SubmissionTipsAssociation: Association between queried tips and submission
"""
tips = Tips.query(name=self.name, lot=self.lot, limit=1)
assoc = SubmissionTipsAssociation.query(tip_id=tips.id, submission_id=submission.id, role=self.role, limit=1)
if assoc is None:
assoc = SubmissionTipsAssociation(submission=submission, tips=tips, role_name=self.role)
return assoc
@@ -640,6 +642,7 @@ class PydSubmission(BaseModel, extra='allow'):
# this could also be done with default_factory
self.submission_object = BasicSubmission.find_polymorphic_subclass(
polymorphic_identity=self.submission_type['value'])
self.namer = RSLNamer(self.rsl_plate_num['value'])
def set_attribute(self, key: str, value):
"""
@@ -853,7 +856,7 @@ class PydSubmission(BaseModel, extra='allow'):
"""
template = self.submission_object.filename_template()
# logger.debug(f"Using template string: {template}")
render = RSLNamer.construct_export_name(template=template, **self.improved_dict(dictionaries=False)).replace(
render = self.namer.construct_export_name(template=template, **self.improved_dict(dictionaries=False)).replace(
"/", "")
# logger.debug(f"Template rendered as: {render}")
return render

View File

@@ -89,7 +89,7 @@ class SubmissionsSheet(QTableView):
self.data = BasicSubmission.submissions_to_df()
try:
self.data['Id'] = self.data['Id'].apply(str)
self.data['Id'] = self.data['Id'].str.zfill(3)
self.data['Id'] = self.data['Id'].str.zfill(4)
except KeyError as e:
logger.error(f"Could not alter id to string due to {e}")
proxyModel = QSortFilterProxyModel()