Improved scraping of gel info for Artic.

This commit is contained in:
lwark
2024-08-29 11:07:54 -05:00
parent 2afb57a6cc
commit 482bfa5728
7 changed files with 101 additions and 22 deletions

View File

@@ -1,3 +1,11 @@
## 202408-05
- Improved scraping for gel info of Artic submissions.
## 202408.04
- Fixed false error throw when tips added in xl and from app.
## 202408.03 ## 202408.03
- Fixed issue backing up database file. - Fixed issue backing up database file.

View File

@@ -8,14 +8,14 @@
*should fit 90% of usage cases* *should fit 90% of usage cases*
1. Ensure a properly formatted Submission Excel form has been filled out. 1. Ensure a properly formatted Submission Excel form has been filled out.
a. The program can fill in reagent fields and some other information automatically, but should be checked for accuracy afterward. 1. The program can fill in reagent fields and some other information automatically, but should be checked for accuracy afterward.
2. Click on 'File' in the menu bar, followed by 'Import Submission' and use the file dialog to locate the form. 2. Click on 'File' in the menu bar, followed by 'Import Submission' and use the file dialog to locate the form.
1. The Excel file may also be dragged into the grey area on the left hand side of the screen from Windows File Explorer. If so, skip step 3. 1. The Excel file may also be dragged into the grey area on the left hand side of the screen from Windows File Explorer. If so, skip step 3.
3. Click 'Ok'. 3. Click 'Ok'.
4. Most of the fields in the form should be automatically filled in from the form area to the left of the screen. 4. Most of the fields in the form should be automatically filled in from the form area to the left of the screen.
5. You may need to maximize the app to ensure you can see all the info. 5. You may need to maximize the app to ensure you can see all the info.
6. Any fields that are not automatically filled in can be filled in manually from the drop-down menus. 6. Any fields that are not automatically filled in can be filled in manually from the drop-down menus.
1. Any reagent lots not found in the drop-downs can be typed in manually. 1. Any reagent lots not found in the drop-downs can be typed in manually.
7. Once you are certain all the information is correct, click 'Submit' at the bottom of the form. 7. Once you are certain all the information is correct, click 'Submit' at the bottom of the form.
8. Add in any new reagents the app doesn't have in the database. 8. Add in any new reagents the app doesn't have in the database.
9. Once the new run shows up at the bottom of the Submissions, everything is fine. 9. Once the new run shows up at the bottom of the Submissions, everything is fine.
@@ -68,7 +68,7 @@ This is meant to import .xlsx files created from the Design & Analysis Software
1. Click on 'Reports' -> 'Make Report' in the menu bar. 1. Click on 'Reports' -> 'Make Report' in the menu bar.
2. Select the start date and the end date you want for the report. Click 'ok'. 2. Select the start date and the end date you want for the report. Click 'ok'.
3. Use the file dialog to select a location to save the report. 3. Use the file dialog to select a location to save the report.
a. Both an Excel sheet and a pdf should be generated containing summary information for submissions made by each client lab. 1. Both an Excel sheet and a pdf should be generated containing summary information for submissions made by each client lab.
## Exporting a run as an Excel file: ## Exporting a run as an Excel file:
@@ -96,9 +96,9 @@ This is meant to import .xlsx files created from the Design & Analysis Software
4. For each reagent type in the kit click the "Add Reagent Type" button. 4. For each reagent type in the kit click the "Add Reagent Type" button.
5. Fill in the name of the reagent type. Alternatively select from already existing types in the drop-down. 5. Fill in the name of the reagent type. Alternatively select from already existing types in the drop-down.
6. Fill in the reagent location in the Excel submission sheet. 6. Fill in the reagent location in the Excel submission sheet.
a. For example if the reagent name is in a sheet called "Reagent Info" in row 12, column 1, type "Reagent Info" in the "Excel Location Sheet Name" field. 1. For example if the reagent name is in a sheet called "Reagent Info" in row 12, column 1, type "Reagent Info" in the "Excel Location Sheet Name" field.
b. Set 12 in the "Name Row" and 1 in the "Name Column". 2. Set 12 in the "Name Row" and 1 in the "Name Column".
c. Repeat 6b for the Lot and the Expiry row and columns. 3. Repeat 6b for the Lot and the Expiry row and columns.
7. Click the "Submit" button at the top. 7. Click the "Submit" button at the top.
## Linking Extraction Logs: ## Linking Extraction Logs:

View File

@@ -1494,11 +1494,12 @@ class SubmissionEquipmentAssociation(BaseClass):
@classmethod @classmethod
@setup_lookup @setup_lookup
def query(cls, equipment_id:int, submission_id:int, role:str, limit:int=0, **kwargs) -> Any | List[Any]: def query(cls, equipment_id:int, submission_id:int, role:str|None=None, limit:int=0, **kwargs) -> Any | List[Any]:
query: Query = cls.__database_session__.query(cls) query: Query = cls.__database_session__.query(cls)
query = query.filter(cls.equipment_id==equipment_id) query = query.filter(cls.equipment_id==equipment_id)
query = query.filter(cls.submission_id==submission_id) query = query.filter(cls.submission_id==submission_id)
query = query.filter(cls.role==role) if role is not None:
query = query.filter(cls.role==role)
return cls.execute_query(query=query, limit=limit, **kwargs) return cls.execute_query(query=query, limit=limit, **kwargs)
@@ -1763,3 +1764,13 @@ class SubmissionTipsAssociation(BaseClass):
dict: Values of this object dict: Values of this object
""" """
return dict(role=self.role_name, name=self.tips.name, lot=self.tips.lot) return dict(role=self.role_name, name=self.tips.name, lot=self.tips.lot)
@classmethod
@setup_lookup
def query(cls, tip_id: int, role: str, submission_id: int|None=None, limit: int = 0, **kwargs) -> Any | List[Any]:
query: Query = cls.__database_session__.query(cls)
query = query.filter(cls.tip_id == tip_id)
if submission_id is not None:
query = query.filter(cls.submission_id == submission_id)
query = query.filter(cls.role_name == role)
return cls.execute_query(query=query, limit=limit, **kwargs)

View File

@@ -9,7 +9,7 @@ from copy import deepcopy
from getpass import getuser from getpass import getuser
import logging, uuid, tempfile, re, yaml, base64 import logging, uuid, tempfile, re, yaml, base64
from zipfile import ZipFile from zipfile import ZipFile
from tempfile import TemporaryDirectory from tempfile import TemporaryDirectory, TemporaryFile
from operator import itemgetter from operator import itemgetter
from pprint import pformat from pprint import pformat
from . import BaseClass, Reagent, SubmissionType, KitType, Organization, Contact, Tips from . import BaseClass, Reagent, SubmissionType, KitType, Organization, Contact, Tips
@@ -33,6 +33,7 @@ from jinja2.exceptions import TemplateNotFound
from jinja2 import Template from jinja2 import Template
from docxtpl import InlineImage from docxtpl import InlineImage
from docx.shared import Inches from docx.shared import Inches
from PIL import Image
logger = logging.getLogger(f"submissions.{__name__}") logger = logging.getLogger(f"submissions.{__name__}")
@@ -469,7 +470,7 @@ class BasicSubmission(BaseClass):
'equipment', 'gel_info', 'gel_image', 'dna_core_submission_number', 'gel_controls', 'equipment', 'gel_info', 'gel_image', 'dna_core_submission_number', 'gel_controls',
'source_plates', 'pcr_technician', 'ext_technician', 'artic_technician', 'cost_centre', 'source_plates', 'pcr_technician', 'ext_technician', 'artic_technician', 'cost_centre',
'signed_by', 'artic_date', 'gel_barcode', 'gel_date', 'ngs_date', 'contact_phone', 'contact', 'signed_by', 'artic_date', 'gel_barcode', 'gel_date', 'ngs_date', 'contact_phone', 'contact',
'tips'] 'tips', 'gel_image_path']
for item in excluded: for item in excluded:
try: try:
df = df.drop(item, axis=1) df = df.drop(item, axis=1)
@@ -1187,8 +1188,10 @@ class BasicSubmission(BaseClass):
# logger.debug("We have tips in this equipment") # logger.debug("We have tips in this equipment")
for tips in equip.tips: for tips in equip.tips:
tassoc = tips.to_sql(submission=self) tassoc = tips.to_sql(submission=self)
tassoc.save() if tassoc not in self.submission_tips_associations:
tassoc.save()
else:
logger.error(f"Tips already found in submission, skipping.")
else: else:
pass pass
@@ -1638,13 +1641,30 @@ class WastewaterArtic(BasicSubmission):
dict: Updated sample dictionary dict: Updated sample dictionary
""" """
from backend.validators import RSLNamer from backend.validators import RSLNamer
from openpyxl_image_loader.sheet_image_loader import SheetImageLoader
def scrape_image(wb: Workbook, info_dict: dict) -> Image or None:
ws = wb[info_dict['sheet']]
img_loader = SheetImageLoader(ws)
for ii in range(info_dict['start_row'], info_dict['end_row'] + 1):
logger.debug(f"Checking row: {ii}")
for jj in range(info_dict['start_column'], info_dict['end_column'] + 1):
cell_str = f"{row_map[jj]}{ii}"
if img_loader.image_in(cell_str):
return img_loader.get(cell_str)
return None
input_dict = super().custom_info_parser(input_dict) input_dict = super().custom_info_parser(input_dict)
egel_section = custom_fields['egel_results'] logger.debug(f"Custom fields: {custom_fields}")
egel_section = custom_fields['egel_controls']
ws = xl[egel_section['sheet']] ws = xl[egel_section['sheet']]
data = [ws.cell(row=ii, column=jj) for jj in range(egel_section['start_column'], egel_section['end_column']+1) for # NOTE: Here we should be scraping the control results.
ii in range(egel_section['start_row'], egel_section['end_row']+1)] data = [ws.cell(row=ii, column=jj) for jj in range(egel_section['start_column'], egel_section['end_column'] + 1)
for
ii in range(egel_section['start_row'], egel_section['end_row'] + 1)]
data = [cell for cell in data if cell.value is not None and "NTC" in cell.value] data = [cell for cell in data if cell.value is not None and "NTC" in cell.value]
# logger.debug(f"Got gel control map: {data}") # logger.debug(f"Got gel control map: {data}")
# logger.debug(f"Checking against row_map: {row_map}")
input_dict['gel_controls'] = [ input_dict['gel_controls'] = [
dict(sample_id=cell.value, location=f"{row_map[cell.row - 9]}{str(cell.column - 14).zfill(2)}") for cell in dict(sample_id=cell.value, location=f"{row_map[cell.row - 9]}{str(cell.column - 14).zfill(2)}") for cell in
data] data]
@@ -1662,6 +1682,35 @@ class WastewaterArtic(BasicSubmission):
else: else:
datum['plate'] = RSLNamer(filename=datum['plate'], sub_type="Wastewater").parsed_name datum['plate'] = RSLNamer(filename=datum['plate'], sub_type="Wastewater").parsed_name
input_dict['source_plates'] = data input_dict['source_plates'] = data
egel_info_section = custom_fields['egel_info']
ws = xl[egel_info_section['sheet']]
data = []
for ii in range(egel_info_section['start_row'], egel_info_section['end_row'] + 1):
datum = dict(
name=ws.cell(row=ii, column=egel_info_section['start_column'] - 3).value,
values=[]
)
for jj in range(egel_info_section['start_column'], egel_info_section['end_column'] + 1):
d = dict(
name=ws.cell(row=egel_info_section['start_row'] - 1, column=jj).value,
value=ws.cell(row=ii, column=jj).value
)
if d['value'] is not None:
datum['values'].append(d)
data.append(datum)
input_dict['gel_info'] = data
logger.debug(f"Wastewater Artic custom info:\n\n{pformat(input_dict)}")
egel_image_section = custom_fields['image_range']
img: Image = scrape_image(wb=xl, info_dict=egel_image_section)
if img is not None:
tmp = Path(TemporaryFile().name).with_suffix(".jpg")
img.save(tmp.__str__())
with ZipFile(cls.__directory_path__.joinpath("submission_imgs.zip"), 'a') as zipf:
# NOTE: Add a file located at the source_path to the destination within the zip
# file. It will overwrite existing files if the names collide, but it
# will give a warning
zipf.write(tmp.__str__(), f"{input_dict['rsl_plate_num']['value']}.jpg")
input_dict['gel_image'] = f"{input_dict['rsl_plate_num']['value']}.jpg"
return input_dict return input_dict
@classmethod @classmethod
@@ -1887,13 +1936,13 @@ class WastewaterArtic(BasicSubmission):
logger.warning(f"No source plate info found.") logger.warning(f"No source plate info found.")
# NOTE: check for gel information # NOTE: check for gel information
if check_key_or_attr(key='gel_info', interest=info, check_none=True): if check_key_or_attr(key='gel_info', interest=info, check_none=True):
egel_section = custom_fields['egel_results'] egel_section = custom_fields['egel_info']
# logger.debug(f"Gel info check passed.") # logger.debug(f"Gel info check passed.")
# NOTE: print json field gel results to Egel results # NOTE: print json field gel results to Egel results
worksheet = input_excel[egel_section['sheet']] worksheet = input_excel[egel_section['sheet']]
# TODO: Move all this into a seperate function? # TODO: Move all this into a seperate function?
start_row = egel_section['start_row'] start_row = egel_section['start_row'] - 1
start_column = egel_section['start_column'] start_column = egel_section['start_column'] - 3
for row, ki in enumerate(info['gel_info']['value'], start=1): for row, ki in enumerate(info['gel_info']['value'], start=1):
# logger.debug(f"ki: {ki}") # logger.debug(f"ki: {ki}")
# logger.debug(f"vi: {vi}") # logger.debug(f"vi: {vi}")

View File

@@ -75,7 +75,7 @@ class RSLNamer(object):
try: try:
submission_type = m.lastgroup submission_type = m.lastgroup
except AttributeError as e: except AttributeError as e:
logger.critical("No RSL plate number found or submission type found!") logger.critical(f"No RSL plate number found or submission type found!: {e}")
case _: case _:
submission_type = None submission_type = None
try: try:
@@ -180,6 +180,14 @@ class RSLNamer(object):
template = environment.from_string(template) template = environment.from_string(template)
return template.render(**kwargs) return template.render(**kwargs)
def calculate_repeat(self):
regex = re.compile(r"-\d(?P<repeat>R\d)")
m = regex.search(self.parsed_name)
if m is not None:
return m.group("repeat")
else:
return ""
from .pydant import PydSubmission, PydKit, PydContact, PydOrganization, PydSample, PydReagent, PydReagentRole, \ from .pydant import PydSubmission, PydKit, PydContact, PydOrganization, PydSample, PydReagent, PydReagentRole, \
PydEquipment, PydEquipmentRole, PydTips PydEquipment, PydEquipmentRole, PydTips

View File

@@ -296,7 +296,9 @@ class PydTips(BaseModel):
SubmissionTipsAssociation: Association between queried tips and submission SubmissionTipsAssociation: Association between queried tips and submission
""" """
tips = Tips.query(name=self.name, lot=self.lot, limit=1) tips = Tips.query(name=self.name, lot=self.lot, limit=1)
assoc = SubmissionTipsAssociation(submission=submission, tips=tips, role_name=self.role) assoc = SubmissionTipsAssociation.query(tip_id=tips.id, submission_id=submission.id, role=self.role, limit=1)
if assoc is None:
assoc = SubmissionTipsAssociation(submission=submission, tips=tips, role_name=self.role)
return assoc return assoc
@@ -640,6 +642,7 @@ class PydSubmission(BaseModel, extra='allow'):
# this could also be done with default_factory # this could also be done with default_factory
self.submission_object = BasicSubmission.find_polymorphic_subclass( self.submission_object = BasicSubmission.find_polymorphic_subclass(
polymorphic_identity=self.submission_type['value']) polymorphic_identity=self.submission_type['value'])
self.namer = RSLNamer(self.rsl_plate_num['value'])
def set_attribute(self, key: str, value): def set_attribute(self, key: str, value):
""" """
@@ -853,7 +856,7 @@ class PydSubmission(BaseModel, extra='allow'):
""" """
template = self.submission_object.filename_template() template = self.submission_object.filename_template()
# logger.debug(f"Using template string: {template}") # logger.debug(f"Using template string: {template}")
render = RSLNamer.construct_export_name(template=template, **self.improved_dict(dictionaries=False)).replace( render = self.namer.construct_export_name(template=template, **self.improved_dict(dictionaries=False)).replace(
"/", "") "/", "")
# logger.debug(f"Template rendered as: {render}") # logger.debug(f"Template rendered as: {render}")
return render return render

View File

@@ -89,7 +89,7 @@ class SubmissionsSheet(QTableView):
self.data = BasicSubmission.submissions_to_df() self.data = BasicSubmission.submissions_to_df()
try: try:
self.data['Id'] = self.data['Id'].apply(str) self.data['Id'] = self.data['Id'].apply(str)
self.data['Id'] = self.data['Id'].str.zfill(3) self.data['Id'] = self.data['Id'].str.zfill(4)
except KeyError as e: except KeyError as e:
logger.error(f"Could not alter id to string due to {e}") logger.error(f"Could not alter id to string due to {e}")
proxyModel = QSortFilterProxyModel() proxyModel = QSortFilterProxyModel()