Improved scraping of gel info for Artic.
This commit is contained in:
@@ -1,3 +1,11 @@
|
|||||||
|
## 202408-05
|
||||||
|
|
||||||
|
- Improved scraping for gel info of Artic submissions.
|
||||||
|
|
||||||
|
## 202408.04
|
||||||
|
|
||||||
|
- Fixed false error throw when tips added in xl and from app.
|
||||||
|
|
||||||
## 202408.03
|
## 202408.03
|
||||||
|
|
||||||
- Fixed issue backing up database file.
|
- Fixed issue backing up database file.
|
||||||
|
|||||||
12
README.md
12
README.md
@@ -8,14 +8,14 @@
|
|||||||
*should fit 90% of usage cases*
|
*should fit 90% of usage cases*
|
||||||
|
|
||||||
1. Ensure a properly formatted Submission Excel form has been filled out.
|
1. Ensure a properly formatted Submission Excel form has been filled out.
|
||||||
a. The program can fill in reagent fields and some other information automatically, but should be checked for accuracy afterward.
|
1. The program can fill in reagent fields and some other information automatically, but should be checked for accuracy afterward.
|
||||||
2. Click on 'File' in the menu bar, followed by 'Import Submission' and use the file dialog to locate the form.
|
2. Click on 'File' in the menu bar, followed by 'Import Submission' and use the file dialog to locate the form.
|
||||||
1. The Excel file may also be dragged into the grey area on the left hand side of the screen from Windows File Explorer. If so, skip step 3.
|
1. The Excel file may also be dragged into the grey area on the left hand side of the screen from Windows File Explorer. If so, skip step 3.
|
||||||
3. Click 'Ok'.
|
3. Click 'Ok'.
|
||||||
4. Most of the fields in the form should be automatically filled in from the form area to the left of the screen.
|
4. Most of the fields in the form should be automatically filled in from the form area to the left of the screen.
|
||||||
5. You may need to maximize the app to ensure you can see all the info.
|
5. You may need to maximize the app to ensure you can see all the info.
|
||||||
6. Any fields that are not automatically filled in can be filled in manually from the drop-down menus.
|
6. Any fields that are not automatically filled in can be filled in manually from the drop-down menus.
|
||||||
1. Any reagent lots not found in the drop-downs can be typed in manually.
|
1. Any reagent lots not found in the drop-downs can be typed in manually.
|
||||||
7. Once you are certain all the information is correct, click 'Submit' at the bottom of the form.
|
7. Once you are certain all the information is correct, click 'Submit' at the bottom of the form.
|
||||||
8. Add in any new reagents the app doesn't have in the database.
|
8. Add in any new reagents the app doesn't have in the database.
|
||||||
9. Once the new run shows up at the bottom of the Submissions, everything is fine.
|
9. Once the new run shows up at the bottom of the Submissions, everything is fine.
|
||||||
@@ -68,7 +68,7 @@ This is meant to import .xlsx files created from the Design & Analysis Software
|
|||||||
1. Click on 'Reports' -> 'Make Report' in the menu bar.
|
1. Click on 'Reports' -> 'Make Report' in the menu bar.
|
||||||
2. Select the start date and the end date you want for the report. Click 'ok'.
|
2. Select the start date and the end date you want for the report. Click 'ok'.
|
||||||
3. Use the file dialog to select a location to save the report.
|
3. Use the file dialog to select a location to save the report.
|
||||||
a. Both an Excel sheet and a pdf should be generated containing summary information for submissions made by each client lab.
|
1. Both an Excel sheet and a pdf should be generated containing summary information for submissions made by each client lab.
|
||||||
|
|
||||||
## Exporting a run as an Excel file:
|
## Exporting a run as an Excel file:
|
||||||
|
|
||||||
@@ -96,9 +96,9 @@ This is meant to import .xlsx files created from the Design & Analysis Software
|
|||||||
4. For each reagent type in the kit click the "Add Reagent Type" button.
|
4. For each reagent type in the kit click the "Add Reagent Type" button.
|
||||||
5. Fill in the name of the reagent type. Alternatively select from already existing types in the drop-down.
|
5. Fill in the name of the reagent type. Alternatively select from already existing types in the drop-down.
|
||||||
6. Fill in the reagent location in the Excel submission sheet.
|
6. Fill in the reagent location in the Excel submission sheet.
|
||||||
a. For example if the reagent name is in a sheet called "Reagent Info" in row 12, column 1, type "Reagent Info" in the "Excel Location Sheet Name" field.
|
1. For example if the reagent name is in a sheet called "Reagent Info" in row 12, column 1, type "Reagent Info" in the "Excel Location Sheet Name" field.
|
||||||
b. Set 12 in the "Name Row" and 1 in the "Name Column".
|
2. Set 12 in the "Name Row" and 1 in the "Name Column".
|
||||||
c. Repeat 6b for the Lot and the Expiry row and columns.
|
3. Repeat 6b for the Lot and the Expiry row and columns.
|
||||||
7. Click the "Submit" button at the top.
|
7. Click the "Submit" button at the top.
|
||||||
|
|
||||||
## Linking Extraction Logs:
|
## Linking Extraction Logs:
|
||||||
|
|||||||
@@ -1494,11 +1494,12 @@ class SubmissionEquipmentAssociation(BaseClass):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@setup_lookup
|
@setup_lookup
|
||||||
def query(cls, equipment_id:int, submission_id:int, role:str, limit:int=0, **kwargs) -> Any | List[Any]:
|
def query(cls, equipment_id:int, submission_id:int, role:str|None=None, limit:int=0, **kwargs) -> Any | List[Any]:
|
||||||
query: Query = cls.__database_session__.query(cls)
|
query: Query = cls.__database_session__.query(cls)
|
||||||
query = query.filter(cls.equipment_id==equipment_id)
|
query = query.filter(cls.equipment_id==equipment_id)
|
||||||
query = query.filter(cls.submission_id==submission_id)
|
query = query.filter(cls.submission_id==submission_id)
|
||||||
query = query.filter(cls.role==role)
|
if role is not None:
|
||||||
|
query = query.filter(cls.role==role)
|
||||||
return cls.execute_query(query=query, limit=limit, **kwargs)
|
return cls.execute_query(query=query, limit=limit, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
@@ -1763,3 +1764,13 @@ class SubmissionTipsAssociation(BaseClass):
|
|||||||
dict: Values of this object
|
dict: Values of this object
|
||||||
"""
|
"""
|
||||||
return dict(role=self.role_name, name=self.tips.name, lot=self.tips.lot)
|
return dict(role=self.role_name, name=self.tips.name, lot=self.tips.lot)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@setup_lookup
|
||||||
|
def query(cls, tip_id: int, role: str, submission_id: int|None=None, limit: int = 0, **kwargs) -> Any | List[Any]:
|
||||||
|
query: Query = cls.__database_session__.query(cls)
|
||||||
|
query = query.filter(cls.tip_id == tip_id)
|
||||||
|
if submission_id is not None:
|
||||||
|
query = query.filter(cls.submission_id == submission_id)
|
||||||
|
query = query.filter(cls.role_name == role)
|
||||||
|
return cls.execute_query(query=query, limit=limit, **kwargs)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ from copy import deepcopy
|
|||||||
from getpass import getuser
|
from getpass import getuser
|
||||||
import logging, uuid, tempfile, re, yaml, base64
|
import logging, uuid, tempfile, re, yaml, base64
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
from tempfile import TemporaryDirectory
|
from tempfile import TemporaryDirectory, TemporaryFile
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from pprint import pformat
|
from pprint import pformat
|
||||||
from . import BaseClass, Reagent, SubmissionType, KitType, Organization, Contact, Tips
|
from . import BaseClass, Reagent, SubmissionType, KitType, Organization, Contact, Tips
|
||||||
@@ -33,6 +33,7 @@ from jinja2.exceptions import TemplateNotFound
|
|||||||
from jinja2 import Template
|
from jinja2 import Template
|
||||||
from docxtpl import InlineImage
|
from docxtpl import InlineImage
|
||||||
from docx.shared import Inches
|
from docx.shared import Inches
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
logger = logging.getLogger(f"submissions.{__name__}")
|
logger = logging.getLogger(f"submissions.{__name__}")
|
||||||
|
|
||||||
@@ -469,7 +470,7 @@ class BasicSubmission(BaseClass):
|
|||||||
'equipment', 'gel_info', 'gel_image', 'dna_core_submission_number', 'gel_controls',
|
'equipment', 'gel_info', 'gel_image', 'dna_core_submission_number', 'gel_controls',
|
||||||
'source_plates', 'pcr_technician', 'ext_technician', 'artic_technician', 'cost_centre',
|
'source_plates', 'pcr_technician', 'ext_technician', 'artic_technician', 'cost_centre',
|
||||||
'signed_by', 'artic_date', 'gel_barcode', 'gel_date', 'ngs_date', 'contact_phone', 'contact',
|
'signed_by', 'artic_date', 'gel_barcode', 'gel_date', 'ngs_date', 'contact_phone', 'contact',
|
||||||
'tips']
|
'tips', 'gel_image_path']
|
||||||
for item in excluded:
|
for item in excluded:
|
||||||
try:
|
try:
|
||||||
df = df.drop(item, axis=1)
|
df = df.drop(item, axis=1)
|
||||||
@@ -1187,8 +1188,10 @@ class BasicSubmission(BaseClass):
|
|||||||
# logger.debug("We have tips in this equipment")
|
# logger.debug("We have tips in this equipment")
|
||||||
for tips in equip.tips:
|
for tips in equip.tips:
|
||||||
tassoc = tips.to_sql(submission=self)
|
tassoc = tips.to_sql(submission=self)
|
||||||
tassoc.save()
|
if tassoc not in self.submission_tips_associations:
|
||||||
|
tassoc.save()
|
||||||
|
else:
|
||||||
|
logger.error(f"Tips already found in submission, skipping.")
|
||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -1638,13 +1641,30 @@ class WastewaterArtic(BasicSubmission):
|
|||||||
dict: Updated sample dictionary
|
dict: Updated sample dictionary
|
||||||
"""
|
"""
|
||||||
from backend.validators import RSLNamer
|
from backend.validators import RSLNamer
|
||||||
|
from openpyxl_image_loader.sheet_image_loader import SheetImageLoader
|
||||||
|
|
||||||
|
def scrape_image(wb: Workbook, info_dict: dict) -> Image or None:
|
||||||
|
ws = wb[info_dict['sheet']]
|
||||||
|
img_loader = SheetImageLoader(ws)
|
||||||
|
for ii in range(info_dict['start_row'], info_dict['end_row'] + 1):
|
||||||
|
logger.debug(f"Checking row: {ii}")
|
||||||
|
for jj in range(info_dict['start_column'], info_dict['end_column'] + 1):
|
||||||
|
cell_str = f"{row_map[jj]}{ii}"
|
||||||
|
if img_loader.image_in(cell_str):
|
||||||
|
return img_loader.get(cell_str)
|
||||||
|
return None
|
||||||
|
|
||||||
input_dict = super().custom_info_parser(input_dict)
|
input_dict = super().custom_info_parser(input_dict)
|
||||||
egel_section = custom_fields['egel_results']
|
logger.debug(f"Custom fields: {custom_fields}")
|
||||||
|
egel_section = custom_fields['egel_controls']
|
||||||
ws = xl[egel_section['sheet']]
|
ws = xl[egel_section['sheet']]
|
||||||
data = [ws.cell(row=ii, column=jj) for jj in range(egel_section['start_column'], egel_section['end_column']+1) for
|
# NOTE: Here we should be scraping the control results.
|
||||||
ii in range(egel_section['start_row'], egel_section['end_row']+1)]
|
data = [ws.cell(row=ii, column=jj) for jj in range(egel_section['start_column'], egel_section['end_column'] + 1)
|
||||||
|
for
|
||||||
|
ii in range(egel_section['start_row'], egel_section['end_row'] + 1)]
|
||||||
data = [cell for cell in data if cell.value is not None and "NTC" in cell.value]
|
data = [cell for cell in data if cell.value is not None and "NTC" in cell.value]
|
||||||
# logger.debug(f"Got gel control map: {data}")
|
# logger.debug(f"Got gel control map: {data}")
|
||||||
|
# logger.debug(f"Checking against row_map: {row_map}")
|
||||||
input_dict['gel_controls'] = [
|
input_dict['gel_controls'] = [
|
||||||
dict(sample_id=cell.value, location=f"{row_map[cell.row - 9]}{str(cell.column - 14).zfill(2)}") for cell in
|
dict(sample_id=cell.value, location=f"{row_map[cell.row - 9]}{str(cell.column - 14).zfill(2)}") for cell in
|
||||||
data]
|
data]
|
||||||
@@ -1662,6 +1682,35 @@ class WastewaterArtic(BasicSubmission):
|
|||||||
else:
|
else:
|
||||||
datum['plate'] = RSLNamer(filename=datum['plate'], sub_type="Wastewater").parsed_name
|
datum['plate'] = RSLNamer(filename=datum['plate'], sub_type="Wastewater").parsed_name
|
||||||
input_dict['source_plates'] = data
|
input_dict['source_plates'] = data
|
||||||
|
egel_info_section = custom_fields['egel_info']
|
||||||
|
ws = xl[egel_info_section['sheet']]
|
||||||
|
data = []
|
||||||
|
for ii in range(egel_info_section['start_row'], egel_info_section['end_row'] + 1):
|
||||||
|
datum = dict(
|
||||||
|
name=ws.cell(row=ii, column=egel_info_section['start_column'] - 3).value,
|
||||||
|
values=[]
|
||||||
|
)
|
||||||
|
for jj in range(egel_info_section['start_column'], egel_info_section['end_column'] + 1):
|
||||||
|
d = dict(
|
||||||
|
name=ws.cell(row=egel_info_section['start_row'] - 1, column=jj).value,
|
||||||
|
value=ws.cell(row=ii, column=jj).value
|
||||||
|
)
|
||||||
|
if d['value'] is not None:
|
||||||
|
datum['values'].append(d)
|
||||||
|
data.append(datum)
|
||||||
|
input_dict['gel_info'] = data
|
||||||
|
logger.debug(f"Wastewater Artic custom info:\n\n{pformat(input_dict)}")
|
||||||
|
egel_image_section = custom_fields['image_range']
|
||||||
|
img: Image = scrape_image(wb=xl, info_dict=egel_image_section)
|
||||||
|
if img is not None:
|
||||||
|
tmp = Path(TemporaryFile().name).with_suffix(".jpg")
|
||||||
|
img.save(tmp.__str__())
|
||||||
|
with ZipFile(cls.__directory_path__.joinpath("submission_imgs.zip"), 'a') as zipf:
|
||||||
|
# NOTE: Add a file located at the source_path to the destination within the zip
|
||||||
|
# file. It will overwrite existing files if the names collide, but it
|
||||||
|
# will give a warning
|
||||||
|
zipf.write(tmp.__str__(), f"{input_dict['rsl_plate_num']['value']}.jpg")
|
||||||
|
input_dict['gel_image'] = f"{input_dict['rsl_plate_num']['value']}.jpg"
|
||||||
return input_dict
|
return input_dict
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -1887,13 +1936,13 @@ class WastewaterArtic(BasicSubmission):
|
|||||||
logger.warning(f"No source plate info found.")
|
logger.warning(f"No source plate info found.")
|
||||||
# NOTE: check for gel information
|
# NOTE: check for gel information
|
||||||
if check_key_or_attr(key='gel_info', interest=info, check_none=True):
|
if check_key_or_attr(key='gel_info', interest=info, check_none=True):
|
||||||
egel_section = custom_fields['egel_results']
|
egel_section = custom_fields['egel_info']
|
||||||
# logger.debug(f"Gel info check passed.")
|
# logger.debug(f"Gel info check passed.")
|
||||||
# NOTE: print json field gel results to Egel results
|
# NOTE: print json field gel results to Egel results
|
||||||
worksheet = input_excel[egel_section['sheet']]
|
worksheet = input_excel[egel_section['sheet']]
|
||||||
# TODO: Move all this into a seperate function?
|
# TODO: Move all this into a seperate function?
|
||||||
start_row = egel_section['start_row']
|
start_row = egel_section['start_row'] - 1
|
||||||
start_column = egel_section['start_column']
|
start_column = egel_section['start_column'] - 3
|
||||||
for row, ki in enumerate(info['gel_info']['value'], start=1):
|
for row, ki in enumerate(info['gel_info']['value'], start=1):
|
||||||
# logger.debug(f"ki: {ki}")
|
# logger.debug(f"ki: {ki}")
|
||||||
# logger.debug(f"vi: {vi}")
|
# logger.debug(f"vi: {vi}")
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ class RSLNamer(object):
|
|||||||
try:
|
try:
|
||||||
submission_type = m.lastgroup
|
submission_type = m.lastgroup
|
||||||
except AttributeError as e:
|
except AttributeError as e:
|
||||||
logger.critical("No RSL plate number found or submission type found!")
|
logger.critical(f"No RSL plate number found or submission type found!: {e}")
|
||||||
case _:
|
case _:
|
||||||
submission_type = None
|
submission_type = None
|
||||||
try:
|
try:
|
||||||
@@ -180,6 +180,14 @@ class RSLNamer(object):
|
|||||||
template = environment.from_string(template)
|
template = environment.from_string(template)
|
||||||
return template.render(**kwargs)
|
return template.render(**kwargs)
|
||||||
|
|
||||||
|
def calculate_repeat(self):
|
||||||
|
regex = re.compile(r"-\d(?P<repeat>R\d)")
|
||||||
|
m = regex.search(self.parsed_name)
|
||||||
|
if m is not None:
|
||||||
|
return m.group("repeat")
|
||||||
|
else:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
from .pydant import PydSubmission, PydKit, PydContact, PydOrganization, PydSample, PydReagent, PydReagentRole, \
|
from .pydant import PydSubmission, PydKit, PydContact, PydOrganization, PydSample, PydReagent, PydReagentRole, \
|
||||||
PydEquipment, PydEquipmentRole, PydTips
|
PydEquipment, PydEquipmentRole, PydTips
|
||||||
|
|||||||
@@ -296,7 +296,9 @@ class PydTips(BaseModel):
|
|||||||
SubmissionTipsAssociation: Association between queried tips and submission
|
SubmissionTipsAssociation: Association between queried tips and submission
|
||||||
"""
|
"""
|
||||||
tips = Tips.query(name=self.name, lot=self.lot, limit=1)
|
tips = Tips.query(name=self.name, lot=self.lot, limit=1)
|
||||||
assoc = SubmissionTipsAssociation(submission=submission, tips=tips, role_name=self.role)
|
assoc = SubmissionTipsAssociation.query(tip_id=tips.id, submission_id=submission.id, role=self.role, limit=1)
|
||||||
|
if assoc is None:
|
||||||
|
assoc = SubmissionTipsAssociation(submission=submission, tips=tips, role_name=self.role)
|
||||||
return assoc
|
return assoc
|
||||||
|
|
||||||
|
|
||||||
@@ -640,6 +642,7 @@ class PydSubmission(BaseModel, extra='allow'):
|
|||||||
# this could also be done with default_factory
|
# this could also be done with default_factory
|
||||||
self.submission_object = BasicSubmission.find_polymorphic_subclass(
|
self.submission_object = BasicSubmission.find_polymorphic_subclass(
|
||||||
polymorphic_identity=self.submission_type['value'])
|
polymorphic_identity=self.submission_type['value'])
|
||||||
|
self.namer = RSLNamer(self.rsl_plate_num['value'])
|
||||||
|
|
||||||
def set_attribute(self, key: str, value):
|
def set_attribute(self, key: str, value):
|
||||||
"""
|
"""
|
||||||
@@ -853,7 +856,7 @@ class PydSubmission(BaseModel, extra='allow'):
|
|||||||
"""
|
"""
|
||||||
template = self.submission_object.filename_template()
|
template = self.submission_object.filename_template()
|
||||||
# logger.debug(f"Using template string: {template}")
|
# logger.debug(f"Using template string: {template}")
|
||||||
render = RSLNamer.construct_export_name(template=template, **self.improved_dict(dictionaries=False)).replace(
|
render = self.namer.construct_export_name(template=template, **self.improved_dict(dictionaries=False)).replace(
|
||||||
"/", "")
|
"/", "")
|
||||||
# logger.debug(f"Template rendered as: {render}")
|
# logger.debug(f"Template rendered as: {render}")
|
||||||
return render
|
return render
|
||||||
|
|||||||
@@ -89,7 +89,7 @@ class SubmissionsSheet(QTableView):
|
|||||||
self.data = BasicSubmission.submissions_to_df()
|
self.data = BasicSubmission.submissions_to_df()
|
||||||
try:
|
try:
|
||||||
self.data['Id'] = self.data['Id'].apply(str)
|
self.data['Id'] = self.data['Id'].apply(str)
|
||||||
self.data['Id'] = self.data['Id'].str.zfill(3)
|
self.data['Id'] = self.data['Id'].str.zfill(4)
|
||||||
except KeyError as e:
|
except KeyError as e:
|
||||||
logger.error(f"Could not alter id to string due to {e}")
|
logger.error(f"Could not alter id to string due to {e}")
|
||||||
proxyModel = QSortFilterProxyModel()
|
proxyModel = QSortFilterProxyModel()
|
||||||
|
|||||||
Reference in New Issue
Block a user