Database updates, scraping samples from excel sheets

This commit is contained in:
Landon Wark
2023-01-19 11:22:57 -06:00
parent e763e7273d
commit d17ee5862d
10 changed files with 206 additions and 53 deletions

View File

@@ -1,8 +1,8 @@
"""initial commit
Revision ID: 4cba0c1ffe03
Revision ID: 03da9270e51f
Revises:
Create Date: 2023-01-18 08:59:34.382715
Create Date: 2023-01-19 09:01:03.022482
"""
from alembic import op
@@ -10,7 +10,7 @@ import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '4cba0c1ffe03'
revision = '03da9270e51f'
down_revision = None
branch_labels = None
depends_on = None
@@ -50,22 +50,6 @@ def upgrade() -> None:
sa.ForeignKeyConstraint(['kit_id'], ['_kits.id'], name='fk_RT_kits_id', ondelete='SET NULL', use_alter=True),
sa.PrimaryKeyConstraint('id')
)
op.create_table('_ww_samples',
sa.Column('id', sa.INTEGER(), nullable=False),
sa.Column('ww_processing_num', sa.String(length=64), nullable=True),
sa.Column('ww_sample_full_id', sa.String(length=64), nullable=True),
sa.Column('rsl_number', sa.String(length=64), nullable=True),
sa.Column('collection_date', sa.TIMESTAMP(), nullable=True),
sa.Column('testing_type', sa.String(length=64), nullable=True),
sa.Column('site_status', sa.String(length=64), nullable=True),
sa.Column('notes', sa.String(length=2000), nullable=True),
sa.Column('ct_n1', sa.FLOAT(precision=2), nullable=True),
sa.Column('ct_n2', sa.FLOAT(precision=2), nullable=True),
sa.Column('seq_submitted', sa.BOOLEAN(), nullable=True),
sa.Column('ww_seq_run_id', sa.String(length=64), nullable=True),
sa.Column('sample_type', sa.String(length=8), nullable=True),
sa.PrimaryKeyConstraint('id')
)
op.create_table('_control_samples',
sa.Column('id', sa.INTEGER(), nullable=False),
sa.Column('parent_id', sa.String(), nullable=True),
@@ -119,35 +103,63 @@ def upgrade() -> None:
sa.Column('technician', sa.String(length=64), nullable=True),
sa.Column('reagents_id', sa.String(), nullable=True),
sa.Column('control_id', sa.INTEGER(), nullable=True),
sa.Column('sample_id', sa.String(), nullable=True),
sa.ForeignKeyConstraint(['control_id'], ['_control_samples.id'], name='fk_BC_control_id', ondelete='SET NULL'),
sa.ForeignKeyConstraint(['extraction_kit_id'], ['_kits.id'], ondelete='SET NULL'),
sa.ForeignKeyConstraint(['extraction_kit_id'], ['_kits.id'], name='fk_BS_extkit_id', ondelete='SET NULL'),
sa.ForeignKeyConstraint(['reagents_id'], ['_reagents.id'], name='fk_BS_reagents_id', ondelete='SET NULL'),
sa.ForeignKeyConstraint(['sample_id'], ['_ww_samples.id'], name='fk_WW_sample_id', ondelete='SET NULL'),
sa.ForeignKeyConstraint(['submitting_lab_id'], ['_organizations.id'], ondelete='SET NULL'),
sa.ForeignKeyConstraint(['submitting_lab_id'], ['_organizations.id'], name='fk_BS_sublab_id', ondelete='SET NULL'),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint('rsl_plate_num'),
sa.UniqueConstraint('submitter_plate_num')
)
op.create_table('_bc_samples',
sa.Column('id', sa.INTEGER(), nullable=False),
sa.Column('well_number', sa.String(length=8), nullable=True),
sa.Column('sample_id', sa.String(length=64), nullable=False),
sa.Column('organism', sa.String(length=64), nullable=True),
sa.Column('concentration', sa.String(length=16), nullable=True),
sa.Column('rsl_plate_id', sa.INTEGER(), nullable=True),
sa.ForeignKeyConstraint(['rsl_plate_id'], ['_submissions.id'], name='fk_BCS_sample_id', ondelete='SET NULL'),
sa.PrimaryKeyConstraint('id')
)
op.create_table('_reagents_submissions',
sa.Column('reagent_id', sa.INTEGER(), nullable=True),
sa.Column('submission_id', sa.INTEGER(), nullable=True),
sa.ForeignKeyConstraint(['reagent_id'], ['_reagents.id'], ),
sa.ForeignKeyConstraint(['submission_id'], ['_submissions.id'], )
)
op.create_table('_ww_samples',
sa.Column('id', sa.INTEGER(), nullable=False),
sa.Column('ww_processing_num', sa.String(length=64), nullable=True),
sa.Column('ww_sample_full_id', sa.String(length=64), nullable=False),
sa.Column('rsl_number', sa.String(length=64), nullable=True),
sa.Column('rsl_plate_id', sa.INTEGER(), nullable=True),
sa.Column('collection_date', sa.TIMESTAMP(), nullable=True),
sa.Column('testing_type', sa.String(length=64), nullable=True),
sa.Column('site_status', sa.String(length=64), nullable=True),
sa.Column('notes', sa.String(length=2000), nullable=True),
sa.Column('ct_n1', sa.FLOAT(precision=2), nullable=True),
sa.Column('ct_n2', sa.FLOAT(precision=2), nullable=True),
sa.Column('seq_submitted', sa.BOOLEAN(), nullable=True),
sa.Column('ww_seq_run_id', sa.String(length=64), nullable=True),
sa.Column('sample_type', sa.String(length=8), nullable=True),
sa.Column('well_number', sa.String(length=8), nullable=True),
sa.ForeignKeyConstraint(['rsl_plate_id'], ['_submissions.id'], name='fk_WWS_sample_id', ondelete='SET NULL'),
sa.PrimaryKeyConstraint('id')
)
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('_ww_samples')
op.drop_table('_reagents_submissions')
op.drop_table('_bc_samples')
op.drop_table('_submissions')
op.drop_table('_orgs_contacts')
op.drop_table('_reagentstypes_kittypes')
op.drop_table('_reagents')
op.drop_table('_organizations')
op.drop_table('_control_samples')
op.drop_table('_ww_samples')
op.drop_table('_reagent_types')
op.drop_table('_kits')
op.drop_table('_control_types')

View File

@@ -16,6 +16,12 @@ def get_kits_by_use( ctx:dict, kittype_str:str|None) -> list:
def store_submission(ctx:dict, base_submission:models.BasicSubmission) -> None:
for sample in base_submission.samples:
sample.rsl_plate = base_submission
try:
ctx['database_session'].add(sample)
except IntegrityError:
continue
ctx['database_session'].add(base_submission)
try:
ctx['database_session'].commit()
@@ -53,6 +59,11 @@ def construct_submission_info(ctx:dict, info_dict:dict) -> models.BasicSubmissio
# Because of unique constraint, the submitter plate number cannot be None, so...
if info_dict[item] == None:
info_dict[item] = uuid.uuid4().hex.upper()
field_value = info_dict[item]
# case "samples":
# for sample in info_dict[item]:
# instance.samples.append(sample)
# continue
case _:
field_value = info_dict[item]
try:
@@ -60,6 +71,7 @@ def construct_submission_info(ctx:dict, info_dict:dict) -> models.BasicSubmissio
except AttributeError:
print(f"Could not set attribute: {item} to {info_dict[item]}")
continue
# print(instance.__dict__)
return instance
# looked_up = []
# for reagent in reagents:
@@ -185,6 +197,10 @@ def create_kit_from_yaml(ctx:dict, exp:dict) -> None:
ctx (dict): Context dictionary passed down from frontend
exp (dict): Experiment dictionary created from yaml file
"""
try:
exp['password'].decode()
except (UnicodeDecodeError, AttributeError):
exp['password'] = exp['password'].encode()
if base64.b64encode(exp['password']) != b'cnNsX3N1Ym1pNTVpb25z':
print(f"Not the correct password.")
return

View File

@@ -8,4 +8,4 @@ from .controls import Control, ControlType
from .kits import KitType, ReagentType, Reagent
from .submissions import BasicSubmission, BacterialCulture, Wastewater
from .organizations import Organization, Contact
from .samples import Sample
from .samples import WWSample, BCSample

View File

@@ -3,15 +3,16 @@ from sqlalchemy import Column, String, TIMESTAMP, text, JSON, INTEGER, ForeignKe
from sqlalchemy.orm import relationship, relationships
class Sample(Base):
class WWSample(Base):
__tablename__ = "_ww_samples"
id = Column(INTEGER, primary_key=True) #: primary key
ww_processing_num = Column(String(64))
ww_sample_full_id = Column(String(64))
ww_sample_full_id = Column(String(64), nullable=False)
rsl_number = Column(String(64))
rsl_plate = relationship("Wastewater", back_populates="samples")
rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_WWS_sample_id"))
collection_date = Column(TIMESTAMP) #: Date submission received
testing_type = Column(String(64))
site_status = Column(String(64))
@@ -21,7 +22,35 @@ class Sample(Base):
seq_submitted = Column(BOOLEAN())
ww_seq_run_id = Column(String(64))
sample_type = Column(String(8))
well_number = Column(String(8))
def to_string(self):
return f"{self.well_number}: {self.ww_sample_full_id}"
def to_sub_dict(self):
return {
"well": self.well_number,
"name": self.ww_sample_full_id,
}
class BCSample(Base):
__tablename__ = "_bc_samples"
id = Column(INTEGER, primary_key=True) #: primary key
well_number = Column(String(8))
sample_id = Column(String(64), nullable=False)
organism = Column(String(64))
concentration = Column(String(16))
rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_BCS_sample_id"))
rsl_plate = relationship("BacterialCulture", back_populates="samples")
def to_string(self):
return f"{self.well_number}: {self.sample_id} - {self.organism}"
def to_sub_dict(self):
return {
"well": self.well_number,
"name": f"{self.sample_id} - ({self.organism})",
}

View File

@@ -15,10 +15,10 @@ class BasicSubmission(Base):
submitter_plate_num = Column(String(127), unique=True) #: The number given to the submission by the submitting lab
submitted_date = Column(TIMESTAMP) #: Date submission received
submitting_lab = relationship("Organization", back_populates="submissions") #: client
submitting_lab_id = Column(INTEGER, ForeignKey("_organizations.id", ondelete="SET NULL"))
submitting_lab_id = Column(INTEGER, ForeignKey("_organizations.id", ondelete="SET NULL", name="fk_BS_sublab_id"))
sample_count = Column(INTEGER) #: Number of samples in the submission
extraction_kit = relationship("KitType", back_populates="submissions") #: The extraction kit used
extraction_kit_id = Column(INTEGER, ForeignKey("_kits.id", ondelete="SET NULL"))
extraction_kit_id = Column(INTEGER, ForeignKey("_kits.id", ondelete="SET NULL", name="fk_BS_extkit_id"))
submission_type = Column(String(32))
technician = Column(String(64))
# Move this into custom types?
@@ -94,10 +94,12 @@ class BasicSubmission(Base):
class BacterialCulture(BasicSubmission):
control = relationship("Control", back_populates="submissions") #: A control sample added to submission
control_id = Column(INTEGER, ForeignKey("_control_samples.id", ondelete="SET NULL", name="fk_BC_control_id"))
samples = relationship("BCSample", back_populates="rsl_plate", uselist=True)
# bc_sample_id = Column(INTEGER, ForeignKey("_bc_samples.id", ondelete="SET NULL", name="fk_BC_sample_id"))
__mapper_args__ = {"polymorphic_identity": "bacterial_culture", "polymorphic_load": "inline"}
class Wastewater(BasicSubmission):
samples = relationship("Sample", back_populates="rsl_plate")
sample_id = Column(String, ForeignKey("_ww_samples.id", ondelete="SET NULL", name="fk_WW_sample_id"))
samples = relationship("WWSample", back_populates="rsl_plate", uselist=True)
# ww_sample_id = Column(String, ForeignKey("_ww_samples.id", ondelete="SET NULL", name="fk_WW_sample_id"))
__mapper_args__ = {"polymorphic_identity": "wastewater", "polymorphic_load": "inline"}

View File

@@ -1,9 +1,12 @@
import pandas as pd
from pathlib import Path
from datetime import datetime
from backend.db.models.samples import WWSample, BCSample
import logging
from collections import OrderedDict
import re
import numpy as np
from datetime import date
import uuid
logger = logging.getLogger(f"submissions.{__name__}")
@@ -21,8 +24,8 @@ class SheetParser(object):
self.xl = None
self.sub = OrderedDict()
self.sub['submission_type'] = self._type_decider()
parse = getattr(self, f"_parse_{self.sub['submission_type'].lower()}")
parse()
parse_sub = getattr(self, f"_parse_{self.sub['submission_type'].lower()}")
parse_sub()
def _type_decider(self):
try:
@@ -46,6 +49,7 @@ class SheetParser(object):
self.sub['submitting_lab'] = submission_info.iloc[0][3]
self.sub['sample_count'] = str(submission_info.iloc[2][3])
self.sub['extraction_kit'] = submission_info.iloc[3][3]
return submission_info
@@ -71,6 +75,9 @@ class SheetParser(object):
self.sub['lot_ethanol'] = submission_info.iloc[10][6]
self.sub['lot_positive_control'] = submission_info.iloc[103][1]
self.sub['lot_plate'] = submission_info.iloc[12][6]
sample_parser = SampleParser(submission_info.iloc[15:111])
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
self.sub['samples'] = sample_parse()
def _parse_wastewater(self):
@@ -102,6 +109,9 @@ class SheetParser(object):
self.sub['lot_pre_mix_2'] = qprc_info.iloc[2][14]
self.sub['lot_positive_control'] = qprc_info.iloc[3][14]
self.sub['lot_ddh2o'] = qprc_info.iloc[4][14]
sample_parser = SampleParser(submission_info.iloc[16:40])
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
self.sub['samples'] = sample_parse()
# tech = str(submission_info.iloc[11][1])
# if tech == "nan":
# tech = "Unknown"
@@ -120,3 +130,59 @@ class SheetParser(object):
# self.sub['lot_ethanol'] = submission_info.iloc[10][6]
# self.sub['lot_positive_control'] = None #submission_info.iloc[103][1]
# self.sub['lot_plate'] = submission_info.iloc[12][6]
class SampleParser(object):
def __init__(self, df:pd.DataFrame) -> None:
self.samples = df.to_dict("records")
def parse_bacterial_culture_samples(self) -> list[BCSample]:
new_list = []
for sample in self.samples:
new = BCSample()
new.well_number = sample['This section to be filled in completely by submittor']
new.sample_id = sample['Unnamed: 1']
new.organism = sample['Unnamed: 2']
new.concentration = sample['Unnamed: 3']
print(f"Sample object: {new.sample_id} = {type(new.sample_id)}")
try:
not_a_nan = not np.isnan(new.sample_id)
except TypeError:
not_a_nan = True
if not_a_nan:
new_list.append(new)
return new_list
def parse_wastewater_samples(self) -> list[WWSample]:
new_list = []
for sample in self.samples:
new = WWSample()
new.ww_processing_num = sample['Unnamed: 2']
try:
not_a_nan = not np.isnan(sample['Unnamed: 3'])
except TypeError:
not_a_nan = True
if not_a_nan:
new.ww_sample_full_id = sample['Unnamed: 3']
else:
new.ww_sample_full_id = uuid.uuid4().hex.upper()
new.rsl_number = sample['Unnamed: 9']
try:
not_a_nan = not np.isnan(sample['Unnamed: 5'])
except TypeError:
not_a_nan = True
if not_a_nan:
new.collection_date = sample['Unnamed: 5']
else:
new.collection_date = date.today()
new.testing_type = sample['Unnamed: 6']
new.site_status = sample['Unnamed: 7']
new.notes = str(sample['Unnamed: 8'])
new.well_number = sample['Unnamed: 1']
new_list.append(new)
return new_list

View File

@@ -90,6 +90,7 @@ class App(QMainWindow):
def importSubmission(self):
logger.debug(self.ctx)
self.samples = []
home_dir = str(Path(self.ctx["directory_path"]))
fname = Path(QFileDialog.getOpenFileName(self, 'Open file', home_dir)[0])
logger.debug(f"Attempting to parse file: {fname}")
@@ -107,27 +108,31 @@ class App(QMainWindow):
(?P<extraction_kit>^extraction_kit$) |
(?P<submitted_date>^submitted_date$) |
(?P<submitting_lab>)^submitting_lab$ |
(?P<samples>)^samples$ |
(?P<reagent>^lot_.*$)
""", re.VERBOSE)
for item in prsr.sub:
logger.debug(f"Item: {item}")
self.table_widget.formlayout.addWidget(QLabel(item.replace("_", " ").title()))
try:
mo = variable_parser.fullmatch(item).lastgroup
except AttributeError:
mo = "other"
print(f"Mo: {mo}")
match mo:
case 'submitting_lab':
self.table_widget.formlayout.addWidget(QLabel(item.replace("_", " ").title()))
print(f"{item}: {prsr.sub[item]}")
add_widget = QComboBox()
labs = [item.__str__() for item in lookup_all_orgs(ctx=self.ctx)]
try:
labs = difflib.get_close_matches(prsr.sub[item], labs, len(labs), 0)
except TypeError:
except (TypeError, ValueError):
pass
add_widget.addItems(labs)
case 'extraction_kit':
self.table_widget.formlayout.addWidget(QLabel(item.replace("_", " ").title()))
if prsr.sub[item] == 'nan':
msg = QMessageBox()
# msg.setIcon(QMessageBox.critical)
@@ -143,10 +148,12 @@ class App(QMainWindow):
else:
add_widget.addItems(['bacterial_culture'])
case 'submitted_date':
self.table_widget.formlayout.addWidget(QLabel(item.replace("_", " ").title()))
add_widget = QDateEdit(calendarPopup=True)
# add_widget.setDateTime(QDateTime.date(prsr.sub[item]))
add_widget.setDate(prsr.sub[item])
case 'reagent':
self.table_widget.formlayout.addWidget(QLabel(item.replace("_", " ").title()))
add_widget = QComboBox()
add_widget.setEditable(True)
# Ensure that all reagenttypes have a name that matches the items in the excel parser
@@ -169,7 +176,12 @@ class App(QMainWindow):
relevant_reagents.insert(0, str(prsr.sub[item]))
logger.debug(f"Relevant reagents: {relevant_reagents}")
add_widget.addItems(relevant_reagents)
# TODO: make samples not appear in frame.
case 'samples':
print(f"{item}: {prsr.sub[item]}")
self.samples = prsr.sub[item]
case _:
self.table_widget.formlayout.addWidget(QLabel(item.replace("_", " ").title()))
add_widget = QLineEdit()
add_widget.setText(str(prsr.sub[item]).replace("_", " "))
self.table_widget.formlayout.addWidget(add_widget)
@@ -215,6 +227,7 @@ class App(QMainWindow):
if wanted_reagent != None:
parsed_reagents.append(wanted_reagent)
logger.debug(info)
info['samples'] = self.samples
base_submission = construct_submission_info(ctx=self.ctx, info_dict=info)
for reagent in parsed_reagents:
base_submission.reagents.append(reagent)

View File

@@ -4,7 +4,7 @@ from PyQt6.QtWidgets import (
QDialogButtonBox, QDateEdit, QTableView,
QTextEdit, QSizePolicy, QWidget,
QGridLayout, QPushButton, QSpinBox,
QScrollBar
QScrollBar, QScrollArea
)
from PyQt6.QtCore import Qt, QDate, QAbstractTableModel
from PyQt6.QtGui import QFontMetrics
@@ -135,6 +135,7 @@ class SubmissionsSheet(QTableView):
# print(index)
value=index.sibling(index.row(),0).data()
dlg = SubmissionDetails(ctx=self.ctx, id=value)
# dlg.show()
if dlg.exec():
pass
@@ -146,32 +147,42 @@ class SubmissionDetails(QDialog):
super().__init__()
self.setWindowTitle("Submission Details")
interior = QScrollArea()
interior.setParent(self)
data = lookup_submission_by_id(ctx=ctx, id=id)
base_dict = data.to_dict()
base_dict['reagents'] = [item.to_sub_dict() for item in data.reagents]
base_dict['samples'] = [item.to_sub_dict() for item in data.samples]
template = env.get_template("submission_details.txt")
text = template.render(sub=base_dict)
txt_field = QTextEdit(self)
txt_field.setReadOnly(True)
txt_field.document().setPlainText(text)
txt_editor = QTextEdit(self)
font = txt_field.document().defaultFont()
txt_editor.setReadOnly(True)
txt_editor.document().setPlainText(text)
font = txt_editor.document().defaultFont()
fontMetrics = QFontMetrics(font)
textSize = fontMetrics.size(0, txt_field.toPlainText())
textSize = fontMetrics.size(0, txt_editor.toPlainText())
w = textSize.width() + 10
h = textSize.height() + 10
txt_field.setMinimumSize(w, h)
txt_field.setMaximumSize(w, h)
txt_field.resize(w, h)
txt_editor.setMinimumSize(w, h)
txt_editor.setMaximumSize(w, h)
txt_editor.resize(w, h)
interior.resize(w,900)
# txt_field.setSizePolicy(QSizePolicy.Policy.Preferred, QSizePolicy.Policy.MinimumExpanding)
QBtn = QDialogButtonBox.StandardButton.Ok
# QBtn = QDialogButtonBox.StandardButton.Ok
# self.buttonBox = QDialogButtonBox(QBtn)
# self.buttonBox.accepted.connect(self.accept)
txt_field.setText(text)
txt_editor.setText(text)
# txt_editor.verticalScrollBar()
interior.setWidget(txt_editor)
self.layout = QVBoxLayout()
self.layout.addWidget(txt_field)
self.setFixedSize(w, 900)
# self.layout.addWidget(txt_editor)
# self.layout.addStretch()
self.layout.addWidget(interior)
class ReportDatePicker(QDialog):

View File

@@ -1,9 +1,13 @@
{% for key, value in sub.items() if key != 'reagents' %}
{% for key, value in sub.items() if key != 'reagents' and key != 'samples' %}
{{ key }}: {{ value }}
{% endfor %}
Reagents:
{% for item in sub['reagents'] %}
{{ item['type'] }}: {{ item['lot'] }} (EXP: {{ item['expiry'] }})
{% endfor %}
Samples:
{% for item in sub['samples'] %}
{{ item['well'] }}: {{ item['name'] }}
{% endfor %}