Database updates, scraping samples from excel sheets

This commit is contained in:
Landon Wark
2023-01-19 11:22:57 -06:00
parent e763e7273d
commit d17ee5862d
10 changed files with 206 additions and 53 deletions

View File

@@ -16,6 +16,12 @@ def get_kits_by_use( ctx:dict, kittype_str:str|None) -> list:
def store_submission(ctx:dict, base_submission:models.BasicSubmission) -> None:
for sample in base_submission.samples:
sample.rsl_plate = base_submission
try:
ctx['database_session'].add(sample)
except IntegrityError:
continue
ctx['database_session'].add(base_submission)
try:
ctx['database_session'].commit()
@@ -53,6 +59,11 @@ def construct_submission_info(ctx:dict, info_dict:dict) -> models.BasicSubmissio
# Because of unique constraint, the submitter plate number cannot be None, so...
if info_dict[item] == None:
info_dict[item] = uuid.uuid4().hex.upper()
field_value = info_dict[item]
# case "samples":
# for sample in info_dict[item]:
# instance.samples.append(sample)
# continue
case _:
field_value = info_dict[item]
try:
@@ -60,6 +71,7 @@ def construct_submission_info(ctx:dict, info_dict:dict) -> models.BasicSubmissio
except AttributeError:
print(f"Could not set attribute: {item} to {info_dict[item]}")
continue
# print(instance.__dict__)
return instance
# looked_up = []
# for reagent in reagents:
@@ -184,7 +196,11 @@ def create_kit_from_yaml(ctx:dict, exp:dict) -> None:
Args:
ctx (dict): Context dictionary passed down from frontend
exp (dict): Experiment dictionary created from yaml file
"""
"""
try:
exp['password'].decode()
except (UnicodeDecodeError, AttributeError):
exp['password'] = exp['password'].encode()
if base64.b64encode(exp['password']) != b'cnNsX3N1Ym1pNTVpb25z':
print(f"Not the correct password.")
return

View File

@@ -8,4 +8,4 @@ from .controls import Control, ControlType
from .kits import KitType, ReagentType, Reagent
from .submissions import BasicSubmission, BacterialCulture, Wastewater
from .organizations import Organization, Contact
from .samples import Sample
from .samples import WWSample, BCSample

View File

@@ -3,15 +3,16 @@ from sqlalchemy import Column, String, TIMESTAMP, text, JSON, INTEGER, ForeignKe
from sqlalchemy.orm import relationship, relationships
class Sample(Base):
class WWSample(Base):
__tablename__ = "_ww_samples"
id = Column(INTEGER, primary_key=True) #: primary key
ww_processing_num = Column(String(64))
ww_sample_full_id = Column(String(64))
ww_sample_full_id = Column(String(64), nullable=False)
rsl_number = Column(String(64))
rsl_plate = relationship("Wastewater", back_populates="samples")
rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_WWS_sample_id"))
collection_date = Column(TIMESTAMP) #: Date submission received
testing_type = Column(String(64))
site_status = Column(String(64))
@@ -21,7 +22,35 @@ class Sample(Base):
seq_submitted = Column(BOOLEAN())
ww_seq_run_id = Column(String(64))
sample_type = Column(String(8))
well_number = Column(String(8))
def to_string(self):
return f"{self.well_number}: {self.ww_sample_full_id}"
def to_sub_dict(self):
return {
"well": self.well_number,
"name": self.ww_sample_full_id,
}
class BCSample(Base):
__tablename__ = "_bc_samples"
id = Column(INTEGER, primary_key=True) #: primary key
well_number = Column(String(8))
sample_id = Column(String(64), nullable=False)
organism = Column(String(64))
concentration = Column(String(16))
rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_BCS_sample_id"))
rsl_plate = relationship("BacterialCulture", back_populates="samples")
def to_string(self):
return f"{self.well_number}: {self.sample_id} - {self.organism}"
def to_sub_dict(self):
return {
"well": self.well_number,
"name": f"{self.sample_id} - ({self.organism})",
}

View File

@@ -15,10 +15,10 @@ class BasicSubmission(Base):
submitter_plate_num = Column(String(127), unique=True) #: The number given to the submission by the submitting lab
submitted_date = Column(TIMESTAMP) #: Date submission received
submitting_lab = relationship("Organization", back_populates="submissions") #: client
submitting_lab_id = Column(INTEGER, ForeignKey("_organizations.id", ondelete="SET NULL"))
submitting_lab_id = Column(INTEGER, ForeignKey("_organizations.id", ondelete="SET NULL", name="fk_BS_sublab_id"))
sample_count = Column(INTEGER) #: Number of samples in the submission
extraction_kit = relationship("KitType", back_populates="submissions") #: The extraction kit used
extraction_kit_id = Column(INTEGER, ForeignKey("_kits.id", ondelete="SET NULL"))
extraction_kit_id = Column(INTEGER, ForeignKey("_kits.id", ondelete="SET NULL", name="fk_BS_extkit_id"))
submission_type = Column(String(32))
technician = Column(String(64))
# Move this into custom types?
@@ -94,10 +94,12 @@ class BasicSubmission(Base):
class BacterialCulture(BasicSubmission):
control = relationship("Control", back_populates="submissions") #: A control sample added to submission
control_id = Column(INTEGER, ForeignKey("_control_samples.id", ondelete="SET NULL", name="fk_BC_control_id"))
samples = relationship("BCSample", back_populates="rsl_plate", uselist=True)
# bc_sample_id = Column(INTEGER, ForeignKey("_bc_samples.id", ondelete="SET NULL", name="fk_BC_sample_id"))
__mapper_args__ = {"polymorphic_identity": "bacterial_culture", "polymorphic_load": "inline"}
class Wastewater(BasicSubmission):
samples = relationship("Sample", back_populates="rsl_plate")
sample_id = Column(String, ForeignKey("_ww_samples.id", ondelete="SET NULL", name="fk_WW_sample_id"))
samples = relationship("WWSample", back_populates="rsl_plate", uselist=True)
# ww_sample_id = Column(String, ForeignKey("_ww_samples.id", ondelete="SET NULL", name="fk_WW_sample_id"))
__mapper_args__ = {"polymorphic_identity": "wastewater", "polymorphic_load": "inline"}

View File

@@ -1,9 +1,12 @@
import pandas as pd
from pathlib import Path
from datetime import datetime
from backend.db.models.samples import WWSample, BCSample
import logging
from collections import OrderedDict
import re
import numpy as np
from datetime import date
import uuid
logger = logging.getLogger(f"submissions.{__name__}")
@@ -21,8 +24,8 @@ class SheetParser(object):
self.xl = None
self.sub = OrderedDict()
self.sub['submission_type'] = self._type_decider()
parse = getattr(self, f"_parse_{self.sub['submission_type'].lower()}")
parse()
parse_sub = getattr(self, f"_parse_{self.sub['submission_type'].lower()}")
parse_sub()
def _type_decider(self):
try:
@@ -46,6 +49,7 @@ class SheetParser(object):
self.sub['submitting_lab'] = submission_info.iloc[0][3]
self.sub['sample_count'] = str(submission_info.iloc[2][3])
self.sub['extraction_kit'] = submission_info.iloc[3][3]
return submission_info
@@ -71,7 +75,10 @@ class SheetParser(object):
self.sub['lot_ethanol'] = submission_info.iloc[10][6]
self.sub['lot_positive_control'] = submission_info.iloc[103][1]
self.sub['lot_plate'] = submission_info.iloc[12][6]
sample_parser = SampleParser(submission_info.iloc[15:111])
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
self.sub['samples'] = sample_parse()
def _parse_wastewater(self):
# submission_info = self.xl.parse("WW Submissions (ENTER HERE)")
@@ -102,6 +109,9 @@ class SheetParser(object):
self.sub['lot_pre_mix_2'] = qprc_info.iloc[2][14]
self.sub['lot_positive_control'] = qprc_info.iloc[3][14]
self.sub['lot_ddh2o'] = qprc_info.iloc[4][14]
sample_parser = SampleParser(submission_info.iloc[16:40])
sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
self.sub['samples'] = sample_parse()
# tech = str(submission_info.iloc[11][1])
# if tech == "nan":
# tech = "Unknown"
@@ -119,4 +129,60 @@ class SheetParser(object):
# self.sub['lot_isopropanol'] = submission_info.iloc[9][6]
# self.sub['lot_ethanol'] = submission_info.iloc[10][6]
# self.sub['lot_positive_control'] = None #submission_info.iloc[103][1]
# self.sub['lot_plate'] = submission_info.iloc[12][6]
# self.sub['lot_plate'] = submission_info.iloc[12][6]
class SampleParser(object):
def __init__(self, df:pd.DataFrame) -> None:
self.samples = df.to_dict("records")
def parse_bacterial_culture_samples(self) -> list[BCSample]:
new_list = []
for sample in self.samples:
new = BCSample()
new.well_number = sample['This section to be filled in completely by submittor']
new.sample_id = sample['Unnamed: 1']
new.organism = sample['Unnamed: 2']
new.concentration = sample['Unnamed: 3']
print(f"Sample object: {new.sample_id} = {type(new.sample_id)}")
try:
not_a_nan = not np.isnan(new.sample_id)
except TypeError:
not_a_nan = True
if not_a_nan:
new_list.append(new)
return new_list
def parse_wastewater_samples(self) -> list[WWSample]:
new_list = []
for sample in self.samples:
new = WWSample()
new.ww_processing_num = sample['Unnamed: 2']
try:
not_a_nan = not np.isnan(sample['Unnamed: 3'])
except TypeError:
not_a_nan = True
if not_a_nan:
new.ww_sample_full_id = sample['Unnamed: 3']
else:
new.ww_sample_full_id = uuid.uuid4().hex.upper()
new.rsl_number = sample['Unnamed: 9']
try:
not_a_nan = not np.isnan(sample['Unnamed: 5'])
except TypeError:
not_a_nan = True
if not_a_nan:
new.collection_date = sample['Unnamed: 5']
else:
new.collection_date = date.today()
new.testing_type = sample['Unnamed: 6']
new.site_status = sample['Unnamed: 7']
new.notes = str(sample['Unnamed: 8'])
new.well_number = sample['Unnamed: 1']
new_list.append(new)
return new_list