diff --git a/CHANGELOG.md b/CHANGELOG.md index c31fbda..c4ca625 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 202308.03 + +- Large restructure of database to allow better relationships between kits/reagenttypes & submissions/samples. + ## 202307.04 - Large scale refactor to clean up code. diff --git a/TODO.md b/TODO.md index 4233afd..647b4c1 100644 --- a/TODO.md +++ b/TODO.md @@ -1,13 +1,20 @@ +- [ ] Clean up & document code... again. + - Including paring down the logging.debugs +- [ ] Fix Tests... again. - [ ] Rebuild database -- [ ] Fix Wastewater/Artic double submission problem -- [X] Fix tests. -- [X] Reorganize wastewater artic parser. -- [ ] Streamline addition of new kits by moving as much into DB as possible. -- [X] Large scale refactor (2023-07-24). +- [ ] Provide more generic names for reagenttypes in kits and move specific names to reagents. + - ex. Instead of "omega_e-z_96_disruptor_plate_c_plus" in reagent types, have "omega_plate" and have "omega_e-z_96_disruptor_plate_c_plus" in reagent name. + - Maybe rename to "ReagentRoles"? + - If I'm doing this, since the forms have a different layout for each submission type I should rewrite the parser to use the locations given in database... Which I should do anyway +- [x] Fix Wastewater/Artic double submission problem +- [x] Fix tests. +- [x] Reorganize wastewater artic parser. +- [x] Streamline addition of new kits by moving as much into DB as possible. +- [x] Large scale refactor (2023-07-24). - [x] Make plate details from html, same as export. - [x] Put in SN controls I guess. - [x] Code clean-up and refactor (2023-07). -- [X] Migrate context settings to pydantic-settings model. +- [x] Migrate context settings to pydantic-settings model. - [x] Insert column into reagent type to indicate if reagent is required for kit. - Needed to keep interchangeable bead plates from being forced into forms. - [x] Migrate the parser.sub dictionary to pydantic models. diff --git a/alembic.ini b/alembic.ini index a4160e5..7de27cb 100644 --- a/alembic.ini +++ b/alembic.ini @@ -56,7 +56,7 @@ version_path_separator = os # Use os.pathsep. Default configuration used for ne # output_encoding = utf-8 ; sqlalchemy.url = sqlite:///L:\Robotics Laboratory Support\Submissions\submissions.db -sqlalchemy.url = sqlite:///C:\Users\lwark\Documents\Archives\DB_backups\submissions-20230726.db +sqlalchemy.url = sqlite:///C:\Users\lwark\Documents\Archives\Submissions_app_backups\DB_backups\submissions-new.db ; sqlalchemy.url = sqlite:///C:\Users\lwark\Documents\python\submissions\tests\test_assets\submissions_test.db diff --git a/alembic/versions/06e2c8dc4889_database_rebuild.py b/alembic/versions/b879020f2a91_rebuild_database.py similarity index 77% rename from alembic/versions/06e2c8dc4889_database_rebuild.py rename to alembic/versions/b879020f2a91_rebuild_database.py index 6f3473d..1a60c55 100644 --- a/alembic/versions/06e2c8dc4889_database_rebuild.py +++ b/alembic/versions/b879020f2a91_rebuild_database.py @@ -1,8 +1,8 @@ -"""database_rebuild +"""rebuild database -Revision ID: 06e2c8dc4889 +Revision ID: b879020f2a91 Revises: -Create Date: 2023-07-26 14:08:18.809998 +Create Date: 2023-08-02 09:16:12.792995 """ from alembic import op @@ -10,7 +10,7 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. -revision = '06e2c8dc4889' +revision = 'b879020f2a91' down_revision = None branch_labels = None depends_on = None @@ -23,8 +23,6 @@ def upgrade() -> None: sa.Column('name', sa.String(length=64), nullable=True), sa.Column('email', sa.String(length=64), nullable=True), sa.Column('phone', sa.String(length=32), nullable=True), - sa.Column('organization_id', sa.INTEGER(), nullable=True), - sa.ForeignKeyConstraint(['organization_id'], ['_organizations.id'], name='fk_contact_org_id', ondelete='SET NULL'), sa.PrimaryKeyConstraint('id') ) op.create_table('_control_types', @@ -42,8 +40,6 @@ def upgrade() -> None: sa.Column('mutable_cost_column', sa.FLOAT(precision=2), nullable=True), sa.Column('mutable_cost_sample', sa.FLOAT(precision=2), nullable=True), sa.Column('constant_cost', sa.FLOAT(precision=2), nullable=True), - sa.Column('reagent_types_id', sa.INTEGER(), nullable=True), - sa.ForeignKeyConstraint(['reagent_types_id'], ['_reagent_types.id'], name='fk_KT_reagentstype_id', ondelete='SET NULL', use_alter=True), sa.PrimaryKeyConstraint('id'), sa.UniqueConstraint('name') ) @@ -51,20 +47,38 @@ def upgrade() -> None: sa.Column('id', sa.INTEGER(), nullable=False), sa.Column('name', sa.String(length=64), nullable=True), sa.Column('cost_centre', sa.String(), nullable=True), - sa.Column('contact_ids', sa.INTEGER(), nullable=True), - sa.ForeignKeyConstraint(['contact_ids'], ['_contacts.id'], name='fk_org_contact_id', ondelete='SET NULL'), sa.PrimaryKeyConstraint('id') ) op.create_table('_reagent_types', sa.Column('id', sa.INTEGER(), nullable=False), sa.Column('name', sa.String(length=64), nullable=True), - sa.Column('kit_id', sa.INTEGER(), nullable=True), sa.Column('eol_ext', sa.Interval(), nullable=True), - sa.Column('required', sa.INTEGER(), server_default='1', nullable=True), sa.Column('last_used', sa.String(length=32), nullable=True), - sa.ForeignKeyConstraint(['kit_id'], ['_kits.id'], name='fk_RT_kits_id', ondelete='SET NULL', use_alter=True), sa.PrimaryKeyConstraint('id') ) + op.create_table('_samples', + sa.Column('id', sa.INTEGER(), nullable=False), + sa.Column('submitter_id', sa.String(length=64), nullable=False), + sa.Column('sample_type', sa.String(length=32), nullable=True), + sa.Column('ww_processing_num', sa.String(length=64), nullable=True), + sa.Column('rsl_number', sa.String(length=64), nullable=True), + sa.Column('collection_date', sa.TIMESTAMP(), nullable=True), + sa.Column('testing_type', sa.String(length=64), nullable=True), + sa.Column('site_status', sa.String(length=64), nullable=True), + sa.Column('notes', sa.String(length=2000), nullable=True), + sa.Column('ct_n1', sa.FLOAT(precision=2), nullable=True), + sa.Column('ct_n2', sa.FLOAT(precision=2), nullable=True), + sa.Column('n1_status', sa.String(length=32), nullable=True), + sa.Column('n2_status', sa.String(length=32), nullable=True), + sa.Column('seq_submitted', sa.BOOLEAN(), nullable=True), + sa.Column('ww_seq_run_id', sa.String(length=64), nullable=True), + sa.Column('pcr_results', sa.JSON(), nullable=True), + sa.Column('well_24', sa.String(length=8), nullable=True), + sa.Column('organism', sa.String(length=64), nullable=True), + sa.Column('concentration', sa.String(length=16), nullable=True), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('submitter_id') + ) op.create_table('_discounts', sa.Column('id', sa.INTEGER(), nullable=False), sa.Column('kit_id', sa.INTEGER(), nullable=True), @@ -90,11 +104,14 @@ def upgrade() -> None: sa.ForeignKeyConstraint(['type_id'], ['_reagent_types.id'], name='fk_reagent_type_id', ondelete='SET NULL'), sa.PrimaryKeyConstraint('id') ) - op.create_table('_reagentstypes_kittypes', - sa.Column('reagent_types_id', sa.INTEGER(), nullable=True), - sa.Column('kits_id', sa.INTEGER(), nullable=True), + op.create_table('_reagenttypes_kittypes', + sa.Column('reagent_types_id', sa.INTEGER(), nullable=False), + sa.Column('kits_id', sa.INTEGER(), nullable=False), + sa.Column('uses', sa.JSON(), nullable=True), + sa.Column('required', sa.INTEGER(), nullable=True), sa.ForeignKeyConstraint(['kits_id'], ['_kits.id'], ), - sa.ForeignKeyConstraint(['reagent_types_id'], ['_reagent_types.id'], ) + sa.ForeignKeyConstraint(['reagent_types_id'], ['_reagent_types.id'], ), + sa.PrimaryKeyConstraint('reagent_types_id', 'kits_id') ) op.create_table('_submissions', sa.Column('id', sa.INTEGER(), nullable=False), @@ -119,17 +136,6 @@ def upgrade() -> None: sa.UniqueConstraint('rsl_plate_num'), sa.UniqueConstraint('submitter_plate_num') ) - op.create_table('_bc_samples', - sa.Column('id', sa.INTEGER(), nullable=False), - sa.Column('well_number', sa.String(length=8), nullable=True), - sa.Column('sample_id', sa.String(length=64), nullable=False), - sa.Column('organism', sa.String(length=64), nullable=True), - sa.Column('concentration', sa.String(length=16), nullable=True), - sa.Column('rsl_plate_id', sa.INTEGER(), nullable=True), - sa.ForeignKeyConstraint(['rsl_plate_id'], ['_submissions.id'], name='fk_BCS_sample_id', ondelete='SET NULL'), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('sample_id') - ) op.create_table('_control_samples', sa.Column('id', sa.INTEGER(), nullable=False), sa.Column('parent_id', sa.String(), nullable=True), @@ -153,45 +159,29 @@ def upgrade() -> None: sa.ForeignKeyConstraint(['reagent_id'], ['_reagents.id'], ), sa.ForeignKeyConstraint(['submission_id'], ['_submissions.id'], ) ) - op.create_table('_ww_samples', - sa.Column('id', sa.INTEGER(), nullable=False), - sa.Column('ww_processing_num', sa.String(length=64), nullable=True), - sa.Column('ww_sample_full_id', sa.String(length=64), nullable=False), - sa.Column('rsl_number', sa.String(length=64), nullable=True), - sa.Column('rsl_plate_id', sa.INTEGER(), nullable=True), - sa.Column('collection_date', sa.TIMESTAMP(), nullable=True), - sa.Column('well_number', sa.String(length=8), nullable=True), - sa.Column('testing_type', sa.String(length=64), nullable=True), - sa.Column('site_status', sa.String(length=64), nullable=True), - sa.Column('notes', sa.String(length=2000), nullable=True), - sa.Column('ct_n1', sa.FLOAT(precision=2), nullable=True), - sa.Column('ct_n2', sa.FLOAT(precision=2), nullable=True), - sa.Column('n1_status', sa.String(length=32), nullable=True), - sa.Column('n2_status', sa.String(length=32), nullable=True), - sa.Column('seq_submitted', sa.BOOLEAN(), nullable=True), - sa.Column('ww_seq_run_id', sa.String(length=64), nullable=True), - sa.Column('sample_type', sa.String(length=8), nullable=True), - sa.Column('pcr_results', sa.JSON(), nullable=True), - sa.Column('well_24', sa.String(length=8), nullable=True), - sa.Column('artic_well_number', sa.String(length=8), nullable=True), - sa.ForeignKeyConstraint(['rsl_plate_id'], ['_submissions.id'], name='fk_WWS_submission_id', ondelete='SET NULL'), - sa.PrimaryKeyConstraint('id'), - sa.UniqueConstraint('ww_sample_full_id') + op.create_table('_submission_sample', + sa.Column('sample_id', sa.INTEGER(), nullable=False), + sa.Column('submission_id', sa.INTEGER(), nullable=False), + sa.Column('row', sa.INTEGER(), nullable=True), + sa.Column('column', sa.INTEGER(), nullable=True), + sa.ForeignKeyConstraint(['sample_id'], ['_samples.id'], ), + sa.ForeignKeyConstraint(['submission_id'], ['_submissions.id'], ), + sa.PrimaryKeyConstraint('sample_id', 'submission_id') ) # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('_ww_samples') + op.drop_table('_submission_sample') op.drop_table('_reagents_submissions') op.drop_table('_control_samples') - op.drop_table('_bc_samples') op.drop_table('_submissions') - op.drop_table('_reagentstypes_kittypes') + op.drop_table('_reagenttypes_kittypes') op.drop_table('_reagents') op.drop_table('_orgs_contacts') op.drop_table('_discounts') + op.drop_table('_samples') op.drop_table('_reagent_types') op.drop_table('_organizations') op.drop_table('_kits') diff --git a/alembic/versions/da94eca9d381_polymorpherizing_associations.py b/alembic/versions/da94eca9d381_polymorpherizing_associations.py new file mode 100644 index 0000000..5450e58 --- /dev/null +++ b/alembic/versions/da94eca9d381_polymorpherizing_associations.py @@ -0,0 +1,56 @@ +"""polymorpherizing associations + +Revision ID: da94eca9d381 +Revises: b879020f2a91 +Create Date: 2023-08-03 13:30:34.056316 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import sqlite + +# revision identifiers, used by Alembic. +revision = 'da94eca9d381' +down_revision = 'b879020f2a91' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('_samples', schema=None) as batch_op: + batch_op.drop_column('ct_n2') + batch_op.drop_column('n1_status') + batch_op.drop_column('pcr_results') + batch_op.drop_column('n2_status') + batch_op.drop_column('ct_n1') + + with op.batch_alter_table('_submission_sample', schema=None) as batch_op: + batch_op.add_column(sa.Column('base_sub_type', sa.String(), nullable=True)) + batch_op.add_column(sa.Column('ct_n1', sa.FLOAT(precision=2), nullable=True)) + batch_op.add_column(sa.Column('ct_n2', sa.FLOAT(precision=2), nullable=True)) + batch_op.add_column(sa.Column('n1_status', sa.String(length=32), nullable=True)) + batch_op.add_column(sa.Column('n2_status', sa.String(length=32), nullable=True)) + batch_op.add_column(sa.Column('pcr_results', sa.JSON(), nullable=True)) + + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('_submission_sample', schema=None) as batch_op: + batch_op.drop_column('pcr_results') + batch_op.drop_column('n2_status') + batch_op.drop_column('n1_status') + batch_op.drop_column('ct_n2') + batch_op.drop_column('ct_n1') + batch_op.drop_column('base_sub_type') + + with op.batch_alter_table('_samples', schema=None) as batch_op: + batch_op.add_column(sa.Column('ct_n1', sa.FLOAT(), nullable=True)) + batch_op.add_column(sa.Column('n2_status', sa.VARCHAR(length=32), nullable=True)) + batch_op.add_column(sa.Column('pcr_results', sqlite.JSON(), nullable=True)) + batch_op.add_column(sa.Column('n1_status', sa.VARCHAR(length=32), nullable=True)) + batch_op.add_column(sa.Column('ct_n2', sa.FLOAT(), nullable=True)) + + # ### end Alembic commands ### diff --git a/src/submissions/__init__.py b/src/submissions/__init__.py index 31cffb3..f4f8f2b 100644 --- a/src/submissions/__init__.py +++ b/src/submissions/__init__.py @@ -4,7 +4,7 @@ from pathlib import Path # Version of the realpython-reader package __project__ = "submissions" -__version__ = "202307.4b" +__version__ = "202308.1b" __author__ = {"name":"Landon Wark", "email":"Landon.Wark@phac-aspc.gc.ca"} __copyright__ = "2022-2023, Government of Canada" diff --git a/src/submissions/backend/db/functions.py b/src/submissions/backend/db/functions.py index 26604e5..be8c1dd 100644 --- a/src/submissions/backend/db/functions.py +++ b/src/submissions/backend/db/functions.py @@ -3,15 +3,16 @@ Convenience functions for interacting with the database. ''' from . import models -from .models.kits import reagenttypes_kittypes, KitType -from .models.submissions import reagents_submissions, BasicSubmission +# from .models.kits import KitType +# from .models.submissions import BasicSample, reagents_submissions, BasicSubmission, SubmissionSampleAssociation +# from .models import submissions import pandas as pd import sqlalchemy.exc import sqlite3 import logging from datetime import date, datetime, timedelta -from sqlalchemy import and_ -from sqlalchemy import JSON, event +from sqlalchemy import and_, JSON, event +from sqlalchemy.exc import IntegrityError, OperationalError, SAWarning from sqlalchemy.engine import Engine import json from getpass import getuser @@ -19,6 +20,7 @@ import numpy as np import yaml from pathlib import Path from tools import Settings, check_regex_match, RSLNamer +from typing import List @@ -32,7 +34,7 @@ def set_sqlite_pragma(dbapi_connection, connection_record): cursor.close() -def store_submission(ctx:Settings, base_submission:models.BasicSubmission) -> None|dict: +def store_submission(ctx:Settings, base_submission:models.BasicSubmission, samples:List[dict]=[]) -> None|dict: """ Upserts submissions into database @@ -47,26 +49,37 @@ def store_submission(ctx:Settings, base_submission:models.BasicSubmission) -> No # Add all samples to sample table typer = RSLNamer(ctx=ctx, instr=base_submission.rsl_plate_num) base_submission.rsl_plate_num = typer.parsed_name - for sample in base_submission.samples: - logger.debug(f"Typer: {typer.submission_type}") - logger.debug(f"sample going in: {type(sample)}\n{sample.__dict__}") - # Suuuuuper hacky way to be sure that the artic doesn't overwrite the ww plate in a ww sample - # need something more elegant - if "_artic" not in typer.submission_type: - sample.rsl_plate = base_submission - else: - logger.debug(f"{sample.ww_sample_full_id} is an ARTIC sample.") - # base_submission.samples.remove(sample) - # sample.rsl_plate = sample.rsl_plate - # sample.artic_rsl_plate = base_submission - logger.debug(f"Attempting to add sample: {sample.to_string()}") - try: - # ctx['database_session'].add(sample) - ctx.database_session.add(sample) - except (sqlite3.IntegrityError, sqlalchemy.exc.IntegrityError) as e: - logger.debug(f"Hit an integrity error : {e}") - continue - logger.debug(f"Here is the sample to be stored in the DB: {sample.__dict__}") + # for sample in samples: + # instance = sample['sample'] + # logger.debug(f"Typer: {typer.submission_type}") + # logger.debug(f"sample going in: {type(sample['sample'])}\n{sample['sample'].__dict__}") + # # Suuuuuper hacky way to be sure that the artic doesn't overwrite the ww plate in a ww sample + # # need something more elegant + # # if "_artic" not in typer.submission_type: + # # sample.rsl_plate = base_submission + # # else: + # # logger.debug(f"{sample.ww_sample_full_id} is an ARTIC sample.") + # # # base_submission.samples.remove(sample) + # # # sample.rsl_plate = sample.rsl_plate + # # # sample.artic_rsl_plate = base_submission + # # logger.debug(f"Attempting to add sample: {sample.to_string()}") + # # try: + # # ctx['database_session'].add(sample) + # # ctx.database_session.add(instance) + # # ctx.database_session.commit() + # # logger.debug(f"Submitter id: {sample['sample'].submitter_id} and table id: {sample['sample'].id}") + # logger.debug(f"Submitter id: {instance.submitter_id} and table id: {instance.id}") + # assoc = SubmissionSampleAssociation(submission=base_submission, sample=instance, row=sample['row'], column=sample['column']) + + # # except (sqlite3.IntegrityError, sqlalchemy.exc.IntegrityError) as e: + # # logger.debug(f"Hit an integrity error : {e}") + # # continue + # try: + # base_submission.submission_sample_associations.append(assoc) + # except IntegrityError as e: + # logger.critical(e) + # continue + # logger.debug(f"Here is the sample to be stored in the DB: {sample.__dict__}") # Add submission to submission table # ctx['database_session'].add(base_submission) ctx.database_session.add(base_submission) @@ -148,14 +161,15 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi code = 1 msg = "This submission already exists.\nWould you like to overwrite?" for item in info_dict: - logger.debug(f"Setting {item} to {info_dict[item]}") + value = info_dict[item] + logger.debug(f"Setting {item} to {value}") # set fields based on keys in dictionary match item: case "extraction_kit": - q_str = info_dict[item] - logger.debug(f"Looking up kit {q_str}") + # q_str = info_dict[item] + logger.debug(f"Looking up kit {value}") try: - field_value = lookup_kittype_by_name(ctx=ctx, name=q_str) + field_value = lookup_kittype_by_name(ctx=ctx, name=value) except (sqlite3.IntegrityError, sqlalchemy.exc.IntegrityError) as e: logger.error(f"Hit an integrity error looking up kit type: {e}") logger.error(f"Details: {e.__dict__}") @@ -164,29 +178,62 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi else: msg = "SQL integrity error of unknown origin." return instance, dict(code=2, message=msg) - logger.debug(f"Got {field_value} for kit {q_str}") + logger.debug(f"Got {field_value} for kit {value}") case "submitting_lab": - q_str = info_dict[item].replace(" ", "_").lower() - logger.debug(f"Looking up organization: {q_str}") - field_value = lookup_org_by_name(ctx=ctx, name=q_str) - logger.debug(f"Got {field_value} for organization {q_str}") + value = value.replace(" ", "_").lower() + logger.debug(f"Looking up organization: {value}") + field_value = lookup_org_by_name(ctx=ctx, name=value) + logger.debug(f"Got {field_value} for organization {value}") case "submitter_plate_num": # Because of unique constraint, there will be problems with # multiple submissions named 'None', so... # Should be depreciated with use of pydantic validator - logger.debug(f"Submitter plate id: {info_dict[item]}") + logger.debug(f"Submitter plate id: {value}") # if info_dict[item] == None or info_dict[item] == "None" or info_dict[item] == "": # logger.debug(f"Got None as a submitter plate number, inserting random string to preserve database unique constraint.") # info_dict[item] = uuid.uuid4().hex.upper() - field_value = info_dict[item] + field_value = value + case "samples": + for sample in value: + sample_instance = lookup_sample_by_submitter_id(ctx=ctx, submitter_id=sample['sample'].submitter_id) + if sample_instance == None: + sample_instance = sample['sample'] + else: + logger.warning(f"Sample {sample} already exists, creating association.") + if sample_instance in instance.samples: + logger.error(f"Looks like there's a duplicate sample on this plate: {sample_instance.submitter_id}!") + continue + try: + with ctx.database_session.no_autoflush: + try: + logger.debug(f"Here is the sample instance type: {sample_instance.sample_type}") + try: + assoc = getattr(models, f"{sample_instance.sample_type.replace('_sample', '').replace('_', ' ').title().replace(' ', '')}Association") + except AttributeError as e: + assoc = models.SubmissionSampleAssociation + # assoc = models.SubmissionSampleAssociation(submission=instance, sample=sample_instance, row=sample['row'], column=sample['column']) + assoc = assoc(submission=instance, sample=sample_instance, row=sample['row'], column=sample['column']) + instance.submission_sample_associations.append(assoc) + except IntegrityError: + logger.error(f"Hit integrity error for: {sample}") + continue + except SAWarning: + logger.error(f"Looks like the association already exists for submission: {instance} and sample: {sample_instance}") + continue + except IntegrityError as e: + logger.critical(e) + continue + continue case _: - field_value = info_dict[item] + field_value = value # insert into field try: setattr(instance, item, field_value) except AttributeError: logger.debug(f"Could not set attribute: {item} to {info_dict[item]}") continue + except KeyError: + continue # calculate cost of the run: immutable cost + mutable times number of columns # This is now attached to submission upon creation to preserve at-run costs incase of cost increase in the future. try: @@ -202,8 +249,9 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi logger.debug("Checking and applying discounts...") discounts = [item.amount for item in lookup_discounts_by_org_and_kit(ctx=ctx, kit_id=instance.extraction_kit.id, lab_id=instance.submitting_lab.id)] logger.debug(f"We got discounts: {discounts}") - discounts = sum(discounts) - instance.run_cost = instance.run_cost - discounts + if len(discounts) > 0: + discounts = sum(discounts) + instance.run_cost = instance.run_cost - discounts except Exception as e: logger.error(f"An unknown exception occurred when calculating discounts: {e}") # We need to make sure there's a proper rsl plate number @@ -307,10 +355,15 @@ def lookup_kittype_by_name(ctx:Settings, name:str) -> models.KitType: Returns: models.KitType: retrieved kittype """ + if isinstance(name, dict): + name = name['value'] logger.debug(f"Querying kittype: {name}") # return ctx['database_session'].query(models.KitType).filter(models.KitType.name==name).first() return ctx.database_session.query(models.KitType).filter(models.KitType.name==name).first() +def lookup_kittype_by_id(ctx:Settings, id:int) -> models.KitType: + return ctx.database_session.query(models.KitType).filter(models.KitType.id==id).first() + def lookup_regent_by_type_name(ctx:Settings, type_name:str) -> list[models.Reagent]: """ Lookup reagents by their type name @@ -519,18 +572,21 @@ def create_kit_from_yaml(ctx:Settings, exp:dict) -> dict: # look_up = ctx['database_session'].query(models.ReagentType).filter(models.ReagentType.name==r).first() look_up = ctx.database_session.query(models.ReagentType).filter(models.ReagentType.name==r).first() if look_up == None: - rt = models.ReagentType(name=r.replace(" ", "_").lower(), eol_ext=timedelta(30*exp[type]['kits'][kt]['reagenttypes'][r]['eol_ext']), kits=[kit], required=1) + # rt = models.ReagentType(name=r.replace(" ", "_").lower(), eol_ext=timedelta(30*exp[type]['kits'][kt]['reagenttypes'][r]['eol_ext']), kits=[kit], required=1) + rt = models.ReagentType(name=r.replace(" ", "_").lower(), eol_ext=timedelta(30*exp[type]['kits'][kt]['reagenttypes'][r]['eol_ext']), last_used="") else: rt = look_up - rt.kits.append(kit) + # rt.kits.append(kit) # add this because I think it's necessary to get proper back population - try: - kit.reagent_types_id.append(rt.id) - except AttributeError as e: - logger.error(f"Error appending reagent id to kit.reagent_types_id: {e}, creating new.") + # try: + # kit.reagent_types_id.append(rt.id) + # except AttributeError as e: + # logger.error(f"Error appending reagent id to kit.reagent_types_id: {e}, creating new.") # kit.reagent_types_id = [rt.id] + assoc = models.KitTypeReagentTypeAssociation(kit_type=kit, reagent_type=rt, uses=kit.used_for) # ctx['database_session'].add(rt) ctx.database_session.add(rt) + kit.kit_reagenttype_associations.append(assoc) logger.debug(f"Kit construction reagent type: {rt.__dict__}") logger.debug(f"Kit construction kit: {kit.__dict__}") # ctx['database_session'].add(kit) @@ -727,19 +783,25 @@ def delete_submission_by_id(ctx:Settings, id:int) -> None: yaml.dump(backup, f) except KeyError: pass - sub.reagents = [] - for sample in sub.samples: - if sample.rsl_plate == sub: - # ctx['database_session'].delete(sample) - ctx.database_session.delete(sample) - else: - logger.warning(f"Not deleting sample {sample.ww_sample_full_id} because it belongs to another plate.") + # sub.reagents = [] + # for assoc in sub.submission_sample_associations: + # # if sample.rsl_plate == sub: + # if sub in sample.submissions: + # # ctx['database_session'].delete(sample) + # ctx.database_session.delete(assoc) + # else: + # logger.warning(f"Not deleting sample {sample.ww_sample_full_id} because it belongs to another plate.") # ctx["database_session"].delete(sub) # ctx["database_session"].commit() + ctx.database_session.delete(sub) - ctx.database_session.commit() + try: + ctx.database_session.commit() + except (IntegrityError, OperationalError) as e: + ctx.database_session.rollback() + raise e -def lookup_ww_sample_by_rsl_sample_number(ctx:Settings, rsl_number:str) -> models.WWSample: +def lookup_ww_sample_by_rsl_sample_number(ctx:Settings, rsl_number:str) -> models.WastewaterSample: """ Retrieves wastewater sample from database by rsl sample number @@ -751,9 +813,9 @@ def lookup_ww_sample_by_rsl_sample_number(ctx:Settings, rsl_number:str) -> model models.WWSample: instance of wastewater sample """ # return ctx['database_session'].query(models.WWSample).filter(models.WWSample.rsl_number==rsl_number).first() - return ctx.database_session.query(models.WWSample).filter(models.WWSample.rsl_number==rsl_number).first() + return ctx.database_session.query(models.WastewaterSample).filter(models.WastewaterSample.rsl_number==rsl_number).first() -def lookup_ww_sample_by_ww_sample_num(ctx:Settings, sample_number:str) -> models.WWSample: +def lookup_ww_sample_by_ww_sample_num(ctx:Settings, sample_number:str) -> models.WastewaterSample: """ Retrieves wastewater sample from database by ww sample number @@ -764,9 +826,9 @@ def lookup_ww_sample_by_ww_sample_num(ctx:Settings, sample_number:str) -> models Returns: models.WWSample: instance of wastewater sample """ - return ctx.database_session.query(models.WWSample).filter(models.WWSample.ww_sample_full_id==sample_number).first() + return ctx.database_session.query(models.WastewaterSample).filter(models.WastewaterSample.submitter_id==sample_number).first() -def lookup_ww_sample_by_sub_sample_rsl(ctx:Settings, sample_rsl:str, plate_rsl:str) -> models.WWSample: +def lookup_ww_sample_by_sub_sample_rsl(ctx:Settings, sample_rsl:str, plate_rsl:str) -> models.WastewaterSample: """ Retrieves a wastewater sample from the database by its rsl sample number and parent rsl plate number. This will likely replace simply looking up by the sample rsl above cine I need to control for repeats. @@ -780,9 +842,10 @@ def lookup_ww_sample_by_sub_sample_rsl(ctx:Settings, sample_rsl:str, plate_rsl:s models.WWSample: Relevant wastewater object """ # return ctx['database_session'].query(models.WWSample).join(models.BasicSubmission).filter(models.BasicSubmission.rsl_plate_num==plate_rsl).filter(models.WWSample.rsl_number==sample_rsl).first() - return ctx.database_session.query(models.WWSample).join(models.BasicSubmission).filter(models.BasicSubmission.rsl_plate_num==plate_rsl).filter(models.WWSample.rsl_number==sample_rsl).first() + # return ctx.database_session.query(models.BasicSample).join(models.BasicSubmission).filter(models.BasicSubmission.rsl_plate_num==plate_rsl).filter(models.BasicSample.submitter_id==sample_rsl).first() + return ctx.database_session.query(models.BasicSample).filter(models.BasicSample.submissions.any(models.BasicSubmission.rsl_plate_num==plate_rsl)).filter(models.WastewaterSample.rsl_number==sample_rsl).first() -def lookup_ww_sample_by_sub_sample_well(ctx:Settings, sample_rsl:str, well_num:str, plate_rsl:str) -> models.WWSample: +def lookup_ww_sample_by_sub_sample_well(ctx:Settings, sample_rsl:str, well_num:str, plate_rsl:str) -> models.WastewaterSample: """ Retrieves a wastewater sample from the database by its rsl sample number and parent rsl plate number. This will likely replace simply looking up by the sample rsl above cine I need to control for repeats. @@ -800,10 +863,10 @@ def lookup_ww_sample_by_sub_sample_well(ctx:Settings, sample_rsl:str, well_num:s # .filter(models.BasicSubmission.rsl_plate_num==plate_rsl) \ # .filter(models.WWSample.rsl_number==sample_rsl) \ # .filter(models.WWSample.well_number==well_num).first() - return ctx.database_session.query(models.WWSample).join(models.BasicSubmission) \ + return ctx.database_session.query(models.WastewaterSample).join(models.BasicSubmission) \ .filter(models.BasicSubmission.rsl_plate_num==plate_rsl) \ - .filter(models.WWSample.rsl_number==sample_rsl) \ - .filter(models.WWSample.well_number==well_num).first() + .filter(models.WastewaterSample.rsl_number==sample_rsl) \ + .filter(models.WastewaterSample.well_number==well_num).first() def update_ww_sample(ctx:Settings, sample_obj:dict): """ @@ -815,25 +878,26 @@ def update_ww_sample(ctx:Settings, sample_obj:dict): """ # ww_samp = lookup_ww_sample_by_rsl_sample_number(ctx=ctx, rsl_number=sample_obj['sample']) logger.debug(f"Looking up {sample_obj['sample']} in plate {sample_obj['plate_rsl']}") - ww_samp = lookup_ww_sample_by_sub_sample_rsl(ctx=ctx, sample_rsl=sample_obj['sample'], plate_rsl=sample_obj['plate_rsl']) + # ww_samp = lookup_ww_sample_by_sub_sample_rsl(ctx=ctx, sample_rsl=sample_obj['sample'], plate_rsl=sample_obj['plate_rsl']) + assoc = lookup_ww_association_by_plate_sample(ctx=ctx, rsl_plate_num=sample_obj['plate_rsl'], rsl_sample_num=sample_obj['sample']) # ww_samp = lookup_ww_sample_by_sub_sample_well(ctx=ctx, sample_rsl=sample_obj['sample'], well_num=sample_obj['well_num'], plate_rsl=sample_obj['plate_rsl']) - if ww_samp != None: + if assoc != None: # del sample_obj['well_number'] for key, value in sample_obj.items(): # set attribute 'key' to 'value' try: - check = getattr(ww_samp, key) + check = getattr(assoc, key) except AttributeError: continue if check == None: logger.debug(f"Setting {key} to {value}") - setattr(ww_samp, key, value) + setattr(assoc, key, value) else: logger.error(f"Unable to find sample {sample_obj['sample']}") return # ctx['database_session'].add(ww_samp) # ctx["database_session"].commit() - ctx.database_session.add(ww_samp) + ctx.database_session.add(assoc) ctx.database_session.commit() def lookup_discounts_by_org_and_kit(ctx:Settings, kit_id:int, lab_id:int) -> list: @@ -860,7 +924,7 @@ def lookup_discounts_by_org_and_kit(ctx:Settings, kit_id:int, lab_id:int) -> lis def hitpick_plate(submission:models.BasicSubmission, plate_number:int=0) -> list: """ Creates a list of sample positions and statuses to be used by plate mapping and csv output to biomek software. - + DEPRECIATED: replaced by Submission.hitpick Args: submission (models.BasicSubmission): Input submission plate_number (int, optional): plate position in the series of selected plates. Defaults to 0. @@ -881,7 +945,7 @@ def hitpick_plate(submission:models.BasicSubmission, plate_number:int=0) -> list this_sample = dict( plate_number = plate_number, sample_name = samp['name'], - column = samp['col'], + column = samp['column'], row = samp['row'], positive = samp['positive'], plate_name = submission.rsl_plate_num @@ -966,7 +1030,7 @@ def lookup_last_used_reagenttype_lot(ctx:Settings, type_name:str) -> models.Reag except AttributeError: return None -def check_kit_integrity(sub:BasicSubmission|KitType, reagenttypes:list|None=None) -> dict|None: +def check_kit_integrity(sub:models.BasicSubmission|models.KitType, reagenttypes:list|None=None) -> dict|None: """ Ensures all reagents expected in kit are listed in Submission @@ -980,16 +1044,20 @@ def check_kit_integrity(sub:BasicSubmission|KitType, reagenttypes:list|None=None logger.debug(type(sub)) # What type is sub? match sub: - case BasicSubmission(): + case models.BasicSubmission(): # Get all required reagent types for this kit. - ext_kit_rtypes = [reagenttype.name for reagenttype in sub.extraction_kit.reagent_types if reagenttype.required == 1] + # ext_kit_rtypes = [reagenttype.name for reagenttype in sub.extraction_kit.reagent_types if reagenttype.required == 1] + ext_kit_rtypes = [item.name for item in sub.extraction_kit.get_reagents(required=True)] # Overwrite function parameter reagenttypes try: reagenttypes = [reagent.type.name for reagent in sub.reagents] except AttributeError as e: logger.error(f"Problem parsing reagents: {[f'{reagent.lot}, {reagent.type}' for reagent in sub.reagents]}") - case KitType(): - ext_kit_rtypes = [reagenttype.name for reagenttype in sub.reagent_types if reagenttype.required == 1] + case models.KitType(): + # ext_kit_rtypes = [reagenttype.name for reagenttype in sub.reagent_types if reagenttype.required == 1] + ext_kit_rtypes = [item.name for item in sub.get_reagents(required=True)] + case _: + raise ValueError(f"There was no match for the integrity object.\n\nCheck to make sure they are imported from the same place because it matters.") logger.debug(f"Kit reagents: {ext_kit_rtypes}") logger.debug(f"Submission reagents: {reagenttypes}") # check if lists are equal @@ -1003,4 +1071,83 @@ def check_kit_integrity(sub:BasicSubmission|KitType, reagenttypes:list|None=None result = None else: result = {'message' : f"The submission you are importing is missing some reagents expected by the kit.\n\nIt looks like you are missing: {[item.upper() for item in missing]}\n\nAlternatively, you may have set the wrong extraction kit.\n\nThe program will populate lists using existing reagents.\n\nPlease make sure you check the lots carefully!", 'missing': missing} - return result \ No newline at end of file + return result + +def lookup_sample_by_submitter_id(ctx:Settings, submitter_id:str) -> models.BasicSample: + """ + _summary_ + + Args: + ctx (Settings): _description_ + submitter_id (str): _description_ + + Returns: + BasicSample: _description_ + """ + return ctx.database_session.query(models.BasicSample).filter(models.BasicSample.submitter_id==submitter_id).first() + +def get_all_submission_types(ctx:Settings) -> List[str]: + """ + _summary_ + + Args: + ctx (Settings): _description_ + + Returns: + List[str]: _description_ + """ + kits = ctx.database_session.query(KitType).all() + uses = [list(item.used_for.keys()) for item in kits] + flat_list = [item for sublist in uses for item in sublist] + return list(set(flat_list)).sort() + +def get_reagents_in_extkit(ctx:Settings, kit_name:str) -> List[str]: + """ + _summary_ + DEPRECIATED, use kit.get_reagents() instead + + Args: + ctx (Settings): _description_ + kit_name (str): _description_ + + Returns: + List[str]: _description_ + """ + kit = lookup_kittype_by_name(ctx=ctx, name=kit_name) + return kit.get_reagents(required=False) + +def lookup_ww_association_by_plate_sample(ctx:Settings, rsl_plate_num:str, rsl_sample_num:str) -> models.SubmissionSampleAssociation: + """ + _summary_ + + Args: + ctx (Settings): _description_ + rsl_plate_num (str): _description_ + sample_submitter_id (_type_): _description_ + + Returns: + models.SubmissionSampleAssociation: _description_ + """ + return ctx.database_session.query(models.SubmissionSampleAssociation)\ + .join(models.BasicSubmission)\ + .join(models.WastewaterSample)\ + .filter(models.BasicSubmission.rsl_plate_num==rsl_plate_num)\ + .filter(models.WastewaterSample.rsl_number==rsl_sample_num)\ + .first() + +def lookup_all_reagent_names_by_role(ctx:Settings, role_name:str) -> List[str]: + """ + _summary_ + + Args: + ctx (Settings): _description_ + role_name (str): _description_ + + Returns: + List[str]: _description_ + """ + role = lookup_reagenttype_by_name(ctx=ctx, rt_name=role_name) + try: + return [reagent.name for reagent in role.instances] + except AttributeError: + return [] \ No newline at end of file diff --git a/src/submissions/backend/db/models/__init__.py b/src/submissions/backend/db/models/__init__.py index ac2d028..d2758a4 100644 --- a/src/submissions/backend/db/models/__init__.py +++ b/src/submissions/backend/db/models/__init__.py @@ -7,7 +7,7 @@ Base = declarative_base() metadata = Base.metadata from .controls import Control, ControlType -from .kits import KitType, ReagentType, Reagent, Discount +from .kits import KitType, ReagentType, Reagent, Discount, KitTypeReagentTypeAssociation from .organizations import Organization, Contact -from .samples import WWSample, BCSample -from .submissions import BasicSubmission, BacterialCulture, Wastewater, WastewaterArtic +# from .samples import WWSample, BCSample, BasicSample +from .submissions import BasicSubmission, BacterialCulture, Wastewater, WastewaterArtic, WastewaterSample, BacterialCultureSample, BasicSample, SubmissionSampleAssociation, WastewaterAssociation diff --git a/src/submissions/backend/db/models/kits.py b/src/submissions/backend/db/models/kits.py index e26e240..a834414 100644 --- a/src/submissions/backend/db/models/kits.py +++ b/src/submissions/backend/db/models/kits.py @@ -4,14 +4,23 @@ All kit and reagent related models from . import Base from sqlalchemy import Column, String, TIMESTAMP, JSON, INTEGER, ForeignKey, Interval, Table, FLOAT, CheckConstraint from sqlalchemy.orm import relationship, validates +from sqlalchemy.ext.associationproxy import association_proxy + from datetime import date import logging logger = logging.getLogger(f'submissions.{__name__}') -# Table containing reagenttype-kittype relationships -reagenttypes_kittypes = Table("_reagentstypes_kittypes", Base.metadata, Column("reagent_types_id", INTEGER, ForeignKey("_reagent_types.id")), Column("kits_id", INTEGER, ForeignKey("_kits.id"))) +# # Table containing reagenttype-kittype relationships +# reagenttypes_kittypes = Table("_reagentstypes_kittypes", Base.metadata, +# Column("reagent_types_id", INTEGER, ForeignKey("_reagent_types.id")), +# Column("kits_id", INTEGER, ForeignKey("_kits.id")), +# # The entry will look like ["Bacteria Culture":{"row":1, "column":4}] +# Column("uses", JSON), +# # is the reagent required for that kit? +# Column("required", INTEGER) +# ) class KitType(Base): @@ -25,12 +34,24 @@ class KitType(Base): submissions = relationship("BasicSubmission", back_populates="extraction_kit") #: submissions this kit was used for used_for = Column(JSON) #: list of names of sample types this kit can process cost_per_run = Column(FLOAT(2)) #: dollar amount for each full run of this kit NOTE: depreciated, use the constant and mutable costs instead - # TODO: Change below to 'mutable_cost_column' and 'mutable_cost_sample' before moving to production. mutable_cost_column = Column(FLOAT(2)) #: dollar amount per 96 well plate that can change with number of columns (reagents, tips, etc) mutable_cost_sample = Column(FLOAT(2)) #: dollar amount that can change with number of samples (reagents, tips, etc) constant_cost = Column(FLOAT(2)) #: dollar amount per plate that will remain constant (plates, man hours, etc) - reagent_types = relationship("ReagentType", back_populates="kits", uselist=True, secondary=reagenttypes_kittypes) #: reagent types this kit contains - reagent_types_id = Column(INTEGER, ForeignKey("_reagent_types.id", ondelete='SET NULL', use_alter=True, name="fk_KT_reagentstype_id")) #: joined reagent type id + # reagent_types = relationship("ReagentType", back_populates="kits", uselist=True, secondary=reagenttypes_kittypes) #: reagent types this kit contains + # reagent_types_id = Column(INTEGER, ForeignKey("_reagent_types.id", ondelete='SET NULL', use_alter=True, name="fk_KT_reagentstype_id")) #: joined reagent type id + # kit_reagenttype_association = + + kit_reagenttype_associations = relationship( + "KitTypeReagentTypeAssociation", + back_populates="kit_type", + cascade="all, delete-orphan", + ) + # association proxy of "user_keyword_associations" collection + # to "keyword" attribute + reagent_types = association_proxy("kit_reagenttype_associations", "reagenttype") + + def __repr__(self) -> str: + return f"KitType({self.name})" def __str__(self) -> str: """ @@ -41,6 +62,61 @@ class KitType(Base): """ return self.name + def get_reagents(self, required:bool=False) -> list: + if required: + return [item.reagenttype for item in self.kit_reagenttype_associations if item.required == 1] + else: + return [item.reagenttype for item in self.kit_reagenttype_associations] + + + def construct_xl_map_for_use(self, use:str) -> dict: + # map = self.used_for[use] + map = {} + assocs = [item for item in self.kit_reagenttype_associations if use in item.uses] + for assoc in assocs: + try: + map[assoc.reagenttype.name] = assoc.uses[use] + except TypeError: + continue + return map + + +class KitTypeReagentTypeAssociation(Base): + """ + table containing reagenttype/kittype associations + DOC: https://docs.sqlalchemy.org/en/14/orm/extensions/associationproxy.html + """ + __tablename__ = "_reagenttypes_kittypes" + reagent_types_id = Column(INTEGER, ForeignKey("_reagent_types.id"), primary_key=True) + kits_id = Column(INTEGER, ForeignKey("_kits.id"), primary_key=True) + uses = Column(JSON) + required = Column(INTEGER) + # reagent_type_name = Column(INTEGER, ForeignKey("_reagent_types.name")) + + kit_type = relationship(KitType, back_populates="kit_reagenttype_associations") + + # reference to the "ReagentType" object + reagenttype = relationship("ReagentType") + + def __init__(self, kit_type=None, reagent_type=None, uses=None, required=1): + self.kit = kit_type + self.reagenttype = reagent_type + self.uses = uses + self.required = required + + @validates('required') + def validate_age(self, key, value): + if not 0 <= value < 2: + raise ValueError(f'Invalid required value {value}. Must be 0 or 1.') + return value + + @validates('reagenttype') + def validate_reagenttype(self, key, value): + if not isinstance(value, ReagentType): + raise ValueError(f'{value} is not a reagenttype') + return value + + class ReagentType(Base): """ @@ -50,17 +126,17 @@ class ReagentType(Base): id = Column(INTEGER, primary_key=True) #: primary key name = Column(String(64)) #: name of reagent type - kit_id = Column(INTEGER, ForeignKey("_kits.id", ondelete="SET NULL", use_alter=True, name="fk_RT_kits_id")) #: id of joined kit type - kits = relationship("KitType", back_populates="reagent_types", uselist=True, foreign_keys=[kit_id]) #: kits this reagent is used in + # kit_id = Column(INTEGER, ForeignKey("_kits.id", ondelete="SET NULL", use_alter=True, name="fk_RT_kits_id")) #: id of joined kit type + # kits = relationship("KitType", back_populates="reagent_types", uselist=True, foreign_keys=[kit_id]) #: kits this reagent is used in instances = relationship("Reagent", back_populates="type") #: concrete instances of this reagent type eol_ext = Column(Interval()) #: extension of life interval - required = Column(INTEGER, server_default="1") #: sqlite boolean to determine if reagent type is essential for the kit + # required = Column(INTEGER, server_default="1") #: sqlite boolean to determine if reagent type is essential for the kit last_used = Column(String(32)) #: last used lot number of this type of reagent @validates('required') def validate_age(self, key, value): if not 0 <= value < 2: - raise ValueError(f'Invalid required value {value}') + raise ValueError(f'Invalid required value {value}. Must be 0 or 1.') return value def __str__(self) -> str: @@ -71,6 +147,9 @@ class ReagentType(Base): str: string representing this object's name """ return self.name + + def __repr__(self): + return f"ReagentType({self.name})" class Reagent(Base): @@ -87,6 +166,13 @@ class Reagent(Base): expiry = Column(TIMESTAMP) #: expiry date - extended by eol_ext of parent programmatically submissions = relationship("BasicSubmission", back_populates="reagents", uselist=True) #: submissions this reagent is used in + def __repr__(self): + if self.name != None: + return f"Reagent({self.name}-{self.lot})" + else: + return f"Reagent({self.type.name}-{self.lot})" + + def __str__(self) -> str: """ string representing this object @@ -142,4 +228,6 @@ class Discount(Base): client = relationship("Organization") #: joined client lab client_id = Column(INTEGER, ForeignKey("_organizations.id", ondelete='SET NULL', name="fk_org_id")) name = Column(String(128)) - amount = Column(FLOAT(2)) \ No newline at end of file + amount = Column(FLOAT(2)) + + diff --git a/src/submissions/backend/db/models/organizations.py b/src/submissions/backend/db/models/organizations.py index e857e87..45a46f4 100644 --- a/src/submissions/backend/db/models/organizations.py +++ b/src/submissions/backend/db/models/organizations.py @@ -21,7 +21,7 @@ class Organization(Base): submissions = relationship("BasicSubmission", back_populates="submitting_lab") #: submissions this organization has submitted cost_centre = Column(String()) #: cost centre used by org for payment contacts = relationship("Contact", back_populates="organization", secondary=orgs_contacts) #: contacts involved with this org - contact_ids = Column(INTEGER, ForeignKey("_contacts.id", ondelete="SET NULL", name="fk_org_contact_id")) #: contact ids of this organization + # contact_ids = Column(INTEGER, ForeignKey("_contacts.id", ondelete="SET NULL", name="fk_org_contact_id")) #: contact ids of this organization def __str__(self) -> str: """ @@ -44,5 +44,5 @@ class Contact(Base): email = Column(String(64)) #: contact email phone = Column(String(32)) #: contact phone number organization = relationship("Organization", back_populates="contacts", uselist=True, secondary=orgs_contacts) #: relationship to joined organization - organization_id = Column(INTEGER, ForeignKey("_organizations.id", ondelete="SET NULL", name="fk_contact_org_id")) #: joined organization ids + # organization_id = Column(INTEGER, ForeignKey("_organizations.id", ondelete="SET NULL", name="fk_contact_org_id")) #: joined organization ids diff --git a/src/submissions/backend/db/models/samples.py b/src/submissions/backend/db/models/samples.py deleted file mode 100644 index afbfab8..0000000 --- a/src/submissions/backend/db/models/samples.py +++ /dev/null @@ -1,158 +0,0 @@ -''' -All models for individual samples. -''' -from . import Base -from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, FLOAT, BOOLEAN, JSON -from sqlalchemy.orm import relationship -import logging - - -logger = logging.getLogger(f"submissions.{__name__}") - - -class WWSample(Base): - """ - Base wastewater sample - """ - __tablename__ = "_ww_samples" - - id = Column(INTEGER, primary_key=True) #: primary key - ww_processing_num = Column(String(64)) #: wastewater processing number - ww_sample_full_id = Column(String(64), nullable=False, unique=True) - rsl_number = Column(String(64)) #: rsl plate identification number - rsl_plate = relationship("Wastewater", back_populates="samples") #: relationship to parent plate - rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_WWS_submission_id")) - collection_date = Column(TIMESTAMP) #: Date submission received - well_number = Column(String(8)) #: location on 96 well plate - # The following are fields from the sample tracking excel sheet Ruth put together. - # I have no idea when they will be implemented or how. - testing_type = Column(String(64)) - site_status = Column(String(64)) - notes = Column(String(2000)) - ct_n1 = Column(FLOAT(2)) #: AKA ct for N1 - ct_n2 = Column(FLOAT(2)) #: AKA ct for N2 - n1_status = Column(String(32)) - n2_status = Column(String(32)) - seq_submitted = Column(BOOLEAN()) - ww_seq_run_id = Column(String(64)) - sample_type = Column(String(8)) - pcr_results = Column(JSON) - well_24 = Column(String(8)) #: location on 24 well plate - artic_rsl_plate = relationship("WastewaterArtic", back_populates="samples") - artic_well_number = Column(String(8)) - - - def to_string(self) -> str: - """ - string representing sample object - - Returns: - str: string representing location and sample id - """ - return f"{self.well_number}: {self.ww_sample_full_id}" - - def to_sub_dict(self) -> dict: - """ - gui friendly dictionary - - Returns: - dict: well location and id NOTE: keys must sync with BCSample to_sub_dict below - """ - if self.ct_n1 != None and self.ct_n2 != None: - # logger.debug(f"Using well info in name.") - name = f"{self.ww_sample_full_id}\n\t- ct N1: {'{:.2f}'.format(self.ct_n1)} ({self.n1_status})\n\t- ct N2: {'{:.2f}'.format(self.ct_n2)} ({self.n2_status})" - else: - # logger.debug(f"NOT using well info in name for: {self.ww_sample_full_id}") - name = self.ww_sample_full_id - return { - "well": self.well_number, - "name": name, - } - - def to_hitpick(self) -> dict|None: - """ - Outputs a dictionary of locations if sample is positive - - Returns: - dict: dictionary of sample id, row and column in elution plate - """ - # dictionary to translate row letters into numbers - row_dict = dict(A=1, B=2, C=3, D=4, E=5, F=6, G=7, H=8) - # if either n1 or n2 is positive, include this sample - try: - positive = any(["positive" in item for item in [self.n1_status, self.n2_status]]) - except TypeError as e: - logger.error(f"Couldn't check positives for {self.rsl_number}. Looks like there isn't PCR data.") - return None - well_row = row_dict[self.well_number[0]] - well_col = self.well_number[1:] - # if positive: - # try: - # # The first character of the elution well is the row - # well_row = row_dict[self.elution_well[0]] - # # The remaining charagers are the columns - # well_col = self.elution_well[1:] - # except TypeError as e: - # logger.error(f"This sample doesn't have elution plate info.") - # return None - return dict(name=self.ww_sample_full_id, - row=well_row, - col=well_col, - positive=positive) - # else: - # return None - - -class BCSample(Base): - """ - base of bacterial culture sample - """ - __tablename__ = "_bc_samples" - - id = Column(INTEGER, primary_key=True) #: primary key - well_number = Column(String(8)) #: location on parent plate - sample_id = Column(String(64), nullable=False, unique=True) #: identification from submitter - organism = Column(String(64)) #: bacterial specimen - concentration = Column(String(16)) #: - rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_BCS_sample_id")) #: id of parent plate - rsl_plate = relationship("BacterialCulture", back_populates="samples") #: relationship to parent plate - - def to_string(self) -> str: - """ - string representing object - - Returns: - str: string representing well location, sample id and organism - """ - return f"{self.well_number}: {self.sample_id} - {self.organism}" - - def to_sub_dict(self) -> dict: - """ - gui friendly dictionary - - Returns: - dict: well location and name (sample id, organism) NOTE: keys must sync with WWSample to_sub_dict above - """ - return { - "well": self.well_number, - "name": f"{self.sample_id} - ({self.organism})", - } - - def to_hitpick(self) -> dict|None: - """ - Outputs a dictionary of locations - - Returns: - dict: dictionary of sample id, row and column in elution plate - """ - # dictionary to translate row letters into numbers - row_dict = dict(A=1, B=2, C=3, D=4, E=5, F=6, G=7, H=8) - # if either n1 or n2 is positive, include this sample - well_row = row_dict[self.well_number[0]] - # The remaining charagers are the columns - well_col = self.well_number[1:] - return dict(name=self.sample_id, - row=well_row, - col=well_col, - positive=False) - diff --git a/src/submissions/backend/db/models/submissions.py b/src/submissions/backend/db/models/submissions.py index 68aefff..436edf8 100644 --- a/src/submissions/backend/db/models/submissions.py +++ b/src/submissions/backend/db/models/submissions.py @@ -3,12 +3,15 @@ Models for the main submission types. ''' import math from . import Base -from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, Table, JSON, FLOAT -from sqlalchemy.orm import relationship +from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, Table, JSON, FLOAT, BOOLEAN +from sqlalchemy.orm import relationship, validates import logging import json from json.decoder import JSONDecodeError from math import ceil +from sqlalchemy.ext.associationproxy import association_proxy +import uuid +from . import Base logger = logging.getLogger(f"submissions.{__name__}") @@ -40,6 +43,15 @@ class BasicSubmission(Base): uploaded_by = Column(String(32)) #: user name of person who submitted the submission to the database. comment = Column(JSON) + submission_sample_associations = relationship( + "SubmissionSampleAssociation", + back_populates="submission", + cascade="all, delete-orphan", + ) + # association proxy of "user_keyword_associations" collection + # to "keyword" attribute + samples = association_proxy("submission_sample_associations", "sample") + # Allows for subclassing into ex. BacterialCulture, Wastewater, etc. __mapper_args__ = { "polymorphic_identity": "basic_submission", @@ -47,6 +59,9 @@ class BasicSubmission(Base): "with_polymorphic": "*", } + def __repr__(self): + return f"{self.submission_type}Submission({self.rsl_plate_num})" + def to_string(self) -> str: """ string presenting basic submission @@ -64,6 +79,7 @@ class BasicSubmission(Base): dict: dictionary used in submissions summary """ # get lab from nested organization object + try: sub_lab = self.submitting_lab.name except AttributeError: @@ -90,10 +106,20 @@ class BasicSubmission(Base): except Exception as e: logger.error(f"We got an error retrieving reagents: {e}") reagents = None - try: - samples = [item.to_sub_dict() for item in self.samples] - except: - samples = None + # try: + # samples = [item.sample.to_sub_dict(item.__dict__()) for item in self.submission_sample_associations] + # except Exception as e: + # logger.error(f"Problem making list of samples: {e}") + # samples = None + samples = [] + for item in self.submission_sample_associations: + sample = item.sample.to_sub_dict(submission_rsl=self.rsl_plate_num) + # try: + # sample['well'] = f"{row_map[item.row]}{item.column}" + # except KeyError as e: + # logger.error(f"Unable to find row {item.row} in row_map.") + # sample['well'] = None + samples.append(sample) try: comments = self.comment except: @@ -115,11 +141,8 @@ class BasicSubmission(Base): "ext_info": ext_info, "comments": comments } - # logger.debug(f"{self.rsl_plate_num} extraction: {output['Extraction Status']}") - # logger.debug(f"Output dict: {output}") return output - def report_dict(self) -> dict: """ dictionary used in creating reports @@ -141,13 +164,6 @@ class BasicSubmission(Base): ext_kit = self.extraction_kit.name except AttributeError: ext_kit = None - # get extraction kit cost from nested kittype object - # depreciated as it will change kit cost overtime - # try: - # cost = self.extraction_kit.cost_per_run - # except AttributeError: - # cost = None - output = { "id": self.id, "Plate Number": self.rsl_plate_num, @@ -168,24 +184,47 @@ class BasicSubmission(Base): except Exception as e: logger.error(f"Column count error: {e}") # cols_count_24 = ceil(int(self.sample_count) / 3) - try: - self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count)) - except Exception as e: - logger.error(f"Calculation error: {e}") + if all(item == 0.0 for item in [self.extraction_kit.constant_cost, self.extraction_kit.mutable_cost_column, self.extraction_kit.mutable_cost_sample]): + try: + self.run_cost = self.extraction_kit.cost_per_run + except Exception as e: + logger.error(f"Calculation error: {e}") + else: + try: + self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count)) + except Exception as e: + logger.error(f"Calculation error: {e}") def calculate_column_count(self): - columns = [int(sample.well_number[-2:]) for sample in self.samples] - logger.debug(f"Here are the columns for {self.rsl_plate_num}: {columns}") - return max(columns) + logger.debug(f"Here's the samples: {self.samples}") + # columns = [int(sample.well_number[-2:]) for sample in self.samples] + columns = [assoc.column for assoc in self.submission_sample_associations] + logger.debug(f"Here are the columns for {self.rsl_plate_num}: {columns}") + return max(columns) + + def hitpick_plate(self, plate_number:int|None=None) -> list: + output_list = [] + for assoc in self.submission_sample_associations: + samp = assoc.sample.to_hitpick(submission_rsl=self.rsl_plate_num) + if samp != None: + if plate_number != None: + samp['plate_number'] = plate_number + samp['row'] = assoc.row + samp['column'] = assoc.column + samp['plate_name'] = self.rsl_plate_num + output_list.append(samp) + else: + continue + return output_list # Below are the custom submission types -class BacterialCulture(BasicSubmission): +class BacterialCulture(BasicSubmission): """ derivative submission type from BasicSubmission """ controls = relationship("Control", back_populates="submission", uselist=True) #: A control sample added to submission - samples = relationship("BCSample", back_populates="rsl_plate", uselist=True) + # samples = relationship("BCSample", back_populates="rsl_plate", uselist=True) __mapper_args__ = {"polymorphic_identity": "bacterial_culture", "polymorphic_load": "inline"} def to_dict(self) -> dict: @@ -197,26 +236,13 @@ class BacterialCulture(BasicSubmission): """ output = super().to_dict() output['controls'] = [item.to_sub_dict() for item in self.controls] - return output - - - # def calculate_base_cost(self): - # try: - # cols_count_96 = ceil(int(self.sample_count) / 8) - # except Exception as e: - # logger.error(f"Column count error: {e}") - # # cols_count_24 = ceil(int(self.sample_count) / 3) - # try: - # self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count)) - # except Exception as e: - # logger.error(f"Calculation error: {e}") - + return output class Wastewater(BasicSubmission): """ derivative submission type from BasicSubmission """ - samples = relationship("WWSample", back_populates="rsl_plate", uselist=True) + # samples = relationship("WWSample", back_populates="rsl_plate", uselist=True) pcr_info = Column(JSON) # ww_sample_id = Column(String, ForeignKey("_ww_samples.id", ondelete="SET NULL", name="fk_WW_sample_id")) __mapper_args__ = {"polymorphic_identity": "wastewater", "polymorphic_load": "inline"} @@ -235,23 +261,11 @@ class Wastewater(BasicSubmission): pass return output - # def calculate_base_cost(self): - # try: - # cols_count_96 = ceil(int(self.sample_count) / 8) + 1 #: Adding in one column to account for 24 samples + ext negatives - # except Exception as e: - # logger.error(f"Column count error: {e}") - # # cols_count_24 = ceil(int(self.sample_count) / 3) - # try: - # self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count)) - # except Exception as e: - # logger.error(f"Calculation error: {e}") - - class WastewaterArtic(BasicSubmission): """ derivative submission type for artic wastewater """ - samples = relationship("WWSample", back_populates="artic_rsl_plate", uselist=True) + # samples = relationship("WWSample", back_populates="artic_rsl_plate", uselist=True) # Can it use the pcr_info from the wastewater? Cause I can't define pcr_info here due to conflicts with that # Not necessary because we don't get any results for this procedure. __mapper_args__ = {"polymorphic_identity": "wastewater_artic", "polymorphic_load": "inline"} @@ -273,3 +287,252 @@ class WastewaterArtic(BasicSubmission): self.run_cost = const_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count)) except Exception as e: logger.error(f"Calculation error: {e}") + +class BasicSample(Base): + """ + Base of basic sample which polymorphs into BCSample and WWSample + """ + + __tablename__ = "_samples" + + id = Column(INTEGER, primary_key=True) #: primary key + submitter_id = Column(String(64), nullable=False, unique=True) #: identification from submitter + sample_type = Column(String(32)) + + sample_submission_associations = relationship( + "SubmissionSampleAssociation", + back_populates="sample", + cascade="all, delete-orphan", + ) + + __mapper_args__ = { + "polymorphic_identity": "basic_sample", + "polymorphic_on": sample_type, + "with_polymorphic": "*", + } + + submissions = association_proxy("sample_submission_associations", "submission") + + @validates('submitter_id') + def create_id(self, key, value): + logger.debug(f"validating sample_id of: {value}") + if value == None: + return uuid.uuid4().hex.upper() + else: + return value + + def __repr__(self) -> str: + return f"{self.sample_type}Sample({self.submitter_id})" + + def to_sub_dict(self, submission_rsl:str) -> dict: + row_map = {1:"A", 2:"B", 3:"C", 4:"D", 5:"E", 6:"F", 7:"G", 8:"H"} + self.assoc = [item for item in self.sample_submission_associations if item.submission.rsl_plate_num==submission_rsl][0] + sample = {} + try: + sample['well'] = f"{row_map[self.assoc.row]}{self.assoc.column}" + except KeyError as e: + logger.error(f"Unable to find row {self.assoc.row} in row_map.") + sample['well'] = None + sample['name'] = self.submitter_id + return sample + + def to_hitpick(self, submission_rsl:str) -> dict|None: + """ + Outputs a dictionary of locations + + Returns: + dict: dictionary of sample id, row and column in elution plate + """ + self.assoc = [item for item in self.sample_submission_associations if item.submission.rsl_plate_num==submission_rsl][0] + # dictionary to translate row letters into numbers + # row_dict = dict(A=1, B=2, C=3, D=4, E=5, F=6, G=7, H=8) + # if either n1 or n2 is positive, include this sample + # well_row = row_dict[self.well_number[0]] + # The remaining charagers are the columns + # well_col = self.well_number[1:] + return dict(name=self.submitter_id, + # row=well_row, + # col=well_col, + positive=False) + +class WastewaterSample(BasicSample): + """ + Base wastewater sample + """ + # __tablename__ = "_ww_samples" + + # id = Column(INTEGER, primary_key=True) #: primary key + ww_processing_num = Column(String(64)) #: wastewater processing number + # ww_sample_full_id = Column(String(64), nullable=False, unique=True) + rsl_number = Column(String(64)) #: rsl plate identification number + # rsl_plate = relationship("Wastewater", back_populates="samples") #: relationship to parent plate + # rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_WWS_submission_id")) + collection_date = Column(TIMESTAMP) #: Date submission received + # well_number = Column(String(8)) #: location on 96 well plate + # The following are fields from the sample tracking excel sheet Ruth put together. + # I have no idea when they will be implemented or how. + testing_type = Column(String(64)) + site_status = Column(String(64)) + notes = Column(String(2000)) + # ct_n1 = Column(FLOAT(2)) #: AKA ct for N1 + # ct_n2 = Column(FLOAT(2)) #: AKA ct for N2 + # n1_status = Column(String(32)) + # n2_status = Column(String(32)) + seq_submitted = Column(BOOLEAN()) + ww_seq_run_id = Column(String(64)) + # sample_type = Column(String(16)) + # pcr_results = Column(JSON) + well_24 = Column(String(8)) #: location on 24 well plate + # artic_rsl_plate = relationship("WastewaterArtic", back_populates="samples") + # artic_well_number = Column(String(8)) + + __mapper_args__ = {"polymorphic_identity": "wastewater_sample", "polymorphic_load": "inline"} + + # def to_string(self) -> str: + # """ + # string representing sample object + + # Returns: + # str: string representing location and sample id + # """ + # return f"{self.well_number}: {self.ww_sample_full_id}" + + def to_sub_dict(self, submission_rsl:str) -> dict: + """ + Gui friendly dictionary. Inherited from BasicSample + This version will include PCR status. + + Args: + submission_rsl (str): RSL plate number (passed down from the submission.to_dict() functino) + + Returns: + dict: Alphanumeric well id and sample name + """ + # Get the relevant submission association for this sample + sample = super().to_sub_dict(submission_rsl=submission_rsl) + try: + check = self.assoc.ct_n1 != None and self.assoc.ct_n2 != None + except AttributeError as e: + check = False + if check: + logger.debug(f"Using well info in name.") + sample['name'] = f"{self.submitter_id}\n\t- ct N1: {'{:.2f}'.format(self.assoc.ct_n1)} ({self.assoc.n1_status})\n\t- ct N2: {'{:.2f}'.format(self.assoc.ct_n2)} ({self.assoc.n2_status})" + else: + logger.error(f"Couldn't get the pcr info") + return sample + + def to_hitpick(self, submission_rsl:str) -> dict|None: + """ + Outputs a dictionary of locations if sample is positive + + Returns: + dict: dictionary of sample id, row and column in elution plate + """ + sample = super().to_hitpick(submission_rsl=submission_rsl) + # dictionary to translate row letters into numbers + # row_dict = dict(A=1, B=2, C=3, D=4, E=5, F=6, G=7, H=8) + # if either n1 or n2 is positive, include this sample + try: + sample['positive'] = any(["positive" in item for item in [self.assoc.n1_status, self.assoc.n2_status]]) + except (TypeError, AttributeError) as e: + logger.error(f"Couldn't check positives for {self.rsl_number}. Looks like there isn't PCR data.") + # return None + # positive = False + # well_row = row_dict[self.well_number[0]] + # well_col = self.well_number[1:] + # if positive: + # try: + # # The first character of the elution well is the row + # well_row = row_dict[self.elution_well[0]] + # # The remaining charagers are the columns + # well_col = self.elution_well[1:] + # except TypeError as e: + # logger.error(f"This sample doesn't have elution plate info.") + # return None + return sample + + +class BacterialCultureSample(BasicSample): + """ + base of bacterial culture sample + """ + # __tablename__ = "_bc_samples" + + # id = Column(INTEGER, primary_key=True) #: primary key + # well_number = Column(String(8)) #: location on parent plate + # sample_id = Column(String(64), nullable=False, unique=True) #: identification from submitter + organism = Column(String(64)) #: bacterial specimen + concentration = Column(String(16)) #: + # sample_type = Column(String(16)) + # rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_BCS_sample_id")) #: id of parent plate + # rsl_plate = relationship("BacterialCulture", back_populates="samples") #: relationship to parent plate + + __mapper_args__ = {"polymorphic_identity": "bacterial_culture_sample", "polymorphic_load": "inline"} + + # def to_string(self) -> str: + # """ + # string representing object + + # Returns: + # str: string representing well location, sample id and organism + # """ + # return f"{self.well_number}: {self.sample_id} - {self.organism}" + + def to_sub_dict(self, submission_rsl:str) -> dict: + """ + gui friendly dictionary + + Returns: + dict: well location and name (sample id, organism) NOTE: keys must sync with WWSample to_sub_dict above + """ + sample = super().to_sub_dict(submission_rsl=submission_rsl) + sample['name'] = f"{self.submitter_id} - ({self.organism})" + # return { + # # "well": self.well_number, + # "name": f"{self.submitter_id} - ({self.organism})", + # } + return sample + + + +class SubmissionSampleAssociation(Base): + """ + table containing submission/sample associations + DOC: https://docs.sqlalchemy.org/en/14/orm/extensions/associationproxy.html + """ + __tablename__ = "_submission_sample" + sample_id = Column(INTEGER, ForeignKey("_samples.id"), primary_key=True) + submission_id = Column(INTEGER, ForeignKey("_submissions.id"), primary_key=True) + row = Column(INTEGER) + column = Column(INTEGER) + + submission = relationship(BasicSubmission, back_populates="submission_sample_associations") + + # reference to the "ReagentType" object + # sample = relationship("BasicSample") + sample = relationship(BasicSample, back_populates="sample_submission_associations") + + base_sub_type = Column(String) + # """Refers to the type of parent.""" + + __mapper_args__ = { + "polymorphic_identity": "basic_association", + "polymorphic_on": base_sub_type, + "with_polymorphic": "*", + } + + def __init__(self, submission:BasicSubmission=None, sample:BasicSample=None, row:int=1, column:int=1): + self.submission = submission + self.sample = sample + self.row = row + self.column = column + +class WastewaterAssociation(SubmissionSampleAssociation): + + ct_n1 = Column(FLOAT(2)) #: AKA ct for N1 + ct_n2 = Column(FLOAT(2)) #: AKA ct for N2 + n1_status = Column(String(32)) + n2_status = Column(String(32)) + pcr_results = Column(JSON) + + __mapper_args__ = {"polymorphic_identity": "wastewater", "polymorphic_load": "inline"} \ No newline at end of file diff --git a/src/submissions/backend/excel/parser.py b/src/submissions/backend/excel/parser.py index d62681b..bb260a7 100644 --- a/src/submissions/backend/excel/parser.py +++ b/src/submissions/backend/excel/parser.py @@ -6,8 +6,8 @@ import pprint from typing import Tuple import pandas as pd from pathlib import Path -from backend.db.models import WWSample, BCSample -from backend.db import lookup_ww_sample_by_ww_sample_num +from backend.db.models import WastewaterSample, BacterialCultureSample +from backend.db import lookup_ww_sample_by_ww_sample_num, lookup_sample_by_submitter_id, get_reagents_in_extkit, lookup_kittype_by_name, lookup_kittype_by_use from backend.pydant import PydSubmission, PydReagent import logging from collections import OrderedDict @@ -15,10 +15,14 @@ import re import numpy as np from datetime import date, datetime import uuid +# from submissions.backend.db.functions import from tools import check_not_nan, RSLNamer, massage_common_reagents, convert_nans_to_nones, Settings +from frontend.custom_widgets.pop_ups import SubmissionTypeSelector, KitSelector logger = logging.getLogger(f"submissions.{__name__}") +row_keys = dict(A=1, B=2, C=3, D=4, E=5, F=6, G=7, H=8) + class SheetParser(object): """ object to pull and contain data from excel file @@ -46,9 +50,13 @@ class SheetParser(object): # make decision about type of sample we have self.sub['submission_type'] = self.type_decider() # select proper parser based on sample type - parse_sub = getattr(self, f"parse_{self.sub['submission_type'].lower()}") + parse_sub = getattr(self, f"parse_{self.sub['submission_type'].replace(' ', '_').lower()}") parse_sub() # self.calculate_column_count() + self.import_kit_validation_check() + self.parse_reagents() + self.import_reagent_validation_check() + def type_decider(self) -> str: """ @@ -74,7 +82,13 @@ class SheetParser(object): return "Unknown" except Exception as e: logger.warning(f"We were unable to parse the submission type due to: {e}") - return "Unknown" + # return "Unknown" + dlg = SubmissionTypeSelector(ctx=self.ctx, title="Select Submission Type", message="We were unable to find the submission type from the excel metadata. Please select from below.") + if dlg.exec(): + return dlg.getValues() + else: + logger.warning(f"Last attempt at getting submission was rejected.") + raise ValueError("Submission Type needed.") def parse_unknown(self) -> None: """ @@ -173,9 +187,10 @@ class SheetParser(object): self.sub['reagents'] = [] reagent_range = submission_info.iloc[1:14, 4:8] logger.debug(reagent_range) - parse_reagents(reagent_range) + # parse_reagents(reagent_range) # get individual sample info sample_parser = SampleParser(self.ctx, submission_info.iloc[16:112]) + logger.debug(f"Sample type: {self.sub['submission_type']}") sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type']['value'].replace(' ', '_').lower()}_samples") logger.debug(f"Parser result: {self.sub}") self.sample_result, self.sub['samples'] = sample_parse() @@ -200,7 +215,7 @@ class SheetParser(object): """ # iterate through sub-df rows for ii, row in df.iterrows(): - logger.debug(f"Parsing this row for reagents: {row}") + # logger.debug(f"Parsing this row for reagents: {row}") if check_not_nan(row[5]): # must be prefixed with 'lot_' to be recognized by gui # regex below will remove 80% from 80% ethanol in the Wastewater kit. @@ -246,9 +261,9 @@ class SheetParser(object): parsed = False self.sub['technician'] = dict(value=f"Enr: {enrichment_info.columns[2]}, Ext: {extraction_info.columns[2]}, PCR: {qprc_info.columns[2]}", parsed=parsed) self.sub['reagents'] = [] - parse_reagents(enr_reagent_range) - parse_reagents(ext_reagent_range) - parse_reagents(pcr_reagent_range) + # parse_reagents(enr_reagent_range) + # parse_reagents(ext_reagent_range) + # parse_reagents(pcr_reagent_range) # parse samples sample_parser = SampleParser(self.ctx, submission_info.iloc[16:], elution_map=retrieve_elution_map()) sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type']['value'].lower()}_samples") @@ -263,7 +278,7 @@ class SheetParser(object): def parse_reagents(df:pd.DataFrame): logger.debug(df) for ii, row in df.iterrows(): - if check_not_nan(row[0]): + if check_not_nan(row[1]): try: output_key = re.sub(r"\(.+?\)", "", row[0].lower().strip().replace(' ', '_')) except AttributeError: @@ -294,7 +309,20 @@ class SheetParser(object): self.sub['reagents'].append(PydReagent(type=output_key, lot=output_var, exp=expiry)) else: continue - def massage_samples(df:pd.DataFrame) -> pd.DataFrame: + def massage_samples(df:pd.DataFrame, lookup_table:pd.DataFrame) -> pd.DataFrame: + """ + Takes sample info from Artic sheet format and converts to regular formate + + Args: + df (pd.DataFrame): Elution plate map + lookup_table (pd.DataFrame): Sample submission form map. + + Returns: + pd.DataFrame: _description_ + """ + lookup_table.set_index(lookup_table.columns[0], inplace=True) + lookup_table.columns = lookup_table.iloc[0] + logger.debug(f"Massaging samples from {lookup_table}") df.set_index(df.columns[0], inplace=True) df.columns = df.iloc[0] logger.debug(f"df to massage\n: {df}") @@ -305,10 +333,17 @@ class SheetParser(object): continue logger.debug(f"Checking {ii.name}{c}") if check_not_nan(df.loc[ii.name, int(c)]) and df.loc[ii.name, int(c)] != "EMPTY": + sample_name = df.loc[ii.name, int(c)] + row = lookup_table.loc[lookup_table['Sample Name (WW)'] == sample_name] + logger.debug(f"Looking up {row['Sample Name (LIMS)'][-1]}") try: - return_list.append(dict(sample_name=re.sub(r"\s?\(.*\)", "", df.loc[ii.name, int(c)]), \ - well=f"{ii.name}{c}", - artic_plate=self.sub['rsl_plate_num'])) + return_list.append(dict(submitter_id=re.sub(r"\s?\(.*\)", "", df.loc[ii.name, int(c)]), \ + # well=f"{ii.name}{c}", + row = row_keys[ii.name], + column = c, + artic_plate=self.sub['rsl_plate_num'], + sample_name=row['Sample Name (LIMS)'][-1] + )) except TypeError as e: logger.error(f"Got an int for {c}, skipping.") continue @@ -333,13 +368,69 @@ class SheetParser(object): self.sub['extraction_kit'] = "ArticV4.1" self.sub['technician'] = f"MM: {biomek_info.iloc[2][1]}, Bio: {biomek_info.iloc[3][1]}" self.sub['reagents'] = [] - parse_reagents(sub_reagent_range) - parse_reagents(biomek_reagent_range) - samples = massage_samples(biomek_info.iloc[22:31, 0:]) + # parse_reagents(sub_reagent_range) + # parse_reagents(biomek_reagent_range) + samples = massage_samples(biomek_info.iloc[22:31, 0:], submission_info.iloc[4:37, 1:5]) # samples = massage_samples(biomek_info.iloc[25:33, 0:]) sample_parser = SampleParser(self.ctx, pd.DataFrame.from_records(samples)) sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type']['value'].lower()}_samples") self.sample_result, self.sub['samples'] = sample_parse() + + def parse_reagents(self): + ext_kit = lookup_kittype_by_name(ctx=self.ctx, name=self.sub['extraction_kit']) + if ext_kit != None: + logger.debug(f"Querying extraction kit: {self.sub['submission_type']}") + reagent_map = ext_kit.construct_xl_map_for_use(use=self.sub['submission_type']['value']) + logger.debug(f"Reagent map: {pprint.pformat(reagent_map)}") + else: + raise AttributeError("No extraction kit found, unable to parse reagents") + for sheet in self.xl.sheet_names: + df = self.xl.parse(sheet) + relevant = {k:v for k,v in reagent_map.items() if sheet in reagent_map[k]['sheet']} + logger.debug(f"relevant map for {sheet}: {pprint.pformat(relevant)}") + if relevant == {}: + continue + for item in reagent_map: + try: + # role = item + name = df.iat[relevant[item]['name']['row']-2, relevant[item]['name']['column']-1] + lot = df.iat[relevant[item]['lot']['row']-2, relevant[item]['lot']['column']-1] + expiry = df.iat[relevant[item]['expiry']['row']-2, relevant[item]['expiry']['column']-1] + except (KeyError, IndexError): + continue + # self.sub['reagents'].append(dict(name=name, lot=lot, expiry=expiry, role=role)) + self.sub['reagents'].append(PydReagent(type=item, lot=lot, exp=expiry, name=name)) + + + def import_kit_validation_check(self): + """ + Enforce that the parser has an extraction kit + + Args: + ctx (Settings): Settings obj passed down from gui + parser_sub (dict): The parser dictionary before going to pydantic + + Returns: + List[PydReagent]: List of reagents + """ + if not check_not_nan(self.sub['extraction_kit']): + dlg = KitSelector(ctx=self.ctx, title="Kit Needed", message="At minimum a kit is needed. Please select one.") + if dlg.exec(): + self.sub['extraction_kit'] = dict(value=dlg.getValues(), parsed=False) + else: + raise ValueError("Extraction kit needed.") + else: + self.sub['extraction_kit'] = dict(value=self.sub['extraction_kit'], parsed=False) + + # logger.debug(f"Here is the validated parser dictionary:\n\n{pprint.pformat(self.sub)}\n\n") + # return parser_sub + + def import_reagent_validation_check(self): + """ + Enforce that only allowed reagents get into the Pydantic Model + """ + allowed_reagents = [item.name for item in get_reagents_in_extkit(ctx=self.ctx, kit_name=self.sub['extraction_kit']['value'])] + self.sub['reagents'] = [reagent for reagent in self.sub['reagents'] if reagent.type in allowed_reagents] def to_pydantic(self) -> PydSubmission: """ @@ -352,8 +443,9 @@ class SheetParser(object): psm = PydSubmission(ctx=self.ctx, filepath=self.filepath, **self.sub) delattr(psm, "filepath") return psm - + + class SampleParser(object): """ object to pull data for samples in excel sheet and construct individual sample objects @@ -373,34 +465,36 @@ class SampleParser(object): self.elution_map = elution_map - def parse_bacterial_culture_samples(self) -> Tuple[str|None, list[BCSample]]: + def parse_bacterial_culture_samples(self) -> Tuple[str|None, list[dict]]: """ construct bacterial culture specific sample objects Returns: list[BCSample]: list of sample objects """ - # logger.debug(f"Samples: {self.samples}") + # logger.debug(f"Samples: {self.samples}") + new_list = [] for sample in self.samples: - new = BCSample() - new.well_number = sample['This section to be filled in completely by submittor'] - new.sample_id = sample['Unnamed: 1'] - new.organism = sample['Unnamed: 2'] - new.concentration = sample['Unnamed: 3'] + logger.debug(f"Well info: {sample['This section to be filled in completely by submittor']}") + instance = lookup_sample_by_submitter_id(ctx=self.ctx, submitter_id=sample['Unnamed: 1']) + if instance == None: + instance = BacterialCultureSample() + well_number = sample['This section to be filled in completely by submittor'] + row = row_keys[well_number[0]] + column = int(well_number[1:]) + instance.submitter_id = sample['Unnamed: 1'] + instance.organism = sample['Unnamed: 2'] + instance.concentration = sample['Unnamed: 3'] # logger.debug(f"Sample object: {new.sample_id} = {type(new.sample_id)}") - logger.debug(f"Got sample_id: {new.sample_id}") + logger.debug(f"Got sample_id: {instance.submitter_id}") # need to exclude empties and blanks - try: - not_a_nan = not np.isnan(new.sample_id) and str(new.sample_id).lower() != 'blank' - except TypeError: - not_a_nan = True - if not_a_nan: - new_list.append(new) + if check_not_nan(instance.submitter_id): + new_list.append(dict(sample=instance, row=row, column=column)) return None, new_list - def parse_wastewater_samples(self) -> Tuple[str|None, list[WWSample]]: + def parse_wastewater_samples(self) -> Tuple[str|None, list[dict]]: """ construct wastewater specific sample objects @@ -408,60 +502,95 @@ class SampleParser(object): list[WWSample]: list of sample objects """ def search_df_for_sample(sample_rsl:str): - logger.debug(f"Attempting to find sample {sample_rsl} in \n {self.elution_map}") + # logger.debug(f"Attempting to find sample {sample_rsl} in \n {self.elution_map}") well = self.elution_map.where(self.elution_map==sample_rsl) # logger.debug(f"Well: {well}") well = well.dropna(how='all').dropna(axis=1, how="all") if well.size > 1: well = well.iloc[0].to_frame().dropna().T - logger.debug(f"well {sample_rsl} post processing: {well.size}: {type(well)}, {well.index[0]}, {well.columns[0]}") - self.elution_map.at[well.index[0], well.columns[0]] = np.nan + logger.debug(f"well {sample_rsl} post processing: {well.size}: {type(well)}")#, {well.index[0]}, {well.columns[0]}") try: - col = str(int(well.columns[0])).zfill(2) - except ValueError: - col = str(well.columns[0]).zfill(2) + self.elution_map.at[well.index[0], well.columns[0]] = np.nan + except IndexError as e: + logger.error(f"Couldn't find the well for {sample_rsl}") + return 0, 0 + try: + column = int(well.columns[0]) except TypeError as e: logger.error(f"Problem parsing out column number for {well}:\n {e}") - return f"{well.index[0]}{col}" + row = row_keys[well.index[0]] + return row, column new_list = [] return_val = None for sample in self.samples: - new = WWSample() - if check_not_nan(sample["Unnamed: 7"]): - new.rsl_number = sample['Unnamed: 7'] # previously Unnamed: 9 + logger.debug(f"Sample: {sample}") + instance = lookup_ww_sample_by_ww_sample_num(ctx=self.ctx, sample_number=sample['Unnamed: 3']) + if instance == None: + instance = WastewaterSample() + if check_not_nan(sample["Unnamed: 7"]): + if sample["Unnamed: 7"] != "Fixed" and sample['Unnamed: 7'] != "Flex": + instance.rsl_number = sample['Unnamed: 7'] # previously Unnamed: 9 + elif check_not_nan(sample['Unnamed: 9']): + instance.rsl_number = sample['Unnamed: 9'] # previously Unnamed: 9 + else: + logger.error(f"No RSL sample number found for this sample.") + continue + else: + logger.error(f"No RSL sample number found for this sample.") + continue + instance.ww_processing_num = sample['Unnamed: 2'] + # need to ensure we have a sample id for database integrity + # if we don't have a sample full id, make one up + if check_not_nan(sample['Unnamed: 3']): + logger.debug(f"Sample name: {sample['Unnamed: 3']}") + instance.submitter_id = sample['Unnamed: 3'] + else: + instance.submitter_id = uuid.uuid4().hex.upper() + # logger.debug(f"The Submitter sample id is: {instance.submitter_id}") + # need to ensure we get a collection date + if check_not_nan(sample['Unnamed: 5']): + instance.collection_date = sample['Unnamed: 5'] + else: + instance.collection_date = date.today() + # new.testing_type = sample['Unnamed: 6'] + # new.site_status = sample['Unnamed: 7'] + instance.notes = str(sample['Unnamed: 6']) # previously Unnamed: 8 + instance.well_24 = sample['Unnamed: 1'] else: - logger.error(f"No RSL sample number found for this sample.") - continue - new.ww_processing_num = sample['Unnamed: 2'] - # need to ensure we have a sample id for database integrity - # if we don't have a sample full id, make one up - if check_not_nan(sample['Unnamed: 3']): - new.ww_sample_full_id = sample['Unnamed: 3'] - else: - new.ww_sample_full_id = uuid.uuid4().hex.upper() - # need to ensure we get a collection date - if check_not_nan(sample['Unnamed: 5']): - new.collection_date = sample['Unnamed: 5'] - else: - new.collection_date = date.today() - # new.testing_type = sample['Unnamed: 6'] - # new.site_status = sample['Unnamed: 7'] - new.notes = str(sample['Unnamed: 6']) # previously Unnamed: 8 - new.well_24 = sample['Unnamed: 1'] - elu_well = search_df_for_sample(new.rsl_number) - if elu_well != None: - row = elu_well[0] - col = elu_well[1:].zfill(2) - new.well_number = f"{row}{col}" - else: - # try: - return_val += f"{new.rsl_number}\n" - # except TypeError: - # return_val = f"{new.rsl_number}\n" - new_list.append(new) + # What to do if the sample already exists + assert isinstance(instance, WastewaterSample) + if instance.rsl_number == None: + if check_not_nan(sample["Unnamed: 7"]): + if sample["Unnamed: 7"] != "Fixed" and sample['Unnamed: 7'] != "Flex": + instance.rsl_number = sample['Unnamed: 7'] # previously Unnamed: 9 + elif check_not_nan(sample['Unnamed: 9']): + instance.rsl_number = sample['Unnamed: 9'] # previously Unnamed: 9 + else: + logger.error(f"No RSL sample number found for this sample.") + if instance.collection_date == None: + if check_not_nan(sample['Unnamed: 5']): + instance.collection_date = sample['Unnamed: 5'] + else: + instance.collection_date = date.today() + if instance.notes == None: + instance.notes = str(sample['Unnamed: 6']) # previously Unnamed: 8 + if instance.well_24 == None: + instance.well_24 = sample['Unnamed: 1'] + logger.debug(f"Already have that sample, going to add association to this plate.") + row, column = search_df_for_sample(instance.rsl_number) + # if elu_well != None: + # row = elu_well[0] + # col = elu_well[1:].zfill(2) + # # new.well_number = f"{row}{col}" + # else: + # # try: + # return_val += f"{new.rsl_number}\n" + # # except TypeError: + # # return_val = f"{new.rsl_number}\n" + new_list.append(dict(sample=instance, row=row, column=column)) return return_val, new_list - def parse_wastewater_artic_samples(self) -> Tuple[str|None, list[WWSample]]: + def parse_wastewater_artic_samples(self) -> Tuple[str|None, list[WastewaterSample]]: """ The artic samples are the wastewater samples that are to be sequenced So we will need to lookup existing ww samples and append Artic well # and plate relation @@ -469,27 +598,32 @@ class SampleParser(object): Returns: list[WWSample]: list of wastewater samples to be updated """ + new_list = [] missed_samples = [] for sample in self.samples: with self.ctx.database_session.no_autoflush: instance = lookup_ww_sample_by_ww_sample_num(ctx=self.ctx, sample_number=sample['sample_name']) - logger.debug(f"Checking: {sample['sample_name']}") + logger.debug(f"Checking: {sample}") if instance == None: - logger.error(f"Unable to find match for: {sample['sample_name']}") + logger.error(f"Unable to find match for: {sample['sample_name']}. Making new instance using {sample['submitter_id']}.") + instance = WastewaterSample() + instance.ww_processing_num = sample['sample_name'] + instance.submitter_id = sample['submitter_id'] missed_samples.append(sample['sample_name']) - continue - logger.debug(f"Got instance: {instance.ww_sample_full_id}") - if sample['well'] != None: - row = sample['well'][0] - col = sample['well'][1:].zfill(2) - sample['well'] = f"{row}{col}" - instance.artic_well_number = sample['well'] - new_list.append(instance) + # continue + logger.debug(f"Got instance: {instance.submitter_id}") + # if sample['row'] != None: + # row = int(row_keys[sample['well'][0]]) + # if sample['column'] != None: + # column = int(sample['well'][1:]) + # sample['well'] = f"{row}{col}" + # instance.artic_well_number = sample['well'] + if instance.submitter_id != "NTC1" and instance.submitter_id != "NTC2": + new_list.append(dict(sample=instance, row=sample['row'], column=sample['column'])) missed_str = "\n\t".join(missed_samples) return f"Could not find matches for the following samples:\n\t {missed_str}", new_list - class PCRParser(object): """ Object to pull data from Design and Analysis PCR export file. diff --git a/src/submissions/backend/pydant/__init__.py b/src/submissions/backend/pydant/__init__.py index a784f78..c1913b3 100644 --- a/src/submissions/backend/pydant/__init__.py +++ b/src/submissions/backend/pydant/__init__.py @@ -1,6 +1,8 @@ import uuid from pydantic import BaseModel, field_validator, model_validator, Extra from datetime import date, datetime +from dateutil.parser import parse +from dateutil.parser._parser import ParserError from typing import List, Any from tools import RSLNamer from pathlib import Path @@ -8,6 +10,7 @@ import re import logging from tools import check_not_nan, convert_nans_to_nones, Settings import numpy as np +from backend.db.functions import lookup_submission_by_rsl_num @@ -17,6 +20,7 @@ class PydReagent(BaseModel): type: str|None lot: str|None exp: date|None + name: str|None @field_validator("type", mode='before') @classmethod @@ -37,10 +41,13 @@ class PydReagent(BaseModel): @field_validator("exp", mode="before") @classmethod def enforce_date(cls, value): - if isinstance(value, float) or value == np.nan: - raise ValueError(f"Date cannot be a float: {value}") - else: - return value + # if isinstance(value, float) or value == np.nan: + # raise ValueError(f"Date cannot be a float: {value}") + # else: + # return value + if value != None: + return convert_nans_to_nones(str(value)) + return value @@ -50,7 +57,7 @@ class PydSubmission(BaseModel, extra=Extra.allow): submission_type: str|dict|None submitter_plate_num: str|None rsl_plate_num: str|dict|None - submitted_date: date + submitted_date: date|dict submitting_lab: str|None sample_count: int extraction_kit: str|dict|None @@ -65,10 +72,19 @@ class PydSubmission(BaseModel, extra=Extra.allow): if not check_not_nan(value): value = date.today() if isinstance(value, datetime): - return value + return dict(value=value, parsed=True) if isinstance(value, date): return value - return re.sub(r"_\d$", "", value) + string = re.sub(r"(_|-)\d$", "", value) + try: + output = dict(value=parse(string).date(), parsed=False) + except ParserError as e: + logger.error(f"Problem parsing date: {e}") + try: + output = dict(value=parse(string.replace("-","")).date(), parsed=False) + except Exception as e: + logger.error(f"Problem with parse fallback: {e}") + return output @field_validator("submitter_plate_num") @classmethod @@ -87,13 +103,20 @@ class PydSubmission(BaseModel, extra=Extra.allow): @classmethod def rsl_from_file(cls, value, values): logger.debug(f"RSL-plate initial value: {value}") + if isinstance(values.data['submission_type'], dict): + sub_type = values.data['submission_type']['value'] + elif isinstance(values.data['submission_type'], str): + sub_type = values.data['submission_type'] if check_not_nan(value): - if isinstance(value, str): + if lookup_submission_by_rsl_num(ctx=values.data['ctx'], rsl_num=value) == None: return dict(value=value, parsed=True) else: - return value + logger.warning(f"Submission number {value} already exists in DB, attempting salvage with filepath") + output = RSLNamer(ctx=values.data['ctx'], instr=values.data['filepath'].__str__(), sub_type=sub_type).parsed_name + return dict(value=output, parsed=False) else: - return dict(value=RSLNamer(ctx=values.data['ctx'], instr=values.data['filepath'].__str__()).parsed_name, parsed=False) + output = RSLNamer(ctx=values.data['ctx'], instr=values.data['filepath'].__str__(), sub_type=sub_type).parsed_name + return dict(value=output, parsed=False) @field_validator("technician", mode="before") @classmethod @@ -130,16 +153,16 @@ class PydSubmission(BaseModel, extra=Extra.allow): @field_validator("extraction_kit", mode='before') @classmethod - def get_kit_if_none(cls, value, values): - from frontend.custom_widgets.pop_ups import KitSelector + def get_kit_if_none(cls, value): + # from frontend.custom_widgets.pop_ups import KitSelector if check_not_nan(value): - return dict(value=value, parsed=True) + if isinstance(value, str): + return dict(value=value, parsed=True) + elif isinstance(value, dict): + return value else: - dlg = KitSelector(ctx=values.data['ctx'], title="Kit Needed", message="At minimum a kit is needed. Please select one.") - if dlg.exec(): - return dict(value=dlg.getValues(), parsed=False) - else: - raise ValueError("Extraction kit needed.") + raise ValueError(f"No extraction kit found.") + @field_validator("submission_type", mode='before') @classmethod diff --git a/src/submissions/frontend/custom_widgets/misc.py b/src/submissions/frontend/custom_widgets/misc.py index 3c984aa..eca8921 100644 --- a/src/submissions/frontend/custom_widgets/misc.py +++ b/src/submissions/frontend/custom_widgets/misc.py @@ -10,11 +10,10 @@ from PyQt6.QtWidgets import ( QHBoxLayout ) from PyQt6.QtCore import Qt, QDate, QSize -from tools import check_not_nan +from tools import check_not_nan, jinja_template_loading, Settings from ..all_window_functions import extract_form_info from backend.db import get_all_reagenttype_names, lookup_all_sample_types, create_kit_from_yaml, \ - lookup_regent_by_type_name, lookup_last_used_reagenttype_lot -from tools import jinja_template_loading + lookup_regent_by_type_name, lookup_last_used_reagenttype_lot, lookup_all_reagent_names_by_role import logging import numpy as np from .pop_ups import AlertPop @@ -28,9 +27,9 @@ class AddReagentForm(QDialog): """ dialog to add gather info about new reagent """ - def __init__(self, ctx:dict, reagent_lot:str|None, reagent_type:str|None, expiry:date|None=None) -> None: + def __init__(self, ctx:dict, reagent_lot:str|None, reagent_type:str|None, expiry:date|None=None, reagent_name:str|None=None) -> None: super().__init__() - + self.ctx = ctx if reagent_lot == None: reagent_lot = "" @@ -42,21 +41,26 @@ class AddReagentForm(QDialog): self.buttonBox.accepted.connect(self.accept) self.buttonBox.rejected.connect(self.reject) # widget to get lot info - lot_input = QLineEdit() - lot_input.setObjectName("lot") - lot_input.setText(reagent_lot) + self.name_input = QComboBox() + self.name_input.setObjectName("name") + self.name_input.setEditable(True) + self.name_input.setCurrentText(reagent_name) + # self.name_input.setText(reagent_name) + self.lot_input = QLineEdit() + self.lot_input.setObjectName("lot") + self.lot_input.setText(reagent_lot) # widget to get expiry info - exp_input = QDateEdit(calendarPopup=True) - exp_input.setObjectName('expiry') + self.exp_input = QDateEdit(calendarPopup=True) + self.exp_input.setObjectName('expiry') # if expiry is not passed in from gui, use today if expiry == None: - exp_input.setDate(QDate.currentDate()) + self.exp_input.setDate(QDate.currentDate()) else: - exp_input.setDate(expiry) + self.exp_input.setDate(expiry) # widget to get reagent type info - type_input = QComboBox() - type_input.setObjectName('type') - type_input.addItems([item.replace("_", " ").title() for item in get_all_reagenttype_names(ctx=ctx)]) + self.type_input = QComboBox() + self.type_input.setObjectName('type') + self.type_input.addItems([item.replace("_", " ").title() for item in get_all_reagenttype_names(ctx=ctx)]) logger.debug(f"Trying to find index of {reagent_type}") # convert input to user friendly string? try: @@ -64,18 +68,26 @@ class AddReagentForm(QDialog): except AttributeError: reagent_type = None # set parsed reagent type to top of list - index = type_input.findText(reagent_type, Qt.MatchFlag.MatchEndsWith) + index = self.type_input.findText(reagent_type, Qt.MatchFlag.MatchEndsWith) if index >= 0: - type_input.setCurrentIndex(index) + self.type_input.setCurrentIndex(index) self.layout = QVBoxLayout() + self.layout.addWidget(QLabel("Name:")) + self.layout.addWidget(self.name_input) self.layout.addWidget(QLabel("Lot:")) - self.layout.addWidget(lot_input) + self.layout.addWidget(self.lot_input) self.layout.addWidget(QLabel("Expiry:\n(use exact date on reagent.\nEOL will be calculated from kit automatically)")) - self.layout.addWidget(exp_input) + self.layout.addWidget(self.exp_input) self.layout.addWidget(QLabel("Type:")) - self.layout.addWidget(type_input) + self.layout.addWidget(self.type_input) self.layout.addWidget(self.buttonBox) self.setLayout(self.layout) + self.type_input.currentTextChanged.connect(self.update_names) + + def update_names(self): + logger.debug(self.type_input.currentText()) + self.name_input.clear() + self.name_input.addItems(item for item in lookup_all_reagent_names_by_role(ctx=self.ctx, role_name=self.type_input.currentText().replace(" ", "_").lower())) class ReportDatePicker(QDialog): @@ -111,7 +123,7 @@ class KitAdder(QWidget): """ dialog to get information to add kit """ - def __init__(self, parent_ctx:dict) -> None: + def __init__(self, parent_ctx:Settings) -> None: super().__init__() self.ctx = parent_ctx self.grid = QGridLayout() @@ -196,6 +208,7 @@ class KitAdder(QWidget): result = create_kit_from_yaml(ctx=self.ctx, exp=yml_type) msg = AlertPop(message=result['message'], status=result['status']) msg.exec() + self.__init__(self.ctx) class ReagentTypeForm(QWidget): diff --git a/src/submissions/frontend/custom_widgets/pop_ups.py b/src/submissions/frontend/custom_widgets/pop_ups.py index 001e4df..faf4c65 100644 --- a/src/submissions/frontend/custom_widgets/pop_ups.py +++ b/src/submissions/frontend/custom_widgets/pop_ups.py @@ -7,7 +7,7 @@ from PyQt6.QtWidgets import ( ) from tools import jinja_template_loading import logging -from backend.db.functions import lookup_kittype_by_use +from backend.db.functions import lookup_kittype_by_use, lookup_all_sample_types logger = logging.getLogger(f"submissions.{__name__}") @@ -78,4 +78,31 @@ class KitSelector(QDialog): # r = dlg.exec_() # if r: # return dlg.getValues() - # return None \ No newline at end of file + # return None + +class SubmissionTypeSelector(QDialog): + """ + dialog to ask yes/no questions + """ + def __init__(self, ctx:dict, title:str, message:str) -> QDialog: + super().__init__() + self.setWindowTitle(title) + self.widget = QComboBox() + sub_type = lookup_all_sample_types(ctx=ctx) + self.widget.addItems(sub_type) + self.widget.setEditable(False) + # set yes/no buttons + QBtn = QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel + self.buttonBox = QDialogButtonBox(QBtn) + self.buttonBox.accepted.connect(self.accept) + self.buttonBox.rejected.connect(self.reject) + self.layout = QVBoxLayout() + # Text for the yes/no question + message = QLabel(message) + self.layout.addWidget(message) + self.layout.addWidget(self.widget) + self.layout.addWidget(self.buttonBox) + self.setLayout(self.layout) + + def getValues(self): + return self.widget.currentText() diff --git a/src/submissions/frontend/custom_widgets/sub_details.py b/src/submissions/frontend/custom_widgets/sub_details.py index 525d400..c89cbd0 100644 --- a/src/submissions/frontend/custom_widgets/sub_details.py +++ b/src/submissions/frontend/custom_widgets/sub_details.py @@ -4,6 +4,7 @@ Contains widgets specific to the submission summary and submission details. import base64 from datetime import datetime from io import BytesIO +import pprint from PyQt6 import QtPrintSupport from PyQt6.QtWidgets import ( QVBoxLayout, QDialog, QTableView, @@ -215,7 +216,8 @@ class SubmissionsSheet(QTableView): if iii > 3: logger.error(f"Error: Had to truncate number of plates to 4.") continue - plate_dicto = hitpick_plate(submission=sub, plate_number=iii+1) + # plate_dicto = hitpick_plate(submission=sub, plate_number=iii+1) + plate_dicto = sub.hitpick_plate(plate_number=iii+1) if plate_dicto == None: continue image = make_plate_map(plate_dicto) @@ -236,7 +238,7 @@ class SubmissionsSheet(QTableView): return date = datetime.strftime(datetime.today(), "%Y-%m-%d") # ask for filename and save as csv. - home_dir = Path(self.ctx["directory_path"]).joinpath(f"Hitpicks_{date}.csv").resolve().__str__() + home_dir = Path(self.ctx.directory_path).joinpath(f"Hitpicks_{date}.csv").resolve().__str__() fname = Path(QFileDialog.getSaveFileName(self, "Save File", home_dir, filter=".csv")[0]) if fname.__str__() == ".": logger.debug("Saving csv was cancelled.") @@ -265,7 +267,7 @@ class SubmissionDetails(QDialog): interior.setParent(self) # get submision from db data = lookup_submission_by_id(ctx=ctx, id=id) - logger.debug(f"Submission details data:\n{data.to_dict()}") + logger.debug(f"Submission details data:\n{pprint.pformat(data.to_dict())}") self.base_dict = data.to_dict() # don't want id del self.base_dict['id'] @@ -291,7 +293,8 @@ class SubmissionDetails(QDialog): # interior.setWidget(txt_editor) self.base_dict['barcode'] = base64.b64encode(make_plate_barcode(self.base_dict['Plate Number'], width=120, height=30)).decode('utf-8') sub = lookup_submission_by_rsl_num(ctx=self.ctx, rsl_num=self.base_dict['Plate Number']) - plate_dicto = hitpick_plate(sub) + # plate_dicto = hitpick_plate(sub) + plate_dicto = sub.hitpick_plate() platemap = make_plate_map(plate_dicto) logger.debug(f"platemap: {platemap}") image_io = BytesIO() diff --git a/src/submissions/frontend/main_window_functions.py b/src/submissions/frontend/main_window_functions.py index 312e0c4..b4f2a6b 100644 --- a/src/submissions/frontend/main_window_functions.py +++ b/src/submissions/frontend/main_window_functions.py @@ -8,7 +8,8 @@ import inspect import pprint import yaml import json -from typing import Tuple +from typing import Tuple, List +from openpyxl import load_workbook from openpyxl.utils import get_column_letter from xhtml2pdf import pisa import pandas as pd @@ -25,19 +26,16 @@ from backend.db.functions import ( construct_submission_info, lookup_reagent, store_submission, lookup_submissions_by_date_range, create_kit_from_yaml, create_org_from_yaml, get_control_subtypes, get_all_controls_by_type, lookup_all_submissions_by_type, get_all_controls, lookup_submission_by_rsl_num, update_ww_sample, - check_kit_integrity + check_kit_integrity, get_reagents_in_extkit ) from backend.excel.parser import SheetParser, PCRParser from backend.excel.reports import make_report_html, make_report_xlsx, convert_data_list_to_df from backend.pydant import PydReagent from tools import check_not_nan -from .custom_widgets.pop_ups import AlertPop, QuestionAsker +from .custom_widgets.pop_ups import AlertPop, KitSelector, QuestionAsker from .custom_widgets import ReportDatePicker from .custom_widgets.misc import ImportReagent from .visualizations.control_charts import create_charts, construct_html -from typing import List -from openpyxl import load_workbook - logger = logging.getLogger(f"submissions.{__name__}") @@ -71,6 +69,7 @@ def import_submission_function(obj:QMainWindow) -> Tuple[QMainWindow, dict|None] except PermissionError: logger.error(f"Couldn't get permission to access file: {fname}") return obj, result + # prsr.sub = import_validation_check(ctx=obj.ctx, parser_sub=prsr.sub) # obj.column_count = prsr.column_count try: logger.debug(f"Submission dictionary: {prsr.sub}") @@ -260,7 +259,7 @@ def kit_integrity_completion_function(obj:QMainWindow) -> Tuple[QMainWindow, dic obj.missing_reagents = kit_integrity['missing'] for item in kit_integrity['missing']: obj.table_widget.formlayout.addWidget(QLabel(f"Lot {item.replace('_', ' ').title()}")) - reagent = dict(type=item, lot=None, exp=None) + reagent = dict(type=item, lot=None, exp=None, name=None) add_widget = ImportReagent(ctx=obj.ctx, reagent=PydReagent(**reagent))#item=item) obj.table_widget.formlayout.addWidget(add_widget) submit_btn = QPushButton("Submit") @@ -306,9 +305,11 @@ def submit_new_sample_function(obj:QMainWindow) -> Tuple[QMainWindow, dict]: else: # In this case we will have an empty reagent and the submission will fail kit integrity check logger.debug("Will not add reagent.") - if wanted_reagent != None: - parsed_reagents.append(wanted_reagent) - wanted_reagent.type.last_used = reagents[reagent] + # obj.ctx.database_session.rollback() + return obj, dict(message="Failed integrity check", status="critical") + # if wanted_reagent != None: + parsed_reagents.append(wanted_reagent) + wanted_reagent.type.last_used = reagents[reagent] # move samples into preliminary submission dict info['samples'] = obj.samples info['uploaded_by'] = getuser() @@ -325,6 +326,7 @@ def submit_new_sample_function(obj:QMainWindow) -> Tuple[QMainWindow, dict]: # Do not add duplicate reagents. base_submission.reagents = [] else: + obj.ctx.database_session.rollback() return obj, dict(message="Overwrite cancelled", status="Information") # code 2: No RSL plate number given case 2: @@ -340,7 +342,7 @@ def submit_new_sample_function(obj:QMainWindow) -> Tuple[QMainWindow, dict]: if kit_integrity != None: return obj, dict(message=kit_integrity['message'], status="critical") logger.debug(f"Sending submission: {base_submission.rsl_plate_num} to database.") - result = store_submission(ctx=obj.ctx, base_submission=base_submission) + result = store_submission(ctx=obj.ctx, base_submission=base_submission, samples=obj.samples) # check result of storing for issues # update summary sheet obj.table_widget.sub_wid.setData() @@ -353,7 +355,10 @@ def submit_new_sample_function(obj:QMainWindow) -> Tuple[QMainWindow, dict]: extraction_kit = lookup_kittype_by_name(obj.ctx, name=obj.ext_kit) logger.debug(f"We have the extraction kit: {extraction_kit.name}") logger.debug(f"Extraction kit map:\n\n{extraction_kit.used_for[obj.current_submission_type.replace('_', ' ')]}") - excel_map = extraction_kit.used_for[obj.current_submission_type.replace('_', ' ')] + # TODO replace below with function in KitType object. Update Kittype associations. + # excel_map = extraction_kit.used_for[obj.current_submission_type.replace('_', ' ')] + excel_map = extraction_kit.construct_xl_map_for_use(obj.current_submission_type.replace('_', ' ').title()) + excel_map.update(extraction_kit.used_for[obj.current_submission_type.replace('_', ' ').title()]) input_reagents = [item.to_reagent_dict() for item in parsed_reagents] autofill_excel(obj=obj, xl_map=excel_map, reagents=input_reagents, missing_reagents=obj.missing_reagents, info=info) if hasattr(obj, 'csv'): @@ -430,7 +435,7 @@ def add_kit_function(obj:QMainWindow) -> Tuple[QMainWindow, dict]: """ result = None # setup file dialog to find yaml flie - fname = select_open_file(obj, extension="yml") + fname = select_open_file(obj, file_extension="yml") assert fname.exists() # read yaml file try: @@ -587,7 +592,7 @@ def link_controls_function(obj:QMainWindow) -> Tuple[QMainWindow, dict]: for bcs in all_bcs: logger.debug(f"Running for {bcs.rsl_plate_num}") logger.debug(f"Here is the current control: {[control.name for control in bcs.controls]}") - samples = [sample.sample_id for sample in bcs.samples] + samples = [sample.submitter_id for sample in bcs.samples] logger.debug(bcs.controls) for sample in samples: # replace below is a stopgap method because some dingus decided to add spaces in some of the ATCC49... so it looks like "ATCC 49"... @@ -897,6 +902,7 @@ def autofill_excel(obj:QMainWindow, xl_map:dict, reagents:List[dict], missing_re worksheet.cell(row=item['location']['row'], column=item['location']['column'], value=item['value']) # Hacky way to if info['submission_type'] == "Bacterial Culture": - workbook["Sample List"].cell(row=14, column=2, value=getuser()) + workbook["Sample List"].cell(row=14, column=2, value=getuser()[0:2].upper()) fname = select_save_file(obj=obj, default_name=info['rsl_plate_num'], extension="xlsx") workbook.save(filename=fname.__str__()) + diff --git a/src/submissions/templates/submission_details.html b/src/submissions/templates/submission_details.html index d3b66cb..5cc664b 100644 --- a/src/submissions/templates/submission_details.html +++ b/src/submissions/templates/submission_details.html @@ -7,28 +7,28 @@

Submission Details for {{ sub['Plate Number'] }}

   

{% for key, value in sub.items() if key not in excluded %} - {% if loop.index == 1 %} -    {{ key }}: {% if key=='Cost' %}{{ "${:,.2f}".format(value) }}{% else %}{{ value }}{% endif %}
- {% else %} + + +     {{ key }}: {% if key=='Cost' %} {{ "${:,.2f}".format(value) }}{% else %}{{ value }}{% endif %}
- {% endif %} + {% endfor %}

Reagents:

{% for item in sub['reagents'] %} - {% if loop.index == 1%} -    {{ item['type'] }}: {{ item['lot'] }} (EXP: {{ item['expiry'] }})
- {% else %} + + +     {{ item['type'] }}: {{ item['lot'] }} (EXP: {{ item['expiry'] }})
- {% endif %} + {% endfor %}

{% if sub['samples'] %}

Samples:

{% for item in sub['samples'] %} - {% if loop.index == 1 %} -    {{ item['well'] }}: {{ item['name']|replace('\n\t', '
        ') }}
- {% else %} + + +     {{ item['well'] }}: {{ item['name']|replace('\n\t', '
        ') }}
- {% endif %} + {% endfor %}

{% endif %} {% if sub['controls'] %} @@ -38,11 +38,11 @@ {% if item['kraken'] %}

   {{ item['name'] }} Top 5 Kraken Results:

{% for genera in item['kraken'] %} - {% if loop.index == 1 %} -        {{ genera['name'] }}: {{ genera['kraken_count'] }} ({{ genera['kraken_percent'] }})
- {% else %} + + +         {{ genera['name'] }}: {{ genera['kraken_count'] }} ({{ genera['kraken_percent'] }})
- {% endif %} + {% endfor %}

{% endif %} {% endfor %} @@ -51,15 +51,15 @@ {% for entry in sub['ext_info'] %}

Extraction Status:

{% for key, value in entry.items() %} - {% if loop.index == 1 %} -    {{ key|replace('_', ' ')|title() }}: {{ value }}
- {% else %} + + + {% if "column" in key %}     {{ key|replace('_', ' ')|title() }}: {{ value }}uL
{% else %}     {{ key|replace('_', ' ')|title() }}: {{ value }}
{% endif %} - {% endif %} + {% endfor %}

{% endfor %} {% endif %} @@ -71,26 +71,26 @@

qPCR Status:

{% endif %}

{% for key, value in entry.items() if key != 'imported_by'%} - {% if loop.index == 1 %} -    {{ key|replace('_', ' ')|title() }}: {{ value }}
- {% else %} + + + {% if "column" in key %}     {{ key|replace('_', ' ')|title() }}: {{ value }}uL
{% else %}     {{ key|replace('_', ' ')|title() }}: {{ value }}
{% endif %} - {% endif %} + {% endfor %}

{% endfor %} {% endif %} {% if sub['comments'] %}

Comments:

{% for entry in sub['comments'] %} - {% if loop.index == 1 %} -    {{ entry['name'] }}:
{{ entry['text'] }}
- {{ entry['time'] }}
- {% else %} + + +      {{ entry['name'] }}:
{{ entry['text'] }}
- {{ entry['time'] }}
- {% endif %} + {% endfor %}

{% endif %} {% if sub['platemap'] %} diff --git a/src/submissions/tools/__init__.py b/src/submissions/tools/__init__.py index b9c270b..2dd7d05 100644 --- a/src/submissions/tools/__init__.py +++ b/src/submissions/tools/__init__.py @@ -18,8 +18,6 @@ from sqlalchemy import create_engine from pydantic import field_validator from pydantic_settings import BaseSettings, SettingsConfigDict from typing import Any, Tuple -import __init__ as package - logger = logging.getLogger(f"submissions.{__name__}") @@ -50,7 +48,7 @@ def check_not_nan(cell_contents) -> bool: """ # check for nan as a string first try: - if "Unnamed:" in cell_contents: + if "Unnamed:" in cell_contents or "blank" in cell_contents.lower(): cell_contents = np.nan cell_contents = cell_contents.lower() except (TypeError, AttributeError): @@ -59,7 +57,6 @@ def check_not_nan(cell_contents) -> bool: cell_contents = np.nan if cell_contents == None: cell_contents = np.nan - try: if pd.isnull(cell_contents): cell_contents = np.nan @@ -170,11 +167,12 @@ class RSLNamer(object): """ Object that will enforce proper formatting on RSL plate names. """ - def __init__(self, ctx, instr:str): + def __init__(self, ctx, instr:str, sub_type:str|None=None): self.ctx = ctx + self.submission_type = sub_type self.retrieve_rsl_number(in_str=instr) if self.submission_type != None: - parser = getattr(self, f"enforce_{self.submission_type}") + parser = getattr(self, f"enforce_{self.submission_type.lower()}") parser() self.parsed_name = self.parsed_name.replace("_", "-") @@ -187,35 +185,37 @@ class RSLNamer(object): """ if not isinstance(in_str, Path): in_str = Path(in_str) - out_str = in_str.stem - logger.debug(f"Attempting match of {out_str}") - logger.debug(f"The initial plate name is: {out_str}") + self.out_str = in_str.stem + logger.debug(f"Attempting match of {self.out_str}") + logger.debug(f"The initial plate name is: {self.out_str}") regex = re.compile(r""" - # (?PRSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(?:_|-)\d?((?!\d)|R)?\d(?!\d))?)| - (?PRSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?)| - (?PRSL-?\d{2}-?\d{4})| - (?P(\d{4}-\d{2}-\d{2}_(?:\d_)?artic)|(RSL(?:-|_)?AR(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?)) - """, flags = re.IGNORECASE | re.VERBOSE) - m = regex.search(out_str) + # (?PRSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(?:_|-)\d?((?!\d)|R)?\d(?!\d))?)| + (?PRSL(?:-|_)?WW(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?)| + (?PRSL-?\d{2}-?\d{4})| + (?P(\d{4}-\d{2}-\d{2}_(?:\d_)?artic)|(RSL(?:-|_)?AR(?:-|_)?20\d{2}-?\d{2}-?\d{2}(?:(_|-)\d?(\D|$)R?\d?)?)) + """, flags = re.IGNORECASE | re.VERBOSE) + m = regex.search(self.out_str) if m != None: - try: - self.parsed_name = m.group().upper().strip(".") - logger.debug(f"Got parsed submission name: {self.parsed_name}") - self.submission_type = m.lastgroup - except AttributeError as e: - logger.critical("No RSL plate number found or submission type found!") - logger.debug(f"The cause of the above error was: {e}") - else: - logger.warning(f"We're going to have to create the submission type from the excel sheet properties...") - if in_str.exists(): - my_xl = pd.ExcelFile(in_str) - if my_xl.book.properties.category != None: - categories = [item.strip().title() for item in my_xl.book.properties.category.split(";")] - self.submission_type = categories[0].replace(" ", "_").lower() - else: - raise AttributeError(f"File {in_str.__str__()} has no categories.") - else: - raise FileNotFoundError() + self.parsed_name = m.group().upper().strip(".") + logger.debug(f"Got parsed submission name: {self.parsed_name}") + if self.submission_type == None: + try: + self.submission_type = m.lastgroup + except AttributeError as e: + logger.critical("No RSL plate number found or submission type found!") + logger.debug(f"The cause of the above error was: {e}") + logger.warning(f"We're going to have to create the submission type from the excel sheet properties...") + if in_str.exists(): + my_xl = pd.ExcelFile(in_str) + if my_xl.book.properties.category != None: + categories = [item.strip().title() for item in my_xl.book.properties.category.split(";")] + self.submission_type = categories[0].replace(" ", "_").lower() + else: + raise AttributeError(f"File {in_str.__str__()} has no categories.") + else: + raise FileNotFoundError() + # else: + # raise ValueError(f"No parsed name could be created for {self.out_str}.") def enforce_wastewater(self): """ @@ -223,10 +223,11 @@ class RSLNamer(object): """ def construct(): today = datetime.now() - return f"RSL-WW-{today.year}{str(today.month).zfill(2)}{str(today.day).zfill(2)}" + return f"RSL-WW-{today.year}{str(today.month).zfill(2)}{str(today.day).zfill(2)}" try: self.parsed_name = re.sub(r"PCR(-|_)", "", self.parsed_name) except AttributeError as e: + logger.error(f"Problem using regex: {e}") self.parsed_name = construct() self.parsed_name = self.parsed_name.replace("RSLWW", "RSL-WW") self.parsed_name = re.sub(r"WW(\d{4})", r"WW-\1", self.parsed_name, flags=re.IGNORECASE) @@ -413,6 +414,7 @@ class Settings(BaseSettings): @field_validator('package', mode="before") @classmethod def import_package(cls, value): + import __init__ as package if value == None: return package