diff --git a/alembic/versions/b879020f2a91_rebuild_database.py b/alembic/versions/cac89ced412b_rebuild_database.py similarity index 86% rename from alembic/versions/b879020f2a91_rebuild_database.py rename to alembic/versions/cac89ced412b_rebuild_database.py index 1a60c55..b73e843 100644 --- a/alembic/versions/b879020f2a91_rebuild_database.py +++ b/alembic/versions/cac89ced412b_rebuild_database.py @@ -1,8 +1,8 @@ """rebuild database -Revision ID: b879020f2a91 +Revision ID: cac89ced412b Revises: -Create Date: 2023-08-02 09:16:12.792995 +Create Date: 2023-08-25 14:03:48.883090 """ from alembic import op @@ -10,7 +10,7 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. -revision = 'b879020f2a91' +revision = 'cac89ced412b' down_revision = None branch_labels = None depends_on = None @@ -35,11 +35,6 @@ def upgrade() -> None: op.create_table('_kits', sa.Column('id', sa.INTEGER(), nullable=False), sa.Column('name', sa.String(length=64), nullable=True), - sa.Column('used_for', sa.JSON(), nullable=True), - sa.Column('cost_per_run', sa.FLOAT(precision=2), nullable=True), - sa.Column('mutable_cost_column', sa.FLOAT(precision=2), nullable=True), - sa.Column('mutable_cost_sample', sa.FLOAT(precision=2), nullable=True), - sa.Column('constant_cost', sa.FLOAT(precision=2), nullable=True), sa.PrimaryKeyConstraint('id'), sa.UniqueConstraint('name') ) @@ -61,24 +56,24 @@ def upgrade() -> None: sa.Column('submitter_id', sa.String(length=64), nullable=False), sa.Column('sample_type', sa.String(length=32), nullable=True), sa.Column('ww_processing_num', sa.String(length=64), nullable=True), + sa.Column('ww_sample_full_id', sa.String(length=64), nullable=True), sa.Column('rsl_number', sa.String(length=64), nullable=True), sa.Column('collection_date', sa.TIMESTAMP(), nullable=True), - sa.Column('testing_type', sa.String(length=64), nullable=True), - sa.Column('site_status', sa.String(length=64), nullable=True), + sa.Column('received_date', sa.TIMESTAMP(), nullable=True), sa.Column('notes', sa.String(length=2000), nullable=True), - sa.Column('ct_n1', sa.FLOAT(precision=2), nullable=True), - sa.Column('ct_n2', sa.FLOAT(precision=2), nullable=True), - sa.Column('n1_status', sa.String(length=32), nullable=True), - sa.Column('n2_status', sa.String(length=32), nullable=True), - sa.Column('seq_submitted', sa.BOOLEAN(), nullable=True), - sa.Column('ww_seq_run_id', sa.String(length=64), nullable=True), - sa.Column('pcr_results', sa.JSON(), nullable=True), - sa.Column('well_24', sa.String(length=8), nullable=True), + sa.Column('sample_location', sa.String(length=8), nullable=True), sa.Column('organism', sa.String(length=64), nullable=True), sa.Column('concentration', sa.String(length=16), nullable=True), sa.PrimaryKeyConstraint('id'), sa.UniqueConstraint('submitter_id') ) + op.create_table('_submission_types', + sa.Column('id', sa.INTEGER(), nullable=False), + sa.Column('name', sa.String(length=128), nullable=True), + sa.Column('info_map', sa.JSON(), nullable=True), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('name') + ) op.create_table('_discounts', sa.Column('id', sa.INTEGER(), nullable=False), sa.Column('kit_id', sa.INTEGER(), nullable=True), @@ -113,6 +108,16 @@ def upgrade() -> None: sa.ForeignKeyConstraint(['reagent_types_id'], ['_reagent_types.id'], ), sa.PrimaryKeyConstraint('reagent_types_id', 'kits_id') ) + op.create_table('_submissiontypes_kittypes', + sa.Column('submission_types_id', sa.INTEGER(), nullable=False), + sa.Column('kits_id', sa.INTEGER(), nullable=False), + sa.Column('mutable_cost_column', sa.FLOAT(precision=2), nullable=True), + sa.Column('mutable_cost_sample', sa.FLOAT(precision=2), nullable=True), + sa.Column('constant_cost', sa.FLOAT(precision=2), nullable=True), + sa.ForeignKeyConstraint(['kits_id'], ['_kits.id'], ), + sa.ForeignKeyConstraint(['submission_types_id'], ['_submission_types.id'], ), + sa.PrimaryKeyConstraint('submission_types_id', 'kits_id') + ) op.create_table('_submissions', sa.Column('id', sa.INTEGER(), nullable=False), sa.Column('rsl_plate_num', sa.String(length=32), nullable=False), @@ -121,7 +126,7 @@ def upgrade() -> None: sa.Column('submitting_lab_id', sa.INTEGER(), nullable=True), sa.Column('sample_count', sa.INTEGER(), nullable=True), sa.Column('extraction_kit_id', sa.INTEGER(), nullable=True), - sa.Column('submission_type', sa.String(length=32), nullable=True), + sa.Column('submission_type_name', sa.String(), nullable=True), sa.Column('technician', sa.String(length=64), nullable=True), sa.Column('reagents_id', sa.String(), nullable=True), sa.Column('extraction_info', sa.JSON(), nullable=True), @@ -131,6 +136,7 @@ def upgrade() -> None: sa.Column('pcr_info', sa.JSON(), nullable=True), sa.ForeignKeyConstraint(['extraction_kit_id'], ['_kits.id'], name='fk_BS_extkit_id', ondelete='SET NULL'), sa.ForeignKeyConstraint(['reagents_id'], ['_reagents.id'], name='fk_BS_reagents_id', ondelete='SET NULL'), + sa.ForeignKeyConstraint(['submission_type_name'], ['_submission_types.name'], name='fk_BS_subtype_name', ondelete='SET NULL'), sa.ForeignKeyConstraint(['submitting_lab_id'], ['_organizations.id'], name='fk_BS_sublab_id', ondelete='SET NULL'), sa.PrimaryKeyConstraint('id'), sa.UniqueConstraint('rsl_plate_num'), @@ -164,6 +170,12 @@ def upgrade() -> None: sa.Column('submission_id', sa.INTEGER(), nullable=False), sa.Column('row', sa.INTEGER(), nullable=True), sa.Column('column', sa.INTEGER(), nullable=True), + sa.Column('base_sub_type', sa.String(), nullable=True), + sa.Column('ct_n1', sa.FLOAT(precision=2), nullable=True), + sa.Column('ct_n2', sa.FLOAT(precision=2), nullable=True), + sa.Column('n1_status', sa.String(length=32), nullable=True), + sa.Column('n2_status', sa.String(length=32), nullable=True), + sa.Column('pcr_results', sa.JSON(), nullable=True), sa.ForeignKeyConstraint(['sample_id'], ['_samples.id'], ), sa.ForeignKeyConstraint(['submission_id'], ['_submissions.id'], ), sa.PrimaryKeyConstraint('sample_id', 'submission_id') @@ -177,10 +189,12 @@ def downgrade() -> None: op.drop_table('_reagents_submissions') op.drop_table('_control_samples') op.drop_table('_submissions') + op.drop_table('_submissiontypes_kittypes') op.drop_table('_reagenttypes_kittypes') op.drop_table('_reagents') op.drop_table('_orgs_contacts') op.drop_table('_discounts') + op.drop_table('_submission_types') op.drop_table('_samples') op.drop_table('_reagent_types') op.drop_table('_organizations') diff --git a/alembic/versions/da94eca9d381_polymorpherizing_associations.py b/alembic/versions/da94eca9d381_polymorpherizing_associations.py deleted file mode 100644 index 5450e58..0000000 --- a/alembic/versions/da94eca9d381_polymorpherizing_associations.py +++ /dev/null @@ -1,56 +0,0 @@ -"""polymorpherizing associations - -Revision ID: da94eca9d381 -Revises: b879020f2a91 -Create Date: 2023-08-03 13:30:34.056316 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import sqlite - -# revision identifiers, used by Alembic. -revision = 'da94eca9d381' -down_revision = 'b879020f2a91' -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('_samples', schema=None) as batch_op: - batch_op.drop_column('ct_n2') - batch_op.drop_column('n1_status') - batch_op.drop_column('pcr_results') - batch_op.drop_column('n2_status') - batch_op.drop_column('ct_n1') - - with op.batch_alter_table('_submission_sample', schema=None) as batch_op: - batch_op.add_column(sa.Column('base_sub_type', sa.String(), nullable=True)) - batch_op.add_column(sa.Column('ct_n1', sa.FLOAT(precision=2), nullable=True)) - batch_op.add_column(sa.Column('ct_n2', sa.FLOAT(precision=2), nullable=True)) - batch_op.add_column(sa.Column('n1_status', sa.String(length=32), nullable=True)) - batch_op.add_column(sa.Column('n2_status', sa.String(length=32), nullable=True)) - batch_op.add_column(sa.Column('pcr_results', sa.JSON(), nullable=True)) - - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('_submission_sample', schema=None) as batch_op: - batch_op.drop_column('pcr_results') - batch_op.drop_column('n2_status') - batch_op.drop_column('n1_status') - batch_op.drop_column('ct_n2') - batch_op.drop_column('ct_n1') - batch_op.drop_column('base_sub_type') - - with op.batch_alter_table('_samples', schema=None) as batch_op: - batch_op.add_column(sa.Column('ct_n1', sa.FLOAT(), nullable=True)) - batch_op.add_column(sa.Column('n2_status', sa.VARCHAR(length=32), nullable=True)) - batch_op.add_column(sa.Column('pcr_results', sqlite.JSON(), nullable=True)) - batch_op.add_column(sa.Column('n1_status', sa.VARCHAR(length=32), nullable=True)) - batch_op.add_column(sa.Column('ct_n2', sa.FLOAT(), nullable=True)) - - # ### end Alembic commands ### diff --git a/src/submissions/backend/db/functions.py b/src/submissions/backend/db/functions.py index be8c1dd..8320205 100644 --- a/src/submissions/backend/db/functions.py +++ b/src/submissions/backend/db/functions.py @@ -150,7 +150,7 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi logger.debug(f"Looking at models for submission type: {query}") model = getattr(models, query) logger.debug(f"We've got the model: {type(model)}") - info_dict['submission_type'] = info_dict['submission_type'].replace(" ", "_").lower() + # info_dict['submission_type'] = info_dict['submission_type'].replace(" ", "_").lower() # if query return nothing, ie doesn't already exist in db if instance == None: instance = model() @@ -224,6 +224,9 @@ def construct_submission_info(ctx:Settings, info_dict:dict) -> models.BasicSubmi logger.critical(e) continue continue + case "submission_type": + # item = "submission_type" + field_value = lookup_submissiontype_by_name(ctx=ctx, type_name=value) case _: field_value = value # insert into field @@ -276,7 +279,7 @@ def construct_reagent(ctx:Settings, info_dict:dict) -> models.Reagent: """ reagent = models.Reagent() for item in info_dict: - logger.debug(f"Reagent info item: {item}") + logger.debug(f"Reagent info item for {item}: {info_dict[item]}") # set fields based on keys in dictionary match item: case "lot": @@ -284,7 +287,12 @@ def construct_reagent(ctx:Settings, info_dict:dict) -> models.Reagent: case "expiry": reagent.expiry = info_dict[item] case "type": - reagent.type = lookup_reagenttype_by_name(ctx=ctx, rt_name=info_dict[item].replace(" ", "_").lower()) + reagent.type = lookup_reagenttype_by_name(ctx=ctx, rt_name=info_dict[item]) + case "name": + if item == None: + reagent.name = reagent.type.name + else: + reagent.name = info_dict[item] # add end-of-life extension from reagent type to expiry date # NOTE: this will now be done only in the reporting phase to account for potential changes in end-of-life extensions # try: @@ -320,7 +328,7 @@ def lookup_reagenttype_by_name(ctx:Settings, rt_name:str) -> models.ReagentType: Returns: models.ReagentType: looked up reagent type """ - logger.debug(f"Looking up ReagentType by name: {rt_name}") + logger.debug(f"Looking up ReagentType by name: {rt_name.title()}") # lookedup = ctx['database_session'].query(models.ReagentType).filter(models.ReagentType.name==rt_name).first() lookedup = ctx.database_session.query(models.ReagentType).filter(models.ReagentType.name==rt_name).first() logger.debug(f"Found ReagentType: {lookedup}") @@ -339,12 +347,13 @@ def lookup_kittype_by_use(ctx:Settings, used_by:str|None=None) -> list[models.Ki """ if used_by != None: # return ctx['database_session'].query(models.KitType).filter(models.KitType.used_for.contains(used_by)).all() - return ctx.database_session.query(models.KitType).filter(models.KitType.used_for.contains(used_by)).all() + # return ctx.database_session.query(models.KitType).filter(models.KitType.used_for.contains(used_by)).all() + return ctx.database_session.query(models.KitType).filter(models.KitType.used_for.any(name=used_by)).all() else: # return ctx['database_session'].query(models.KitType).all() return ctx.database_session.query(models.KitType).all() -def lookup_kittype_by_name(ctx:Settings, name:str) -> models.KitType: +def lookup_kittype_by_name(ctx:Settings, name:str|dict) -> models.KitType: """ Lookup a kit type by name @@ -359,7 +368,8 @@ def lookup_kittype_by_name(ctx:Settings, name:str) -> models.KitType: name = name['value'] logger.debug(f"Querying kittype: {name}") # return ctx['database_session'].query(models.KitType).filter(models.KitType.name==name).first() - return ctx.database_session.query(models.KitType).filter(models.KitType.name==name).first() + with ctx.database_session.no_autoflush: + return ctx.database_session.query(models.KitType).filter(models.KitType.name==name).first() def lookup_kittype_by_id(ctx:Settings, id:int) -> models.KitType: return ctx.database_session.query(models.KitType).filter(models.KitType.id==id).first() @@ -559,12 +569,17 @@ def create_kit_from_yaml(ctx:Settings, exp:dict) -> dict: # continue # A submission type may use multiple kits. for kt in exp[type]['kits']: + submission_type = lookup_submissiontype_by_name(ctx=ctx, type_name=type) kit = models.KitType(name=kt, - used_for=[type.replace("_", " ").title()], - constant_cost=exp[type]["kits"][kt]["constant_cost"], - mutable_cost_column=exp[type]["kits"][kt]["mutable_cost_column"], - mutable_cost_sample=exp[type]["kits"][kt]["mutable_cost_sample"] + # constant_cost=exp[type]["kits"][kt]["constant_cost"], + # mutable_cost_column=exp[type]["kits"][kt]["mutable_cost_column"], + # mutable_cost_sample=exp[type]["kits"][kt]["mutable_cost_sample"] ) + kt_st_assoc = models.SubmissionTypeKitTypeAssociation(kit_type=kit, submission_type=submission_type) + kt_st_assoc.constant_cost = exp[type]["kits"][kt]["constant_cost"] + kt_st_assoc.mutable_cost_column = exp[type]["kits"][kt]["mutable_cost_column"] + kt_st_assoc.mutable_cost_sample = exp[type]["kits"][kt]["mutable_cost_sample"] + kit.kit_submissiontype_associations.append(kt_st_assoc) # A kit contains multiple reagent types. for r in exp[type]['kits'][kt]['reagenttypes']: # check if reagent type already exists. @@ -573,7 +588,7 @@ def create_kit_from_yaml(ctx:Settings, exp:dict) -> dict: look_up = ctx.database_session.query(models.ReagentType).filter(models.ReagentType.name==r).first() if look_up == None: # rt = models.ReagentType(name=r.replace(" ", "_").lower(), eol_ext=timedelta(30*exp[type]['kits'][kt]['reagenttypes'][r]['eol_ext']), kits=[kit], required=1) - rt = models.ReagentType(name=r.replace(" ", "_").lower(), eol_ext=timedelta(30*exp[type]['kits'][kt]['reagenttypes'][r]['eol_ext']), last_used="") + rt = models.ReagentType(name=r.replace(" ", "_").lower().strip(), eol_ext=timedelta(30*exp[type]['kits'][kt]['reagenttypes'][r]['eol_ext']), last_used="") else: rt = look_up # rt.kits.append(kit) @@ -583,7 +598,7 @@ def create_kit_from_yaml(ctx:Settings, exp:dict) -> dict: # except AttributeError as e: # logger.error(f"Error appending reagent id to kit.reagent_types_id: {e}, creating new.") # kit.reagent_types_id = [rt.id] - assoc = models.KitTypeReagentTypeAssociation(kit_type=kit, reagent_type=rt, uses=kit.used_for) + assoc = models.KitTypeReagentTypeAssociation(kit_type=kit, reagent_type=rt, uses={}) # ctx['database_session'].add(rt) ctx.database_session.add(rt) kit.kit_reagenttype_associations.append(assoc) @@ -646,10 +661,11 @@ def lookup_all_sample_types(ctx:Settings) -> list[str]: list[str]: list of sample type names """ # uses = [item.used_for for item in ctx['database_session'].query(models.KitType).all()] - uses = [item.used_for for item in ctx.database_session.query(models.KitType).all()] + # uses = [item.used_for for item in ctx.database_session.query(models.KitType).all()] # flattened list of lists - uses = list(set([item for sublist in uses for item in sublist])) - return uses + # uses = list(set([item for sublist in uses for item in sublist])) + + return [item.name for item in ctx.database_session.query(models.SubmissionType).all()] def get_all_available_modes(ctx:Settings) -> list[str]: """ @@ -1084,7 +1100,8 @@ def lookup_sample_by_submitter_id(ctx:Settings, submitter_id:str) -> models.Basi Returns: BasicSample: _description_ """ - return ctx.database_session.query(models.BasicSample).filter(models.BasicSample.submitter_id==submitter_id).first() + with ctx.database_session.no_autoflush: + return ctx.database_session.query(models.BasicSample).filter(models.BasicSample.submitter_id==submitter_id).first() def get_all_submission_types(ctx:Settings) -> List[str]: """ @@ -1150,4 +1167,18 @@ def lookup_all_reagent_names_by_role(ctx:Settings, role_name:str) -> List[str]: try: return [reagent.name for reagent in role.instances] except AttributeError: - return [] \ No newline at end of file + return [] + +def lookup_submissiontype_by_name(ctx:Settings, type_name:str) -> models.SubmissionType: + """ + _summary_ + + Args: + ctx (Settings): _description_ + type_name (str): _description_ + + Returns: + models.SubmissionType: _description_ + """ + + return ctx.database_session.query(models.SubmissionType).filter(models.SubmissionType.name==type_name).first() \ No newline at end of file diff --git a/src/submissions/backend/db/models/__init__.py b/src/submissions/backend/db/models/__init__.py index d2758a4..cf0ed2d 100644 --- a/src/submissions/backend/db/models/__init__.py +++ b/src/submissions/backend/db/models/__init__.py @@ -7,7 +7,7 @@ Base = declarative_base() metadata = Base.metadata from .controls import Control, ControlType -from .kits import KitType, ReagentType, Reagent, Discount, KitTypeReagentTypeAssociation +from .kits import KitType, ReagentType, Reagent, Discount, KitTypeReagentTypeAssociation, SubmissionType, SubmissionTypeKitTypeAssociation from .organizations import Organization, Contact # from .samples import WWSample, BCSample, BasicSample from .submissions import BasicSubmission, BacterialCulture, Wastewater, WastewaterArtic, WastewaterSample, BacterialCultureSample, BasicSample, SubmissionSampleAssociation, WastewaterAssociation diff --git a/src/submissions/backend/db/models/kits.py b/src/submissions/backend/db/models/kits.py index a834414..30b9797 100644 --- a/src/submissions/backend/db/models/kits.py +++ b/src/submissions/backend/db/models/kits.py @@ -32,11 +32,9 @@ class KitType(Base): id = Column(INTEGER, primary_key=True) #: primary key name = Column(String(64), unique=True) #: name of kit submissions = relationship("BasicSubmission", back_populates="extraction_kit") #: submissions this kit was used for - used_for = Column(JSON) #: list of names of sample types this kit can process - cost_per_run = Column(FLOAT(2)) #: dollar amount for each full run of this kit NOTE: depreciated, use the constant and mutable costs instead - mutable_cost_column = Column(FLOAT(2)) #: dollar amount per 96 well plate that can change with number of columns (reagents, tips, etc) - mutable_cost_sample = Column(FLOAT(2)) #: dollar amount that can change with number of samples (reagents, tips, etc) - constant_cost = Column(FLOAT(2)) #: dollar amount per plate that will remain constant (plates, man hours, etc) + # used_for = Column(JSON) #: list of names of sample types this kit can process + # used_for = relationship("SubmissionType", back_populates="extraction_kits", uselist=True, secondary=submissiontype_kittypes) + # cost_per_run = Column(FLOAT(2)) #: dollar amount for each full run of this kit NOTE: depreciated, use the constant and mutable costs instead # reagent_types = relationship("ReagentType", back_populates="kits", uselist=True, secondary=reagenttypes_kittypes) #: reagent types this kit contains # reagent_types_id = Column(INTEGER, ForeignKey("_reagent_types.id", ondelete='SET NULL', use_alter=True, name="fk_KT_reagentstype_id")) #: joined reagent type id # kit_reagenttype_association = @@ -46,12 +44,23 @@ class KitType(Base): back_populates="kit_type", cascade="all, delete-orphan", ) + # association proxy of "user_keyword_associations" collection # to "keyword" attribute reagent_types = association_proxy("kit_reagenttype_associations", "reagenttype") + + kit_submissiontype_associations = relationship( + "SubmissionTypeKitTypeAssociation", + back_populates="kit_type", + cascade="all, delete-orphan", + ) + + used_for = association_proxy("kit_submissiontype_associations", "submission_type") + + def __repr__(self) -> str: - return f"KitType({self.name})" + return f"" def __str__(self) -> str: """ @@ -64,9 +73,9 @@ class KitType(Base): def get_reagents(self, required:bool=False) -> list: if required: - return [item.reagenttype for item in self.kit_reagenttype_associations if item.required == 1] + return [item.reagent_type for item in self.kit_reagenttype_associations if item.required == 1] else: - return [item.reagenttype for item in self.kit_reagenttype_associations] + return [item.reagent_type for item in self.kit_reagenttype_associations] def construct_xl_map_for_use(self, use:str) -> dict: @@ -75,12 +84,16 @@ class KitType(Base): assocs = [item for item in self.kit_reagenttype_associations if use in item.uses] for assoc in assocs: try: - map[assoc.reagenttype.name] = assoc.uses[use] + map[assoc.reagent_type.name] = assoc.uses[use] except TypeError: continue + try: + st_assoc = [item for item in self.used_for if use == item.name][0] + map['info'] = st_assoc.info_map + except IndexError as e: + map['info'] = {} return map - class KitTypeReagentTypeAssociation(Base): """ table containing reagenttype/kittype associations @@ -96,11 +109,11 @@ class KitTypeReagentTypeAssociation(Base): kit_type = relationship(KitType, back_populates="kit_reagenttype_associations") # reference to the "ReagentType" object - reagenttype = relationship("ReagentType") + reagent_type = relationship("ReagentType") def __init__(self, kit_type=None, reagent_type=None, uses=None, required=1): - self.kit = kit_type - self.reagenttype = reagent_type + self.kit_type = kit_type + self.reagent_type = reagent_type self.uses = uses self.required = required @@ -116,8 +129,6 @@ class KitTypeReagentTypeAssociation(Base): raise ValueError(f'{value} is not a reagenttype') return value - - class ReagentType(Base): """ Base of reagent type abstract @@ -151,7 +162,6 @@ class ReagentType(Base): def __repr__(self): return f"ReagentType({self.name})" - class Reagent(Base): """ Concrete reagent instance @@ -215,7 +225,6 @@ class Reagent(Base): "expiry": self.expiry.strftime("%Y-%m-%d") } - class Discount(Base): """ Relationship table for client labs for certain kits. @@ -230,4 +239,44 @@ class Discount(Base): name = Column(String(128)) amount = Column(FLOAT(2)) +class SubmissionType(Base): + + __tablename__ = "_submission_types" + + id = Column(INTEGER, primary_key=True) #: primary key + name = Column(String(128), unique=True) #: name of submission type + info_map = Column(JSON) + instances = relationship("BasicSubmission", backref="submission_type") + submissiontype_kit_associations = relationship( + "SubmissionTypeKitTypeAssociation", + back_populates="submission_type", + cascade="all, delete-orphan", + ) + + kit_types = association_proxy("kit_submissiontype_associations", "kit_type") + + def __repr__(self) -> str: + return f"" + +class SubmissionTypeKitTypeAssociation(Base): + + __tablename__ = "_submissiontypes_kittypes" + submission_types_id = Column(INTEGER, ForeignKey("_submission_types.id"), primary_key=True) + kits_id = Column(INTEGER, ForeignKey("_kits.id"), primary_key=True) + mutable_cost_column = Column(FLOAT(2)) #: dollar amount per 96 well plate that can change with number of columns (reagents, tips, etc) + mutable_cost_sample = Column(FLOAT(2)) #: dollar amount that can change with number of samples (reagents, tips, etc) + constant_cost = Column(FLOAT(2)) #: dollar amount per plate that will remain constant (plates, man hours, etc) + # reagent_type_name = Column(INTEGER, ForeignKey("_reagent_types.name")) + + kit_type = relationship(KitType, back_populates="kit_submissiontype_associations") + + # reference to the "ReagentType" object + submission_type = relationship(SubmissionType, back_populates="submissiontype_kit_associations") + + def __init__(self, kit_type=None, submission_type=None): + self.kit_type = kit_type + self.submission_type = submission_type + self.mutable_cost_column = 0.00 + self.mutable_cost_sample = 0.00 + self.constant_cost = 0.00 \ No newline at end of file diff --git a/src/submissions/backend/db/models/organizations.py b/src/submissions/backend/db/models/organizations.py index 45a46f4..480e93d 100644 --- a/src/submissions/backend/db/models/organizations.py +++ b/src/submissions/backend/db/models/organizations.py @@ -31,6 +31,9 @@ class Organization(Base): str: string representing organization name """ return self.name.replace("_", " ").title() + + def __repr__(self) -> str: + return f"" class Contact(Base): diff --git a/src/submissions/backend/db/models/submissions.py b/src/submissions/backend/db/models/submissions.py index 436edf8..d2beb47 100644 --- a/src/submissions/backend/db/models/submissions.py +++ b/src/submissions/backend/db/models/submissions.py @@ -12,6 +12,9 @@ from math import ceil from sqlalchemy.ext.associationproxy import association_proxy import uuid from . import Base +from pandas import Timestamp +from dateutil.parser import parse +import pprint logger = logging.getLogger(f"submissions.{__name__}") @@ -33,7 +36,8 @@ class BasicSubmission(Base): sample_count = Column(INTEGER) #: Number of samples in the submission extraction_kit = relationship("KitType", back_populates="submissions") #: The extraction kit used extraction_kit_id = Column(INTEGER, ForeignKey("_kits.id", ondelete="SET NULL", name="fk_BS_extkit_id")) - submission_type = Column(String(32)) #: submission type (should be string in D3 of excel sheet) + # submission_type = Column(String(32)) #: submission type (should be string in D3 of excel sheet) + submission_type_name = Column(String, ForeignKey("_submission_types.name", ondelete="SET NULL", name="fk_BS_subtype_name")) technician = Column(String(64)) #: initials of processing tech(s) # Move this into custom types? reagents = relationship("Reagent", back_populates="submissions", secondary=reagents_submissions) #: relationship to reagents @@ -55,7 +59,7 @@ class BasicSubmission(Base): # Allows for subclassing into ex. BacterialCulture, Wastewater, etc. __mapper_args__ = { "polymorphic_identity": "basic_submission", - "polymorphic_on": submission_type, + "polymorphic_on": submission_type_name, "with_polymorphic": "*", } @@ -128,7 +132,7 @@ class BasicSubmission(Base): output = { "id": self.id, "Plate Number": self.rsl_plate_num, - "Submission Type": self.submission_type.replace("_", " ").title(), + "Submission Type": self.submission_type_name, "Submitter Plate Number": self.submitter_plate_num, "Submitted Date": self.submitted_date.strftime("%Y-%m-%d"), "Submitting Lab": sub_lab, @@ -184,14 +188,18 @@ class BasicSubmission(Base): except Exception as e: logger.error(f"Column count error: {e}") # cols_count_24 = ceil(int(self.sample_count) / 3) - if all(item == 0.0 for item in [self.extraction_kit.constant_cost, self.extraction_kit.mutable_cost_column, self.extraction_kit.mutable_cost_sample]): + logger.debug(f"Pre-association check. {pprint.pformat(self.__dict__)}") + assoc = [item for item in self.extraction_kit.kit_submissiontype_associations if item.submission_type == self.submission_type][0] + logger.debug(f"Came up with association: {assoc}") + # if all(item == 0.0 for item in [self.extraction_kit.constant_cost, self.extraction_kit.mutable_cost_column, self.extraction_kit.mutable_cost_sample]): + if all(item == 0.0 for item in [assoc.constant_cost, assoc.mutable_cost_column, assoc.mutable_cost_sample]): try: self.run_cost = self.extraction_kit.cost_per_run except Exception as e: logger.error(f"Calculation error: {e}") else: try: - self.run_cost = self.extraction_kit.constant_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count)) + self.run_cost = assoc.constant_cost + (assoc.mutable_cost_column * cols_count_96) + (assoc.mutable_cost_sample * int(self.sample_count)) except Exception as e: logger.error(f"Calculation error: {e}") @@ -225,7 +233,7 @@ class BacterialCulture(BasicSubmission): """ controls = relationship("Control", back_populates="submission", uselist=True) #: A control sample added to submission # samples = relationship("BCSample", back_populates="rsl_plate", uselist=True) - __mapper_args__ = {"polymorphic_identity": "bacterial_culture", "polymorphic_load": "inline"} + __mapper_args__ = {"polymorphic_identity": "Bacterial Culture", "polymorphic_load": "inline"} def to_dict(self) -> dict: """ @@ -245,7 +253,7 @@ class Wastewater(BasicSubmission): # samples = relationship("WWSample", back_populates="rsl_plate", uselist=True) pcr_info = Column(JSON) # ww_sample_id = Column(String, ForeignKey("_ww_samples.id", ondelete="SET NULL", name="fk_WW_sample_id")) - __mapper_args__ = {"polymorphic_identity": "wastewater", "polymorphic_load": "inline"} + __mapper_args__ = {"polymorphic_identity": "Wastewater", "polymorphic_load": "inline"} def to_dict(self) -> dict: """ @@ -315,14 +323,14 @@ class BasicSample(Base): @validates('submitter_id') def create_id(self, key, value): - logger.debug(f"validating sample_id of: {value}") + # logger.debug(f"validating sample_id of: {value}") if value == None: return uuid.uuid4().hex.upper() else: return value def __repr__(self) -> str: - return f"{self.sample_type}Sample({self.submitter_id})" + return f"<{self.sample_type.replace('_', ' ').title(). replace(' ', '')}({self.submitter_id})>" def to_sub_dict(self, submission_rsl:str) -> dict: row_map = {1:"A", 2:"B", 3:"C", 4:"D", 5:"E", 6:"F", 7:"G", 8:"H"} @@ -363,30 +371,31 @@ class WastewaterSample(BasicSample): # id = Column(INTEGER, primary_key=True) #: primary key ww_processing_num = Column(String(64)) #: wastewater processing number - # ww_sample_full_id = Column(String(64), nullable=False, unique=True) + ww_sample_full_id = Column(String(64)) rsl_number = Column(String(64)) #: rsl plate identification number # rsl_plate = relationship("Wastewater", back_populates="samples") #: relationship to parent plate # rsl_plate_id = Column(INTEGER, ForeignKey("_submissions.id", ondelete="SET NULL", name="fk_WWS_submission_id")) - collection_date = Column(TIMESTAMP) #: Date submission received + collection_date = Column(TIMESTAMP) #: Date sample collected + received_date = Column(TIMESTAMP) #: Date sample received # well_number = Column(String(8)) #: location on 96 well plate # The following are fields from the sample tracking excel sheet Ruth put together. # I have no idea when they will be implemented or how. - testing_type = Column(String(64)) - site_status = Column(String(64)) + # testing_type = Column(String(64)) + # site_status = Column(String(64)) notes = Column(String(2000)) # ct_n1 = Column(FLOAT(2)) #: AKA ct for N1 # ct_n2 = Column(FLOAT(2)) #: AKA ct for N2 # n1_status = Column(String(32)) # n2_status = Column(String(32)) - seq_submitted = Column(BOOLEAN()) - ww_seq_run_id = Column(String(64)) + # seq_submitted = Column(BOOLEAN()) + # ww_seq_run_id = Column(String(64)) # sample_type = Column(String(16)) # pcr_results = Column(JSON) - well_24 = Column(String(8)) #: location on 24 well plate + sample_location = Column(String(8)) #: location on 24 well plate # artic_rsl_plate = relationship("WastewaterArtic", back_populates="samples") # artic_well_number = Column(String(8)) - __mapper_args__ = {"polymorphic_identity": "wastewater_sample", "polymorphic_load": "inline"} + __mapper_args__ = {"polymorphic_identity": "Wastewater Sample", "polymorphic_load": "inline"} # def to_string(self) -> str: # """ @@ -397,6 +406,42 @@ class WastewaterSample(BasicSample): # """ # return f"{self.well_number}: {self.ww_sample_full_id}" + # @validates("received-date") + # def convert_rdate_time(self, key, value): + # if isinstance(value, Timestamp): + # return value.date() + # return value + + @validates("collected-date") + def convert_cdate_time(self, key, value): + logger.debug(f"Validating {key}: {value}") + if isinstance(value, Timestamp): + return value.date() + if isinstance(value, str): + return parse(value) + return value + + # @collection_date.setter + # def collection_date(self, value): + # match value: + # case Timestamp(): + # self.collection_date = value.date() + # case str(): + # self.collection_date = parse(value) + # case _: + # self.collection_date = value + + + def __init__(self, **kwargs): + if 'collection_date' in kwargs.keys(): + logger.debug(f"Got collection_date: {kwargs['collection_date']}. Attempting parse.") + if isinstance(kwargs['collection_date'], str): + logger.debug(f"collection_date is a string...") + kwargs['collection_date'] = parse(kwargs['collection_date']) + logger.debug(f"output is {kwargs['collection_date']}") + super().__init__(**kwargs) + + def to_sub_dict(self, submission_rsl:str) -> dict: """ Gui friendly dictionary. Inherited from BasicSample @@ -451,7 +496,6 @@ class WastewaterSample(BasicSample): # return None return sample - class BacterialCultureSample(BasicSample): """ base of bacterial culture sample @@ -493,8 +537,6 @@ class BacterialCultureSample(BasicSample): # } return sample - - class SubmissionSampleAssociation(Base): """ table containing submission/sample associations diff --git a/src/submissions/backend/excel/parser.py b/src/submissions/backend/excel/parser.py index bb260a7..effa183 100644 --- a/src/submissions/backend/excel/parser.py +++ b/src/submissions/backend/excel/parser.py @@ -3,17 +3,17 @@ contains parser object for pulling values from client generated submission sheet ''' from getpass import getuser import pprint -from typing import Tuple +from typing import List, Tuple import pandas as pd from pathlib import Path -from backend.db.models import WastewaterSample, BacterialCultureSample -from backend.db import lookup_ww_sample_by_ww_sample_num, lookup_sample_by_submitter_id, get_reagents_in_extkit, lookup_kittype_by_name, lookup_kittype_by_use +from backend.db import lookup_ww_sample_by_ww_sample_num, lookup_sample_by_submitter_id, get_reagents_in_extkit, lookup_kittype_by_name, lookup_submissiontype_by_name, models from backend.pydant import PydSubmission, PydReagent import logging from collections import OrderedDict import re import numpy as np from datetime import date, datetime +from dateutil.parser import parse, ParserError import uuid # from submissions.backend.db.functions import from tools import check_not_nan, RSLNamer, massage_common_reagents, convert_nans_to_nones, Settings @@ -49,13 +49,13 @@ class SheetParser(object): self.sub = OrderedDict() # make decision about type of sample we have self.sub['submission_type'] = self.type_decider() - # select proper parser based on sample type - parse_sub = getattr(self, f"parse_{self.sub['submission_type'].replace(' ', '_').lower()}") - parse_sub() - # self.calculate_column_count() + # # grab the info map from the submission type in database + # self.info_map = self.fetch_kit_info_map() + self.parse_info() self.import_kit_validation_check() self.parse_reagents() self.import_reagent_validation_check() + self.parse_samples() def type_decider(self) -> str: @@ -69,7 +69,7 @@ class SheetParser(object): if self.xl.book.properties.category != None: logger.debug("Using file properties to find type...") categories = [item.strip().title() for item in self.xl.book.properties.category.split(";")] - return categories[0].replace(" ", "_") + return dict(value=categories[0], parsed=False) else: # This code is going to be depreciated once there is full adoption of the client sheets # with updated metadata... but how will it work for Artic? @@ -78,120 +78,107 @@ class SheetParser(object): for type in self.ctx.submission_types: # This gets the *first* submission type that matches the sheet names in the workbook if self.xl.sheet_names == self.ctx.submission_types[type]['excel_map']: - return type.title() + return dict(value=type.title(), parsed=True) return "Unknown" except Exception as e: logger.warning(f"We were unable to parse the submission type due to: {e}") # return "Unknown" dlg = SubmissionTypeSelector(ctx=self.ctx, title="Select Submission Type", message="We were unable to find the submission type from the excel metadata. Please select from below.") if dlg.exec(): - return dlg.getValues() + return dict(value=dlg.getValues(), parsed=False) else: logger.warning(f"Last attempt at getting submission was rejected.") raise ValueError("Submission Type needed.") - - def parse_unknown(self) -> None: + + def parse_info(self): """ - Dummy function to handle unknown excel structures - """ - logger.error(f"Unknown excel workbook structure. Cannot parse.") - self.sub = None - - def parse_generic(self, sheet_name:str) -> pd.DataFrame: - """ - Pulls information common to all wasterwater/bacterial culture types and passes on dataframe + _summary_ + """ + info = InfoParser(ctx=self.ctx, xl=self.xl, submission_type=self.sub['submission_type']).parse_info() + for k,v in info.items(): + if k != "sample": + self.sub[k] = v + logger.debug(f"Parser.sub after info scrape: {pprint.pformat(self.sub)}") - Args: - sheet_name (str): name of excel worksheet to pull from + def parse_reagents(self): + self.sub['reagents'] = ReagentParser(ctx=self.ctx, xl=self.xl, submission_type=self.sub['submission_type'], extraction_kit=self.sub['extraction_kit']).parse_reagents() - Returns: - pd.DataFrame: relevant dataframe from excel sheet - """ - # self.xl is a pd.ExcelFile so we need to parse it into a df - submission_info = self.xl.parse(sheet_name=sheet_name, dtype=object) - self.sub['submitter_plate_num'] = submission_info.iloc[0][1] - if check_not_nan(submission_info.iloc[10][1]): - self.sub['rsl_plate_num'] = RSLNamer(ctx=self.ctx, instr=submission_info.iloc[10][1]).parsed_name - else: - # self.sub['rsl_plate_num'] = RSLNamer(self.filepath).parsed_name - self.sub['rsl_plate_num'] = None - self.sub['submitted_date'] = submission_info.iloc[1][1] - self.sub['submitting_lab'] = submission_info.iloc[0][3] - self.sub['sample_count'] = submission_info.iloc[2][3] - self.sub['extraction_kit'] = submission_info.iloc[3][3] - if check_not_nan(submission_info.iloc[1][3]): - self.sub['submission_type'] = dict(value=submission_info.iloc[1][3], parsed=True) - else: - self.sub['submission_type'] = dict(value=self.sub['submission_type'], parsed=False) - return submission_info + def parse_samples(self): + self.sample_result, self.sub['samples'] = SampleParser(ctx=self.ctx, xl=self.xl, submission_type=self.sub['submission_type']['value']).parse_samples() def parse_bacterial_culture(self) -> None: """ pulls info specific to bacterial culture sample type """ - def parse_reagents(df:pd.DataFrame) -> None: - """ - Pulls reagents from the bacterial sub-dataframe + # def parse_reagents(df:pd.DataFrame) -> None: + # """ + # Pulls reagents from the bacterial sub-dataframe - Args: - df (pd.DataFrame): input sub dataframe - """ - for ii, row in df.iterrows(): - # skip positive control - logger.debug(f"Running reagent parse for {row[1]} with type {type(row[1])} and value: {row[2]} with type {type(row[2])}") - # if the lot number isn't a float and the reagent type isn't blank - # if not isinstance(row[2], float) and check_not_nan(row[1]): - if check_not_nan(row[1]): - # must be prefixed with 'lot_' to be recognized by gui - # This is no longer true since reagents are loaded into their own key in dictionary - try: - reagent_type = row[1].replace(' ', '_').lower().strip() - except AttributeError: - pass - # If there is a double slash in the type field, such as ethanol/iso - # Use the cell to the left for reagent type. - if reagent_type == "//": - if check_not_nan(row[2]): - reagent_type = row[0].replace(' ', '_').lower().strip() - else: - continue - try: - output_var = convert_nans_to_nones(str(row[2]).upper()) - except AttributeError: - logger.debug(f"Couldn't upperize {row[2]}, must be a number") - output_var = convert_nans_to_nones(str(row[2])) - logger.debug(f"Output variable is {output_var}") - logger.debug(f"Expiry date for imported reagent: {row[3]}") - if check_not_nan(row[3]): - try: - expiry = row[3].date() - except AttributeError as e: - try: - expiry = datetime.strptime(row[3], "%Y-%m-%d") - except TypeError as e: - expiry = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + row[3] - 2) - else: - logger.debug(f"Date: {row[3]}") - # expiry = date.today() - expiry = date(year=1970, month=1, day=1) - # self.sub[f"lot_{reagent_type}"] = {'lot':output_var, 'exp':expiry} - # self.sub['reagents'].append(dict(type=reagent_type, lot=output_var, exp=expiry)) - self.sub['reagents'].append(PydReagent(type=reagent_type, lot=output_var, exp=expiry)) - submission_info = self.parse_generic("Sample List") - # iloc is [row][column] and the first row is set as header row so -2 - self.sub['technician'] = str(submission_info.iloc[11][1]) - # reagents - # must be prefixed with 'lot_' to be recognized by gui - # This is no longer true wince the creation of self.sub['reagents'] - self.sub['reagents'] = [] - reagent_range = submission_info.iloc[1:14, 4:8] - logger.debug(reagent_range) + # Args: + # df (pd.DataFrame): input sub dataframe + # """ + # for ii, row in df.iterrows(): + # # skip positive control + # logger.debug(f"Running reagent parse for {row[1]} with type {type(row[1])} and value: {row[2]} with type {type(row[2])}") + # # if the lot number isn't a float and the reagent type isn't blank + # # if not isinstance(row[2], float) and check_not_nan(row[1]): + # if check_not_nan(row[1]): + # # must be prefixed with 'lot_' to be recognized by gui + # # This is no longer true since reagents are loaded into their own key in dictionary + # try: + # reagent_type = row[1].replace(' ', '_').lower().strip() + # except AttributeError: + # pass + # # If there is a double slash in the type field, such as ethanol/iso + # # Use the cell to the left for reagent type. + # if reagent_type == "//": + # if check_not_nan(row[2]): + # reagent_type = row[0].replace(' ', '_').lower().strip() + # else: + # continue + # try: + # output_var = convert_nans_to_nones(str(row[2]).upper()) + # except AttributeError: + # logger.debug(f"Couldn't upperize {row[2]}, must be a number") + # output_var = convert_nans_to_nones(str(row[2])) + # logger.debug(f"Output variable is {output_var}") + # logger.debug(f"Expiry date for imported reagent: {row[3]}") + # if check_not_nan(row[3]): + # try: + # expiry = row[3].date() + # except AttributeError as e: + # try: + # expiry = datetime.strptime(row[3], "%Y-%m-%d") + # except TypeError as e: + # expiry = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + row[3] - 2) + # else: + # logger.debug(f"Date: {row[3]}") + # # expiry = date.today() + # expiry = date(year=1970, month=1, day=1) + # # self.sub[f"lot_{reagent_type}"] = {'lot':output_var, 'exp':expiry} + # # self.sub['reagents'].append(dict(type=reagent_type, lot=output_var, exp=expiry)) + # self.sub['reagents'].append(PydReagent(type=reagent_type, lot=output_var, exp=expiry)) + # submission_info = self.xl.parse(sheet_name="Sample List", dtype=object) + # self.sub['extraction_kit'] = submission_info.iloc[3][3] + # submission_info = self.parse_generic("Sample List") + # # iloc is [row][column] and the first row is set as header row so -2 + # self.sub['technician'] = str(submission_info.iloc[11][1]) + # # reagents + # # must be prefixed with 'lot_' to be recognized by gui + # # This is no longer true wince the creation of self.sub['reagents'] + # self.sub['reagents'] = [] + # reagent_range = submission_info.iloc[1:14, 4:8] + # logger.debug(reagent_range) # parse_reagents(reagent_range) # get individual sample info sample_parser = SampleParser(self.ctx, submission_info.iloc[16:112]) logger.debug(f"Sample type: {self.sub['submission_type']}") - sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type']['value'].replace(' ', '_').lower()}_samples") + if isinstance(self.sub['submission_type'], dict): + getter = self.sub['submission_type']['value'] + else: + getter = self.sub['submission_type'] + sample_parse = getattr(sample_parser, f"parse_{getter.replace(' ', '_').lower()}_samples") logger.debug(f"Parser result: {self.sub}") self.sample_result, self.sub['samples'] = sample_parse() @@ -206,40 +193,40 @@ class SheetParser(object): elu_map.columns = elu_map.iloc[0] elu_map = elu_map.tail(-1) return elu_map - def parse_reagents(df:pd.DataFrame) -> None: - """ - Pulls reagents from the bacterial sub-dataframe + # def parse_reagents(df:pd.DataFrame) -> None: + # """ + # Pulls reagents from the bacterial sub-dataframe - Args: - df (pd.DataFrame): input sub dataframe - """ - # iterate through sub-df rows - for ii, row in df.iterrows(): - # logger.debug(f"Parsing this row for reagents: {row}") - if check_not_nan(row[5]): - # must be prefixed with 'lot_' to be recognized by gui - # regex below will remove 80% from 80% ethanol in the Wastewater kit. - output_key = re.sub(r"^\d{1,3}%\s?", "", row[0].lower().strip().replace(' ', '_')) - output_key = output_key.strip("_") - # output_var is the lot number - try: - output_var = convert_nans_to_nones(str(row[5].upper())) - except AttributeError: - logger.debug(f"Couldn't upperize {row[5]}, must be a number") - output_var = convert_nans_to_nones(str(row[5])) - if check_not_nan(row[7]): - try: - expiry = row[7].date() - except AttributeError: - expiry = date.today() - else: - expiry = date.today() - logger.debug(f"Expiry date for {output_key}: {expiry} of type {type(expiry)}") - # self.sub[f"lot_{output_key}"] = {'lot':output_var, 'exp':expiry} - # self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry)) - reagent = PydReagent(type=output_key, lot=output_var, exp=expiry) - logger.debug(f"Here is the created reagent: {reagent}") - self.sub['reagents'].append(reagent) + # Args: + # df (pd.DataFrame): input sub dataframe + # """ + # # iterate through sub-df rows + # for ii, row in df.iterrows(): + # # logger.debug(f"Parsing this row for reagents: {row}") + # if check_not_nan(row[5]): + # # must be prefixed with 'lot_' to be recognized by gui + # # regex below will remove 80% from 80% ethanol in the Wastewater kit. + # output_key = re.sub(r"^\d{1,3}%\s?", "", row[0].lower().strip().replace(' ', '_')) + # output_key = output_key.strip("_") + # # output_var is the lot number + # try: + # output_var = convert_nans_to_nones(str(row[5].upper())) + # except AttributeError: + # logger.debug(f"Couldn't upperize {row[5]}, must be a number") + # output_var = convert_nans_to_nones(str(row[5])) + # if check_not_nan(row[7]): + # try: + # expiry = row[7].date() + # except AttributeError: + # expiry = date.today() + # else: + # expiry = date.today() + # logger.debug(f"Expiry date for {output_key}: {expiry} of type {type(expiry)}") + # # self.sub[f"lot_{output_key}"] = {'lot':output_var, 'exp':expiry} + # # self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry)) + # reagent = PydReagent(type=output_key, lot=output_var, exp=expiry) + # logger.debug(f"Here is the created reagent: {reagent}") + # self.sub['reagents'].append(reagent) # parse submission sheet submission_info = self.parse_generic("WW Submissions (ENTER HERE)") # parse enrichment sheet @@ -274,41 +261,42 @@ class SheetParser(object): """ pulls info specific to wastewater_arctic submission type """ - self.sub['submission_type'] = dict(value=self.sub['submission_type'], parsed=True) - def parse_reagents(df:pd.DataFrame): - logger.debug(df) - for ii, row in df.iterrows(): - if check_not_nan(row[1]): - try: - output_key = re.sub(r"\(.+?\)", "", row[0].lower().strip().replace(' ', '_')) - except AttributeError: - continue - output_key = output_key.strip("_") - output_key = massage_common_reagents(output_key) - try: - output_var = convert_nans_to_nones(str(row[1].upper())) - except AttributeError: - logger.debug(f"Couldn't upperize {row[1]}, must be a number") - output_var = convert_nans_to_nones(str(row[1])) - logger.debug(f"Output variable is {output_var}") - logger.debug(f"Expiry date for imported reagent: {row[2]}") - if check_not_nan(row[2]): - try: - expiry = row[2].date() - except AttributeError as e: - try: - expiry = datetime.strptime(row[2], "%Y-%m-%d") - except TypeError as e: - expiry = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + row[2] - 2) - except ValueError as e: - continue - else: - logger.debug(f"Date: {row[2]}") - expiry = date.today() - # self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry)) - self.sub['reagents'].append(PydReagent(type=output_key, lot=output_var, exp=expiry)) - else: - continue + if isinstance(self.sub['submission_type'], str): + self.sub['submission_type'] = dict(value=self.sub['submission_type'], parsed=True) + # def parse_reagents(df:pd.DataFrame): + # logger.debug(df) + # for ii, row in df.iterrows(): + # if check_not_nan(row[1]): + # try: + # output_key = re.sub(r"\(.+?\)", "", row[0].lower().strip().replace(' ', '_')) + # except AttributeError: + # continue + # output_key = output_key.strip("_") + # output_key = massage_common_reagents(output_key) + # try: + # output_var = convert_nans_to_nones(str(row[1].upper())) + # except AttributeError: + # logger.debug(f"Couldn't upperize {row[1]}, must be a number") + # output_var = convert_nans_to_nones(str(row[1])) + # logger.debug(f"Output variable is {output_var}") + # logger.debug(f"Expiry date for imported reagent: {row[2]}") + # if check_not_nan(row[2]): + # try: + # expiry = row[2].date() + # except AttributeError as e: + # try: + # expiry = datetime.strptime(row[2], "%Y-%m-%d") + # except TypeError as e: + # expiry = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + row[2] - 2) + # except ValueError as e: + # continue + # else: + # logger.debug(f"Date: {row[2]}") + # expiry = date.today() + # # self.sub['reagents'].append(dict(type=output_key, lot=output_var, exp=expiry)) + # self.sub['reagents'].append(PydReagent(type=output_key, lot=output_var, exp=expiry)) + # else: + # continue def massage_samples(df:pd.DataFrame, lookup_table:pd.DataFrame) -> pd.DataFrame: """ Takes sample info from Artic sheet format and converts to regular formate @@ -376,30 +364,30 @@ class SheetParser(object): sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type']['value'].lower()}_samples") self.sample_result, self.sub['samples'] = sample_parse() - def parse_reagents(self): - ext_kit = lookup_kittype_by_name(ctx=self.ctx, name=self.sub['extraction_kit']) - if ext_kit != None: - logger.debug(f"Querying extraction kit: {self.sub['submission_type']}") - reagent_map = ext_kit.construct_xl_map_for_use(use=self.sub['submission_type']['value']) - logger.debug(f"Reagent map: {pprint.pformat(reagent_map)}") - else: - raise AttributeError("No extraction kit found, unable to parse reagents") - for sheet in self.xl.sheet_names: - df = self.xl.parse(sheet) - relevant = {k:v for k,v in reagent_map.items() if sheet in reagent_map[k]['sheet']} - logger.debug(f"relevant map for {sheet}: {pprint.pformat(relevant)}") - if relevant == {}: - continue - for item in reagent_map: - try: - # role = item - name = df.iat[relevant[item]['name']['row']-2, relevant[item]['name']['column']-1] - lot = df.iat[relevant[item]['lot']['row']-2, relevant[item]['lot']['column']-1] - expiry = df.iat[relevant[item]['expiry']['row']-2, relevant[item]['expiry']['column']-1] - except (KeyError, IndexError): - continue - # self.sub['reagents'].append(dict(name=name, lot=lot, expiry=expiry, role=role)) - self.sub['reagents'].append(PydReagent(type=item, lot=lot, exp=expiry, name=name)) + # def parse_reagents(self): + # ext_kit = lookup_kittype_by_name(ctx=self.ctx, name=self.sub['extraction_kit']) + # if ext_kit != None: + # logger.debug(f"Querying extraction kit: {self.sub['submission_type']}") + # reagent_map = ext_kit.construct_xl_map_for_use(use=self.sub['submission_type']['value']) + # logger.debug(f"Reagent map: {pprint.pformat(reagent_map)}") + # else: + # raise AttributeError("No extraction kit found, unable to parse reagents") + # for sheet in self.xl.sheet_names: + # df = self.xl.parse(sheet) + # relevant = {k:v for k,v in reagent_map.items() if sheet in reagent_map[k]['sheet']} + # logger.debug(f"relevant map for {sheet}: {pprint.pformat(relevant)}") + # if relevant == {}: + # continue + # for item in relevant: + # try: + # # role = item + # name = df.iat[relevant[item]['name']['row']-2, relevant[item]['name']['column']-1] + # lot = df.iat[relevant[item]['lot']['row']-2, relevant[item]['lot']['column']-1] + # expiry = df.iat[relevant[item]['expiry']['row']-2, relevant[item]['expiry']['column']-1] + # except (KeyError, IndexError): + # continue + # # self.sub['reagents'].append(dict(name=name, lot=lot, expiry=expiry, role=role)) + # self.sub['reagents'].append(PydReagent(type=item, lot=lot, exp=expiry, name=name)) def import_kit_validation_check(self): @@ -420,7 +408,8 @@ class SheetParser(object): else: raise ValueError("Extraction kit needed.") else: - self.sub['extraction_kit'] = dict(value=self.sub['extraction_kit'], parsed=False) + if isinstance(self.sub['extraction_kit'], str): + self.sub['extraction_kit'] = dict(value=self.sub['extraction_kit'], parsed=False) # logger.debug(f"Here is the validated parser dictionary:\n\n{pprint.pformat(self.sub)}\n\n") # return parser_sub @@ -430,7 +419,8 @@ class SheetParser(object): Enforce that only allowed reagents get into the Pydantic Model """ allowed_reagents = [item.name for item in get_reagents_in_extkit(ctx=self.ctx, kit_name=self.sub['extraction_kit']['value'])] - self.sub['reagents'] = [reagent for reagent in self.sub['reagents'] if reagent.type in allowed_reagents] + logger.debug(f"List of reagents for comparison with allowed_reagents: {pprint.pformat(self.sub['reagents'])}") + self.sub['reagents'] = [reagent for reagent in self.sub['reagents'] if reagent['value'].type in allowed_reagents] def to_pydantic(self) -> PydSubmission: """ @@ -444,6 +434,96 @@ class SheetParser(object): delattr(psm, "filepath") return psm +class InfoParser(object): + + def __init__(self, ctx:Settings, xl:pd.ExcelFile, submission_type:str): + self.ctx = ctx + # self.submission_type = submission_type + # self.extraction_kit = extraction_kit + self.map = self.fetch_submission_info_map(submission_type=submission_type) + self.xl = xl + logger.debug(f"Info map for InfoParser: {pprint.pformat(self.map)}") + + def fetch_submission_info_map(self, submission_type:dict) -> dict: + logger.debug(f"Looking up submission type: {submission_type['value']}") + submission_type = lookup_submissiontype_by_name(ctx=self.ctx, type_name=submission_type['value']) + info_map = submission_type.info_map + try: + del info_map['samples'] + except KeyError: + pass + return info_map + + def parse_info(self) -> dict: + dicto = {} + for sheet in self.xl.sheet_names: + df = self.xl.parse(sheet, header=None) + relevant = {} + for k, v in self.map.items(): + if k == "samples": + continue + if sheet in self.map[k]['sheets']: + relevant[k] = v + # relevant = {k:v for k,v in self.map.items() if sheet in self.map[k]['sheets']} + logger.debug(f"relevant map for {sheet}: {pprint.pformat(relevant)}") + if relevant == {}: + continue + for item in relevant: + value = df.iat[relevant[item]['row']-1, relevant[item]['column']-1] + logger.debug(f"Setting {item} on {sheet} to {value}") + if check_not_nan(value): + try: + dicto[item] = dict(value=value, parsed=True) + except (KeyError, IndexError): + continue + else: + dicto[item] = dict(value=convert_nans_to_nones(value), parsed=False) + if "submitter_plate_num" not in dicto.keys(): + dicto['submitter_plate_num'] = dict(value=None, parsed=False) + return dicto + +class ReagentParser(object): + + def __init__(self, ctx:Settings, xl:pd.ExcelFile, submission_type:str, extraction_kit:str): + self.ctx = ctx + self.map = self.fetch_kit_info_map(extraction_kit=extraction_kit, submission_type=submission_type) + self.xl = xl + + def fetch_kit_info_map(self, extraction_kit:dict, submission_type:str): + kit = lookup_kittype_by_name(ctx=self.ctx, name=extraction_kit['value']) + if isinstance(submission_type, dict): + submission_type = submission_type['value'] + reagent_map = kit.construct_xl_map_for_use(submission_type.title()) + del reagent_map['info'] + return reagent_map + + def parse_reagents(self) -> list: + listo = [] + for sheet in self.xl.sheet_names: + df = self.xl.parse(sheet, header=None, dtype=object) + relevant = {k.strip():v for k,v in self.map.items() if sheet in self.map[k]['sheet']} + logger.debug(f"relevant map for {sheet}: {pprint.pformat(relevant)}") + if relevant == {}: + continue + for item in relevant: + logger.debug(f"Attempting to scrape: {item}") + try: + # role = item + name = df.iat[relevant[item]['name']['row']-1, relevant[item]['name']['column']-1] + lot = df.iat[relevant[item]['lot']['row']-1, relevant[item]['lot']['column']-1] + expiry = df.iat[relevant[item]['expiry']['row']-1, relevant[item]['expiry']['column']-1] + except (KeyError, IndexError): + listo.append(dict(value=PydReagent(type=item.strip(), lot=None, exp=None, name=None), parsed=False)) + continue + if check_not_nan(lot): + parsed = True + else: + parsed = False + # self.sub['reagents'].append(dict(name=name, lot=lot, expiry=expiry, role=role)) + logger.debug(f"Got lot for {item}-{name}: {lot} as {type(lot)}") + lot = str(lot) + listo.append(dict(value=PydReagent(type=item.strip(), lot=lot, exp=expiry, name=name), parsed=parsed)) + return listo class SampleParser(object): @@ -451,7 +531,7 @@ class SampleParser(object): object to pull data for samples in excel sheet and construct individual sample objects """ - def __init__(self, ctx:Settings, df:pd.DataFrame, elution_map:pd.DataFrame|None=None) -> None: + def __init__(self, ctx:Settings, xl:pd.ExcelFile, submission_type:str) -> None: """ convert sample sub-dataframe to dictionary of records @@ -460,12 +540,122 @@ class SampleParser(object): df (pd.DataFrame): input sample dataframe elution_map (pd.DataFrame | None, optional): optional map of elution plate. Defaults to None. """ + self.samples = [] self.ctx = ctx - self.samples = df.to_dict("records") - self.elution_map = elution_map + self.xl = xl + self.submission_type = submission_type + sample_info_map = self.fetch_sample_info_map(submission_type=submission_type) + self.plate_map = self.construct_plate_map(plate_map_location=sample_info_map['plate_map']) + self.lookup_table = self.construct_lookup_table(lookup_table_location=sample_info_map['lookup_table']) + self.excel_to_db_map = sample_info_map['xl_db_translation'] + self.create_basic_dictionaries_from_plate_map() + self.parse_lookup_table() + + def fetch_sample_info_map(self, submission_type:dict) -> dict: + logger.debug(f"Looking up submission type: {submission_type}") + submission_type = lookup_submissiontype_by_name(ctx=self.ctx, type_name=submission_type) + sample_info_map = submission_type.info_map['samples'] + return sample_info_map + + def construct_plate_map(self, plate_map_location:dict) -> pd.DataFrame: + df = self.xl.parse(plate_map_location['sheet'], header=None, dtype=object) + df = df.iloc[plate_map_location['start_row']-1:plate_map_location['end_row'], plate_map_location['start_column']-1:plate_map_location['end_column']] + # logger.debug(f"Input dataframe for plate map: {df}") + df = pd.DataFrame(df.values[1:], columns=df.iloc[0]) + df = df.set_index(df.columns[0]) + # logger.debug(f"Output dataframe for plate map: {df}") + return df + + def construct_lookup_table(self, lookup_table_location) -> pd.DataFrame: + df = self.xl.parse(lookup_table_location['sheet'], header=None, dtype=object) + df = df.iloc[lookup_table_location['start_row']-1:lookup_table_location['end_row']] + df = pd.DataFrame(df.values[1:], columns=df.iloc[0]) + df = df.reset_index(drop=True) + # logger.debug(f"Dataframe for lookup table: {df}") + return df + + def create_basic_dictionaries_from_plate_map(self): + new_df = self.plate_map.dropna(axis=1, how='all') + columns = new_df.columns.tolist() + for _, iii in new_df.iterrows(): + for c in columns: + # logger.debug(f"Checking sample {iii[c]}") + if check_not_nan(iii[c]): + id = iii[c] + logger.debug(f"Adding sample {iii[c]}") + try: + c = self.plate_map.columns.get_loc(c) + 1 + except Exception as e: + logger.error(f"Unable to get column index of {c} due to {e}") + self.samples.append(dict(submitter_id=id, row=row_keys[iii._name], column=c)) + + def parse_lookup_table(self): + def determine_if_date(input_str) -> str|date: + # logger.debug(f"Looks like we have a str: {input_str}") + regex = re.compile(r"\d{4}-?\d{2}-?\d{2}") + if bool(regex.search(input_str)): + try: + return parse(input_str) + except ParserError: + return None + else: + return input_str + for sample in self.samples: + addition = self.lookup_table[self.lookup_table.isin([sample['submitter_id']]).any(axis=1)].squeeze().to_dict() + for k,v in addition.items(): + # logger.debug(f"Checking {k} in lookup table.") + if check_not_nan(k) and isinstance(k, str): + if k.lower() not in sample: + k = k.replace(" ", "_").replace("#","num").lower() + # logger.debug(f"Adding {type(v)} - {k}, {v} to the lookuptable output dict") + match v: + case pd.Timestamp(): + sample[k] = v.date() + case str(): + sample[k] = determine_if_date(v) + case _: + sample[k] = v + logger.debug(f"Output sample dict: {sample}") + + def parse_samples(self) -> List[dict]: + result = None + new_samples = [] + for sample in self.samples: + translated_dict = {} + for k, v in sample.items(): + match v: + case dict(): + v = None + case float(): + v = convert_nans_to_nones(v) + case _: + v = v + try: + translated_dict[self.excel_to_db_map[k]] = v + except KeyError: + translated_dict[k] = convert_nans_to_nones(v) + # translated_dict['sample_type'] = f"{self.submission_type.replace(' ', '_').lower()}_sample" + translated_dict['sample_type'] = f"{self.submission_type} Sample" + # logger.debug(f"New sample dictionary going into object creation:\n{translated_dict}") + new_samples.append(self.generate_sample_object(translated_dict)) + return result, new_samples + + def generate_sample_object(self, input_dict) -> models.BasicSample: + # query = input_dict['sample_type'].replace('_sample', '').replace("_", " ").title().replace(" ", "") + query = input_dict['sample_type'].replace(" ", "") + database_obj = getattr(models, query) + instance = lookup_sample_by_submitter_id(ctx=self.ctx, submitter_id=input_dict['submitter_id']) + if instance == None: + instance = database_obj() + for k,v in input_dict.items(): + try: + setattr(instance, k, v) + except Exception as e: + logger.error(f"Failed to set {k} due to {type(e).__name__}: {e}") + return dict(sample=instance, row=input_dict['row'], column=input_dict['column']) - def parse_bacterial_culture_samples(self) -> Tuple[str|None, list[dict]]: + # def parse_bacterial_culture_samples(self) -> Tuple[str|None, list[dict]]: """ construct bacterial culture specific sample objects @@ -493,8 +683,7 @@ class SampleParser(object): new_list.append(dict(sample=instance, row=row, column=column)) return None, new_list - - def parse_wastewater_samples(self) -> Tuple[str|None, list[dict]]: + # def parse_wastewater_samples(self) -> Tuple[str|None, list[dict]]: """ construct wastewater specific sample objects @@ -590,7 +779,7 @@ class SampleParser(object): new_list.append(dict(sample=instance, row=row, column=column)) return return_val, new_list - def parse_wastewater_artic_samples(self) -> Tuple[str|None, list[WastewaterSample]]: + # def parse_wastewater_artic_samples(self) -> Tuple[str|None, list[WastewaterSample]]: """ The artic samples are the wastewater samples that are to be sequenced So we will need to lookup existing ww samples and append Artic well # and plate relation @@ -751,5 +940,4 @@ class PCRParser(object): self.samples.append(sample_obj) - - + diff --git a/src/submissions/backend/pydant/__init__.py b/src/submissions/backend/pydant/__init__.py index c1913b3..9f3a5cb 100644 --- a/src/submissions/backend/pydant/__init__.py +++ b/src/submissions/backend/pydant/__init__.py @@ -48,34 +48,65 @@ class PydReagent(BaseModel): if value != None: return convert_nans_to_nones(str(value)) return value + + @field_validator("name", mode="before") + @classmethod + def enforce_name(cls, value, values): + if value != None: + return convert_nans_to_nones(str(value)) + else: + return values.data['type'] class PydSubmission(BaseModel, extra=Extra.allow): ctx: Settings filepath: Path - submission_type: str|dict|None - submitter_plate_num: str|None - rsl_plate_num: str|dict|None - submitted_date: date|dict - submitting_lab: str|None - sample_count: int - extraction_kit: str|dict|None - technician: str|dict|None - reagents: List[PydReagent] = [] + submission_type: dict|None + submitter_plate_num: dict|None + rsl_plate_num: dict|None + submitted_date: dict|None + submitting_lab: dict|None + sample_count: dict|None + extraction_kit: dict|None + technician: dict|None + reagents: List[dict] = [] samples: List[Any] # missing_fields: List[str] = [] + @field_validator("submitter_plate_num") + @classmethod + def rescue_submitter_id(cls, value): + if value == None: + return dict(value=None, parsed=False) + return value + + @field_validator("submitter_plate_num") + @classmethod + def enforce_with_uuid(cls, value): + logger.debug(f"submitter plate id: {value}") + if value['value'] == None: + return dict(value=uuid.uuid4().hex.upper(), parsed=False) + else: + return value + @field_validator("submitted_date", mode="before") @classmethod + def rescue_date(cls, value): + if value == None: + return dict(value=date.today(), parsed=False) + return value + + @field_validator("submitted_date") + @classmethod def strip_datetime_string(cls, value): - if not check_not_nan(value): - value = date.today() - if isinstance(value, datetime): - return dict(value=value, parsed=True) - if isinstance(value, date): + if isinstance(value['value'], datetime): return value - string = re.sub(r"(_|-)\d$", "", value) + if isinstance(value['value'], date): + return value + if isinstance(value['value'], int): + return dict(value=datetime.fromordinal(datetime(1900, 1, 1).toordinal() + value['value'] - 2).date(), parsed=False) + string = re.sub(r"(_|-)\d$", "", value['value']) try: output = dict(value=parse(string).date(), parsed=False) except ParserError as e: @@ -85,31 +116,32 @@ class PydSubmission(BaseModel, extra=Extra.allow): except Exception as e: logger.error(f"Problem with parse fallback: {e}") return output - - @field_validator("submitter_plate_num") - @classmethod - def enforce_with_uuid(cls, value): - if value == None or value == "" or value == "None": - return uuid.uuid4().hex.upper() - else: - return value @field_validator("submitting_lab", mode="before") @classmethod - def transform_nan(cls, value): - return convert_nans_to_nones(value) + def rescue_submitting_lab(cls, value): + if value == None: + return dict(value=None, parsed=False) + return value @field_validator("rsl_plate_num", mode='before') @classmethod + def rescue_rsl_number(cls, value): + if value == None: + return dict(value=None, parsed=False) + return value + + @field_validator("rsl_plate_num") + @classmethod def rsl_from_file(cls, value, values): - logger.debug(f"RSL-plate initial value: {value}") - if isinstance(values.data['submission_type'], dict): - sub_type = values.data['submission_type']['value'] - elif isinstance(values.data['submission_type'], str): - sub_type = values.data['submission_type'] - if check_not_nan(value): - if lookup_submission_by_rsl_num(ctx=values.data['ctx'], rsl_num=value) == None: - return dict(value=value, parsed=True) + logger.debug(f"RSL-plate initial value: {value['value']}") + # if isinstance(values.data['submission_type'], dict): + # sub_type = values.data['submission_type']['value'] + # elif isinstance(values.data['submission_type'], str): + sub_type = values.data['submission_type']['value'] + if check_not_nan(value['value']): + if lookup_submission_by_rsl_num(ctx=values.data['ctx'], rsl_num=value['value']) == None: + return dict(value=value['value'], parsed=True) else: logger.warning(f"Submission number {value} already exists in DB, attempting salvage with filepath") output = RSLNamer(ctx=values.data['ctx'], instr=values.data['filepath'].__str__(), sub_type=sub_type).parsed_name @@ -120,58 +152,60 @@ class PydSubmission(BaseModel, extra=Extra.allow): @field_validator("technician", mode="before") @classmethod + def rescue_tech(cls, value): + if value == None: + return dict(value=None, parsed=False) + return value + + @field_validator("technician") + @classmethod def enforce_tech(cls, value): - if check_not_nan(value): - if isinstance(value, dict): - value['value'] = re.sub(r"\: \d", "", value['value']) - return value - else: - return dict(value=re.sub(r"\: \d", "", value), parsed=True) + if check_not_nan(value['value']): + value['value'] = re.sub(r"\: \d", "", value['value']) + return value else: - return dict(value="Unnamed", parsed=False) + return dict(value=convert_nans_to_nones(value['value']), parsed=False) return value - @field_validator("reagents") - @classmethod - def remove_atcc(cls, value): - return_val = [] - for reagent in value: - logger.debug(f"Pydantic reagent: {reagent}") - if reagent.type == None: - continue - else: - return_val.append(reagent) - return return_val + # @field_validator("reagents") + # @classmethod + # def remove_atcc(cls, value): + # return_val = [] + # for reagent in value: + # logger.debug(f"Pydantic reagent: {reagent}") + # if reagent['value'].type == None: + # continue + # else: + # return_val.append(reagent) + # return return_val @field_validator("sample_count", mode='before') @classmethod - def enforce_sample_count(cls, value): - if check_not_nan(value): - return int(value) - else: - return convert_nans_to_nones(value) + def rescue_sample_count(cls, value): + if value == None: + return dict(value=None, parsed=False) + return value @field_validator("extraction_kit", mode='before') @classmethod - def get_kit_if_none(cls, value): + def rescue_kit(cls, value): # from frontend.custom_widgets.pop_ups import KitSelector - if check_not_nan(value): - if isinstance(value, str): - return dict(value=value, parsed=True) - elif isinstance(value, dict): - return value - else: - raise ValueError(f"No extraction kit found.") + # if check_not_nan(value): + # if isinstance(value, str): + # return dict(value=value, parsed=True) + # elif isinstance(value, dict): + # return value + # else: + # raise ValueError(f"No extraction kit found.") + if value == None: + return dict(value=None, parsed=False) + return value - @field_validator("submission_type", mode='before') @classmethod def make_submission_type(cls, value, values): - if check_not_nan(value): - if isinstance(value, dict): - value['value'] = value['value'].title() - return value - elif isinstance(value, str): - return dict(value=value.title(), parsed=False) + if check_not_nan(value['value']): + value = value['value'].title() + return dict(value=value, parsed=True) else: return dict(value=RSLNamer(ctx=values.data['ctx'], instr=values.data['filepath'].__str__()).submission_type.title(), parsed=False) diff --git a/src/submissions/frontend/__init__.py b/src/submissions/frontend/__init__.py index 2a68924..0c4c25d 100644 --- a/src/submissions/frontend/__init__.py +++ b/src/submissions/frontend/__init__.py @@ -14,7 +14,8 @@ from pathlib import Path from backend.db import ( construct_reagent, get_all_Control_Types_names, get_all_available_modes, store_reagent ) -from .main_window_functions import * +# from .main_window_functions import * +from .all_window_functions import extract_form_info from tools import check_if_app from frontend.custom_widgets import SubmissionsSheet, AlertPop, AddReagentForm, KitAdder, ControlsDatePicker import logging @@ -27,6 +28,7 @@ logger.info("Hello, I am a logger") class App(QMainWindow): def __init__(self, ctx: dict = {}): + super().__init__() self.ctx = ctx # indicate version and connected database in title bar @@ -154,6 +156,7 @@ class App(QMainWindow): """ import submission from excel sheet into form """ + from .main_window_functions import import_submission_function self, result = import_submission_function(self) logger.debug(f"Import result: {result}") self.result_reporter(result) @@ -162,6 +165,7 @@ class App(QMainWindow): """ Removes all reagents from form before running kit integrity completion. """ + from .main_window_functions import kit_reload_function self, result = kit_reload_function(self) self.result_reporter(result) @@ -171,6 +175,7 @@ class App(QMainWindow): NOTE: this will not change self.reagents which should be fine since it's only used when looking up """ + from .main_window_functions import kit_integrity_completion_function self, result = kit_integrity_completion_function(self) self.result_reporter(result) @@ -178,10 +183,11 @@ class App(QMainWindow): """ Attempt to add sample to database when 'submit' button clicked """ + from .main_window_functions import submit_new_sample_function self, result = submit_new_sample_function(self) self.result_reporter(result) - def add_reagent(self, reagent_lot:str|None=None, reagent_type:str|None=None, expiry:date|None=None): + def add_reagent(self, reagent_lot:str|None=None, reagent_type:str|None=None, expiry:date|None=None, name:str|None=None): """ Action to create new reagent in DB. @@ -195,7 +201,7 @@ class App(QMainWindow): if isinstance(reagent_lot, bool): reagent_lot = "" # create form - dlg = AddReagentForm(ctx=self.ctx, reagent_lot=reagent_lot, reagent_type=reagent_type, expiry=expiry) + dlg = AddReagentForm(ctx=self.ctx, reagent_lot=reagent_lot, reagent_type=reagent_type, expiry=expiry, reagent_name=name) if dlg.exec(): # extract form info info = extract_form_info(dlg) @@ -212,6 +218,7 @@ class App(QMainWindow): """ Action to create a summary of sheet data per client """ + from .main_window_functions import generate_report_function self, result = generate_report_function(self) self.result_reporter(result) @@ -219,6 +226,7 @@ class App(QMainWindow): """ Constructs new kit from yaml and adds to DB. """ + from .main_window_functions import add_kit_function self, result = add_kit_function(self) self.result_reporter(result) @@ -226,6 +234,7 @@ class App(QMainWindow): """ Constructs new kit from yaml and adds to DB. """ + from .main_window_functions import add_org_function self, result = add_org_function(self) self.result_reporter(result) @@ -233,6 +242,7 @@ class App(QMainWindow): """ Lookup controls from database and send to chartmaker """ + from .main_window_functions import controls_getter_function self, result = controls_getter_function(self) self.result_reporter(result) @@ -240,6 +250,7 @@ class App(QMainWindow): """ Creates plotly charts for webview """ + from .main_window_functions import chart_maker_function self, result = chart_maker_function(self) self.result_reporter(result) @@ -247,6 +258,7 @@ class App(QMainWindow): """ Adds controls pulled from irida to relevant submissions """ + from .main_window_functions import link_controls_function self, result = link_controls_function(self) self.result_reporter(result) @@ -254,6 +266,7 @@ class App(QMainWindow): """ Links extraction logs from .csv files to relevant submissions. """ + from .main_window_functions import link_extractions_function self, result = link_extractions_function(self) self.result_reporter(result) @@ -261,6 +274,7 @@ class App(QMainWindow): """ Links PCR logs from .csv files to relevant submissions. """ + from .main_window_functions import link_pcr_function self, result = link_pcr_function(self) self.result_reporter(result) @@ -268,6 +282,7 @@ class App(QMainWindow): """ Imports results exported from Design and Analysis .eds files """ + from .main_window_functions import import_pcr_results_function self, result = import_pcr_results_function(self) self.result_reporter(result) diff --git a/src/submissions/frontend/custom_widgets/misc.py b/src/submissions/frontend/custom_widgets/misc.py index eca8921..47a1777 100644 --- a/src/submissions/frontend/custom_widgets/misc.py +++ b/src/submissions/frontend/custom_widgets/misc.py @@ -60,7 +60,7 @@ class AddReagentForm(QDialog): # widget to get reagent type info self.type_input = QComboBox() self.type_input.setObjectName('type') - self.type_input.addItems([item.replace("_", " ").title() for item in get_all_reagenttype_names(ctx=ctx)]) + self.type_input.addItems([item for item in get_all_reagenttype_names(ctx=ctx)]) logger.debug(f"Trying to find index of {reagent_type}") # convert input to user friendly string? try: @@ -311,3 +311,20 @@ class ImportReagent(QComboBox): logger.debug(f"New relevant reagents: {relevant_reagents}") self.setObjectName(f"lot_{reagent.type}") self.addItems(relevant_reagents) + +class ParsedQLabel(QLabel): + + def __init__(self, input_object, field_name, title:bool=True): + super().__init__() + try: + check = input_object['parsed'] + except: + return + if title: + output = field_name.replace('_', ' ').title() + else: + output = field_name.replace('_', ' ') + if check: + self.setText(f"Parsed {output}") + else: + self.setText(f"MISSING {output}") diff --git a/src/submissions/frontend/main_window_functions.py b/src/submissions/frontend/main_window_functions.py index b4f2a6b..b945370 100644 --- a/src/submissions/frontend/main_window_functions.py +++ b/src/submissions/frontend/main_window_functions.py @@ -34,7 +34,7 @@ from backend.pydant import PydReagent from tools import check_not_nan from .custom_widgets.pop_ups import AlertPop, KitSelector, QuestionAsker from .custom_widgets import ReportDatePicker -from .custom_widgets.misc import ImportReagent +from .custom_widgets.misc import ImportReagent, ParsedQLabel from .visualizations.control_charts import create_charts, construct_html logger = logging.getLogger(f"submissions.{__name__}") @@ -54,8 +54,9 @@ def import_submission_function(obj:QMainWindow) -> Tuple[QMainWindow, dict|None] logger.debug(obj.ctx) # initialize samples obj.samples = [] - obj.reagents = {} + obj.reagents = [] obj.missing_reagents = [] + obj.missing_info = [] # set file dialog fname = select_open_file(obj, file_extension="xlsx") @@ -72,7 +73,7 @@ def import_submission_function(obj:QMainWindow) -> Tuple[QMainWindow, dict|None] # prsr.sub = import_validation_check(ctx=obj.ctx, parser_sub=prsr.sub) # obj.column_count = prsr.column_count try: - logger.debug(f"Submission dictionary: {prsr.sub}") + logger.debug(f"Submission dictionary:\n{pprint.pformat(prsr.sub)}") pyd = prsr.to_pydantic() logger.debug(f"Pydantic result: \n\n{pprint.pformat(pyd)}\n\n") except Exception as e: @@ -85,7 +86,7 @@ def import_submission_function(obj:QMainWindow) -> Tuple[QMainWindow, dict|None] # if hasattr(sample, "elution_well"): # logger.debug(f"Sample from import: {sample.elution_well}") # I don't remember why this is here. - missing_info = [k for k,v in pyd if v == None] + obj.current_submission_type = pyd.submission_type['value'] # destroy any widgets from previous imports for item in obj.table_widget.formlayout.parentWidget().findChildren(QWidget): @@ -101,51 +102,55 @@ def import_submission_function(obj:QMainWindow) -> Tuple[QMainWindow, dict|None] if isinstance(value, dict) and field != 'ctx': logger.debug(f"The field {field} is a dictionary: {value}") if not value['parsed']: - missing_info.append(field) - value = value['value'] + obj.missing_info.append(field) + label = ParsedQLabel(value, field) match field: case 'submitting_lab': # create label - label = QLabel(field.replace("_", " ").title()) - logger.debug(f"{field}: {value}") + # label = QLabel(field.replace("_", " ").title()) + # label = ParsedQLabel(value, field) + logger.debug(f"{field}: {value['value']}") # create combobox to hold looked up submitting labs add_widget = QComboBox() labs = [item.__str__() for item in lookup_all_orgs(ctx=obj.ctx)] # try to set closest match to top of list try: - labs = difflib.get_close_matches(value, labs, len(labs), 0) + labs = difflib.get_close_matches(value['value'], labs, len(labs), 0) except (TypeError, ValueError): pass # set combobox values to lookedup values add_widget.addItems(labs) case 'extraction_kit': # create label - label = QLabel(field.replace("_", " ").title()) + # label = QLabel(field.replace("_", " ").title()) + # if extraction kit not available, all other values fail - if not check_not_nan(value): + if not check_not_nan(value['value']): msg = AlertPop(message="Make sure to check your extraction kit in the excel sheet!", status="warning") msg.exec() # create combobox to hold looked up kits add_widget = QComboBox() # lookup existing kits by 'submission_type' decided on by sheetparser - uses = [item.__str__() for item in lookup_kittype_by_use(ctx=obj.ctx, used_by=pyd.submission_type['value'].lower())] - logger.debug(f"Kits received for {pyd.submission_type}: {uses}") - if check_not_nan(value): - logger.debug(f"The extraction kit in parser was: {value}") - uses.insert(0, uses.pop(uses.index(value))) - obj.ext_kit = value + # uses = [item.__str__() for item in lookup_kittype_by_use(ctx=obj.ctx, used_by=pyd.submission_type['value'].lower())] + logger.debug(f"Looking up kits used for {pyd.submission_type['value']}") + uses = [item.__str__() for item in lookup_kittype_by_use(ctx=obj.ctx, used_by=pyd.submission_type['value'])] + logger.debug(f"Kits received for {pyd.submission_type['value']}: {uses}") + if check_not_nan(value['value']): + logger.debug(f"The extraction kit in parser was: {value['value']}") + uses.insert(0, uses.pop(uses.index(value['value']))) + obj.ext_kit = value['value'] else: logger.error(f"Couldn't find {prsr.sub['extraction_kit']}") obj.ext_kit = uses[0] add_widget.addItems(uses) case 'submitted_date': # create label - label = QLabel(field.replace("_", " ").title()) + # label = QLabel(field.replace("_", " ").title()) # uses base calendar add_widget = QDateEdit(calendarPopup=True) # sets submitted date based on date found in excel sheet try: - add_widget.setDate(value) + add_widget.setDate(value['value']) # if not found, use today except: add_widget.setDate(date.today()) @@ -160,19 +165,28 @@ def import_submission_function(obj:QMainWindow) -> Tuple[QMainWindow, dict|None] for reagent in value: # create label # reg_label = QLabel(reagent['type'].replace("_", " ").title()) - try: - reg_label = QLabel(reagent.type.replace("_", " ").title()) - except AttributeError: + reg_label = ParsedQLabel(reagent, reagent['value'].type, title=False) + if reagent['parsed']: + # try: + # reg_label = QLabel(f"Parsed Lot: {reagent['value'].type}") + obj.reagents.append(reagent['value']) + # except AttributeError: + # continue + else: + # try: + # reg_label = QLabel(f"MISSING Lot: {reagent['value'].type}") + obj.missing_reagents.append(reagent['value'].type) continue + # except AttributeError: + # continue # reg_label.setObjectName(f"lot_{reagent['type']}_label") - reg_label.setObjectName(f"lot_{reagent.type}_label") + reg_label.setObjectName(f"lot_{reagent['value'].type}_label") # create reagent choice widget - add_widget = ImportReagent(ctx=obj.ctx, reagent=reagent) - add_widget.setObjectName(f"lot_{reagent.type}") + add_widget = ImportReagent(ctx=obj.ctx, reagent=reagent['value']) + add_widget.setObjectName(f"lot_{reagent['value'].type}") logger.debug(f"Widget name set to: {add_widget.objectName()}") obj.table_widget.formlayout.addWidget(reg_label) obj.table_widget.formlayout.addWidget(add_widget) - obj.reagents[reagent.type] = reagent continue # case "rsl_plate_num": # label = QLabel(field.replace("_", " ").title()) @@ -181,10 +195,10 @@ def import_submission_function(obj:QMainWindow) -> Tuple[QMainWindow, dict|None] # add_widget.setText(str(value['value']).replace("_", " ")) case _: # anything else gets added in as a line edit - label = QLabel(field.replace("_", " ").title()) + # label = QLabel(field.replace("_", " ").title()) add_widget = QLineEdit() - logger.debug(f"Setting widget text to {str(value).replace('_', ' ')}") - add_widget.setText(str(value).replace("_", " ")) + logger.debug(f"Setting widget text to {str(value['value']).replace('_', ' ')}") + add_widget.setText(str(value['value']).replace("_", " ")) try: add_widget.setObjectName(field) logger.debug(f"Widget name set to: {add_widget.objectName()}") @@ -195,7 +209,7 @@ def import_submission_function(obj:QMainWindow) -> Tuple[QMainWindow, dict|None] # compare obj.reagents with expected reagents in kit if hasattr(obj, 'ext_kit'): obj.kit_integrity_completion() - obj.missing_reagents = obj.missing_reagents + missing_info + # obj.missing_reagents = obj.missing_reagents + missing_info logger.debug(f"Imported reagents: {obj.reagents}") if prsr.sample_result != None: msg = AlertPop(message=prsr.sample_result, status="WARNING") @@ -247,21 +261,24 @@ def kit_integrity_completion_function(obj:QMainWindow) -> Tuple[QMainWindow, dic obj.ext_kit = kit_widget.currentText() logger.debug(f"Checking integrity of {obj.ext_kit}") # get the kit from database using current kit info - kit = lookup_kittype_by_name(ctx=obj.ctx, name=obj.ext_kit) + # kit = lookup_kittype_by_name(ctx=obj.ctx, name=obj.ext_kit) # get all reagents stored in the QWindow object - reagents_to_lookup = [item.replace("lot_", "") for item in obj.reagents] - logger.debug(f"Reagents for lookup for {kit.name}: {reagents_to_lookup}") + # reagents_to_lookup = [item.name for item in obj.missing_reagents] + # logger.debug(f"Reagents for lookup for {kit.name}: {reagents_to_lookup}") # make sure kit contains all necessary info - kit_integrity = check_kit_integrity(kit, reagents_to_lookup) + # kit_integrity = check_kit_integrity(kit, reagents_to_lookup) # if kit integrity comes back with an error, make widgets with missing reagents using default info - if kit_integrity != None: - result = dict(message=kit_integrity['message'], status="Warning") - obj.missing_reagents = kit_integrity['missing'] - for item in kit_integrity['missing']: - obj.table_widget.formlayout.addWidget(QLabel(f"Lot {item.replace('_', ' ').title()}")) - reagent = dict(type=item, lot=None, exp=None, name=None) - add_widget = ImportReagent(ctx=obj.ctx, reagent=PydReagent(**reagent))#item=item) - obj.table_widget.formlayout.addWidget(add_widget) + # if kit_integrity != None: + # result = dict(message=kit_integrity['message'], status="Warning") + # obj.missing_reagents = kit_integrity['missing'] + # for item in kit_integrity['missing']: + if len(obj.missing_reagents) > 0: + result = dict(message=f"The submission you are importing is missing some reagents expected by the kit.\n\nIt looks like you are missing: {[item.upper() for item in obj.missing_reagents]}\n\nAlternatively, you may have set the wrong extraction kit.\n\nThe program will populate lists using existing reagents.\n\nPlease make sure you check the lots carefully!", status="Warning") + for item in obj.missing_reagents: + obj.table_widget.formlayout.addWidget(ParsedQLabel({'parsed':False}, item, title=False)) + reagent = dict(type=item, lot=None, exp=None, name=None) + add_widget = ImportReagent(ctx=obj.ctx, reagent=PydReagent(**reagent))#item=item) + obj.table_widget.formlayout.addWidget(add_widget) submit_btn = QPushButton("Submit") submit_btn.setObjectName("lot_submit_btn") obj.table_widget.formlayout.addWidget(submit_btn) @@ -297,11 +314,13 @@ def submit_new_sample_function(obj:QMainWindow) -> Tuple[QMainWindow, dict]: # if reagent not found offer to add to database if wanted_reagent == None: r_lot = reagents[reagent] - dlg = QuestionAsker(title=f"Add {r_lot}?", message=f"Couldn't find reagent type {reagent.replace('_', ' ').title().strip('Lot')}: {r_lot} in the database.\n\nWould you like to add it?") + dlg = QuestionAsker(title=f"Add {r_lot}?", message=f"Couldn't find reagent type {reagent.strip('Lot')}: {r_lot} in the database.\n\nWould you like to add it?") if dlg.exec(): - logger.debug(f"checking reagent: {reagent} in obj.reagents. Result: {obj.reagents[reagent]}") - expiry_date = obj.reagents[reagent].exp - wanted_reagent = obj.add_reagent(reagent_lot=r_lot, reagent_type=reagent.replace("lot_", ""), expiry=expiry_date) + logger.debug(f"Looking through {obj.reagents} for reagent {reagent}") + picked_reagent = [item for item in obj.reagents if item.type == reagent][0] + logger.debug(f"checking reagent: {reagent} in obj.reagents. Result: {picked_reagent}") + expiry_date = picked_reagent.exp + wanted_reagent = obj.add_reagent(reagent_lot=r_lot, reagent_type=reagent.replace("lot_", ""), expiry=expiry_date, name=picked_reagent.name) else: # In this case we will have an empty reagent and the submission will fail kit integrity check logger.debug("Will not add reagent.") @@ -357,10 +376,10 @@ def submit_new_sample_function(obj:QMainWindow) -> Tuple[QMainWindow, dict]: logger.debug(f"Extraction kit map:\n\n{extraction_kit.used_for[obj.current_submission_type.replace('_', ' ')]}") # TODO replace below with function in KitType object. Update Kittype associations. # excel_map = extraction_kit.used_for[obj.current_submission_type.replace('_', ' ')] - excel_map = extraction_kit.construct_xl_map_for_use(obj.current_submission_type.replace('_', ' ').title()) - excel_map.update(extraction_kit.used_for[obj.current_submission_type.replace('_', ' ').title()]) + excel_map = extraction_kit.construct_xl_map_for_use(obj.current_submission_type) + # excel_map.update(extraction_kit.used_for[obj.current_submission_type.replace('_', ' ').title()]) input_reagents = [item.to_reagent_dict() for item in parsed_reagents] - autofill_excel(obj=obj, xl_map=excel_map, reagents=input_reagents, missing_reagents=obj.missing_reagents, info=info) + autofill_excel(obj=obj, xl_map=excel_map, reagents=input_reagents, missing_reagents=obj.missing_reagents, info=info, missing_info=obj.missing_info) if hasattr(obj, 'csv'): dlg = QuestionAsker("Export CSV?", "Would you like to export the csv file?") if dlg.exec(): @@ -827,29 +846,31 @@ def import_pcr_results_function(obj:QMainWindow) -> Tuple[QMainWindow, dict]: result = dict(message=f"We added PCR info to {sub.rsl_plate_num}.", status='information') return obj, result -def autofill_excel(obj:QMainWindow, xl_map:dict, reagents:List[dict], missing_reagents:List[str], info:dict): +def autofill_excel(obj:QMainWindow, xl_map:dict, reagents:List[dict], missing_reagents:List[str], info:dict, missing_info:List[str]): """ Automatically fills in excel cells with submission info. Args: obj (QMainWindow): Original main app window - xl_map (dict): Map of where each reagent goes in the excel workbook. - reagents (List[dict]): All reagents used in the kit. + xl_map (dict): Map of where each item goes in the excel workbook. + reagents (List[dict]): All reagents placed in the submission form. missing_reagents (List[str]): Reagents that are required for the kit that were not present. info (dict): Dictionary of misc info from submission + missing_info (List[str]): Plate info missing from the excel sheet. """ # logger.debug(reagents) logger.debug(f"Here is the info dict coming in:\n{pprint.pformat(info)}") logger.debug(f"Here are the missing reagents:\n{missing_reagents}") + logger.debug(f"Here are the missing info:\n{missing_info}") # pare down the xl map to only the missing data. relevant_map = {k:v for k,v in xl_map.items() if k in missing_reagents} # pare down reagents to only what's missing relevant_reagents = [item for item in reagents if item['type'] in missing_reagents] # hacky manipulation of submission type so it looks better. - info['submission_type'] = info['submission_type'].replace("_", " ").title() + # info['submission_type'] = info['submission_type'].replace("_", " ").title() # pare down info to just what's missing - relevant_info = {k:v for k,v in info.items() if k in missing_reagents} + relevant_info = {k:v for k,v in info.items() if k in missing_info} logger.debug(f"Here is the relevant info: {pprint.pformat(relevant_info)}") # construct new objects to put into excel sheets: new_reagents = [] diff --git a/src/submissions/tools/__init__.py b/src/submissions/tools/__init__.py index 2dd7d05..f733cb9 100644 --- a/src/submissions/tools/__init__.py +++ b/src/submissions/tools/__init__.py @@ -80,9 +80,9 @@ def convert_nans_to_nones(input_str) -> str|None: Returns: str: _description_ """ - if not check_not_nan(input_str): - return None - return input_str + if check_not_nan(input_str): + return input_str + return None def check_is_power_user(ctx:dict) -> bool: """ @@ -172,7 +172,7 @@ class RSLNamer(object): self.submission_type = sub_type self.retrieve_rsl_number(in_str=instr) if self.submission_type != None: - parser = getattr(self, f"enforce_{self.submission_type.lower()}") + parser = getattr(self, f"enforce_{self.submission_type.replace(' ', '_').lower()}") parser() self.parsed_name = self.parsed_name.replace("_", "-")