diff --git a/TODO.md b/TODO.md index b5510fd..33c910a 100644 --- a/TODO.md +++ b/TODO.md @@ -1,3 +1,7 @@ +- [ ] Make reporting better. +- [ ] Build master query method? + - Obviously there will need to be extensions, but I feel the attr method I have in Submissions could work. +- [x] Fix Artic RSLNamer - [x] Put "Not applicable" reagents in to_dict() method. - Currently in to_pydantic(). - [x] Critical: Convert Json lits to dicts so I can have them update properly without using crashy Sqlalchemy-json diff --git a/src/submissions/backend/db/models/__init__.py b/src/submissions/backend/db/models/__init__.py index 21b3418..9be93dc 100644 --- a/src/submissions/backend/db/models/__init__.py +++ b/src/submissions/backend/db/models/__init__.py @@ -2,8 +2,10 @@ Contains all models for sqlalchemy ''' import sys -from sqlalchemy.orm import DeclarativeMeta, declarative_base, Query +from sqlalchemy.orm import DeclarativeMeta, declarative_base, Query, Session from sqlalchemy.ext.declarative import declared_attr +from typing import Any, List +from pathlib import Path # Load testing environment if 'pytest' in sys.modules: from pathlib import Path @@ -11,28 +13,32 @@ if 'pytest' in sys.modules: Base: DeclarativeMeta = declarative_base() + class BaseClass(Base): """ Abstract class to pass ctx values to all SQLAlchemy objects. - - Args: - Base (DeclarativeMeta): Declarative base for metadata. """ - __abstract__ = True + __abstract__ = True #: Will not be added to DB - __table_args__ = {'extend_existing': True} + __table_args__ = {'extend_existing': True} #: Will only add new columns @declared_attr - def __tablename__(cls): + def __tablename__(cls) -> str: """ - Set tablename to lowercase class name + Sets table name to lower case class name. + + Returns: + str: lower case class name """ return f"_{cls.__name__.lower()}" @declared_attr - def __database_session__(cls): + def __database_session__(cls) -> Session: """ - Pull db session from ctx + Pull db session from ctx to be used in operations + + Returns: + Session: DB session from ctx settings. """ if not 'pytest' in sys.modules: from tools import ctx @@ -41,9 +47,12 @@ class BaseClass(Base): return ctx.database_session @declared_attr - def __directory_path__(cls): + def __directory_path__(cls) -> Path: """ - Pull submission directory from ctx + Pull directory path from ctx to be used in operations. + + Returns: + Path: Location of the Submissions directory in Settings object """ if not 'pytest' in sys.modules: from tools import ctx @@ -52,27 +61,31 @@ class BaseClass(Base): return ctx.directory_path @declared_attr - def __backup_path__(cls): + def __backup_path__(cls) -> Path: """ - Pull backup directory from ctx + Pull backup directory path from ctx to be used in operations. + + Returns: + Path: Location of the Submissions backup directory in Settings object """ if not 'pytest' in sys.modules: from tools import ctx else: from test_settings import ctx return ctx.backup_path - - def query_return(query:Query, limit:int=0): + + @classmethod + def execute_query(cls, query: Query, limit: int = 0) -> Any | List[Any]: """ Execute sqlalchemy query. Args: - query (Query): Query object - limit (int, optional): Maximum number of results to return (0 = all). Defaults to 0. + query (Query): input query object + limit (int): Maximum number of results. (0 = all) Returns: - _type_: Query result. - """ + Any | List[Any]: Single result if limit = 1 or List if other. + """ with query.session.no_autoflush: match limit: case 0: @@ -81,11 +94,11 @@ class BaseClass(Base): return query.first() case _: return query.limit(limit).all() - + def save(self): """ Add the object to the database and commit - """ + """ # logger.debug(f"Saving object: {pformat(self.__dict__)}") try: self.__database_session__.add(self) @@ -94,6 +107,7 @@ class BaseClass(Base): logger.critical(f"Problem saving object: {e}") self.__database_session__.rollback() + from .controls import * # import order must go: orgs, kit, subs due to circular import issues from .organizations import * diff --git a/src/submissions/backend/db/models/controls.py b/src/submissions/backend/db/models/controls.py index 78a43d9..f5afca0 100644 --- a/src/submissions/backend/db/models/controls.py +++ b/src/submissions/backend/db/models/controls.py @@ -1,59 +1,59 @@ -''' +""" All control related models. -''' +""" from __future__ import annotations from sqlalchemy import Column, String, TIMESTAMP, JSON, INTEGER, ForeignKey from sqlalchemy.orm import relationship, Query -import logging, re, sys +import logging, re from operator import itemgetter from . import BaseClass from tools import setup_lookup from datetime import date, datetime from typing import List from dateutil.parser import parse - +from re import Pattern logger = logging.getLogger(f"submissions.{__name__}") + class ControlType(BaseClass): """ Base class of a control archetype. - """ - - id = Column(INTEGER, primary_key=True) #: primary key - name = Column(String(255), unique=True) #: controltype name (e.g. MCS) - targets = Column(JSON) #: organisms checked for - instances = relationship("Control", back_populates="controltype") #: control samples created of this type. + """ + id = Column(INTEGER, primary_key=True) #: primary key + name = Column(String(255), unique=True) #: controltype name (e.g. MCS) + targets = Column(JSON) #: organisms checked for + instances = relationship("Control", back_populates="controltype") #: control samples created of this type. def __repr__(self) -> str: return f"" @classmethod @setup_lookup - def query(cls, - name:str=None, - limit:int=0 - ) -> ControlType|List[ControlType]: + def query(cls, + name: str = None, + limit: int = 0 + ) -> ControlType | List[ControlType]: """ Lookup control archetypes in the database Args: - name (str, optional): Control type name (limits results to 1). Defaults to None. - limit (int, optional): Maximum number of results to return. Defaults to 0. + name (str, optional): Name of the desired controltype. Defaults to None. + limit (int, optional): Maximum number of results to return (0 = all). Defaults to 0. Returns: - models.ControlType|List[models.ControlType]: ControlType(s) of interest. - """ + ControlType | List[ControlType]: Single result if the limit = 1, else a list. + """ query = cls.__database_session__.query(cls) match name: case str(): - query = query.filter(cls.name==name) + query = query.filter(cls.name == name) limit = 1 case _: pass - return cls.query_return(query=query, limit=limit) - - def get_subtypes(self, mode:str) -> List[str]: + return cls.execute_query(query=query, limit=limit) + + def get_subtypes(self, mode: str) -> List[str]: """ Get subtypes associated with this controltype @@ -62,56 +62,68 @@ class ControlType(BaseClass): Returns: List[str]: list of subtypes available - """ + """ # Get first instance since all should have same subtypes - # outs = self.instances[0] # Get mode of instance - # jsoner = json.loads(getattr(outs, mode)) jsoner = getattr(self.instances[0], mode) - logger.debug(f"JSON out: {jsoner.keys()}") + # logger.debug(f"JSON out: {jsoner.keys()}") try: # Pick genera (all should have same subtypes) genera = list(jsoner.keys())[0] except IndexError: return [] + # remove items that don't have relevant data subtypes = [item for item in jsoner[genera] if "_hashes" not in item and "_ratio" not in item] return subtypes - + @classmethod - def get_positive_control_types(cls): + def get_positive_control_types(cls) -> List[ControlType]: + """ + Gets list of Control types if they have targets + + Returns: + List[ControlType]: Control types that have targets + """ return [item for item in cls.query() if item.targets != []] - + @classmethod - def build_positive_regex(cls): + def build_positive_regex(cls) -> Pattern: + """ + Creates a re.Pattern that will look for positive control types + + Returns: + Pattern: Constructed pattern + """ strings = list(set([item.name.split("-")[0] for item in cls.get_positive_control_types()])) return re.compile(rf"(^{'|^'.join(strings)})-.*", flags=re.IGNORECASE) + class Control(BaseClass): """ Base class of a control sample. - """ - - id = Column(INTEGER, primary_key=True) #: primary key - parent_id = Column(String, ForeignKey("_controltype.id", name="fk_control_parent_id")) #: primary key of control type - controltype = relationship("ControlType", back_populates="instances", foreign_keys=[parent_id]) #: reference to parent control type - name = Column(String(255), unique=True) #: Sample ID - submitted_date = Column(TIMESTAMP) #: Date submitted to Robotics - contains = Column(JSON) #: unstructured hashes in contains.tsv for each organism - matches = Column(JSON) #: unstructured hashes in matches.tsv for each organism - kraken = Column(JSON) #: unstructured output from kraken_report - submission_id = Column(INTEGER, ForeignKey("_basicsubmission.id")) #: parent submission id - submission = relationship("BacterialCulture", back_populates="controls", foreign_keys=[submission_id]) #: parent submission - refseq_version = Column(String(16)) #: version of refseq used in fastq parsing - kraken2_version = Column(String(16)) #: version of kraken2 used in fastq parsing - kraken2_db_version = Column(String(32)) #: folder name of kraken2 db - sample = relationship("BacterialCultureSample", back_populates="control") #: This control's submission sample - sample_id = Column(INTEGER, ForeignKey("_basicsample.id", ondelete="SET NULL", name="cont_BCS_id")) #: sample id key + """ + + id = Column(INTEGER, primary_key=True) #: primary key + parent_id = Column(String, + ForeignKey("_controltype.id", name="fk_control_parent_id")) #: primary key of control type + controltype = relationship("ControlType", back_populates="instances", + foreign_keys=[parent_id]) #: reference to parent control type + name = Column(String(255), unique=True) #: Sample ID + submitted_date = Column(TIMESTAMP) #: Date submitted to Robotics + contains = Column(JSON) #: unstructured hashes in contains.tsv for each organism + matches = Column(JSON) #: unstructured hashes in matches.tsv for each organism + kraken = Column(JSON) #: unstructured output from kraken_report + submission_id = Column(INTEGER, ForeignKey("_basicsubmission.id")) #: parent submission id + submission = relationship("BacterialCulture", back_populates="controls", + foreign_keys=[submission_id]) #: parent submission + refseq_version = Column(String(16)) #: version of refseq used in fastq parsing + kraken2_version = Column(String(16)) #: version of kraken2 used in fastq parsing + kraken2_db_version = Column(String(32)) #: folder name of kraken2 db + sample = relationship("BacterialCultureSample", back_populates="control") #: This control's submission sample + sample_id = Column(INTEGER, + ForeignKey("_basicsample.id", ondelete="SET NULL", name="cont_BCS_id")) #: sample id key def __repr__(self) -> str: - """ - Returns: - str: Representation of self - """ return f"" def to_sub_dict(self) -> dict: @@ -120,7 +132,7 @@ class Control(BaseClass): Returns: dict: output dictionary containing: Name, Type, Targets, Top Kraken results - """ + """ # logger.debug("loading json string into dict") try: # kraken = json.loads(self.kraken) @@ -133,7 +145,8 @@ class Control(BaseClass): for item in kraken: # logger.debug("calculating kraken percent (overwrites what's already been scraped)") kraken_percent = kraken[item]['kraken_count'] / kraken_cnt_total - new_kraken.append({'name': item, 'kraken_count':kraken[item]['kraken_count'], 'kraken_percent':"{0:.0%}".format(kraken_percent)}) + new_kraken.append({'name': item, 'kraken_count': kraken[item]['kraken_count'], + 'kraken_percent': "{0:.0%}".format(kraken_percent)}) new_kraken = sorted(new_kraken, key=itemgetter('kraken_count'), reverse=True) # logger.debug("setting targets") if self.controltype.targets == []: @@ -142,14 +155,14 @@ class Control(BaseClass): targets = self.controltype.targets # logger.debug("constructing output dictionary") output = { - "name" : self.name, - "type" : self.controltype.name, - "targets" : ", ".join(targets), - "kraken" : new_kraken[0:5] + "name": self.name, + "type": self.controltype.name, + "targets": ", ".join(targets), + "kraken": new_kraken[0:5] } return output - def convert_by_mode(self, mode:str) -> list[dict]: + def convert_by_mode(self, mode: str) -> list[dict]: """ split this instance into analysis types for controls graphs @@ -158,7 +171,7 @@ class Control(BaseClass): Returns: list[dict]: list of records - """ + """ output = [] # logger.debug("load json string for mode (i.e. contains, matches, kraken2)") try: @@ -191,7 +204,7 @@ class Control(BaseClass): Returns: List[str]: List of control mode names. - """ + """ try: # logger.debug("Creating a list of JSON columns in _controls table") cols = [item.name for item in list(cls.__table__.columns) if isinstance(item.type, JSON)] @@ -202,13 +215,13 @@ class Control(BaseClass): @classmethod @setup_lookup - def query(cls, - control_type:ControlType|str|None=None, - start_date:date|str|int|None=None, - end_date:date|str|int|None=None, - control_name:str|None=None, - limit:int=0 - ) -> Control|List[Control]: + def query(cls, + control_type: ControlType | str | None = None, + start_date: date | str | int | None = None, + end_date: date | str | int | None = None, + control_name: str | None = None, + limit: int = 0 + ) -> Control | List[Control]: """ Lookup control objects in the database based on a number of parameters. @@ -221,16 +234,16 @@ class Control(BaseClass): Returns: models.Control|List[models.Control]: Control object of interest. - """ + """ query: Query = cls.__database_session__.query(cls) # by control type match control_type: case ControlType(): # logger.debug(f"Looking up control by control type: {control_type}") - query = query.filter(cls.controltype==control_type) + query = query.filter(cls.controltype == control_type) case str(): # logger.debug(f"Looking up control by control type: {control_type}") - query = query.join(ControlType).filter(ControlType.name==control_type) + query = query.join(ControlType).filter(ControlType.name == control_type) case _: pass # by date range @@ -247,7 +260,8 @@ class Control(BaseClass): start_date = start_date.strftime("%Y-%m-%d") case int(): # logger.debug(f"Lookup control by ordinal start date {start_date}") - start_date = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + start_date - 2).date().strftime("%Y-%m-%d") + start_date = datetime.fromordinal( + datetime(1900, 1, 1).toordinal() + start_date - 2).date().strftime("%Y-%m-%d") case _: # logger.debug(f"Lookup control with parsed start date {start_date}") start_date = parse(start_date).strftime("%Y-%m-%d") @@ -257,7 +271,8 @@ class Control(BaseClass): end_date = end_date.strftime("%Y-%m-%d") case int(): # logger.debug(f"Lookup control by ordinal end date {end_date}") - end_date = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + end_date - 2).date().strftime("%Y-%m-%d") + end_date = datetime.fromordinal(datetime(1900, 1, 1).toordinal() + end_date - 2).date().strftime( + "%Y-%m-%d") case _: # logger.debug(f"Lookup control with parsed end date {end_date}") end_date = parse(end_date).strftime("%Y-%m-%d") @@ -270,5 +285,4 @@ class Control(BaseClass): limit = 1 case _: pass - return cls.query_return(query=query, limit=limit) - \ No newline at end of file + return cls.execute_query(query=query, limit=limit) diff --git a/src/submissions/backend/db/models/kits.py b/src/submissions/backend/db/models/kits.py index e479224..4b54fb9 100644 --- a/src/submissions/backend/db/models/kits.py +++ b/src/submissions/backend/db/models/kits.py @@ -214,7 +214,7 @@ class KitType(BaseClass): limit = 1 case _: pass - return cls.query_return(query=query, limit=limit) + return cls.execute_query(query=query, limit=limit) @check_authorization def save(self): @@ -303,7 +303,7 @@ class ReagentType(BaseClass): limit = 1 case _: pass - return cls.query_return(query=query, limit=limit) + return cls.execute_query(query=query, limit=limit) def to_pydantic(self) -> "PydReagent": """ @@ -464,7 +464,7 @@ class Reagent(BaseClass): limit = 1 case _: pass - return cls.query_return(query=query, limit=limit) + return cls.execute_query(query=query, limit=limit) class Discount(BaseClass): """ @@ -533,7 +533,7 @@ class Discount(BaseClass): case _: # raise ValueError(f"Invalid value for kit type: {kit_type}") pass - return cls.query_return(query=query) + return cls.execute_query(query=query) @check_authorization def save(self): @@ -702,7 +702,7 @@ class SubmissionType(BaseClass): query = query.filter(cls.info_map.op('->')(key)!=None) case _: pass - return cls.query_return(query=query, limit=limit) + return cls.execute_query(query=query, limit=limit) @check_authorization def save(self): @@ -781,7 +781,7 @@ class SubmissionTypeKitTypeAssociation(BaseClass): # logger.debug(f"Looking up {cls.__name__} by id {kit_type}") query = query.join(KitType).filter(KitType.id==kit_type) limit = query.count() - return cls.query_return(query=query, limit=limit) + return cls.execute_query(query=query, limit=limit) class KitTypeReagentTypeAssociation(BaseClass): """ @@ -889,7 +889,7 @@ class KitTypeReagentTypeAssociation(BaseClass): pass if kit_type != None and reagent_type != None: limit = 1 - return cls.query_return(query=query, limit=limit) + return cls.execute_query(query=query, limit=limit) class SubmissionReagentAssociation(BaseClass): """ @@ -956,7 +956,7 @@ class SubmissionReagentAssociation(BaseClass): query = query.join(BasicSubmission).filter(BasicSubmission.id==submission) case _: pass - return cls.query_return(query=query, limit=limit) + return cls.execute_query(query=query, limit=limit) def to_sub_dict(self, extraction_kit) -> dict: """ @@ -1083,7 +1083,7 @@ class Equipment(BaseClass): limit = 1 case _: pass - return cls.query_return(query=query, limit=limit) + return cls.execute_query(query=query, limit=limit) def to_pydantic(self, submission_type:SubmissionType, extraction_kit:str|KitType|None=None) -> "PydEquipment": """ @@ -1206,7 +1206,7 @@ class EquipmentRole(BaseClass): limit = 1 case _: pass - return cls.query_return(query=query, limit=limit) + return cls.execute_query(query=query, limit=limit) def get_processes(self, submission_type:str|SubmissionType|None, extraction_kit:str|KitType|None=None) -> List[Process]: """ @@ -1382,5 +1382,5 @@ class Process(BaseClass): limit = 1 case _: pass - return cls.query_return(query=query, limit=limit) + return cls.execute_query(query=query, limit=limit) diff --git a/src/submissions/backend/db/models/organizations.py b/src/submissions/backend/db/models/organizations.py index d66a39b..f389b1e 100644 --- a/src/submissions/backend/db/models/organizations.py +++ b/src/submissions/backend/db/models/organizations.py @@ -33,10 +33,6 @@ class Organization(BaseClass): contacts = relationship("Contact", back_populates="organization", secondary=orgs_contacts) #: contacts involved with this org def __repr__(self) -> str: - """ - Returns: - str: Representation of this Organization - """ return f"" @classmethod @@ -70,7 +66,7 @@ class Organization(BaseClass): limit = 1 case _: pass - return cls.query_return(query=query, limit=limit) + return cls.execute_query(query=query, limit=limit) @check_authorization def save(self): @@ -137,5 +133,5 @@ class Contact(BaseClass): limit = 1 case _: pass - return cls.query_return(query=query, limit=limit) + return cls.execute_query(query=query, limit=limit) \ No newline at end of file diff --git a/src/submissions/backend/db/models/submissions.py b/src/submissions/backend/db/models/submissions.py index 449ca97..8da30ad 100644 --- a/src/submissions/backend/db/models/submissions.py +++ b/src/submissions/backend/db/models/submissions.py @@ -47,7 +47,6 @@ class BasicSubmission(BaseClass): submission_type_name = Column(String, ForeignKey("_submissiontype.name", ondelete="SET NULL", name="fk_BS_subtype_name")) #: name of joined submission type technician = Column(String(64)) #: initials of processing tech(s) # Move this into custom types? - # reagents = relationship("Reagent", back_populates="submissions", secondary=reagents_submissions) #: relationship to reagents reagents_id = Column(String, ForeignKey("_reagent.id", ondelete="SET NULL", name="fk_BS_reagents_id")) #: id of used reagents extraction_info = Column(JSON) #: unstructured output from the extraction table logger. run_cost = Column(FLOAT(2)) #: total cost of running the plate. Set from constant and mutable kit costs at time of creation. @@ -127,13 +126,13 @@ class BasicSubmission(BaseClass): output = {} for k,v in dicto.items(): if len(args) > 0 and k not in args: - logger.debug(f"Don't want {k}") + # logger.debug(f"Don't want {k}") continue else: output[k] = v for k,v in st.defaults.items(): if len(args) > 0 and k not in args: - logger.debug(f"Don't want {k}") + # logger.debug(f"Don't want {k}") continue else: match v: @@ -410,7 +409,7 @@ class BasicSubmission(BaseClass): case item if item in self.jsons(): logger.debug(f"Setting JSON attribute.") existing = self.__getattribute__(key) - if value == "" or value is None or value == 'null': + if value is None or value in ['', 'null']: logger.error(f"No value given, not setting.") return if existing is None: @@ -422,7 +421,8 @@ class BasicSubmission(BaseClass): if isinstance(value, list): existing += value else: - existing.append(value) + if value is not None: + existing.append(value) self.__setattr__(key, existing) flag_modified(self, key) return @@ -890,7 +890,7 @@ class BasicSubmission(BaseClass): # limit = 1 if chronologic: query.order_by(cls.submitted_date) - return cls.query_return(query=query, limit=limit) + return cls.execute_query(query=query, limit=limit) @classmethod def query_or_create(cls, submission_type:str|SubmissionType|None=None, **kwargs) -> BasicSubmission: @@ -1421,7 +1421,7 @@ class WastewaterArtic(BasicSubmission): return input_dict @classmethod - def enforce_name(cls, instr:str, data:dict|None={}) -> str: + def enforce_name(cls, instr:str, data:dict={}) -> str: """ Extends parent """ @@ -1430,16 +1430,12 @@ class WastewaterArtic(BasicSubmission): instr = re.sub(r"Artic", "", instr, flags=re.IGNORECASE) except (AttributeError, TypeError) as e: logger.error(f"Problem using regex: {e}") - # try: - # check = instr.startswith("RSL") - # except AttributeError: - # check = False - # if not check: - # try: - # instr = "RSL" + instr - # except TypeError: - # instr = "RSL" + # logger.debug(f"Before RSL addition: {instr}") + instr = instr.replace("-", "") + instr = re.sub(r"^(\d{6})", f"RSL-AR-\\1", instr) + # logger.debug(f"name coming out of Artic namer: {instr}") outstr = super().enforce_name(instr=instr, data=data) + return outstr @classmethod @@ -1922,7 +1918,7 @@ class BasicSample(BaseClass): query = query.filter(attr==v) if len(kwargs) > 0: limit = 1 - return cls.query_return(query=query, limit=limit) + return cls.execute_query(query=query, limit=limit) @classmethod def query_or_create(cls, sample_type:str|None=None, **kwargs) -> BasicSample: @@ -2259,7 +2255,7 @@ class SubmissionSampleAssociation(BaseClass): query = query.order_by(BasicSubmission.submitted_date.desc()) else: query = query.order_by(BasicSubmission.submitted_date) - return cls.query_return(query=query, limit=limit) + return cls.execute_query(query=query, limit=limit) @classmethod def query_or_create(cls, diff --git a/src/submissions/tools.py b/src/submissions/tools.py index b279ef5..9cea7c7 100644 --- a/src/submissions/tools.py +++ b/src/submissions/tools.py @@ -482,12 +482,16 @@ def setup_lookup(func): func (_type_): _description_ """ def wrapper(*args, **kwargs): - for k, v in locals().items(): - if k == "kwargs": - continue + sanitized_kwargs = {} + for k, v in locals()['kwargs'].items(): if isinstance(v, dict): - raise ValueError("Cannot use dictionary in query. Make sure you parse it first.") - return func(*args, **kwargs) + try: + sanitized_kwargs[k] = v['value'] + except KeyError: + raise ValueError("Could not sanitize dictionary in query. Make sure you parse it first.") + elif v is not None: + sanitized_kwargs[k] = v + return func(*args, **sanitized_kwargs) return wrapper class Result(BaseModel):