Missing sample message after Artic parsing.

2023-06-16 13:58:28 -05:00
parent a7132cd1b4
commit 0bdcad0eee
11 changed files with 199 additions and 45 deletions
--- a/src/submissions/backend/db/functions.py
+++ b/src/submissions/backend/db/functions.py
@@ -31,6 +31,7 @@ def set_sqlite_pragma(dbapi_connection, connection_record):
    cursor.execute("PRAGMA foreign_keys=ON")
    cursor.close()

+
 def store_submission(ctx:dict, base_submission:models.BasicSubmission) -> None|dict:
    """
    Upserts submissions into database
@@ -799,9 +800,21 @@ def lookup_discounts_by_org_and_kit(ctx:dict, kit_id:int, lab_id:int):
        )).all()

 def hitpick_plate(submission:models.BasicSubmission, plate_number:int=0) -> list:
+    """
+    Creates a list of sample positions and statuses to be used by plate mapping and csv output to biomek software.
+
+    Args:
+        submission (models.BasicSubmission): Input submission
+        plate_number (int, optional): plate position in the series of selected plates. Defaults to 0.
+
+    Returns:
+        list: list of sample dictionaries.
+    """    
    plate_dicto = []
    for sample in submission.samples:
        # have sample report back its info if it's positive, otherwise, None
+        method_list = [func for func in dir(sample) if callable(getattr(sample, func))]
+        logger.debug(f"Method list of sample: {method_list}")
        samp = sample.to_hitpick()
        if samp == None:
            continue
@@ -811,6 +824,43 @@ def hitpick_plate(submission:models.BasicSubmission, plate_number:int=0) -> list
            # if len(dicto) < 88:
            this_sample = dict(
                plate_number = plate_number,
+                sample_name = samp['name'],
+                column = samp['col'],
+                row = samp['row'],
+                positive = samp['positive'],
+                plate_name = submission.rsl_plate_num
+            )
+            # append to plate samples
+            plate_dicto.append(this_sample)
+            # append to all samples
+    # image = make_plate_map(plate_dicto)
+    return plate_dicto
+
+def platemap_plate(submission:models.BasicSubmission) -> list:
+    """
+    Depreciated. Replaced by new functionality in hitpick_plate
+
+    Args:
+        submission (models.BasicSubmission): Input submission
+
+    Returns:
+        list: list of sample dictionaries
+    """    
+    plate_dicto = []
+    for sample in submission.samples:
+        # have sample report back its info if it's positive, otherwise, None
+        
+        try:
+            samp = sample.to_platemap()
+        except AttributeError:
+            continue
+        if samp == None:
+            continue
+        else:
+            logger.debug(f"Item name: {samp['name']}")
+            # plate can handle 88 samples to leave column for controls
+            # if len(dicto) < 88:
+            this_sample = dict(
                sample_name = samp['name'],
                column = samp['col'],
                row = samp['row'],
--- a/src/submissions/backend/db/models/samples.py
+++ b/src/submissions/backend/db/models/samples.py
@@ -62,8 +62,10 @@ class WWSample(Base):
        # if well_col > 4:
        #     well
        if self.ct_n1 != None and self.ct_n2 != None:
+            # logger.debug(f"Using well info in name.")
            name = f"{self.ww_sample_full_id}\n\t- ct N1: {'{:.2f}'.format(self.ct_n1)} ({self.n1_status})\n\t- ct N2: {'{:.2f}'.format(self.ct_n2)} ({self.n2_status})"
        else:
+            # logger.debug(f"NOT using well info in name for: {self.ww_sample_full_id}")
            name = self.ww_sample_full_id
        return {
            "well": self.well_number,
@@ -85,18 +87,23 @@ class WWSample(Base):
        except TypeError as e:
            logger.error(f"Couldn't check positives for {self.rsl_number}. Looks like there isn't PCR data.")
            return None
-        if positive:
-            try:
-                # The first character of the elution well is the row
-                well_row = row_dict[self.elution_well[0]]
-                # The remaining charagers are the columns
-                well_col = self.elution_well[1:]
-            except TypeError as e:
-                logger.error(f"This sample doesn't have elution plate info.")
-                return None
-            return dict(name=self.ww_sample_full_id, row=well_row, col=well_col)
-        else:
-            return None
+        well_row = row_dict[self.elution_well[0]]
+        well_col = self.elution_well[1:]
+        # if positive:
+        #     try:
+        #         # The first character of the elution well is the row
+        #         well_row = row_dict[self.elution_well[0]]
+        #         # The remaining charagers are the columns
+        #         well_col = self.elution_well[1:]
+        #     except TypeError as e:
+        #         logger.error(f"This sample doesn't have elution plate info.")
+        #         return None
+        return dict(name=self.ww_sample_full_id, 
+                    row=well_row, 
+                    col=well_col, 
+                    positive=positive)
+        # else:
+        #     return None


 class BCSample(Base):
@@ -134,7 +141,24 @@ class BCSample(Base):
            "name": f"{self.sample_id} - ({self.organism})",
        }

+    def to_hitpick(self) -> dict|None:
+        """
+        Outputs a dictionary of locations

+        Returns:
+            dict: dictionary of sample id, row and column in elution plate
+        """        
+        # dictionary to translate row letters into numbers
+        row_dict = dict(A=1, B=2, C=3, D=4, E=5, F=6, G=7, H=8)
+        # if either n1 or n2 is positive, include this sample
+        well_row = row_dict[self.well_number[0]]
+        # The remaining charagers are the columns
+        well_col = self.well_number[1:]
+        return dict(name=self.sample_id, 
+                    row=well_row, 
+                    col=well_col, 
+                    positive=False)
+        
 # class ArticSample(Base):
 #     """
 #     base of artic sample
--- a/src/submissions/backend/db/models/submissions.py
+++ b/src/submissions/backend/db/models/submissions.py
@@ -1,6 +1,7 @@
 '''
 Models for the main submission types.
 '''
+import math
 from . import Base
 from sqlalchemy import Column, String, TIMESTAMP, INTEGER, ForeignKey, Table, JSON, FLOAT
 from sqlalchemy.orm import relationship
@@ -246,5 +247,24 @@ class WastewaterArtic(BasicSubmission):
    derivative submission type for artic wastewater
    """    
    samples = relationship("WWSample", back_populates="artic_rsl_plate", uselist=True)
-    # Can in use the pcr_info from the wastewater? Cause I can't define pcr_info here due to conflicts with that
-    __mapper_args__ = {"polymorphic_identity": "wastewater_artic", "polymorphic_load": "inline"}
+    # Can it use the pcr_info from the wastewater? Cause I can't define pcr_info here due to conflicts with that
+    # Not necessary because we don't get any results for this procedure.
+    __mapper_args__ = {"polymorphic_identity": "wastewater_artic", "polymorphic_load": "inline"}
+
+    def calculate_base_cost(self):
+        """
+        This method overrides parent method due to multiple output plates from a single submission
+        """        
+        logger.debug(f"Hello from calculate base cost in WWArtic")
+        try:
+            cols_count_96 = ceil(int(self.sample_count) / 8)
+        except Exception as e:
+            logger.error(f"Column count error: {e}")
+        # Since we have multiple output plates per submission form, the constant cost will have to reflect this.
+        output_plate_count = math.ceil(int(self.sample_count) / 16)
+        logger.debug(f"Looks like we have {output_plate_count} output plates.")
+        const_cost = self.extraction_kit.constant_cost * output_plate_count
+        try:
+            self.run_cost = const_cost + (self.extraction_kit.mutable_cost_column * cols_count_96) + (self.extraction_kit.mutable_cost_sample * int(self.sample_count))
+        except Exception as e:
+            logger.error(f"Calculation error: {e}")
--- a/src/submissions/backend/excel/parser.py
+++ b/src/submissions/backend/excel/parser.py
@@ -3,6 +3,7 @@ contains parser object for pulling values from client generated submission sheet
 '''
 from getpass import getuser
 import math
+import pprint
 from typing import Tuple
 import pandas as pd
 from pathlib import Path
@@ -160,14 +161,19 @@ class SheetParser(object):
        sample_parser = SampleParser(self.ctx, submission_info.iloc[16:112])
        sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
        logger.debug(f"Parser result: {self.sub}")
-        self.sub['samples'] = sample_parse()
+        self.sample_result, self.sub['samples'] = sample_parse()


    def parse_wastewater(self) -> None:
        """
        pulls info specific to wastewater sample type
        """        
-
+        def retrieve_elution_map():
+            full = self.xl.parse("Extraction Worksheet")
+            elu_map = full.iloc[9:18, 5:]
+            elu_map.set_index(elu_map.columns[0], inplace=True)
+            elu_map.columns = elu_map.iloc[0]
+            return elu_map
        def parse_reagents(df:pd.DataFrame) -> None:
            """
            Pulls reagents from the bacterial sub-dataframe
@@ -216,9 +222,9 @@ class SheetParser(object):
        parse_reagents(ext_reagent_range)
        parse_reagents(pcr_reagent_range)
        # parse samples
-        sample_parser = SampleParser(self.ctx, submission_info.iloc[16:])
+        sample_parser = SampleParser(self.ctx, submission_info.iloc[16:], elution_map=retrieve_elution_map())
        sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
-        self.sub['samples'] = sample_parse()
+        self.sample_result, self.sub['samples'] = sample_parse()
        self.sub['csv'] = self.xl.parse("Copy to import file", dtype=object)


@@ -272,7 +278,7 @@ class SheetParser(object):
                        return_list.append(dict(sample_name=re.sub(r"\s?\(.*\)", "", df.loc[ii.name, int(c)]), \
                                                well=f"{ii.name}{c}",
                                                artic_plate=self.sub['rsl_plate_num']))
-            logger.debug(f"massaged sample list for {self.sub['rsl_plate_num']}: {return_list}")
+            logger.debug(f"massaged sample list for {self.sub['rsl_plate_num']}: {pprint.pprint(return_list)}")
            return return_list
        submission_info = self.xl.parse("First Strand", dtype=object)
        biomek_info = self.xl.parse("ArticV4 Biomek", dtype=object)
@@ -280,7 +286,7 @@ class SheetParser(object):
        biomek_reagent_range = biomek_info.iloc[60:, 0:3].dropna(how='all')
        self.sub['submitter_plate_num'] = ""
        self.sub['rsl_plate_num'] =  RSLNamer(self.filepath.__str__()).parsed_name
-        self.sub['submitted_date'] = submission_info.iloc[0][2]
+        self.sub['submitted_date'] = biomek_info.iloc[1][1]
        self.sub['submitting_lab'] = "Enterics Wastewater Genomics"
        self.sub['sample_count'] = submission_info.iloc[4][6]
        self.sub['extraction_kit'] = "ArticV4.1"
@@ -290,7 +296,7 @@ class SheetParser(object):
        samples = massage_samples(biomek_info.iloc[22:31, 0:])
        sample_parser = SampleParser(self.ctx, pd.DataFrame.from_records(samples))
        sample_parse = getattr(sample_parser, f"parse_{self.sub['submission_type'].lower()}_samples")
-        self.sub['samples'] = sample_parse()
+        self.sample_result, self.sub['samples'] = sample_parse()
        


@@ -299,18 +305,21 @@ class SampleParser(object):
    object to pull data for samples in excel sheet and construct individual sample objects
    """

-    def __init__(self, ctx:dict, df:pd.DataFrame) -> None:
+    def __init__(self, ctx:dict, df:pd.DataFrame, elution_map:pd.DataFrame|None=None) -> None:
        """
        convert sample sub-dataframe to dictionary of records

        Args:
+            ctx (dict): setting passed down from gui
            df (pd.DataFrame): input sample dataframe
+            elution_map (pd.DataFrame | None, optional): optional map of elution plate. Defaults to None.
        """        
        self.ctx = ctx
        self.samples = df.to_dict("records")
+        self.elution_map = elution_map


-    def parse_bacterial_culture_samples(self) -> list[BCSample]:
+    def parse_bacterial_culture_samples(self) -> Tuple[str|None, list[BCSample]]:
        """
        construct bacterial culture specific sample objects

@@ -334,16 +343,28 @@ class SampleParser(object):
                not_a_nan = True
            if not_a_nan:
                new_list.append(new)
-        return new_list
+        return None, new_list


-    def parse_wastewater_samples(self) -> list[WWSample]:
+    def parse_wastewater_samples(self) -> Tuple[str|None, list[WWSample]]:
        """
        construct wastewater specific sample objects

        Returns:
            list[WWSample]: list of sample objects
        """        
+        def search_df_for_sample(sample_rsl:str):
+            logger.debug(f"Attempting to find sample {sample_rsl} in \n {self.elution_map}")
+            print(f"Attempting to find sample {sample_rsl} in \n {self.elution_map}")
+            well = self.elution_map.where(self.elution_map==sample_rsl).dropna(how='all').dropna(axis=1)
+            self.elution_map.at[well.index[0], well.columns[0]] = np.nan
+            try:
+                col = str(int(well.columns[0]))
+            except ValueError:
+                col = str(well.columns[0])
+            except TypeError as e:
+                logger.error(f"Problem parsing out column number for {well}:\n {e}")
+            return f"{well.index[0]}{col}"
        new_list = []
        for sample in self.samples:
            new = WWSample()
@@ -368,10 +389,11 @@ class SampleParser(object):
            # new.site_status = sample['Unnamed: 7']
            new.notes = str(sample['Unnamed: 6']) # previously Unnamed: 8
            new.well_number = sample['Unnamed: 1']
+            new.elution_well = search_df_for_sample(new.rsl_number)
            new_list.append(new)
-        return new_list
+        return None, new_list
    
-    def parse_wastewater_artic_samples(self) -> list[WWSample]:
+    def parse_wastewater_artic_samples(self) -> Tuple[str|None, list[WWSample]]:
        """
        The artic samples are the wastewater samples that are to be sequenced
        So we will need to lookup existing ww samples and append Artic well # and plate relation
@@ -380,17 +402,20 @@ class SampleParser(object):
            list[WWSample]: list of wastewater samples to be updated
        """        
        new_list = []
+        missed_samples = []
        for sample in self.samples:
            with self.ctx['database_session'].no_autoflush:
                instance = lookup_ww_sample_by_ww_sample_num(ctx=self.ctx, sample_number=sample['sample_name'])
            logger.debug(f"Checking: {sample['sample_name']}")
            if instance == None:
                logger.error(f"Unable to find match for: {sample['sample_name']}")
+                missed_samples.append(sample['sample_name'])
                continue
            logger.debug(f"Got instance: {instance.ww_sample_full_id}")
            instance.artic_well_number = sample['well']
            new_list.append(instance)
-        return new_list
+        missed_str = "\n\t".join(missed_samples)
+        return f"Could not find matches for the following samples:\n\t {missed_str}", new_list
            


@@ -472,6 +497,7 @@ class PCRParser(object):
        df = self.parse_general(sheet_name="Results")
        column_names = ["Well", "Well Position", "Omit","Sample","Target","Task"," Reporter","Quencher","Amp Status","Amp Score","Curve Quality","Result Quality Issues","Cq","Cq Confidence","Cq Mean","Cq SD","Auto Threshold","Threshold", "Auto Baseline", "Baseline Start", "Baseline End"]
        self.samples_df = df.iloc[23:][0:]
+        logger.debug(f"Dataframe of PCR results:\n\t{self.samples_df}")
        self.samples_df.columns = column_names
        logger.debug(f"Samples columns: {self.samples_df.columns}")
        well_call_df = self.xl.parse(sheet_name="Well Call").iloc[24:][0:].iloc[:,-1:]
@@ -488,7 +514,7 @@ class PCRParser(object):
                sample_obj = dict(
                    sample = row['Sample'],
                    plate_rsl = self.plate_num,
-                    elution_well = row['Well Position']
+                    # elution_well = row['Well Position']
                )
            logger.debug(f"Got sample obj: {sample_obj}") 
            # logger.debug(f"row: {row}")