Debugged upgrades.

This commit is contained in:
lwark
2024-05-13 07:44:06 -05:00
parent f30f6403d6
commit 84fac23890
15 changed files with 447 additions and 487 deletions

View File

@@ -45,7 +45,7 @@ class SheetParser(object):
raise ValueError("No filepath given.")
try:
# self.xl = pd.ExcelFile(filepath)
self.xl = load_workbook(filepath, read_only=True, data_only=True)
self.xl = load_workbook(filepath, data_only=True)
except ValueError as e:
logger.error(f"Incorrect value: {e}")
raise FileNotFoundError(f"Couldn't parse file {self.filepath}")
@@ -53,6 +53,8 @@ class SheetParser(object):
# make decision about type of sample we have
self.sub['submission_type'] = dict(value=RSLNamer.retrieve_submission_type(filename=self.filepath),
missing=True)
self.submission_type = SubmissionType.query(name=self.sub['submission_type'])
self.sub_object = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type)
# grab the info map from the submission type in database
self.parse_info()
self.import_kit_validation_check()
@@ -67,7 +69,7 @@ class SheetParser(object):
"""
Pulls basic information from the excel sheet
"""
parser = InfoParser(xl=self.xl, submission_type=self.sub['submission_type']['value'])
parser = InfoParser(xl=self.xl, submission_type=self.submission_type, sub_object=self.sub_object)
info = parser.parse_info()
self.info_map = parser.map
# exclude_from_info = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.sub['submission_type']).exclude_from_info_parser()
@@ -87,21 +89,21 @@ class SheetParser(object):
extraction_kit (str | None, optional): Relevant extraction kit for reagent map. Defaults to None.
"""
if extraction_kit == None:
extraction_kit = extraction_kit = self.sub['extraction_kit']
extraction_kit = self.sub['extraction_kit']
# logger.debug(f"Parsing reagents for {extraction_kit}")
self.sub['reagents'] = ReagentParser(xl=self.xl, submission_type=self.sub['submission_type'],
self.sub['reagents'] = ReagentParser(xl=self.xl, submission_type=self.submission_type,
extraction_kit=extraction_kit).parse_reagents()
def parse_samples(self):
"""
Pulls sample info from the excel sheet
"""
parser = SampleParser(xl=self.xl, submission_type=self.sub['submission_type']['value'])
parser = SampleParser(xl=self.xl, submission_type=self.submission_type)
self.sub['samples'] = parser.reconcile_samples()
# self.plate_map = parser.plate_map
def parse_equipment(self):
parser = EquipmentParser(xl=self.xl, submission_type=self.sub['submission_type']['value'])
parser = EquipmentParser(xl=self.xl, submission_type=self.submission_type)
self.sub['equipment'] = parser.parse_equipment()
def import_kit_validation_check(self):
@@ -120,22 +122,13 @@ class SheetParser(object):
if isinstance(self.sub['extraction_kit'], str):
self.sub['extraction_kit'] = dict(value=self.sub['extraction_kit'], missing=True)
def import_reagent_validation_check(self):
"""
Enforce that only allowed reagents get into the Pydantic Model
"""
kit = KitType.query(name=self.sub['extraction_kit']['value'])
allowed_reagents = [item.name for item in kit.get_reagents()]
# logger.debug(f"List of reagents for comparison with allowed_reagents: {pformat(self.sub['reagents'])}")
self.sub['reagents'] = [reagent for reagent in self.sub['reagents'] if reagent.type in allowed_reagents]
def finalize_parse(self):
"""
Run custom final validations of data for submission subclasses.
"""
finisher = BasicSubmission.find_polymorphic_subclass(
polymorphic_identity=self.sub['submission_type']).finalize_parse
self.sub = finisher(input_dict=self.sub, xl=self.xl, info_map=self.info_map)
# finisher = BasicSubmission.find_polymorphic_subclass(
# polymorphic_identity=self.sub['submission_type']).finalize_parse
self.sub = self.sub_object.finalize_parse(input_dict=self.sub, xl=self.xl, info_map=self.info_map)
def to_pydantic(self) -> PydSubmission:
"""
@@ -163,9 +156,14 @@ class SheetParser(object):
class InfoParser(object):
def __init__(self, xl: Workbook, submission_type: str):
def __init__(self, xl: Workbook, submission_type: str|SubmissionType, sub_object: BasicSubmission|None=None):
logger.info(f"\n\Hello from InfoParser!\n\n")
self.submission_type = submission_type
if isinstance(submission_type, str):
submission_type = SubmissionType.query(name=submission_type)
if sub_object is None:
sub_object = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=submission_type.name)
self.submission_type_obj = submission_type
self.sub_object = sub_object
self.map = self.fetch_submission_info_map()
self.xl = xl
logger.debug(f"Info map for InfoParser: {pformat(self.map)}")
@@ -180,16 +178,14 @@ class InfoParser(object):
Returns:
dict: Location map of all info for this submission type
"""
if isinstance(self.submission_type, str):
self.submission_type = dict(value=self.submission_type, missing=True)
self.submission_type = dict(value=self.submission_type_obj.name, missing=True)
logger.debug(f"Looking up submission type: {self.submission_type['value']}")
# submission_type = SubmissionType.query(name=self.submission_type['value'])
# info_map = submission_type.info_map
self.sub_object: BasicSubmission = \
BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type['value'])
# self.sub_object: BasicSubmission = \
# BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type['value'])
info_map = self.sub_object.construct_info_map("read")
# Get the parse_info method from the submission type specified
return info_map
def parse_info(self) -> dict:
@@ -199,8 +195,8 @@ class InfoParser(object):
Returns:
dict: key:value of basic info
"""
if isinstance(self.submission_type, str):
self.submission_type = dict(value=self.submission_type, missing=True)
# if isinstance(self.submission_type, str):
# self.submission_type = dict(value=self.submission_type, missing=True)
dicto = {}
# exclude_from_generic = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type['value']).get_default_info("parser_ignore")
# This loop parses generic info
@@ -224,7 +220,13 @@ class InfoParser(object):
# if check:
# relevant[k] = v
for location in v:
if location['sheet'] == sheet:
try:
check = location['sheet'] == sheet
except TypeError:
logger.warning(f"Location is likely a string, skipping")
dicto[k] = dict(value=location, missing=False)
check = False
if check:
new = location
new['name'] = k
relevant.append(new)
@@ -257,13 +259,18 @@ class InfoParser(object):
dicto[item['name']] = dict(value=value, missing=missing)
except (KeyError, IndexError):
continue
return self.sub_object.parse_info(input_dict=dicto, xl=self.xl)
return self.sub_object.custom_info_parser(input_dict=dicto, xl=self.xl)
class ReagentParser(object):
def __init__(self, xl: Workbook, submission_type: str, extraction_kit: str):
def __init__(self, xl: Workbook, submission_type: str, extraction_kit: str, sub_object:BasicSubmission|None=None):
logger.debug("\n\nHello from ReagentParser!\n\n")
self.submission_type_obj = submission_type
self.sub_object = sub_object
if isinstance(extraction_kit, dict):
extraction_kit = extraction_kit['value']
self.kit_object = KitType.query(name=extraction_kit)
self.map = self.fetch_kit_info_map(extraction_kit=extraction_kit, submission_type=submission_type)
logger.debug(f"Reagent Parser map: {self.map}")
self.xl = xl
@@ -279,13 +286,14 @@ class ReagentParser(object):
Returns:
dict: locations of reagent info for the kit.
"""
if isinstance(extraction_kit, dict):
extraction_kit = extraction_kit['value']
kit = KitType.query(name=extraction_kit)
if isinstance(submission_type, dict):
submission_type = submission_type['value']
reagent_map = kit.construct_xl_map_for_use(submission_type.title())
del reagent_map['info']
reagent_map = self.kit_object.construct_xl_map_for_use(submission_type)
try:
del reagent_map['info']
except KeyError:
pass
return reagent_map
def parse_reagents(self) -> List[PydReagent]:
@@ -348,7 +356,7 @@ class SampleParser(object):
object to pull data for samples in excel sheet and construct individual sample objects
"""
def __init__(self, xl: Workbook, submission_type: str, sample_map: dict | None = None) -> None:
def __init__(self, xl: Workbook, submission_type: SubmissionType, sample_map: dict | None = None, sub_object:BasicSubmission|None=None) -> None:
"""
convert sample sub-dataframe to dictionary of records
@@ -359,7 +367,11 @@ class SampleParser(object):
logger.debug("\n\nHello from SampleParser!\n\n")
self.samples = []
self.xl = xl
self.submission_type = submission_type
self.submission_type = submission_type.name
self.submission_type_obj = submission_type
if sub_object is None:
sub_object = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type_obj.name)
self.sub_object = sub_object
self.sample_info_map = self.fetch_sample_info_map(submission_type=submission_type, sample_map=sample_map)
logger.debug(f"sample_info_map: {self.sample_info_map}")
# self.plate_map = self.construct_plate_map(plate_map_location=sample_info_map['plate_map'])
@@ -385,9 +397,10 @@ class SampleParser(object):
"""
logger.debug(f"Looking up submission type: {submission_type}")
# submission_type = SubmissionType.query(name=submission_type)
self.sub_object = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=submission_type)
# self.sub_object = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=submission_type)
# self.custom_sub_parser = .parse_samples
self.samp_object = BasicSample.find_polymorphic_subclass(polymorphic_identity=f"{submission_type} Sample")
self.sample_type = self.sub_object.get_default_info("sample_type")
self.samp_object = BasicSample.find_polymorphic_subclass(polymorphic_identity=self.sample_type)
logger.debug(f"Got sample class: {self.samp_object.__name__}")
# self.custom_sample_parser = .parse_sample
# logger.debug(f"info_map: {pformat(se)}")
@@ -398,46 +411,46 @@ class SampleParser(object):
sample_info_map = sample_map
return sample_info_map
def construct_plate_map(self, plate_map_location: dict) -> pd.DataFrame:
"""
Gets location of samples from plate map grid in excel sheet.
Args:
plate_map_location (dict): sheet name, start/end row/column
Returns:
pd.DataFrame: Plate map grid
"""
logger.debug(f"Plate map location: {plate_map_location}")
df = self.xl.parse(plate_map_location['sheet'], header=None, dtype=object)
df = df.iloc[plate_map_location['start_row'] - 1:plate_map_location['end_row'],
plate_map_location['start_column'] - 1:plate_map_location['end_column']]
df = pd.DataFrame(df.values[1:], columns=df.iloc[0])
df = df.set_index(df.columns[0])
logger.debug(f"Vanilla platemap: {df}")
# custom_mapper = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type)
df = self.sub_object.custom_platemap(self.xl, df)
# logger.debug(f"Custom platemap:\n{df}")
return df
def construct_lookup_table(self, lookup_table_location: dict) -> pd.DataFrame:
"""
Gets table of misc information from excel book
Args:
lookup_table_location (dict): sheet name, start/end row
Returns:
pd.DataFrame: _description_
"""
try:
df = self.xl.parse(lookup_table_location['sheet'], header=None, dtype=object)
except KeyError:
return None
df = df.iloc[lookup_table_location['start_row'] - 1:lookup_table_location['end_row']]
df = pd.DataFrame(df.values[1:], columns=df.iloc[0])
df = df.reset_index(drop=True)
return df
# def construct_plate_map(self, plate_map_location: dict) -> pd.DataFrame:
# """
# Gets location of samples from plate map grid in excel sheet.
#
# Args:
# plate_map_location (dict): sheet name, start/end row/column
#
# Returns:
# pd.DataFrame: Plate map grid
# """
# logger.debug(f"Plate map location: {plate_map_location}")
# df = self.xl.parse(plate_map_location['sheet'], header=None, dtype=object)
# df = df.iloc[plate_map_location['start_row'] - 1:plate_map_location['end_row'],
# plate_map_location['start_column'] - 1:plate_map_location['end_column']]
# df = pd.DataFrame(df.values[1:], columns=df.iloc[0])
# df = df.set_index(df.columns[0])
# logger.debug(f"Vanilla platemap: {df}")
# # custom_mapper = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type)
# df = self.sub_object.custom_platemap(self.xl, df)
# # logger.debug(f"Custom platemap:\n{df}")
# return df
#
# def construct_lookup_table(self, lookup_table_location: dict) -> pd.DataFrame:
# """
# Gets table of misc information from excel book
#
# Args:
# lookup_table_location (dict): sheet name, start/end row
#
# Returns:
# pd.DataFrame: _description_
# """
# try:
# df = self.xl.parse(lookup_table_location['sheet'], header=None, dtype=object)
# except KeyError:
# return None
# df = df.iloc[lookup_table_location['start_row'] - 1:lookup_table_location['end_row']]
# df = pd.DataFrame(df.values[1:], columns=df.iloc[0])
# df = df.reset_index(drop=True)
# return df
def parse_plate_map(self):
"""
@@ -471,7 +484,7 @@ class SampleParser(object):
if check_not_nan(id):
if id not in invalids:
sample_dict = dict(id=id, row=ii, column=jj)
sample_dict['sample_type'] = f"{self.submission_type} Sample"
sample_dict['sample_type'] = self.sample_type
plate_map_samples.append(sample_dict)
else:
# logger.error(f"Sample cell ({row}, {column}) has invalid value: {id}.")
@@ -524,7 +537,7 @@ class SampleParser(object):
row_dict[lmap['merge_on_id']] = str(row_dict[lmap['merge_on_id']])
except KeyError:
pass
row_dict['sample_type'] = f"{self.submission_type} Sample"
row_dict['sample_type'] = self.sample_type
row_dict['submission_rank'] = ii
try:
check = check_not_nan(row_dict[lmap['merge_on_id']])
@@ -567,22 +580,22 @@ class SampleParser(object):
new_samples.append(PydSample(**translated_dict))
return result, new_samples
def grab_plates(self) -> List[str]:
"""
Parse plate names from
Returns:
List[str]: list of plate names.
"""
plates = []
for plate in self.plates:
df = self.xl.parse(plate['sheet'], header=None)
if isinstance(df.iat[plate['row'] - 1, plate['column'] - 1], str):
output = RSLNamer.retrieve_rsl_number(filename=df.iat[plate['row'] - 1, plate['column'] - 1])
else:
continue
plates.append(output)
return plates
# def grab_plates(self) -> List[str]:
# """
# Parse plate names from
#
# Returns:
# List[str]: list of plate names.
# """
# plates = []
# for plate in self.plates:
# df = self.xl.parse(plate['sheet'], header=None)
# if isinstance(df.iat[plate['row'] - 1, plate['column'] - 1], str):
# output = RSLNamer.retrieve_rsl_number(filename=df.iat[plate['row'] - 1, plate['column'] - 1])
# else:
# continue
# plates.append(output)
# return plates
def reconcile_samples(self):
# TODO: Move to pydantic validator?
@@ -630,20 +643,24 @@ class SampleParser(object):
else:
new = psample
# samples.append(psample)
new['sample_type'] = f"{self.submission_type} Sample"
# new['sample_type'] = f"{self.submission_type} Sample"
try:
check = new['submitter_id'] is None
except KeyError:
check = True
if check:
new['submitter_id'] = psample['id']
new = self.sub_object.parse_samples(new)
samples.append(new)
samples = remove_key_from_list_of_dicts(samples, "id")
return sorted(samples, key=lambda k: (k['row'], k['column']))
class EquipmentParser(object):
def __init__(self, xl: Workbook, submission_type: str) -> None:
def __init__(self, xl: Workbook, submission_type: str|SubmissionType) -> None:
if isinstance(submission_type, str):
submission_type = SubmissionType.query(name=submission_type)
self.submission_type = submission_type
self.xl = xl
self.map = self.fetch_equipment_map()
@@ -655,8 +672,8 @@ class EquipmentParser(object):
Returns:
List[dict]: List of locations
"""
submission_type = SubmissionType.query(name=self.submission_type)
return submission_type.construct_equipment_map()
# submission_type = SubmissionType.query(name=self.submission_type)
return self.submission_type.construct_equipment_map()
def get_asset_number(self, input: str) -> str:
"""

View File

@@ -1,11 +1,12 @@
import logging
from copy import copy
from pathlib import Path
from pprint import pformat
from typing import List
from openpyxl import load_workbook, Workbook
from tools import row_keys
from backend.db.models import SubmissionType, KitType
from backend.db.models import SubmissionType, KitType, BasicSample, BasicSubmission
from backend.validators.pydant import PydSubmission
from io import BytesIO
from collections import OrderedDict
@@ -32,6 +33,7 @@ class SheetWriter(object):
# self.__setattr__('submission_type', submission.submission_type['value'])
self.sub[k] = v['value']
self.submission_type = SubmissionType.query(name=v['value'])
self.sub_object = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type)
case _:
if isinstance(v, dict):
self.sub[k] = v['value']
@@ -82,13 +84,17 @@ class SheetWriter(object):
class InfoWriter(object):
def __init__(self, xl: Workbook, submission_type: SubmissionType | str, info_dict: dict):
def __init__(self, xl: Workbook, submission_type: SubmissionType | str, info_dict: dict, sub_object:BasicSubmission|None=None):
if isinstance(submission_type, str):
submission_type = SubmissionType.query(name=submission_type)
if sub_object is None:
sub_object = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=submission_type.name)
self.submission_type = submission_type
self.sub_object = sub_object
self.xl = xl
map = submission_type.construct_info_map(mode='write')
self.info = self.reconcile_map(info_dict, map)
logger.debug(pformat(self.info))
def reconcile_map(self, info_dict: dict, map: dict) -> dict:
output = {}
@@ -99,7 +105,8 @@ class InfoWriter(object):
try:
dicto['locations'] = map[k]
except KeyError:
continue
# continue
pass
dicto['value'] = v
if len(dicto) > 0:
output[k] = dicto
@@ -113,10 +120,11 @@ class InfoWriter(object):
logger.error(f"No locations for {k}, skipping")
continue
for loc in locations:
logger.debug(f"Writing {k} to {loc['sheet']}, row: {loc['row']}, column: {loc['column']}")
sheet = self.xl[loc['sheet']]
sheet.cell(row=loc['row'], column=loc['column'], value=v['value'])
return self.xl
return self.sub_object.custom_info_writer(self.xl, info=self.info)
class ReagentWriter(object):
@@ -143,7 +151,7 @@ class ReagentWriter(object):
try:
dicto = dict(value=v, row=mp_info[k]['row'], column=mp_info[k]['column'])
except KeyError as e:
logger.error(f"Keyerror: {e}")
# logger.error(f"Keyerror: {e}")
dicto = v
placeholder[k] = dicto
placeholder['sheet'] = mp_info['sheet']
@@ -156,8 +164,8 @@ class ReagentWriter(object):
for k, v in reagent.items():
if not isinstance(v, dict):
continue
logger.debug(
f"Writing {reagent['type']} {k} to {reagent['sheet']}, row: {v['row']}, column: {v['column']}")
# logger.debug(
# f"Writing {reagent['type']} {k} to {reagent['sheet']}, row: {v['row']}, column: {v['column']}")
sheet.cell(row=v['row'], column=v['column'], value=v['value'])
return self.xl
@@ -214,18 +222,27 @@ class EquipmentWriter(object):
output = []
for ii, equipment in enumerate(equipment_list, start=1):
mp_info = map[equipment['role']]
# logger.debug(f"{equipment['role']} map: {mp_info}")
placeholder = copy(equipment)
for jj, (k, v) in enumerate(equipment.items(), start=1):
try:
dicto = dict(value=v, row=mp_info[k]['row'], column=mp_info[k]['column'])
except KeyError as e:
logger.error(f"Keyerror: {e}")
if mp_info == {}:
for jj, (k, v) in enumerate(equipment.items(), start=1):
dicto = dict(value=v, row=ii, column=jj)
placeholder[k] = dicto
try:
placeholder['sheet'] = mp_info['sheet']
except KeyError:
placeholder['sheet'] = "Equipment"
placeholder[k] = dicto
# output.append(placeholder)
else:
for jj, (k, v) in enumerate(equipment.items(), start=1):
try:
dicto = dict(value=v, row=mp_info[k]['row'], column=mp_info[k]['column'])
except KeyError as e:
# logger.error(f"Keyerror: {e}")
continue
placeholder[k] = dicto
try:
placeholder['sheet'] = mp_info['sheet']
except KeyError:
placeholder['sheet'] = "Equipment"
# logger.debug(f"Final output of {equipment['role']} : {placeholder}")
output.append(placeholder)
return output
@@ -241,8 +258,12 @@ class EquipmentWriter(object):
if not isinstance(v, dict):
continue
logger.debug(
f"Writing {equipment['role']} {k} to {equipment['sheet']}, row: {v['row']}, column: {v['column']}")
f"Writing {k}: {v['value']} to {equipment['sheet']}, row: {v['row']}, column: {v['column']}")
if isinstance(v['value'], list):
v['value'] = v['value'][0]
sheet.cell(row=v['row'], column=v['column'], value=v['value'])
try:
sheet.cell(row=v['row'], column=v['column'], value=v['value'])
except AttributeError as e:
logger.error(f"Couldn't write to {equipment['sheet']}, row: {v['row']}, column: {v['column']}")
logger.error(e)
return self.xl