Code cleanup, dependency update, various bug fixes
This commit is contained in:
@@ -543,7 +543,6 @@ class EquipmentParser(object):
|
||||
def __init__(self, xl: Workbook, submission_type: str|SubmissionType) -> None:
|
||||
if isinstance(submission_type, str):
|
||||
submission_type = SubmissionType.query(name=submission_type)
|
||||
|
||||
self.submission_type = submission_type
|
||||
self.xl = xl
|
||||
self.map = self.fetch_equipment_map()
|
||||
@@ -555,7 +554,6 @@ class EquipmentParser(object):
|
||||
Returns:
|
||||
List[dict]: List of locations
|
||||
"""
|
||||
# submission_type = SubmissionType.query(name=self.submission_type)
|
||||
return self.submission_type.construct_equipment_map()
|
||||
|
||||
def get_asset_number(self, input: str) -> str:
|
||||
@@ -569,7 +567,7 @@ class EquipmentParser(object):
|
||||
str: asset number
|
||||
"""
|
||||
regex = Equipment.get_regex()
|
||||
logger.debug(f"Using equipment regex: {regex} on {input}")
|
||||
# logger.debug(f"Using equipment regex: {regex} on {input}")
|
||||
try:
|
||||
return regex.search(input).group().strip("-")
|
||||
except AttributeError:
|
||||
@@ -582,11 +580,10 @@ class EquipmentParser(object):
|
||||
Returns:
|
||||
List[PydEquipment]: list of equipment
|
||||
"""
|
||||
logger.debug(f"Equipment parser going into parsing: {pformat(self.__dict__)}")
|
||||
# logger.debug(f"Equipment parser going into parsing: {pformat(self.__dict__)}")
|
||||
output = []
|
||||
# logger.debug(f"Sheets: {sheets}")
|
||||
for sheet in self.xl.sheetnames:
|
||||
# df = self.xl.parse(sheet, header=None, dtype=object)
|
||||
ws = self.xl[sheet]
|
||||
try:
|
||||
relevant = [item for item in self.map if item['sheet'] == sheet]
|
||||
@@ -595,7 +592,6 @@ class EquipmentParser(object):
|
||||
# logger.debug(f"Relevant equipment: {pformat(relevant)}")
|
||||
previous_asset = ""
|
||||
for equipment in relevant:
|
||||
# asset = df.iat[equipment['name']['row']-1, equipment['name']['column']-1]
|
||||
asset = ws.cell(equipment['name']['row'], equipment['name']['column'])
|
||||
if not check_not_nan(asset):
|
||||
asset = previous_asset
|
||||
@@ -603,7 +599,6 @@ class EquipmentParser(object):
|
||||
previous_asset = asset
|
||||
asset = self.get_asset_number(input=asset)
|
||||
eq = Equipment.query(asset_number=asset)
|
||||
# process = df.iat[equipment['process']['row']-1, equipment['process']['column']-1]
|
||||
process = ws.cell(row=equipment['process']['row'], column=equipment['process']['column'])
|
||||
try:
|
||||
output.append(
|
||||
@@ -614,72 +609,6 @@ class EquipmentParser(object):
|
||||
# logger.debug(f"Here is the output so far: {pformat(output)}")
|
||||
return output
|
||||
|
||||
|
||||
# class PCRParser(object):
|
||||
# """
|
||||
# Object to pull data from Design and Analysis PCR export file.
|
||||
# """
|
||||
#
|
||||
# def __init__(self, filepath: Path | None = None) -> None:
|
||||
# """
|
||||
# Initializes object.
|
||||
#
|
||||
# Args:
|
||||
# filepath (Path | None, optional): file to parse. Defaults to None.
|
||||
# """
|
||||
# logger.debug(f"Parsing {filepath.__str__()}")
|
||||
# if filepath == None:
|
||||
# logger.error(f"No filepath given.")
|
||||
# self.xl = None
|
||||
# else:
|
||||
# try:
|
||||
# self.xl = pd.ExcelFile(filepath.__str__())
|
||||
# except ValueError as e:
|
||||
# logger.error(f"Incorrect value: {e}")
|
||||
# self.xl = None
|
||||
# except PermissionError:
|
||||
# logger.error(f"Couldn't get permissions for {filepath.__str__()}. Operation might have been cancelled.")
|
||||
# return
|
||||
# self.parse_general(sheet_name="Results")
|
||||
# namer = RSLNamer(filename=filepath.__str__())
|
||||
# self.plate_num = namer.parsed_name
|
||||
# self.submission_type = namer.submission_type
|
||||
# logger.debug(f"Set plate number to {self.plate_num} and type to {self.submission_type}")
|
||||
# parser = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type)
|
||||
# self.samples = parser.parse_pcr(xl=self.xl, rsl_number=self.plate_num)
|
||||
#
|
||||
# def parse_general(self, sheet_name: str):
|
||||
# """
|
||||
# Parse general info rows for all types of PCR results
|
||||
#
|
||||
# Args:
|
||||
# sheet_name (str): Name of sheet in excel workbook that holds info.
|
||||
# """
|
||||
# self.pcr = {}
|
||||
# df = self.xl.parse(sheet_name=sheet_name, dtype=object).fillna("")
|
||||
# self.pcr['comment'] = df.iloc[0][1]
|
||||
# self.pcr['operator'] = df.iloc[1][1]
|
||||
# self.pcr['barcode'] = df.iloc[2][1]
|
||||
# self.pcr['instrument'] = df.iloc[3][1]
|
||||
# self.pcr['block_type'] = df.iloc[4][1]
|
||||
# self.pcr['instrument_name'] = df.iloc[5][1]
|
||||
# self.pcr['instrument_serial'] = df.iloc[6][1]
|
||||
# self.pcr['heated_cover_serial'] = df.iloc[7][1]
|
||||
# self.pcr['block_serial'] = df.iloc[8][1]
|
||||
# self.pcr['run-start'] = df.iloc[9][1]
|
||||
# self.pcr['run_end'] = df.iloc[10][1]
|
||||
# self.pcr['run_duration'] = df.iloc[11][1]
|
||||
# self.pcr['sample_volume'] = df.iloc[12][1]
|
||||
# self.pcr['cover_temp'] = df.iloc[13][1]
|
||||
# self.pcr['passive_ref'] = df.iloc[14][1]
|
||||
# self.pcr['pcr_step'] = df.iloc[15][1]
|
||||
# self.pcr['quant_cycle_method'] = df.iloc[16][1]
|
||||
# self.pcr['analysis_time'] = df.iloc[17][1]
|
||||
# self.pcr['software'] = df.iloc[18][1]
|
||||
# self.pcr['plugin'] = df.iloc[19][1]
|
||||
# self.pcr['exported_on'] = df.iloc[20][1]
|
||||
# self.pcr['imported_by'] = getuser()
|
||||
|
||||
class PCRParser(object):
|
||||
"""Object to pull data from Design and Analysis PCR export file."""
|
||||
|
||||
@@ -690,7 +619,7 @@ class PCRParser(object):
|
||||
Args:
|
||||
filepath (Path | None, optional): file to parse. Defaults to None.
|
||||
"""
|
||||
logger.debug(f'Parsing {filepath.__str__()}')
|
||||
# logger.debug(f'Parsing {filepath.__str__()}')
|
||||
if filepath is None:
|
||||
logger.error('No filepath given.')
|
||||
self.xl = None
|
||||
|
||||
@@ -27,7 +27,7 @@ def make_report_xlsx(records:list[dict]) -> Tuple[DataFrame, DataFrame]:
|
||||
# aggregate cost and sample count columns
|
||||
df2 = df.groupby(["Submitting Lab", "Extraction Kit"]).agg({'Extraction Kit':'count', 'Cost': 'sum', 'Sample Count':'sum'})
|
||||
df2 = df2.rename(columns={"Extraction Kit": 'Run Count'})
|
||||
logger.debug(f"Output daftaframe for xlsx: {df2.columns}")
|
||||
# logger.debug(f"Output daftaframe for xlsx: {df2.columns}")
|
||||
df = df.drop('id', axis=1)
|
||||
df = df.sort_values(['Submitting Lab', "Submitted Date"])
|
||||
return df, df2
|
||||
@@ -47,13 +47,13 @@ def make_report_html(df:DataFrame, start_date:date, end_date:date) -> str:
|
||||
"""
|
||||
old_lab = ""
|
||||
output = []
|
||||
logger.debug(f"Report DataFrame: {df}")
|
||||
# logger.debug(f"Report DataFrame: {df}")
|
||||
for ii, row in enumerate(df.iterrows()):
|
||||
logger.debug(f"Row {ii}: {row}")
|
||||
# logger.debug(f"Row {ii}: {row}")
|
||||
lab = row[0][0]
|
||||
logger.debug(type(row))
|
||||
logger.debug(f"Old lab: {old_lab}, Current lab: {lab}")
|
||||
logger.debug(f"Name: {row[0][1]}")
|
||||
# logger.debug(type(row))
|
||||
# logger.debug(f"Old lab: {old_lab}, Current lab: {lab}")
|
||||
# logger.debug(f"Name: {row[0][1]}")
|
||||
data = [item for item in row[1]]
|
||||
kit = dict(name=row[0][1], cost=data[1], run_count=int(data[0]), sample_count=int(data[2]))
|
||||
# if this is the same lab as before add together
|
||||
@@ -67,7 +67,7 @@ def make_report_html(df:DataFrame, start_date:date, end_date:date) -> str:
|
||||
adder = dict(lab=lab, kits=[kit], total_cost=kit['cost'], total_samples=kit['sample_count'], total_runs=kit['run_count'])
|
||||
output.append(adder)
|
||||
old_lab = lab
|
||||
logger.debug(output)
|
||||
# logger.debug(output)
|
||||
dicto = {'start_date':start_date, 'end_date':end_date, 'labs':output}#, "table":table}
|
||||
temp = env.get_template('summary_report.html')
|
||||
html = temp.render(input=dicto)
|
||||
@@ -91,14 +91,14 @@ def convert_data_list_to_df(input:list[dict], subtype:str|None=None) -> DataFram
|
||||
for column in df.columns:
|
||||
if "percent" in column:
|
||||
count_col = [item for item in df.columns if "count" in item][0]
|
||||
# The actual percentage from kraken was off due to exclusion of NaN, recalculating.
|
||||
# NOTE: The actual percentage from kraken was off due to exclusion of NaN, recalculating.
|
||||
df[column] = 100 * df[count_col] / df.groupby('name')[count_col].transform('sum')
|
||||
if column not in safe:
|
||||
if subtype != None and column != subtype:
|
||||
del df[column]
|
||||
# move date of sample submitted on same date as previous ahead one.
|
||||
# NOTE: move date of sample submitted on same date as previous ahead one.
|
||||
df = displace_date(df)
|
||||
# ad hoc method to make data labels more accurate.
|
||||
# NOTE: ad hoc method to make data labels more accurate.
|
||||
df = df_column_renamer(df=df)
|
||||
return df
|
||||
|
||||
@@ -131,8 +131,8 @@ def displace_date(df:DataFrame) -> DataFrame:
|
||||
Returns:
|
||||
DataFrame: output dataframe with dates incremented.
|
||||
"""
|
||||
logger.debug(f"Unique items: {df['name'].unique()}")
|
||||
# get submitted dates for each control
|
||||
# logger.debug(f"Unique items: {df['name'].unique()}")
|
||||
# NOTE: get submitted dates for each control
|
||||
dict_list = [dict(name=item, date=df[df.name == item].iloc[0]['submitted_date']) for item in sorted(df['name'].unique())]
|
||||
previous_dates = []
|
||||
for _, item in enumerate(dict_list):
|
||||
@@ -157,10 +157,10 @@ def check_date(df:DataFrame, item:dict, previous_dates:list) -> Tuple[DataFrame,
|
||||
check = False
|
||||
previous_dates.append(item['date'])
|
||||
if check:
|
||||
logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
|
||||
# get df locations where name == item name
|
||||
# logger.debug(f"We found one! Increment date!\n\t{item['date']} to {item['date'] + timedelta(days=1)}")
|
||||
# NOTE: get df locations where name == item name
|
||||
mask = df['name'] == item['name']
|
||||
# increment date in dataframe
|
||||
# NOTE: increment date in dataframe
|
||||
df.loc[mask, 'submitted_date'] = df.loc[mask, 'submitted_date'].apply(lambda x: x + timedelta(days=1))
|
||||
item['date'] += timedelta(days=1)
|
||||
passed = False
|
||||
@@ -170,9 +170,9 @@ def check_date(df:DataFrame, item:dict, previous_dates:list) -> Tuple[DataFrame,
|
||||
# logger.debug(f"DF: {type(df)}, previous_dates: {type(previous_dates)}")
|
||||
# if run didn't lead to changed date, return values
|
||||
if passed:
|
||||
logger.debug(f"Date check passed, returning.")
|
||||
# logger.debug(f"Date check passed, returning.")
|
||||
return df, previous_dates
|
||||
# if date was changed, rerun with new date
|
||||
# NOTE: if date was changed, rerun with new date
|
||||
else:
|
||||
logger.warning(f"Date check failed, running recursion")
|
||||
df, previous_dates = check_date(df, item, previous_dates)
|
||||
|
||||
@@ -31,7 +31,6 @@ class SheetWriter(object):
|
||||
case 'filepath':
|
||||
self.__setattr__(k, v)
|
||||
case 'submission_type':
|
||||
# self.__setattr__('submission_type', submission.submission_type['value'])
|
||||
self.sub[k] = v['value']
|
||||
self.submission_type = SubmissionType.query(name=v['value'])
|
||||
self.sub_object = BasicSubmission.find_polymorphic_subclass(polymorphic_identity=self.submission_type)
|
||||
@@ -40,7 +39,7 @@ class SheetWriter(object):
|
||||
self.sub[k] = v['value']
|
||||
else:
|
||||
self.sub[k] = v
|
||||
logger.debug(f"\n\nWriting to {submission.filepath.__str__()}\n\n")
|
||||
# logger.debug(f"\n\nWriting to {submission.filepath.__str__()}\n\n")
|
||||
|
||||
if self.filepath.stem.startswith("tmp"):
|
||||
template = self.submission_type.template_file
|
||||
@@ -95,7 +94,7 @@ class InfoWriter(object):
|
||||
self.xl = xl
|
||||
map = submission_type.construct_info_map(mode='write')
|
||||
self.info = self.reconcile_map(info_dict, map)
|
||||
logger.debug(pformat(self.info))
|
||||
# logger.debug(pformat(self.info))
|
||||
|
||||
def reconcile_map(self, info_dict: dict, map: dict) -> dict:
|
||||
output = {}
|
||||
@@ -121,8 +120,7 @@ class InfoWriter(object):
|
||||
logger.error(f"No locations for {k}, skipping")
|
||||
continue
|
||||
for loc in locations:
|
||||
|
||||
logger.debug(f"Writing {k} to {loc['sheet']}, row: {loc['row']}, column: {loc['column']}")
|
||||
# logger.debug(f"Writing {k} to {loc['sheet']}, row: {loc['row']}, column: {loc['column']}")
|
||||
sheet = self.xl[loc['sheet']]
|
||||
sheet.cell(row=loc['row'], column=loc['column'], value=v['value'])
|
||||
return self.sub_object.custom_info_writer(self.xl, info=self.info)
|
||||
@@ -152,7 +150,7 @@ class ReagentWriter(object):
|
||||
try:
|
||||
dicto = dict(value=v, row=mp_info[k]['row'], column=mp_info[k]['column'])
|
||||
except KeyError as e:
|
||||
# logger.error(f"Keyerror: {e}")
|
||||
logger.error(f"KeyError: {e}")
|
||||
dicto = v
|
||||
placeholder[k] = dicto
|
||||
placeholder['sheet'] = mp_info['sheet']
|
||||
@@ -197,7 +195,6 @@ class SampleWriter(object):
|
||||
def write_samples(self):
|
||||
sheet = self.xl[self.map['sheet']]
|
||||
columns = self.map['sample_columns']
|
||||
# rows = range(self.map['start_row'], self.map['end_row']+1)
|
||||
for ii, sample in enumerate(self.samples):
|
||||
row = self.map['start_row'] + (sample['submission_rank'] - 1)
|
||||
for k, v in sample.items():
|
||||
@@ -229,8 +226,6 @@ class EquipmentWriter(object):
|
||||
for jj, (k, v) in enumerate(equipment.items(), start=1):
|
||||
dicto = dict(value=v, row=ii, column=jj)
|
||||
placeholder[k] = dicto
|
||||
|
||||
# output.append(placeholder)
|
||||
else:
|
||||
for jj, (k, v) in enumerate(equipment.items(), start=1):
|
||||
try:
|
||||
@@ -258,8 +253,8 @@ class EquipmentWriter(object):
|
||||
for k, v in equipment.items():
|
||||
if not isinstance(v, dict):
|
||||
continue
|
||||
logger.debug(
|
||||
f"Writing {k}: {v['value']} to {equipment['sheet']}, row: {v['row']}, column: {v['column']}")
|
||||
# logger.debug(
|
||||
# f"Writing {k}: {v['value']} to {equipment['sheet']}, row: {v['row']}, column: {v['column']}")
|
||||
if isinstance(v['value'], list):
|
||||
v['value'] = v['value'][0]
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user