Pre code clean-up

This commit is contained in:
lwark
2024-10-17 13:09:35 -05:00
parent c3a4aac68b
commit 495d1a5a7f
15 changed files with 322 additions and 263 deletions

View File

@@ -135,7 +135,7 @@ class BaseClass(Base):
singles = model.get_default_info('singles')
logger.info(f"Querying: {model}, with kwargs: {kwargs}")
for k, v in kwargs.items():
logger.debug(f"Using key: {k} with value: {v}")
logger.info(f"Using key: {k} with value: {v}")
try:
attr = getattr(model, k)
query = query.filter(attr == v)

View File

@@ -12,7 +12,7 @@ import logging, re
from operator import itemgetter
from . import BaseClass
from tools import setup_lookup, report_result, Result, Report, Settings, get_unique_values_in_df_column
from tools import setup_lookup, report_result, Result, Report, Settings, get_unique_values_in_df_column, super_splitter
from datetime import date, datetime, timedelta
from typing import List, Literal, Tuple, Generator
from dateutil.parser import parse
@@ -81,31 +81,33 @@ class ControlType(BaseClass):
return []
# NOTE: remove items that don't have relevant data
subtypes = [item for item in jsoner[genera] if "_hashes" not in item and "_ratio" not in item]
logger.debug(f"subtypes out: {pformat(subtypes)}")
# logger.debug(f"subtypes out: {pformat(subtypes)}")
return subtypes
def get_instance_class(self):
return Control.find_polymorphic_subclass(polymorphic_identity=self.name)
@classmethod
def get_positive_control_types(cls) -> Generator[ControlType, None, None]:
def get_positive_control_types(cls, control_type: str) -> Generator[str, None, None]:
"""
Gets list of Control types if they have targets
Returns:
List[ControlType]: Control types that have targets
"""
return (item for item in cls.query() if item.targets)
ct = cls.query(name=control_type).targets
return (item for item in ct.keys() if ct[item])
@classmethod
def build_positive_regex(cls) -> Pattern:
def build_positive_regex(cls, control_type:str) -> Pattern:
"""
Creates a re.Pattern that will look for positive control types
Returns:
Pattern: Constructed pattern
"""
strings = list(set([item.name.split("-")[0] for item in cls.get_positive_control_types()]))
# strings = list(set([item.name.split("-")[0] for item in cls.get_positive_control_types()]))
strings = list(set([super_splitter(item, "-", 0) for item in cls.get_positive_control_types(control_type)]))
return re.compile(rf"(^{'|^'.join(strings)})-.*", flags=re.IGNORECASE)
@@ -298,7 +300,8 @@ class PCRControl(Control):
parent.mode_typer.clear()
parent.mode_typer.setEnabled(False)
report = Report()
controls = cls.query(sub_type=chart_settings['sub_type'], start_date=chart_settings['start_date'], end_date=chart_settings['end_date'])
controls = cls.query(sub_type=chart_settings['sub_type'], start_date=chart_settings['start_date'],
end_date=chart_settings['end_date'])
data = [control.to_sub_dict() for control in controls]
df = DataFrame.from_records(data)
try:

View File

@@ -198,7 +198,7 @@ class KitType(BaseClass):
# logger.debug("Get all KitTypeReagentTypeAssociation for SubmissionType")
for assoc in assocs:
try:
logger.debug(f"Yielding: {assoc.reagent_role.name}, {assoc.uses}")
# logger.debug(f"Yielding: {assoc.reagent_role.name}, {assoc.uses}")
yield assoc.reagent_role.name, assoc.uses
except TypeError:
continue
@@ -1156,7 +1156,7 @@ class KitTypeReagentRoleAssociation(BaseClass):
base_dict[k] = v
return base_dict
def get_all_relevant_reagents(self) -> Generator[Reagent, None, None]:
def get_all_relevant_reagents(self, override:Reagent|None=None) -> Generator[Reagent, None, None]:
"""
Creates a generator that will resolve in to a list filling the role associated with this object.

View File

@@ -102,7 +102,6 @@ class Organization(BaseClass):
else:
raise Exception(f"Filetype {filepath.suffix} not supported.")
data = import_dict['orgs']
logger.debug(pformat(import_dict))
for org in data:
organ = Organization.query(name=org['name'])
if organ is None:

View File

@@ -235,7 +235,7 @@ class BasicSubmission(BaseClass):
Returns:
SubmissionType: SubmissionType with name equal to this polymorphic identity
"""
logger.debug(f"Running search for {sub_type}")
# logger.debug(f"Running search for {sub_type}")
if isinstance(sub_type, dict):
try:
sub_type = sub_type['value']
@@ -521,7 +521,7 @@ class BasicSubmission(BaseClass):
Returns:
pd.DataFrame: Pandas Dataframe of all relevant submissions
"""
logger.debug(f"Querying Type: {submission_type}")
# logger.debug(f"Querying Type: {submission_type}")
# logger.debug(f"Using limit: {limit}")
# NOTE: use lookup function to create list of dicts
subs = [item.to_dict() for item in
@@ -827,7 +827,7 @@ class BasicSubmission(BaseClass):
return input_dict
@classmethod
def custom_validation(cls, pyd: "PydSubmission") -> dict:
def custom_validation(cls, pyd: "PydSubmission") -> "PydSubmission":
"""
Performs any final custom parsing of the excel file.
@@ -1412,14 +1412,16 @@ class BacterialCulture(BasicSubmission):
dict: Updated dictionary.
"""
from . import ControlType
# logger.debug(f"\n\nHello from BacterialCulture custom_validation")
pyd = super().custom_validation(pyd)
# NOTE: build regex for all control types that have targets
regex = ControlType.build_positive_regex()
regex = ControlType.build_positive_regex(control_type="Irida Control")
logger.debug(regex)
# NOTE: search samples for match
for sample in pyd.samples:
matched = regex.match(sample.submitter_id)
if bool(matched):
# logger.debug(f"Control match found: {sample['submitter_id']}")
# logger.debug(f"Control match found: {sample.submitter_id}")
new_lot = matched.group()
try:
pos_control_reg = \
@@ -1429,6 +1431,7 @@ class BacterialCulture(BasicSubmission):
return pyd
pos_control_reg.lot = new_lot
pos_control_reg.missing = False
# logger.debug(f"Got positive control: {pos_control_reg}")
return pyd
@classmethod
@@ -1785,6 +1788,7 @@ class WastewaterArtic(BasicSubmission):
ii in
range(source_plates_section['start_row'], source_plates_section['end_row'] + 1)]
for datum in data:
logger.debug(f"Datum: {datum}")
if datum['plate'] in ["None", None, ""]:
continue
else:
@@ -1869,7 +1873,13 @@ class WastewaterArtic(BasicSubmission):
dict: Updated sample dictionary
"""
input_dict = super().parse_samples(input_dict)
logger.debug(f"WWA input dict: {pformat(input_dict)}")
input_dict['sample_type'] = "Wastewater Sample"
# NOTE: Stop gap solution because WW is sloppy with their naming schemes
try:
input_dict['source_plate'] = input_dict['source_plate'].replace("WW20", "WW-20")
except KeyError:
pass
# NOTE: Because generate_sample_object needs the submitter_id and the artic has the "({origin well})"
# at the end, this has to be done here. No moving to sqlalchemy object :(
input_dict['submitter_id'] = re.sub(r"\s\(.+\)\s?$", "", str(input_dict['submitter_id'])).strip()

View File

@@ -260,7 +260,7 @@ class ReagentParser(object):
extraction_kit (str): Extraction kit used.
sub_object (BasicSubmission | None, optional): Submission object holding methods. Defaults to None.
"""
logger.debug("\n\nHello from ReagentParser!\n\n")
logger.info("\n\nHello from ReagentParser!\n\n")
if isinstance(submission_type, str):
submission_type = SubmissionType.query(name=submission_type)
self.submission_type_obj = submission_type
@@ -303,11 +303,11 @@ class ReagentParser(object):
for sheet in self.xl.sheetnames:
ws = self.xl[sheet]
relevant = {k.strip(): v for k, v in self.map.items() if sheet in self.map[k]['sheet']}
logger.debug(f"relevant map for {sheet}: {pformat(relevant)}")
# logger.debug(f"relevant map for {sheet}: {pformat(relevant)}")
if relevant == {}:
continue
for item in relevant:
logger.debug(f"Attempting to scrape: {item}")
# logger.debug(f"Attempting to scrape: {item}")
try:
reagent = relevant[item]
name = ws.cell(row=reagent['name']['row'], column=reagent['name']['column']).value
@@ -325,10 +325,10 @@ class ReagentParser(object):
missing = False
else:
missing = True
logger.debug(f"Got lot for {item}-{name}: {lot} as {type(lot)}")
# logger.debug(f"Got lot for {item}-{name}: {lot} as {type(lot)}")
lot = str(lot)
logger.debug(
f"Going into pydantic: name: {name}, lot: {lot}, expiry: {expiry}, type: {item.strip()}, comment: {comment}")
# logger.debug(
# f"Going into pydantic: name: {name}, lot: {lot}, expiry: {expiry}, type: {item.strip()}, comment: {comment}")
try:
check = name.lower() != "not applicable"
except AttributeError:
@@ -353,12 +353,12 @@ class SampleParser(object):
sample_map (dict | None, optional): Locations in database where samples are found. Defaults to None.
sub_object (BasicSubmission | None, optional): Submission object holding methods. Defaults to None.
"""
logger.debug("\n\nHello from SampleParser!\n\n")
logger.info("\n\nHello from SampleParser!\n\n")
self.samples = []
self.xl = xl
if isinstance(submission_type, str):
submission_type = SubmissionType.query(name=submission_type)
logger.debug(f"Sample parser is using submission type: {submission_type}")
# logger.debug(f"Sample parser is using submission type: {submission_type}")
self.submission_type = submission_type.name
self.submission_type_obj = submission_type
if sub_object is None:
@@ -456,40 +456,49 @@ class SampleParser(object):
List[dict]: Reconciled samples
"""
if not self.plate_map_samples or not self.lookup_samples:
logger.error(f"No separate samples, returning")
self.samples = self.lookup_samples or self.plate_map_samples
return
merge_on_id = self.sample_info_map['lookup_table']['merge_on_id']
plate_map_samples = sorted(copy(self.plate_map_samples), key=lambda d: d['id'])
lookup_samples = sorted(copy(self.lookup_samples), key=lambda d: d[merge_on_id])
for ii, psample in enumerate(plate_map_samples):
# NOTE: See if we can do this the easy way and just use the same list index.
try:
check = psample['id'] == lookup_samples[ii][merge_on_id]
except (KeyError, IndexError):
check = False
if check:
# logger.debug(f"Direct match found for {psample['id']}")
new = lookup_samples[ii] | psample
lookup_samples[ii] = {}
else:
logger.warning(f"Match for {psample['id']} not direct, running search.")
for jj, lsample in enumerate(lookup_samples):
try:
check = lsample[merge_on_id] == psample['id']
except KeyError:
check = False
if check:
new = lsample | psample
lookup_samples[jj] = {}
break
else:
new = psample
if not check_key_or_attr(key='submitter_id', interest=new, check_none=True):
new['submitter_id'] = psample['id']
new = self.sub_object.parse_samples(new)
del new['id']
yield new
logger.warning(f"No separate samples")
samples = self.lookup_samples or self.plate_map_samples
for new in samples:
if not check_key_or_attr(key='submitter_id', interest=new, check_none=True):
new['submitter_id'] = new['id']
new = self.sub_object.parse_samples(new)
try:
del new['id']
except KeyError:
pass
yield new
else:
merge_on_id = self.sample_info_map['lookup_table']['merge_on_id']
plate_map_samples = sorted(copy(self.plate_map_samples), key=lambda d: d['id'])
lookup_samples = sorted(copy(self.lookup_samples), key=lambda d: d[merge_on_id])
for ii, psample in enumerate(plate_map_samples):
# NOTE: See if we can do this the easy way and just use the same list index.
try:
check = psample['id'] == lookup_samples[ii][merge_on_id]
except (KeyError, IndexError):
check = False
if check:
# logger.debug(f"Direct match found for {psample['id']}")
new = lookup_samples[ii] | psample
lookup_samples[ii] = {}
else:
logger.warning(f"Match for {psample['id']} not direct, running search.")
for jj, lsample in enumerate(lookup_samples):
try:
check = lsample[merge_on_id] == psample['id']
except KeyError:
check = False
if check:
new = lsample | psample
lookup_samples[jj] = {}
break
else:
new = psample
if not check_key_or_attr(key='submitter_id', interest=new, check_none=True):
new['submitter_id'] = psample['id']
new = self.sub_object.parse_samples(new)
del new['id']
yield new
class EquipmentParser(object):

View File

@@ -49,12 +49,13 @@ class RSLNamer(object):
str: parsed submission type
"""
def st_from_path(filename:Path) -> str:
logger.debug(f"Using path method for {filename}.")
logger.info(f"Using path method for {filename}.")
if filename.exists():
wb = load_workbook(filename)
try:
# NOTE: Gets first category in the metadata.
submission_type = next(item.strip().title() for item in wb.properties.category.split(";"))
categories = wb.properties.category.split(";")
submission_type = next(item.strip().title() for item in categories)
except (StopIteration, AttributeError):
sts = {item.name: item.get_template_file_sheets() for item in SubmissionType.query()}
try:
@@ -67,12 +68,12 @@ class RSLNamer(object):
return submission_type
def st_from_str(filename:str) -> str:
regex = BasicSubmission.construct_regex()
logger.debug(f"Using string method for {filename}.")
logger.debug(f"Using regex: {regex}")
logger.info(f"Using string method for {filename}.")
# logger.debug(f"Using regex: {regex}")
m = regex.search(filename)
try:
submission_type = m.lastgroup
logger.debug(f"Got submission type: {submission_type}")
# logger.debug(f"Got submission type: {submission_type}")
except AttributeError as e:
submission_type = None
logger.critical(f"No submission type found or submission type found!: {e}")
@@ -118,7 +119,7 @@ class RSLNamer(object):
regex = re.compile(rf'{regex}', re.IGNORECASE | re.VERBOSE)
except re.error as e:
regex = BasicSubmission.construct_regex()
logger.debug(f"Using regex: {regex}")
logger.info(f"Using regex: {regex}")
match filename:
case Path():
m = regex.search(filename.stem)

View File

@@ -57,14 +57,14 @@ class PydReagent(BaseModel):
@classmethod
def rescue_lot_string(cls, value):
if value is not None:
return convert_nans_to_nones(str(value))
return convert_nans_to_nones(str(value).strip())
return value
@field_validator("lot")
@classmethod
def enforce_lot_string(cls, value):
if value is not None:
return value.upper()
return value.upper().strip()
return value
@field_validator("expiry", mode="before")
@@ -97,9 +97,9 @@ class PydReagent(BaseModel):
@classmethod
def enforce_name(cls, value, values):
if value is not None:
return convert_nans_to_nones(str(value))
return convert_nans_to_nones(str(value).strip())
else:
return values.data['role']
return values.data['role'].strip()
def improved_dict(self) -> dict:
"""
@@ -210,6 +210,18 @@ class PydSample(BaseModel, extra='allow'):
def int_to_str(cls, value):
return str(value)
@field_validator("submitter_id")
@classmethod
def strip_sub_id(cls, value):
match value:
case dict():
value['value'] = value['value'].strip().upper()
case str():
value = value.strip().upper()
case _:
pass
return value
def improved_dict(self) -> dict:
"""
Constructs a dictionary consisting of model.fields and model.extras
@@ -439,6 +451,7 @@ class PydSubmission(BaseModel, extra='allow'):
if value['value'] in [None, "None"]:
return dict(value=uuid.uuid4().hex.upper(), missing=True)
else:
value['value'] = value['value'].strip()
return value
@field_validator("submitted_date", mode="before")
@@ -523,6 +536,7 @@ class PydSubmission(BaseModel, extra='allow'):
# logger.debug(f"RSL-plate initial value: {value['value']} and other values: {values.data}")
sub_type = values.data['submission_type']['value']
if check_not_nan(value['value']):
value['value'] = value['value'].strip()
return value
else:
# logger.debug("Constructing plate sub_type.")
@@ -808,6 +822,7 @@ class PydSubmission(BaseModel, extra='allow'):
for sample in self.samples:
sample, associations, _ = sample.toSQL(submission=instance)
# logger.debug(f"Sample SQL object to be added to submission: {sample.__dict__}")
logger.debug(associations)
for assoc in associations:
if assoc is not None and assoc not in instance.submission_sample_associations:
instance.submission_sample_associations.append(assoc)