From a851116a6daf9763c568883b50958d007ab070cb Mon Sep 17 00:00:00 2001 From: Daniel K Lyons <dlyons@nrao.edu> Date: Tue, 30 Mar 2021 10:14:37 -0600 Subject: [PATCH] Reinsert get_locations_file and copying utilities.py into the datafetcher/test area --- .../datafetcher/test/df_pytest_utils.py | 15 +- .../executables/datafetcher/test/utilities.py | 140 ++++++++++++++++++ 2 files changed, 154 insertions(+), 1 deletion(-) create mode 100644 apps/cli/executables/datafetcher/test/utilities.py diff --git a/apps/cli/executables/datafetcher/test/df_pytest_utils.py b/apps/cli/executables/datafetcher/test/df_pytest_utils.py index 060e06526..b9287c8f3 100644 --- a/apps/cli/executables/datafetcher/test/df_pytest_utils.py +++ b/apps/cli/executables/datafetcher/test/df_pytest_utils.py @@ -36,7 +36,7 @@ from pycapo import CapoConfig # pylint: disable=C0115, C0116, C0200, R0902, R0903, R0914, R1721, W0212, W0613, W0621, W0703, W1203 sys.path.insert(0, str(get_project_root())) -from shared.workspaces.test.test_data.utilities import ( +from .utilities import ( get_locations_report, get_test_data_dir, ) @@ -95,6 +95,19 @@ LOCATION_REPORTS = { } +def get_locations_file(key: str): + """ + Return location report file specified by key + :param key: location report name + :return: + + """ + + report_spec = LOCATION_REPORTS[key] + filename = report_spec["filename"] + return Path(get_test_data_dir(), filename) + + def write_locations_file(destination: Path, locations_report: LocationsReport): """ diff --git a/apps/cli/executables/datafetcher/test/utilities.py b/apps/cli/executables/datafetcher/test/utilities.py new file mode 100644 index 000000000..e59be444c --- /dev/null +++ b/apps/cli/executables/datafetcher/test/utilities.py @@ -0,0 +1,140 @@ +""" Helper functions for download product testing """ + +# NB: this is a copy of the file in shared/workspaces/test/test_data +# Directly accessing that file seems to be troublesome for the test suite. -DKL + +import json +import sys +from enum import Enum +from pathlib import Path + + +DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S.%f" +DATE_FORMAT = "%Y-%m-%d" + + +def get_report_file(basename: str): + """Get a locations file from our collection in test_data, + given a basename (.json filename w/o extension) + + """ + + test_data_dir = get_test_data_dir() + + for file in test_data_dir.glob(basename.upper() + ".json"): + # (there should be only one for this basename) + return file + + return None + + +def get_exec_block_details_from_loc_report(prefix: str, exec_blocks: list): + """ + Fetch and read locations report indicated by basename; + for filegroup IDs of exec blocks, return file info dict + and total size of files + + :param prefix: + :param exec_blocks: + :return: + + """ + file_info = dict() + total_size = 0 + for exec_block in exec_blocks: + basename = prefix + str(exec_block.filegroup_id) + locations_report = None + try: + locations_report = get_locations_report(basename) + except FileNotFoundError: + # special case: GBT product + if basename.startswith("AGBT17B_044"): + locations_report = get_locations_report("AGBT17B_044_02") + + total_size += locations_report["aggregate_size"] + for file_spec in locations_report["files"]: + filename = file_spec["ngas_file_id"] + size = file_spec["size"] + file_info[filename] = size + + return file_info, total_size + + +def get_test_data_dir(): + """ where's our test data? """ + top_level_subdirs = sys.path + shared_ws_src = None + for pathname in top_level_subdirs: + if "shared/workspaces" in pathname: + shared_ws_src = pathname + break + shared_wksp = Path(shared_ws_src).parent + + # test data will be a few levels under shared_wksp + for item in shared_wksp.rglob("location_files"): + assert item.is_dir() + return item + + return None + + +def get_file_info_from_json_file(json_filename: str) -> dict: + """ Pluck file information from a .json location file """ + + to_read = None + for file in Path.cwd().rglob(json_filename): + to_read = file + break + assert to_read is not None + with open(to_read, "r") as content: + file_info = json.loads(content.read()) + + return file_info + + +def get_file_info_from_loc_report(locations_report: dict) -> tuple: + file_info = dict() + total_size = 0 + total_size += locations_report["aggregate_size"] + for file_spec in locations_report["files"]: + filename = file_spec["ngas_file_id"] + size = file_spec["size"] + file_info[filename] = size + return file_info, total_size + + +def get_locations_report(basename: str): + """ Get a locations report from a file in test_data """ + + report_path = get_report_file(basename) + + if report_path is not None: + with open(report_path, "r") as content: + locations_report = json.loads(content.read()) + return locations_report + + raise FileNotFoundError(f'{basename.upper() + ".json"} not found') + + +class Deliverable(Enum): + SDM = "SDM" + BDF = "BDF" + MS = "MS" + CMS = "CMS" + IMG = "IMG" + TAR = "TAR" + # VLBA + IDIFITS = "IDIFITS" + + @staticmethod + def from_str(key: str): + for dtype in Deliverable: + if dtype.value == key: + return dtype + return None + + +class DeliverableProduct: + def __init__(self, dtype: Deliverable, file_info: dict): + self.type = dtype + self.file_info = file_info -- GitLab