From a851116a6daf9763c568883b50958d007ab070cb Mon Sep 17 00:00:00 2001
From: Daniel K Lyons <dlyons@nrao.edu>
Date: Tue, 30 Mar 2021 10:14:37 -0600
Subject: [PATCH] Reinsert get_locations_file and copying utilities.py into the
 datafetcher/test area

---
 .../datafetcher/test/df_pytest_utils.py       |  15 +-
 .../executables/datafetcher/test/utilities.py | 140 ++++++++++++++++++
 2 files changed, 154 insertions(+), 1 deletion(-)
 create mode 100644 apps/cli/executables/datafetcher/test/utilities.py

diff --git a/apps/cli/executables/datafetcher/test/df_pytest_utils.py b/apps/cli/executables/datafetcher/test/df_pytest_utils.py
index 060e06526..b9287c8f3 100644
--- a/apps/cli/executables/datafetcher/test/df_pytest_utils.py
+++ b/apps/cli/executables/datafetcher/test/df_pytest_utils.py
@@ -36,7 +36,7 @@ from pycapo import CapoConfig
 
 # pylint: disable=C0115, C0116, C0200, R0902, R0903, R0914, R1721, W0212, W0613, W0621, W0703, W1203
 sys.path.insert(0, str(get_project_root()))
-from shared.workspaces.test.test_data.utilities import (
+from .utilities import (
     get_locations_report,
     get_test_data_dir,
 )
@@ -95,6 +95,19 @@ LOCATION_REPORTS = {
 }
 
 
+def get_locations_file(key: str):
+    """
+    Return location report file specified by key
+    :param key: location report name
+    :return:
+
+    """
+
+    report_spec = LOCATION_REPORTS[key]
+    filename = report_spec["filename"]
+    return Path(get_test_data_dir(), filename)
+
+
 def write_locations_file(destination: Path, locations_report: LocationsReport):
     """
 
diff --git a/apps/cli/executables/datafetcher/test/utilities.py b/apps/cli/executables/datafetcher/test/utilities.py
new file mode 100644
index 000000000..e59be444c
--- /dev/null
+++ b/apps/cli/executables/datafetcher/test/utilities.py
@@ -0,0 +1,140 @@
+""" Helper functions for download product testing """
+
+# NB: this is a copy of the file in shared/workspaces/test/test_data
+# Directly accessing that file seems to be troublesome for the test suite. -DKL
+
+import json
+import sys
+from enum import Enum
+from pathlib import Path
+
+
+DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S.%f"
+DATE_FORMAT = "%Y-%m-%d"
+
+
+def get_report_file(basename: str):
+    """Get a locations file from our collection in test_data,
+    given a basename (.json filename w/o extension)
+
+    """
+
+    test_data_dir = get_test_data_dir()
+
+    for file in test_data_dir.glob(basename.upper() + ".json"):
+        # (there should be only one for this basename)
+        return file
+
+    return None
+
+
+def get_exec_block_details_from_loc_report(prefix: str, exec_blocks: list):
+    """
+    Fetch and read locations report indicated by basename;
+    for filegroup IDs of exec blocks, return file info dict
+    and total size of files
+
+    :param prefix:
+    :param exec_blocks:
+    :return:
+
+    """
+    file_info = dict()
+    total_size = 0
+    for exec_block in exec_blocks:
+        basename = prefix + str(exec_block.filegroup_id)
+        locations_report = None
+        try:
+            locations_report = get_locations_report(basename)
+        except FileNotFoundError:
+            # special case: GBT product
+            if basename.startswith("AGBT17B_044"):
+                locations_report = get_locations_report("AGBT17B_044_02")
+
+        total_size += locations_report["aggregate_size"]
+        for file_spec in locations_report["files"]:
+            filename = file_spec["ngas_file_id"]
+            size = file_spec["size"]
+            file_info[filename] = size
+
+    return file_info, total_size
+
+
+def get_test_data_dir():
+    """ where's our test data? """
+    top_level_subdirs = sys.path
+    shared_ws_src = None
+    for pathname in top_level_subdirs:
+        if "shared/workspaces" in pathname:
+            shared_ws_src = pathname
+            break
+    shared_wksp = Path(shared_ws_src).parent
+
+    # test data will be a few levels under shared_wksp
+    for item in shared_wksp.rglob("location_files"):
+        assert item.is_dir()
+        return item
+
+    return None
+
+
+def get_file_info_from_json_file(json_filename: str) -> dict:
+    """ Pluck file information from a .json location file """
+
+    to_read = None
+    for file in Path.cwd().rglob(json_filename):
+        to_read = file
+        break
+    assert to_read is not None
+    with open(to_read, "r") as content:
+        file_info = json.loads(content.read())
+
+    return file_info
+
+
+def get_file_info_from_loc_report(locations_report: dict) -> tuple:
+    file_info = dict()
+    total_size = 0
+    total_size += locations_report["aggregate_size"]
+    for file_spec in locations_report["files"]:
+        filename = file_spec["ngas_file_id"]
+        size = file_spec["size"]
+        file_info[filename] = size
+    return file_info, total_size
+
+
+def get_locations_report(basename: str):
+    """ Get a locations report from a file in test_data """
+
+    report_path = get_report_file(basename)
+
+    if report_path is not None:
+        with open(report_path, "r") as content:
+            locations_report = json.loads(content.read())
+            return locations_report
+
+    raise FileNotFoundError(f'{basename.upper() + ".json"} not found')
+
+
+class Deliverable(Enum):
+    SDM = "SDM"
+    BDF = "BDF"
+    MS = "MS"
+    CMS = "CMS"
+    IMG = "IMG"
+    TAR = "TAR"
+    # VLBA
+    IDIFITS = "IDIFITS"
+
+    @staticmethod
+    def from_str(key: str):
+        for dtype in Deliverable:
+            if dtype.value == key:
+                return dtype
+        return None
+
+
+class DeliverableProduct:
+    def __init__(self, dtype: Deliverable, file_info: dict):
+        self.type = dtype
+        self.file_info = file_info
-- 
GitLab