WS-543: Addressed issues found in calibration ingestion testing:

* hyphens in manifest text are all changed to underscores * InputScienceProduct is simplified * weblog type is now `pipeline_weblog` * manifest filename is just `ingestion_manifest.json` TODO: additional tests to ensure manifest JSON is in the form of the 16B-069 example manifest

WS-543: Addressed issues found in calibration ingestion testing:
5c7a47c0 · Janet Goldstein · Charlotte Hausman · 0eb41c17 · 5c7a47c0 · 5c7a47c0
Commit 5c7a47c0 authored 3 years ago by Janet Goldstein Committed by Charlotte Hausman 3 years ago
--- a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py
+++ b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py
@@ -14,11 +14,9 @@ import pendulum
 from pendulum import DateTime

 from ingest_envoy.manifest_components import (
-    MANIFEST_NAME_BASE,
-    MANIFEST_NAME_EXT,
    ARTIFACT_NAME,
-    ARTIFACT_EXT,
-    WEBLOG,
+    TARFILE_EXT,
+    WEBLOG_FILENAME,
    JSON,
    IngestionManifestKey,
    ManifestComponentIF,
@@ -29,6 +27,7 @@ from ingest_envoy.manifest_components import (
    AncillaryProduct,
    OutputGroup,
    SCIENCE_PRODUCT_PATTERN,
+    MANIFEST_FILENAME,
 )
 from ingest_envoy.utilities import (
    ScienceProductType,
@@ -152,7 +151,7 @@ class IngestionManifestBuilder:
        # N.B. this is sufficient for most types of ingestion,
        # but ALMA CALs will have multiple EB SPs, identified only by locator,
        # and VLBAs have no input group at all.
-        sp_in = InputScienceProduct(sp_type=self.sp_type, locator=self.locator)
+        sp_in = InputScienceProduct(locator=self.locator)

        return InputGroup([sp_in])

@@ -188,7 +187,7 @@ class IngestionManifestBuilder:
        """
        current_time = pendulum.now()
        timestamp = format_timestamp(current_time)
-        return f"{ARTIFACT_NAME}{timestamp}{ARTIFACT_EXT}"
+        return f"{ARTIFACT_NAME}{timestamp}{TARFILE_EXT}"

    def write_ingestion_artifacts_tar(self) -> Path:
        """
@@ -220,11 +219,11 @@ class IngestionManifestBuilder:

        ancillary_products = []
        # if there's a weblog in here, grab it
-        maybe_weblogs = [file for file in self.files_found if file.name.endswith(WEBLOG)]
+        maybe_weblogs = [file for file in self.files_found if file.name == WEBLOG_FILENAME]
        if len(maybe_weblogs) > 0:
            weblog = maybe_weblogs[0]
            weblog_ap = AncillaryProduct(
-                type=AncillaryProductType.PIPELINE_WEBLOG, filename=weblog.name
+                type=AncillaryProductType.PIPELINE_WEBLOG_TYPE, filename=weblog.name
            )
            ancillary_products.append(weblog_ap)

@@ -281,7 +280,7 @@ class IngestionManifest(ManifestIF):
        :return:
        """

-        output_path = self.staging_source_dir / build_manifest_filename()
+        output_path = self.staging_source_dir / MANIFEST_FILENAME

        to_write = json.dumps(self.to_json(), indent=4)
        with open(output_path, "w") as out:
@@ -336,17 +335,6 @@ def format_timestamp(datetime: DateTime) -> str:
    return datetime.format("YYYY_MM_DDThh_mm_ss.SSS")


-def build_manifest_filename() -> str:
-    """
-    Build unique manifest filename in standard format.
-
-    :return: the filename
-    """
-    current_time = pendulum.now()
-    timestamp = format_timestamp(current_time)
-    return f"{MANIFEST_NAME_BASE}{timestamp}{MANIFEST_NAME_EXT}"
-
-
 def find_manifest(ingestion_path: Path) -> Path:
    """
    Find the ingestion manifest at this ingestion path.
@@ -354,8 +342,7 @@ def find_manifest(ingestion_path: Path) -> Path:
    :param ingestion_path: home of ingestion files
    :return:
    """
-    for file in ingestion_path.iterdir():
-        if file.name.startswith(MANIFEST_NAME_BASE) and file.name.endswith(MANIFEST_NAME_EXT):
-            return file
+    for json_file in ingestion_path.glob(MANIFEST_FILENAME):
+        return json_file

    raise FileNotFoundError(f"No ingestion manifest found at {ingestion_path}")
--- a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/manifest_components.py
+++ b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/manifest_components.py
@@ -8,11 +8,10 @@ from typing import Union, List, Dict

 from ingest_envoy.utilities import ScienceProductType, Telescope, AncillaryProductType

-MANIFEST_NAME_BASE = "ingestion_manifest_"
-MANIFEST_NAME_EXT = ".json"
+MANIFEST_FILENAME = "ingestion_manifest.json"
 ARTIFACT_NAME = "ingestion_artifacts_"
-ARTIFACT_EXT = ".tar"
-WEBLOG = "weblog.tgz"
+TARFILE_EXT = ".tar"
+WEBLOG_FILENAME = "weblog.tgz"
 SCIENCE_PRODUCT_PATTERN = re.compile("[a-zA-Z0-9._\\-+]*\\.tar")
 JSON = Union[int, float, str, List["JSON"], Dict[str, "JSON"]]

@@ -57,17 +56,14 @@ class ManifestComponentIF(abc.ABC):


 class InputScienceProduct(ManifestComponentIF):
-    """Represents a science product in the "input-group" section of the ingestion manifest."""
+    """Simplest type of science product: has only a locator"""

-    def __init__(self, locator: str, sp_type: ScienceProductType = None):
-        self.type = sp_type
+    def __init__(self, locator: str):
        self.locator = locator

    def __eq__(self, other):
        if isinstance(other, InputScienceProduct):
-            return other.type == self.type and other.locator == self.locator
-
-        return False
+            return other.locator == self.locator

    def to_json(self) -> JSON:
        """
@@ -75,8 +71,6 @@ class InputScienceProduct(ManifestComponentIF):

        :return: dicty-me
        """
-        if self.type:
-            return {"type": str(self.type), "locator": self.locator}
        return {"locator": self.locator}



--- a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/utilities.py
+++ b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/utilities.py
@@ -34,8 +34,8 @@ class AncillaryProductType(Enum):

    INGESTION_ARTIFACTS = "ingestion_artifacts"
    PIPELINE_ARTIFACTS = "pipeline_artifacts"
-    PIPELINE_WEBLOG = "pipeline_weblog"
-    LOG = "log_file"
+    PIPELINE_WEBLOG_TYPE = "pipeline_weblog"
+    LOG_TYPE = "log_file"

    ### Images ###


--- a/apps/cli/executables/pexable/ingest_envoy/test/conftest.py
+++ b/apps/cli/executables/pexable/ingest_envoy/test/conftest.py
@@ -7,9 +7,9 @@ from typing import List

 import pytest

-from ingest_envoy.manifest_components import WEBLOG
+from ingest_envoy.manifest_components import WEBLOG_FILENAME

-WANTED_FILENAMES = ["my_science_products.tar", WEBLOG]
+WANTED_FILENAMES = ["my_science_products.tar", WEBLOG_FILENAME]
 UNWANTED = ["ignore_me.fits", "just_a_lotta_nothing", "uninteresting_metadata.xml"]



--- a/apps/cli/executables/pexable/ingest_envoy/test/examples/_16B_069_cal_manifest.json
+++ b/apps/cli/executables/pexable/ingest_envoy/test/examples/_16B_069_cal_manifest.json
+{
+  "parameters": {
+    "reingest": "false",
+    "ngas_ingest": "false",
+    "calibrate": "false",
+    "ingestion_path": "/lustre/aoc/cluster/pipeline/dsoc-dev/workspaces/staging/cal_test6",
+    "telescope": "EVLA"
+  },
+  "input_group": {
+    "science_products": [
+      {
+        "locator": "uid://evla/execblock/48ba4c9d-d7c7-4a8f-9803-1115cd52459b"
+      }
+    ]
+  },
+  "output_group": {
+    "science_products": [
+      {
+        "type": "calibration",
+        "filename": "16B-069_sb32814386_1_001.57685.66193635417.testdate.caltables.tar"
+      }
+    ],
+    "ancillary_products": [
+      {
+        "type": "pipeline_weblog",
+        "filename": "weblog.tgz"
+      }
+    ]
+  }
+}
--- a/apps/cli/executables/pexable/ingest_envoy/test/examples/image_set_manifest.json
+++ b/apps/cli/executables/pexable/ingest_envoy/test/examples/image_set_manifest.json
 {
  "parameters": {
    "reingest": "false",
-    "ngas-ingest": "false",
+    "ngas_ingest": "false",
    "calibrate": "false",
    "ingestion_path": "/lustre/.."
  },
-  "input-group": {
+  "input_group": {
    "science_products": [
      {
        "type": "calibration",
@@ -13,7 +13,7 @@
      }
    ]
  },
-  "output-group": {
+  "output_group": {
    "science_products": [
      {
        "type": "image",

--- a/apps/cli/executables/pexable/ingest_envoy/test/examples/vlass_catalog_manifest.json
+++ b/apps/cli/executables/pexable/ingest_envoy/test/examples/vlass_catalog_manifest.json
 {
  "parameters": {
    "reingest": "false",
-    "ngas-ingest": "false",
+    "ngas_ingest": "false",
    "calibrate": "false",
    "ingestion_path": "/lustre/...../"
  },

--- a/apps/cli/executables/pexable/ingest_envoy/test/test_manifest_if.py
+++ b/apps/cli/executables/pexable/ingest_envoy/test/test_manifest_if.py
@@ -5,7 +5,6 @@

 import json
 import logging
-import re
 import shutil
 import sys
 from pathlib import Path
@@ -16,12 +15,9 @@ import pytest

 from ingest_envoy.ingestion_manifest import (
    IngestionManifestBuilder,
-    build_manifest_filename,
    find_manifest,
 )
 from ingest_envoy.manifest_components import (
-    MANIFEST_NAME_BASE,
-    MANIFEST_NAME_EXT,
    IngestionManifestKey,
    ParamsKey,
    InputScienceProduct,
@@ -30,8 +26,9 @@ from ingest_envoy.manifest_components import (
    OutputScienceProduct,
    AncillaryProduct,
    OutputGroup,
-    ARTIFACT_EXT,
+    TARFILE_EXT,
    ARTIFACT_NAME,
+    WEBLOG_FILENAME,
 )
 from ingest_envoy.utilities import (
    ScienceProductType,
@@ -69,16 +66,16 @@ def test_manifest_is_complete(ingest_path: Path):
        staging_source_dir=ingest_path,
    )

-    sp1 = InputScienceProduct(
-        sp_type=ScienceProductType.EVLA_CAL,
-        locator=FAKE_LOCATOR,
-    )
+    sp1 = InputScienceProduct(locator=FAKE_LOCATOR)

    ig_in = InputGroup(science_products=[sp1])
    osp_in = OutputScienceProduct(
        type=ScienceProductType.EVLA_CAL, filename="my_science_products.tar"
    )
-    ap_in = AncillaryProduct(type=AncillaryProductType.PIPELINE_WEBLOG, filename="weblog.tgz")
+    ap_in = AncillaryProduct(
+        type=AncillaryProductType.PIPELINE_WEBLOG_TYPE,
+        filename=WEBLOG_FILENAME,
+    )

    manifest, _ = IngestionManifestBuilder(
        staging_source_dir=ingest_path,
@@ -95,36 +92,13 @@ def test_manifest_is_complete(ingest_path: Path):
    af_tar_candidates = [
        file
        for file in ingest_path.iterdir()
-        if file.name.startswith(ARTIFACT_NAME) and file.name.endswith(ARTIFACT_EXT)
+        if file.name.startswith(ARTIFACT_NAME) and file.name.endswith(TARFILE_EXT)
    ]
    assert len(af_tar_candidates) == 1

    shutil.rmtree(ingest_path)


-def test_builds_expected_manifest_filename():
-    """
-    We expect the manifest to be named like
-
-    ingestion_manifest_2019_07_30_T13_03_00.936.json
-
-    :return:
-    """
-    filename = build_manifest_filename()
-
-    assert filename.startswith(MANIFEST_NAME_BASE)
-    assert filename.endswith(MANIFEST_NAME_EXT)
-
-    filename_parts = filename.split("_")
-    assert len(filename_parts) == 7
-
-    # get just the timestamp
-    timestamp = filename.replace(MANIFEST_NAME_BASE, "").replace(MANIFEST_NAME_EXT, "")
-
-    # we should have gotten year, month, day, hours, minutes, seconds to 3 decimal places
-    assert re.match(r"\d{4}_\d{2}_\d{2}T\d{2}_\d{2}_\d{2}\.\d{0,3}", timestamp)
-
-
 def test_filters_cal_input_files(ingest_path: Path):
    """
    We'll be getting calibration/image/eb, etc. science products from a directory under
@@ -152,8 +126,6 @@ def test_filters_cal_input_files(ingest_path: Path):

    input_group = manifest.input_group
    assert len(input_group.science_products) == 1
-    sp_in = input_group.science_products[0]
-    assert sp_in.type == ScienceProductType.EVLA_CAL

    output_group = manifest.output_group
    assert len(output_group.science_products) == 1
@@ -161,7 +133,7 @@ def test_filters_cal_input_files(ingest_path: Path):
    for product in output_group.ancillary_products:
        if product.filename not in WANTED_FILENAMES:
            assert product.filename.startswith(ARTIFACT_NAME) and product.filename.endswith(
-                ARTIFACT_EXT
+                TARFILE_EXT
            )
        assert product.filename not in UNWANTED

@@ -195,13 +167,9 @@ def test_writes_expected_output_files(ingest_path: Path):

    # at a minimum, we expect the manifest, the ingestion artifact, and the science products tar
    assert len(ingestion_files) >= 3
-    mf_json = [
-        file
-        for file in ingestion_files
-        if file.name.startswith(MANIFEST_NAME_BASE) and file.name.endswith(MANIFEST_NAME_EXT)
-    ][0]
+    mf_json = find_manifest(ingest_path)
    assert mf_json
-    tars = [file for file in ingestion_files if file.name.endswith(".tar")]
+    tars = [file for file in ingestion_files if file.name.endswith(TARFILE_EXT)]
    assert len(tars) >= 2

    shutil.rmtree(ingest_path)
@@ -259,11 +227,10 @@ def test_input_sp_well_formed():
    locator = "uid://evla/calibration/vanilla_heath_bar_crunch_1a23e"
    # single science product
    sp_dict = {
-        "type": ScienceProductType.EVLA_CAL.value,
        "locator": locator,
    }

-    sp_in = InputScienceProduct(sp_type=ScienceProductType.EVLA_CAL.value, locator=locator)
+    sp_in = InputScienceProduct(locator=locator)
    assert sp_in.to_json() == sp_dict


@@ -273,15 +240,10 @@ def test_input_group_well_formed():

    :return:
    """
-    sp1 = InputScienceProduct(
-        sp_type=ScienceProductType.EXEC_BLOCK.value,
-        locator="uid://evla/execblock/coffee_heath_bar_crunch_7a23f",
-    )
+    sp1 = InputScienceProduct(locator="uid://evla/execblock/coffee_heath_bar_crunch_7a23f")
    sp1_json = sp1.to_json()

-    sp2 = InputScienceProduct(
-        sp_type=ScienceProductType.EVLA_CAL.value, locator="uid://evla/execblock/mint_oreo_omg_omg"
-    )
+    sp2 = InputScienceProduct(locator="uid://evla/execblock/mint_oreo_omg_omg")
    sp2_json = sp2.to_json()

    expected = {
@@ -318,8 +280,8 @@ def test_ancillary_product_well_formed():

    :return:
    """
-    ap1 = AncillaryProduct(type=AncillaryProductType.LOG, filename="without_feathers.tar")
-    expected = {"type": AncillaryProductType.LOG.value, "filename": "without_feathers.tar"}
+    ap1 = AncillaryProduct(type=AncillaryProductType.LOG_TYPE, filename="without_feathers.tar")
+    expected = {"type": AncillaryProductType.LOG_TYPE.value, "filename": "without_feathers.tar"}
    actual = ap1.to_json()

    assert actual == expected
@@ -361,19 +323,15 @@ def test_input_group_properly_formatted():

    :return:
    """
-    sp1 = InputScienceProduct(
-        sp_type=ScienceProductType.EXEC_BLOCK.value,
-        locator="uid://evla/execblock/coffee_heath_bar_crunch_7a23f",
-    )
+    sp1 = InputScienceProduct(locator="uid://evla/execblock/coffee_heath_bar_crunch_7a23f")

    ingroup = InputGroup(science_products=[sp1])
    ig_dict = json.loads(ingroup.to_json())
    ig_text = json.dumps(ig_dict, indent=4)
    expected = """
-  "input-group": {
+  "input_group": {
    "science_products": [
      {
-        "type": "calibration",
        "locator": "uid://evla/execblock/coffee_heath_bar_crunch_7a23f"
      }
    ]
@@ -441,6 +399,16 @@ def test_builds_cal_manifest_as_expected(ingest_path: Path):
    shutil.rmtree(ingest_path)


+@pytest.mark.skip("TODO before merge TODAY 2021-07-22")
+def test_manifest_filename_is_correct():
+    """
+    Calibration ingestion manifest should always be named simply "ingestion_manifest.json"
+
+    :return:
+    """
+    # TODO
+
+
 def populate_ingest_path_for_manifest_evla_cal_example(ingestion_path: Path):
    """
    Create fake input files to match EVLA CAL manifest example
@@ -448,7 +416,7 @@ def populate_ingest_path_for_manifest_evla_cal_example(ingestion_path: Path):
    :param ingestion_path:
    :return:
    """
-    weblog_file = ingestion_path / "qrs.weblog.tgz"
+    weblog_file = ingestion_path / "weblog.tgz"
    weblog_file.touch()
    cal_file = ingestion_path / "XYZ-abc+TMN.O00.tar"
    cal_file.touch()
--- a/apps/cli/executables/pexable/ingest_envoy/test/test_miscellaneous_manifests.py
+++ b/apps/cli/executables/pexable/ingest_envoy/test/test_miscellaneous_manifests.py
@@ -11,12 +11,11 @@ import pytest
 from ingest_envoy.ingestion_manifest import (
    IngestionManifest,
    IngestionManifestBuilder,
+    find_manifest,
 )
 from ingest_envoy.manifest_components import (
-    MANIFEST_NAME_BASE,
-    MANIFEST_NAME_EXT,
    ARTIFACT_NAME,
-    ARTIFACT_EXT,
+    TARFILE_EXT,
 )
 from ingest_envoy.utilities import ScienceProductType, Telescope
 from .conftest import ingest_path, populate_fake_evla_cal_ingest_path
@@ -48,19 +47,15 @@ def test_entry_point_for_evla_cal(ingest_path: Path):
    ingestion_files = [file for file in ingest_path.iterdir()]

    # there should be one ingestion manifest....
-    mf_jsons = [
-        file
-        for file in ingestion_files
-        if file.name.startswith(MANIFEST_NAME_BASE) and file.name.endswith(MANIFEST_NAME_EXT)
-    ]
-    assert len(mf_jsons) == 1
+    manifest_file = find_manifest(ingest_path)
+    assert manifest_file

    # ...and an artifacts tar, and the science products tar we started with
    assert sp_tar in ingestion_files
    artifact_tars = [
        file
        for file in ingestion_files
-        if file.name.startswith(ARTIFACT_NAME) and file.name.endswith(ARTIFACT_EXT)
+        if file.name.startswith(ARTIFACT_NAME) and file.name.endswith(TARFILE_EXT)
    ]
    assert len(artifact_tars) == 1