From 926ef6727c1a09f819014961fbfb427bf8e02403 Mon Sep 17 00:00:00 2001
From: Janet Goldstein <jgoldste@nrao.edu>
Date: Fri, 30 Jul 2021 15:37:30 +0000
Subject: [PATCH] WS-601: Rewrite/refactor ingestion manifest builder to pass
 disabled tests, part two

---
 .../ingest_envoy/ingestion_manifest.py        | 13 ++---
 .../ingest_envoy/std_img_manifest_utils.py    | 48 +++++++++++------
 .../pexable/ingest_envoy/test/conftest.py     | 54 ++++++++++++++++++-
 .../ingest_envoy/test/test_image_manifest.py  | 49 ++---------------
 .../ingest_envoy/test/test_manifest_if.py     | 14 ++---
 .../test/test_miscellaneous_manifests.py      | 51 ++++++++++++------
 6 files changed, 136 insertions(+), 93 deletions(-)

diff --git a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py
index 10c22b4a5..cd364c5b5 100644
--- a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py
+++ b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py
@@ -7,7 +7,7 @@ import tarfile
 from pathlib import Path
 
 # pylint: disable=E0401, R0903, R1721
-from typing import Tuple, List
+from typing import Tuple
 
 import pendulum
 from pendulum import DateTime
@@ -15,7 +15,6 @@ from pendulum import DateTime
 from ingest_envoy.manifest_components import (
     INGESTION_ARTIFACTS_NAME,
     TARFILE_EXT,
-    WEBLOG_FILENAME,
     JSON,
     IngestionManifestKey,
     ManifestComponentIF,
@@ -29,6 +28,7 @@ from ingest_envoy.manifest_components import (
     ParamsKey,
 )
 from ingest_envoy.schema import AbstractTextFile
+from ingest_envoy.std_img_manifest_utils import ImageIngestionProductsFinder
 from ingest_envoy.utilities import (
     ScienceProductType,
     Telescope,
@@ -36,7 +36,6 @@ from ingest_envoy.utilities import (
     AncillaryProductType,
     find_output_tars,
 )
-from ingest_envoy.std_img_manifest_utils import ImageIngestionProductsFinder
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -188,11 +187,13 @@ class IngestionManifestBuilder:
             input_group=self._build_input_group(),
             output_group=self._build_evla_cal_output_group(),
         )
+        manifest_file = manifest.write()
         artifacts_tar = self.write_ingestion_artifacts_tar()
-        manifest.output_group.ancillary_products.append(
-            AncillaryProduct(AncillaryProductType.INGESTION_ARTIFACTS, filename=str(artifacts_tar))
+        artifacts_ap = AncillaryProduct(
+            AncillaryProductType.INGESTION_ARTIFACTS, filename=str(artifacts_tar)
         )
-        manifest_file = manifest.write()
+
+        manifest.output_group.ancillary_products = [artifacts_ap]
 
         return manifest, manifest_file
 
diff --git a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/std_img_manifest_utils.py b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/std_img_manifest_utils.py
index faf853946..0f1a1972a 100644
--- a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/std_img_manifest_utils.py
+++ b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/std_img_manifest_utils.py
@@ -1,10 +1,11 @@
+""" Helper for building image ingestion manifest """
 from pathlib import Path
 from typing import List
 
 from ingest_envoy.manifest_components import OutputScienceProduct, AncillaryProduct, WEBLOG_FILENAME
 from ingest_envoy.utilities import AncillaryProductType
 
-
+# pylint: disable=R1721
 class ImageIngestionProductsFinder:
     """Finds ancillary science products and other ancillary products needed for image ingestion"""
 
@@ -33,7 +34,7 @@ class ImageIngestionProductsFinder:
             if file.name.endswith(".fits") and "rms" not in file.name
         ][0]
         image_files = [
-            file for file in self.files_found if "rms" in file.name or file.name.endswith(".png")
+            file for file in self.files_found if ("rms" in file.name or file.name.endswith(".png"))
         ]
 
         sp_aps = []
@@ -51,7 +52,6 @@ class ImageIngestionProductsFinder:
 
     def _find_other_ancillary_products(self) -> List[AncillaryProduct]:
         """
-        TODO
         Find the "other" ancillary image products in the staging dir: there should be a weblog
         and a pipeline artifacts tar. (The ingestion artifacts tar will be produced during the
         building of the manifest.)
@@ -67,8 +67,7 @@ class ImageIngestionProductsFinder:
                     type=AncillaryProductType.PIPELINE_WEBLOG_TYPE, filename=str(weblog)
                 )
             )
-        except Exception as exc:
-            # TODO which exception will this be?
+        except ValueError as exc:
             raise FileNotFoundError(f"No weblog found in {self.staging_source_dir}") from exc
 
         try:
@@ -83,8 +82,7 @@ class ImageIngestionProductsFinder:
                     filename=str(pipeline_artifacts_tar),
                 )
             )
-        except Exception as exc:
-            # TODO which exception will this be?
+        except ValueError as exc:
             raise FileNotFoundError(
                 f"No pipeline artifacts found in {self.staging_source_dir}"
             ) from exc
@@ -103,12 +101,30 @@ class ImageIngestionProductsFinder:
         if "image" in file.name:
             if file.name.endswith(".png"):
                 return AncillaryProduct(type=AncillaryProductType.THUMBNAIL_IMG, filename=filename)
-            elif file.name.endswith(".fits"):
-                if "rms" in file.name:
-                    return AncillaryProduct(
-                        type=AncillaryProductType.QUICKLOOK_RMS_IMAGE, filename=filename
-                    )
-                else:
-                    return AncillaryProduct(
-                        type=AncillaryProductType.QUICKLOOK_IMAGE, filename=filename
-                    )
+            if file.name.endswith(".fits") and "rms" in file.name:
+                return AncillaryProduct(
+                    type=AncillaryProductType.QUICKLOOK_RMS_IMAGE, filename=filename
+                )
+            return AncillaryProduct(type=AncillaryProductType.QUICKLOOK_IMAGE, filename=filename)
+
+    @staticmethod
+    def is_casa_product(file: Path) -> bool:
+        """
+        Was this file created by CASA? If so, and it's not the weblog,
+        we'll include it in the pipeline artifacts tar.
+
+        :param file: some file found in the ingestion staging dir
+        :return: whether it's a CASA byproduct
+
+        """
+        if file.name.startswith("casa_"):
+            return True
+
+        if "_band" in file.name and file.name.endswith(".fits"):
+            return True
+
+        return file.name in [
+            "pipeline_aquareport.xml",
+            "unknown.auxproducts.tgz",
+            "unknown.pipeline_manifest.xml",
+        ]
diff --git a/apps/cli/executables/pexable/ingest_envoy/test/conftest.py b/apps/cli/executables/pexable/ingest_envoy/test/conftest.py
index 32de44099..2c7c51381 100644
--- a/apps/cli/executables/pexable/ingest_envoy/test/conftest.py
+++ b/apps/cli/executables/pexable/ingest_envoy/test/conftest.py
@@ -9,9 +9,37 @@ import pytest
 
 from ingest_envoy.manifest_components import WEBLOG_FILENAME
 
-WANTED_FILENAMES = ["my_science_products.tar", WEBLOG_FILENAME]
+EVLA_CAL_WANTED_FILENAMES = ["my_science_products.tar", WEBLOG_FILENAME]
 UNWANTED = ["ignore_me.fits", "just_a_lotta_nothing", "uninteresting_metadata.xml"]
 
+IMG_MANIFEST_INPUT_FILENAMES = [
+    # additional metadata
+    "image_metadata_2021_05_21_T10_17_19.180.json",
+    # quicklook image
+    "VLASS2.1.ql.T08t09.J055438-113000.10.2048.v1.I.iter1.image.pbcor.tt0.subim.fits",
+    # quicklook RMS image
+    "VLASS2.1.ql.T08t09.J055438-113000.10.2048.v1.I.iter1.image.pbcor.tt0.rms.subim.fits",
+    # thumbnail
+    "VLASS2.1.ql.T08t09.J055438_113000.10.2048.v1.I.iter1.image.pbcor.tt0.subim.png",
+    # weblog
+    WEBLOG_FILENAME,
+    # ingestion artifacts tar -- to be created as side effect of ingestion manifest creation
+    # pipeline artifacts tar
+    "VLASS2.1.ql.T08t09.J055438-113000.10.2048.v1.tar",
+]
+CASA_BYPRODUCTS = [
+    "unknown.pipeline_manifest.xml",
+    "unknown.aux_products.tgz",
+    "casa_commands.log",
+    "casa_pipescript.py",
+]
+
+# this file gets created during construction of image ingestion manifest
+IMG_ARTIFACTS_FILENAME = "ingestion_artifacts_2021_05_21_T10_17_19.275.tar"
+
+# just to make things interesting
+RANDOM_TAR = "totally_random_tar.tar"
+
 
 @pytest.fixture(scope="function")
 def ingest_path(tmpdir: Path) -> Path:
@@ -52,7 +80,7 @@ def populate_fake_evla_cal_ingest_path(staging_dir: Path) -> List[Path]:
     """
 
     files = []
-    filenames = [filename for filename in WANTED_FILENAMES]
+    filenames = [filename for filename in EVLA_CAL_WANTED_FILENAMES]
     for filename in UNWANTED:
         filenames.append(filename)
 
@@ -62,3 +90,25 @@ def populate_fake_evla_cal_ingest_path(staging_dir: Path) -> List[Path]:
         files.append(path)
 
     return files
+
+
+def populate_fake_image_ingest_path(staging_dir: Path) -> List[Path]:
+    """
+    Create a directory containing fake image products, plus other stuff
+    that we -don't- want to ingest.
+
+    :param staging_dir: our temporary dir
+    :return:
+    """
+    for filename in IMG_MANIFEST_INPUT_FILENAMES:
+        file = staging_dir / filename
+        file.touch()
+
+    for filename in CASA_BYPRODUCTS:
+        file = staging_dir / filename
+        file.touch()
+
+    file = staging_dir / RANDOM_TAR
+    file.touch()
+
+    return [file for file in staging_dir.iterdir()]
diff --git a/apps/cli/executables/pexable/ingest_envoy/test/test_image_manifest.py b/apps/cli/executables/pexable/ingest_envoy/test/test_image_manifest.py
index e1b127689..925dc1f05 100644
--- a/apps/cli/executables/pexable/ingest_envoy/test/test_image_manifest.py
+++ b/apps/cli/executables/pexable/ingest_envoy/test/test_image_manifest.py
@@ -12,8 +12,8 @@ from ingest_envoy.ingestion_manifest import IngestionManifestBuilder
 from ingest_envoy.schema import AbstractTextFile
 from ingest_envoy.utilities import Telescope, AncillaryProductType, ScienceProductType
 
-# NOT unused. IJ is dumb. KEEP THIS.
-from .conftest import ingest_path
+# ingest_path is NOT unused. IJ is dumb.
+from .conftest import ingest_path, populate_fake_image_ingest_path
 
 from ingest_envoy.manifest_components import (
     WEBLOG_FILENAME,
@@ -26,30 +26,6 @@ from ingest_envoy.manifest_components import (
     InputScienceProduct,
 )
 
-IMG_MANIFEST_FILENAMES = [
-    # additional metadata
-    "image_metadata_2021_05_21_T10_17_19.180.json",
-    # quicklook image
-    "VLASS2.1.ql.T08t09.J055438-113000.10.2048.v1.I.iter1.image.pbcor.tt0.subim.fits",
-    # quicklook RMS image
-    "VLASS2.1.ql.T08t09.J055438-113000.10.2048.v1.I.iter1.image.pbcor.tt0.rms.subim.fits",
-    # thumbnail
-    "VLASS2.1.ql.T08t09.J055438_113000.10.2048.v1.I.iter1.image.pbcor.tt0.subim.png",
-    # weblog
-    WEBLOG_FILENAME,
-    # ingestion artifacts tar
-    "ingestion_artifacts_2021_05_21_T10_17_19.275.tar",
-    # pipeline artifacts tar
-    "VLASS2.1.ql.T08t09.J055438-113000.10.2048.v1.tar",
-]
-OTHER_FILENAMES = [
-    "unknown.pipeline_manifest.xml",
-    "unknown.aux_products.tgz",
-    "casa_commands.log",
-    "casa_pipescript.py",
-    "totally_random_tar.tar",
-]
-
 
 def test_parameters_json_well_formed():
     """
@@ -143,7 +119,7 @@ def test_creates_expected_manifest(ingest_path: Path):
     """
 
     # fill the ingestion path with fake files
-    populate_fake_ingest_path(ingest_path)
+    populate_fake_image_ingest_path(ingest_path)
 
     locator = "uid://evla/calibration/3dfa528b-9870-46c9-a200-131dbac701cc"
     addl_md = AbstractTextFile(filename="image_metadata_2021_05_21_T10_17_19.180.json", content="")
@@ -325,22 +301,3 @@ def build_output_group() -> OutputGroup:
     ap_list = other_aps
 
     return OutputGroup(science_products=[osp], ancillary_products=ap_list)
-
-
-def populate_fake_ingest_path(staging_dir: Path) -> List[Path]:
-    """
-    Create a directory containing fake image products, plus other stuff
-    that we -don't- want to ingest.
-
-    :param staging_dir: our temporary dir
-    :return:
-    """
-    for filename in IMG_MANIFEST_FILENAMES:
-        # ingestion artifacts tar is produced during manifest creation
-        if not filename.startswith("ingestion_artifacts"):
-            file = staging_dir / filename
-            file.touch()
-    for filename in OTHER_FILENAMES:
-        file = staging_dir / filename
-        file.touch()
-    return [file for file in staging_dir.iterdir()]
diff --git a/apps/cli/executables/pexable/ingest_envoy/test/test_manifest_if.py b/apps/cli/executables/pexable/ingest_envoy/test/test_manifest_if.py
index 75737c138..5273f8aff 100644
--- a/apps/cli/executables/pexable/ingest_envoy/test/test_manifest_if.py
+++ b/apps/cli/executables/pexable/ingest_envoy/test/test_manifest_if.py
@@ -37,7 +37,7 @@ from ingest_envoy.utilities import (
 from .conftest import (
     ingest_path,
     populate_fake_evla_cal_ingest_path,
-    WANTED_FILENAMES,
+    EVLA_CAL_WANTED_FILENAMES,
     UNWANTED,
     find_example_manifest,
 )
@@ -63,7 +63,7 @@ def test_filters_cal_input_files(ingest_path: Path):
     populate_fake_evla_cal_ingest_path(ingest_path)
     locator = "uid://evla/calibration/twinkle-twinkle-little-quasar"
     manifest, _ = IngestionManifestBuilder(
-        telescope=Telescope.EVLA,
+        telescope=Telescope.EVLA.value,
         staging_source_dir=ingest_path,
         sp_type=ScienceProductType.EVLA_CAL.value,
         locator=locator,
@@ -82,7 +82,7 @@ def test_filters_cal_input_files(ingest_path: Path):
     assert len(output_group.science_products) == 1
     assert len(output_group.ancillary_products) == 1
     for product in output_group.ancillary_products:
-        if product.filename not in WANTED_FILENAMES:
+        if product.filename not in EVLA_CAL_WANTED_FILENAMES:
             assert product.filename.startswith(
                 INGESTION_ARTIFACTS_NAME
             ) and product.filename.endswith(TARFILE_EXT)
@@ -91,7 +91,7 @@ def test_filters_cal_input_files(ingest_path: Path):
     sp_out = output_group.science_products[0]
     assert sp_out.type == ScienceProductType.EVLA_CAL
 
-    assert sp_out.filename in WANTED_FILENAMES
+    assert sp_out.filename in EVLA_CAL_WANTED_FILENAMES
     assert sp_out.filename not in UNWANTED
 
     shutil.rmtree(ingest_path)
@@ -107,7 +107,7 @@ def test_writes_expected_output_files(ingest_path: Path):
     """
     populate_fake_evla_cal_ingest_path(ingest_path)
     manifest_file, manifest = IngestionManifestBuilder(
-        telescope=Telescope.EVLA,
+        telescope=Telescope.EVLA.value,
         staging_source_dir=ingest_path,
         locator="uid://evla/calibration/fee-fi-fo-fum-acdf23",
         sp_type=ScienceProductType.EVLA_CAL.value,
@@ -243,8 +243,8 @@ def test_evla_cal_manifest_matches_example(ingest_path: Path):
 
     builder = IngestionManifestBuilder(
         staging_source_dir=ingest_path,
-        telescope=Telescope.EVLA,
-        sp_type=ScienceProductType.EVLA_CAL,
+        telescope=Telescope.EVLA.value,
+        sp_type=ScienceProductType.EVLA_CAL.value,
         locator="uid://evla/execblock/48ba4c9d-d7c7-4a8f-9803-1115cd52459b",
     )
     _, manifest_file = builder.build()
diff --git a/apps/cli/executables/pexable/ingest_envoy/test/test_miscellaneous_manifests.py b/apps/cli/executables/pexable/ingest_envoy/test/test_miscellaneous_manifests.py
index 4018d402c..65fd4118b 100644
--- a/apps/cli/executables/pexable/ingest_envoy/test/test_miscellaneous_manifests.py
+++ b/apps/cli/executables/pexable/ingest_envoy/test/test_miscellaneous_manifests.py
@@ -6,8 +6,6 @@ from pathlib import Path
 
 # pylint: disable=E0401, E0402, R1721, W0611, W0621
 
-import pytest
-
 from ingest_envoy.ingestion_manifest import (
     IngestionManifest,
     IngestionManifestBuilder,
@@ -18,14 +16,19 @@ from ingest_envoy.manifest_components import (
     TARFILE_EXT,
 )
 from ingest_envoy.utilities import ScienceProductType, Telescope
-from .conftest import ingest_path, populate_fake_evla_cal_ingest_path
+from .conftest import (
+    ingest_path,
+    populate_fake_evla_cal_ingest_path,
+    populate_fake_image_ingest_path,
+    IMG_MANIFEST_INPUT_FILENAMES,
+    CASA_BYPRODUCTS,
+)
 
 logger = logging.getLogger(IngestionManifest.__name__)
 logger.setLevel(logging.INFO)
 logger.addHandler(logging.StreamHandler(sys.stdout))
 
 
-@pytest.mark.skip("TODO: broken temporarily, pending fix to output group creation")
 def test_entry_point_for_evla_cal(ingest_path: Path):
     """
     Confirm that the ingestion launcher entrypoint kicks off production of ingestion manifest
@@ -39,7 +42,7 @@ def test_entry_point_for_evla_cal(ingest_path: Path):
     assert sp_tar
 
     builder = IngestionManifestBuilder(
-        telescope=Telescope.EVLA,
+        telescope=Telescope.EVLA.value,
         locator=locator,
         sp_type=ScienceProductType.EVLA_CAL.value,
         staging_source_dir=ingest_path,
@@ -61,20 +64,36 @@ def test_entry_point_for_evla_cal(ingest_path: Path):
     assert len(artifact_tars) == 1
 
 
-@pytest.mark.skip("TODO: test_builds_image_manifest")
-def test_builds_image_manifest(ingest_path: Path):
+def test_entry_point_for_image(ingest_path: Path):
     """
-    TODO WS-600
+    Confirm that the ingestion launcher entrypoint kicks off production
+    of standard imaging manifest
 
+    :param ingest_path: staging directory
     :return:
     """
-    raise NotImplementedError
 
+    locator = "uid://evla/calibration/mmm-NY-style-pizza-Giovanni-ABQ-12345"
+    populate_fake_image_ingest_path(ingest_path)
 
-@pytest.mark.skip("TODO: test_entry_point_for_image")
-def test_entry_point_for_image(ingest_path: Path):
-    """
-    TODO
-    :return:
-    """
-    raise NotImplementedError
+    # we should be starting out with various image manifest input files
+    # and CASA byproducts, a random file, and -not- the image ingestion
+    # manifest yet to be created
+    expected_file_count_before = len(IMG_MANIFEST_INPUT_FILENAMES) + len(CASA_BYPRODUCTS) + 1
+    ingestion_files_before = [file for file in ingest_path.iterdir()]
+    assert len(ingestion_files_before) == expected_file_count_before
+
+    IngestionManifestBuilder(
+        telescope=Telescope.EVLA.value,
+        locator=locator,
+        sp_type=ScienceProductType.IMAGE.value,
+        staging_source_dir=ingest_path,
+    ).build()
+
+    # there should be one ingestion manifest....
+    manifest_file = find_manifest(ingest_path)
+    assert manifest_file
+
+    # ...and everything we started with, plus a new ingestion artifacts tar
+    ingestion_files_after = [file for file in ingest_path.iterdir()]
+    assert len(ingestion_files_after) == expected_file_count_before + 2
-- 
GitLab