From 926ef6727c1a09f819014961fbfb427bf8e02403 Mon Sep 17 00:00:00 2001 From: Janet Goldstein <jgoldste@nrao.edu> Date: Fri, 30 Jul 2021 15:37:30 +0000 Subject: [PATCH] WS-601: Rewrite/refactor ingestion manifest builder to pass disabled tests, part two --- .../ingest_envoy/ingestion_manifest.py | 13 ++--- .../ingest_envoy/std_img_manifest_utils.py | 48 +++++++++++------ .../pexable/ingest_envoy/test/conftest.py | 54 ++++++++++++++++++- .../ingest_envoy/test/test_image_manifest.py | 49 ++--------------- .../ingest_envoy/test/test_manifest_if.py | 14 ++--- .../test/test_miscellaneous_manifests.py | 51 ++++++++++++------ 6 files changed, 136 insertions(+), 93 deletions(-) diff --git a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py index 10c22b4a5..cd364c5b5 100644 --- a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py +++ b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py @@ -7,7 +7,7 @@ import tarfile from pathlib import Path # pylint: disable=E0401, R0903, R1721 -from typing import Tuple, List +from typing import Tuple import pendulum from pendulum import DateTime @@ -15,7 +15,6 @@ from pendulum import DateTime from ingest_envoy.manifest_components import ( INGESTION_ARTIFACTS_NAME, TARFILE_EXT, - WEBLOG_FILENAME, JSON, IngestionManifestKey, ManifestComponentIF, @@ -29,6 +28,7 @@ from ingest_envoy.manifest_components import ( ParamsKey, ) from ingest_envoy.schema import AbstractTextFile +from ingest_envoy.std_img_manifest_utils import ImageIngestionProductsFinder from ingest_envoy.utilities import ( ScienceProductType, Telescope, @@ -36,7 +36,6 @@ from ingest_envoy.utilities import ( AncillaryProductType, find_output_tars, ) -from ingest_envoy.std_img_manifest_utils import ImageIngestionProductsFinder logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -188,11 +187,13 @@ class IngestionManifestBuilder: input_group=self._build_input_group(), output_group=self._build_evla_cal_output_group(), ) + manifest_file = manifest.write() artifacts_tar = self.write_ingestion_artifacts_tar() - manifest.output_group.ancillary_products.append( - AncillaryProduct(AncillaryProductType.INGESTION_ARTIFACTS, filename=str(artifacts_tar)) + artifacts_ap = AncillaryProduct( + AncillaryProductType.INGESTION_ARTIFACTS, filename=str(artifacts_tar) ) - manifest_file = manifest.write() + + manifest.output_group.ancillary_products = [artifacts_ap] return manifest, manifest_file diff --git a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/std_img_manifest_utils.py b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/std_img_manifest_utils.py index faf853946..0f1a1972a 100644 --- a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/std_img_manifest_utils.py +++ b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/std_img_manifest_utils.py @@ -1,10 +1,11 @@ +""" Helper for building image ingestion manifest """ from pathlib import Path from typing import List from ingest_envoy.manifest_components import OutputScienceProduct, AncillaryProduct, WEBLOG_FILENAME from ingest_envoy.utilities import AncillaryProductType - +# pylint: disable=R1721 class ImageIngestionProductsFinder: """Finds ancillary science products and other ancillary products needed for image ingestion""" @@ -33,7 +34,7 @@ class ImageIngestionProductsFinder: if file.name.endswith(".fits") and "rms" not in file.name ][0] image_files = [ - file for file in self.files_found if "rms" in file.name or file.name.endswith(".png") + file for file in self.files_found if ("rms" in file.name or file.name.endswith(".png")) ] sp_aps = [] @@ -51,7 +52,6 @@ class ImageIngestionProductsFinder: def _find_other_ancillary_products(self) -> List[AncillaryProduct]: """ - TODO Find the "other" ancillary image products in the staging dir: there should be a weblog and a pipeline artifacts tar. (The ingestion artifacts tar will be produced during the building of the manifest.) @@ -67,8 +67,7 @@ class ImageIngestionProductsFinder: type=AncillaryProductType.PIPELINE_WEBLOG_TYPE, filename=str(weblog) ) ) - except Exception as exc: - # TODO which exception will this be? + except ValueError as exc: raise FileNotFoundError(f"No weblog found in {self.staging_source_dir}") from exc try: @@ -83,8 +82,7 @@ class ImageIngestionProductsFinder: filename=str(pipeline_artifacts_tar), ) ) - except Exception as exc: - # TODO which exception will this be? + except ValueError as exc: raise FileNotFoundError( f"No pipeline artifacts found in {self.staging_source_dir}" ) from exc @@ -103,12 +101,30 @@ class ImageIngestionProductsFinder: if "image" in file.name: if file.name.endswith(".png"): return AncillaryProduct(type=AncillaryProductType.THUMBNAIL_IMG, filename=filename) - elif file.name.endswith(".fits"): - if "rms" in file.name: - return AncillaryProduct( - type=AncillaryProductType.QUICKLOOK_RMS_IMAGE, filename=filename - ) - else: - return AncillaryProduct( - type=AncillaryProductType.QUICKLOOK_IMAGE, filename=filename - ) + if file.name.endswith(".fits") and "rms" in file.name: + return AncillaryProduct( + type=AncillaryProductType.QUICKLOOK_RMS_IMAGE, filename=filename + ) + return AncillaryProduct(type=AncillaryProductType.QUICKLOOK_IMAGE, filename=filename) + + @staticmethod + def is_casa_product(file: Path) -> bool: + """ + Was this file created by CASA? If so, and it's not the weblog, + we'll include it in the pipeline artifacts tar. + + :param file: some file found in the ingestion staging dir + :return: whether it's a CASA byproduct + + """ + if file.name.startswith("casa_"): + return True + + if "_band" in file.name and file.name.endswith(".fits"): + return True + + return file.name in [ + "pipeline_aquareport.xml", + "unknown.auxproducts.tgz", + "unknown.pipeline_manifest.xml", + ] diff --git a/apps/cli/executables/pexable/ingest_envoy/test/conftest.py b/apps/cli/executables/pexable/ingest_envoy/test/conftest.py index 32de44099..2c7c51381 100644 --- a/apps/cli/executables/pexable/ingest_envoy/test/conftest.py +++ b/apps/cli/executables/pexable/ingest_envoy/test/conftest.py @@ -9,9 +9,37 @@ import pytest from ingest_envoy.manifest_components import WEBLOG_FILENAME -WANTED_FILENAMES = ["my_science_products.tar", WEBLOG_FILENAME] +EVLA_CAL_WANTED_FILENAMES = ["my_science_products.tar", WEBLOG_FILENAME] UNWANTED = ["ignore_me.fits", "just_a_lotta_nothing", "uninteresting_metadata.xml"] +IMG_MANIFEST_INPUT_FILENAMES = [ + # additional metadata + "image_metadata_2021_05_21_T10_17_19.180.json", + # quicklook image + "VLASS2.1.ql.T08t09.J055438-113000.10.2048.v1.I.iter1.image.pbcor.tt0.subim.fits", + # quicklook RMS image + "VLASS2.1.ql.T08t09.J055438-113000.10.2048.v1.I.iter1.image.pbcor.tt0.rms.subim.fits", + # thumbnail + "VLASS2.1.ql.T08t09.J055438_113000.10.2048.v1.I.iter1.image.pbcor.tt0.subim.png", + # weblog + WEBLOG_FILENAME, + # ingestion artifacts tar -- to be created as side effect of ingestion manifest creation + # pipeline artifacts tar + "VLASS2.1.ql.T08t09.J055438-113000.10.2048.v1.tar", +] +CASA_BYPRODUCTS = [ + "unknown.pipeline_manifest.xml", + "unknown.aux_products.tgz", + "casa_commands.log", + "casa_pipescript.py", +] + +# this file gets created during construction of image ingestion manifest +IMG_ARTIFACTS_FILENAME = "ingestion_artifacts_2021_05_21_T10_17_19.275.tar" + +# just to make things interesting +RANDOM_TAR = "totally_random_tar.tar" + @pytest.fixture(scope="function") def ingest_path(tmpdir: Path) -> Path: @@ -52,7 +80,7 @@ def populate_fake_evla_cal_ingest_path(staging_dir: Path) -> List[Path]: """ files = [] - filenames = [filename for filename in WANTED_FILENAMES] + filenames = [filename for filename in EVLA_CAL_WANTED_FILENAMES] for filename in UNWANTED: filenames.append(filename) @@ -62,3 +90,25 @@ def populate_fake_evla_cal_ingest_path(staging_dir: Path) -> List[Path]: files.append(path) return files + + +def populate_fake_image_ingest_path(staging_dir: Path) -> List[Path]: + """ + Create a directory containing fake image products, plus other stuff + that we -don't- want to ingest. + + :param staging_dir: our temporary dir + :return: + """ + for filename in IMG_MANIFEST_INPUT_FILENAMES: + file = staging_dir / filename + file.touch() + + for filename in CASA_BYPRODUCTS: + file = staging_dir / filename + file.touch() + + file = staging_dir / RANDOM_TAR + file.touch() + + return [file for file in staging_dir.iterdir()] diff --git a/apps/cli/executables/pexable/ingest_envoy/test/test_image_manifest.py b/apps/cli/executables/pexable/ingest_envoy/test/test_image_manifest.py index e1b127689..925dc1f05 100644 --- a/apps/cli/executables/pexable/ingest_envoy/test/test_image_manifest.py +++ b/apps/cli/executables/pexable/ingest_envoy/test/test_image_manifest.py @@ -12,8 +12,8 @@ from ingest_envoy.ingestion_manifest import IngestionManifestBuilder from ingest_envoy.schema import AbstractTextFile from ingest_envoy.utilities import Telescope, AncillaryProductType, ScienceProductType -# NOT unused. IJ is dumb. KEEP THIS. -from .conftest import ingest_path +# ingest_path is NOT unused. IJ is dumb. +from .conftest import ingest_path, populate_fake_image_ingest_path from ingest_envoy.manifest_components import ( WEBLOG_FILENAME, @@ -26,30 +26,6 @@ from ingest_envoy.manifest_components import ( InputScienceProduct, ) -IMG_MANIFEST_FILENAMES = [ - # additional metadata - "image_metadata_2021_05_21_T10_17_19.180.json", - # quicklook image - "VLASS2.1.ql.T08t09.J055438-113000.10.2048.v1.I.iter1.image.pbcor.tt0.subim.fits", - # quicklook RMS image - "VLASS2.1.ql.T08t09.J055438-113000.10.2048.v1.I.iter1.image.pbcor.tt0.rms.subim.fits", - # thumbnail - "VLASS2.1.ql.T08t09.J055438_113000.10.2048.v1.I.iter1.image.pbcor.tt0.subim.png", - # weblog - WEBLOG_FILENAME, - # ingestion artifacts tar - "ingestion_artifacts_2021_05_21_T10_17_19.275.tar", - # pipeline artifacts tar - "VLASS2.1.ql.T08t09.J055438-113000.10.2048.v1.tar", -] -OTHER_FILENAMES = [ - "unknown.pipeline_manifest.xml", - "unknown.aux_products.tgz", - "casa_commands.log", - "casa_pipescript.py", - "totally_random_tar.tar", -] - def test_parameters_json_well_formed(): """ @@ -143,7 +119,7 @@ def test_creates_expected_manifest(ingest_path: Path): """ # fill the ingestion path with fake files - populate_fake_ingest_path(ingest_path) + populate_fake_image_ingest_path(ingest_path) locator = "uid://evla/calibration/3dfa528b-9870-46c9-a200-131dbac701cc" addl_md = AbstractTextFile(filename="image_metadata_2021_05_21_T10_17_19.180.json", content="") @@ -325,22 +301,3 @@ def build_output_group() -> OutputGroup: ap_list = other_aps return OutputGroup(science_products=[osp], ancillary_products=ap_list) - - -def populate_fake_ingest_path(staging_dir: Path) -> List[Path]: - """ - Create a directory containing fake image products, plus other stuff - that we -don't- want to ingest. - - :param staging_dir: our temporary dir - :return: - """ - for filename in IMG_MANIFEST_FILENAMES: - # ingestion artifacts tar is produced during manifest creation - if not filename.startswith("ingestion_artifacts"): - file = staging_dir / filename - file.touch() - for filename in OTHER_FILENAMES: - file = staging_dir / filename - file.touch() - return [file for file in staging_dir.iterdir()] diff --git a/apps/cli/executables/pexable/ingest_envoy/test/test_manifest_if.py b/apps/cli/executables/pexable/ingest_envoy/test/test_manifest_if.py index 75737c138..5273f8aff 100644 --- a/apps/cli/executables/pexable/ingest_envoy/test/test_manifest_if.py +++ b/apps/cli/executables/pexable/ingest_envoy/test/test_manifest_if.py @@ -37,7 +37,7 @@ from ingest_envoy.utilities import ( from .conftest import ( ingest_path, populate_fake_evla_cal_ingest_path, - WANTED_FILENAMES, + EVLA_CAL_WANTED_FILENAMES, UNWANTED, find_example_manifest, ) @@ -63,7 +63,7 @@ def test_filters_cal_input_files(ingest_path: Path): populate_fake_evla_cal_ingest_path(ingest_path) locator = "uid://evla/calibration/twinkle-twinkle-little-quasar" manifest, _ = IngestionManifestBuilder( - telescope=Telescope.EVLA, + telescope=Telescope.EVLA.value, staging_source_dir=ingest_path, sp_type=ScienceProductType.EVLA_CAL.value, locator=locator, @@ -82,7 +82,7 @@ def test_filters_cal_input_files(ingest_path: Path): assert len(output_group.science_products) == 1 assert len(output_group.ancillary_products) == 1 for product in output_group.ancillary_products: - if product.filename not in WANTED_FILENAMES: + if product.filename not in EVLA_CAL_WANTED_FILENAMES: assert product.filename.startswith( INGESTION_ARTIFACTS_NAME ) and product.filename.endswith(TARFILE_EXT) @@ -91,7 +91,7 @@ def test_filters_cal_input_files(ingest_path: Path): sp_out = output_group.science_products[0] assert sp_out.type == ScienceProductType.EVLA_CAL - assert sp_out.filename in WANTED_FILENAMES + assert sp_out.filename in EVLA_CAL_WANTED_FILENAMES assert sp_out.filename not in UNWANTED shutil.rmtree(ingest_path) @@ -107,7 +107,7 @@ def test_writes_expected_output_files(ingest_path: Path): """ populate_fake_evla_cal_ingest_path(ingest_path) manifest_file, manifest = IngestionManifestBuilder( - telescope=Telescope.EVLA, + telescope=Telescope.EVLA.value, staging_source_dir=ingest_path, locator="uid://evla/calibration/fee-fi-fo-fum-acdf23", sp_type=ScienceProductType.EVLA_CAL.value, @@ -243,8 +243,8 @@ def test_evla_cal_manifest_matches_example(ingest_path: Path): builder = IngestionManifestBuilder( staging_source_dir=ingest_path, - telescope=Telescope.EVLA, - sp_type=ScienceProductType.EVLA_CAL, + telescope=Telescope.EVLA.value, + sp_type=ScienceProductType.EVLA_CAL.value, locator="uid://evla/execblock/48ba4c9d-d7c7-4a8f-9803-1115cd52459b", ) _, manifest_file = builder.build() diff --git a/apps/cli/executables/pexable/ingest_envoy/test/test_miscellaneous_manifests.py b/apps/cli/executables/pexable/ingest_envoy/test/test_miscellaneous_manifests.py index 4018d402c..65fd4118b 100644 --- a/apps/cli/executables/pexable/ingest_envoy/test/test_miscellaneous_manifests.py +++ b/apps/cli/executables/pexable/ingest_envoy/test/test_miscellaneous_manifests.py @@ -6,8 +6,6 @@ from pathlib import Path # pylint: disable=E0401, E0402, R1721, W0611, W0621 -import pytest - from ingest_envoy.ingestion_manifest import ( IngestionManifest, IngestionManifestBuilder, @@ -18,14 +16,19 @@ from ingest_envoy.manifest_components import ( TARFILE_EXT, ) from ingest_envoy.utilities import ScienceProductType, Telescope -from .conftest import ingest_path, populate_fake_evla_cal_ingest_path +from .conftest import ( + ingest_path, + populate_fake_evla_cal_ingest_path, + populate_fake_image_ingest_path, + IMG_MANIFEST_INPUT_FILENAMES, + CASA_BYPRODUCTS, +) logger = logging.getLogger(IngestionManifest.__name__) logger.setLevel(logging.INFO) logger.addHandler(logging.StreamHandler(sys.stdout)) -@pytest.mark.skip("TODO: broken temporarily, pending fix to output group creation") def test_entry_point_for_evla_cal(ingest_path: Path): """ Confirm that the ingestion launcher entrypoint kicks off production of ingestion manifest @@ -39,7 +42,7 @@ def test_entry_point_for_evla_cal(ingest_path: Path): assert sp_tar builder = IngestionManifestBuilder( - telescope=Telescope.EVLA, + telescope=Telescope.EVLA.value, locator=locator, sp_type=ScienceProductType.EVLA_CAL.value, staging_source_dir=ingest_path, @@ -61,20 +64,36 @@ def test_entry_point_for_evla_cal(ingest_path: Path): assert len(artifact_tars) == 1 -@pytest.mark.skip("TODO: test_builds_image_manifest") -def test_builds_image_manifest(ingest_path: Path): +def test_entry_point_for_image(ingest_path: Path): """ - TODO WS-600 + Confirm that the ingestion launcher entrypoint kicks off production + of standard imaging manifest + :param ingest_path: staging directory :return: """ - raise NotImplementedError + locator = "uid://evla/calibration/mmm-NY-style-pizza-Giovanni-ABQ-12345" + populate_fake_image_ingest_path(ingest_path) -@pytest.mark.skip("TODO: test_entry_point_for_image") -def test_entry_point_for_image(ingest_path: Path): - """ - TODO - :return: - """ - raise NotImplementedError + # we should be starting out with various image manifest input files + # and CASA byproducts, a random file, and -not- the image ingestion + # manifest yet to be created + expected_file_count_before = len(IMG_MANIFEST_INPUT_FILENAMES) + len(CASA_BYPRODUCTS) + 1 + ingestion_files_before = [file for file in ingest_path.iterdir()] + assert len(ingestion_files_before) == expected_file_count_before + + IngestionManifestBuilder( + telescope=Telescope.EVLA.value, + locator=locator, + sp_type=ScienceProductType.IMAGE.value, + staging_source_dir=ingest_path, + ).build() + + # there should be one ingestion manifest.... + manifest_file = find_manifest(ingest_path) + assert manifest_file + + # ...and everything we started with, plus a new ingestion artifacts tar + ingestion_files_after = [file for file in ingest_path.iterdir()] + assert len(ingestion_files_after) == expected_file_count_before + 2 -- GitLab