diff --git a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py index b187e89191d5f2db6eb3a31020558c539f6ce3c6..5a3da2dbfbfa451a3f8557fcc0826353cdea584b 100644 --- a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py +++ b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py @@ -14,11 +14,9 @@ import pendulum from pendulum import DateTime from ingest_envoy.manifest_components import ( - MANIFEST_NAME_BASE, - MANIFEST_NAME_EXT, ARTIFACT_NAME, - ARTIFACT_EXT, - WEBLOG, + TARFILE_EXT, + WEBLOG_FILENAME, JSON, IngestionManifestKey, ManifestComponentIF, @@ -29,6 +27,7 @@ from ingest_envoy.manifest_components import ( AncillaryProduct, OutputGroup, SCIENCE_PRODUCT_PATTERN, + MANIFEST_FILENAME, ) from ingest_envoy.utilities import ( ScienceProductType, @@ -161,7 +160,7 @@ class IngestionManifestBuilder: # N.B. this is sufficient for most types of ingestion, # but ALMA CALs will have multiple EB SPs, identified only by locator, # and VLBAs have no input group at all. - sp_in = InputScienceProduct(sp_type=self.sp_type, locator=self.locator) + sp_in = InputScienceProduct(locator=self.locator) return InputGroup([sp_in]) @@ -197,7 +196,7 @@ class IngestionManifestBuilder: """ current_time = pendulum.now() timestamp = format_timestamp(current_time) - return f"{ARTIFACT_NAME}{timestamp}{ARTIFACT_EXT}" + return f"{ARTIFACT_NAME}{timestamp}{TARFILE_EXT}" def write_ingestion_artifacts_tar(self) -> Path: """ @@ -229,11 +228,11 @@ class IngestionManifestBuilder: ancillary_products = [] # if there's a weblog in here, grab it - maybe_weblogs = [file for file in self.files_found if file.name.endswith(WEBLOG)] + maybe_weblogs = [file for file in self.files_found if file.name == WEBLOG_FILENAME] if len(maybe_weblogs) > 0: weblog = maybe_weblogs[0] weblog_ap = AncillaryProduct( - type=AncillaryProductType.PIPELINE_WEBLOG, filename=weblog.name + type=AncillaryProductType.PIPELINE_WEBLOG_TYPE, filename=weblog.name ) ancillary_products.append(weblog_ap) @@ -287,7 +286,7 @@ class IngestionManifest(ManifestIF): # N.B. this is sufficient for most types of ingestion, # but ALMA CALs will have multiple EB SPs, identified only by locator, # and VLBAs have no input group at all. - sp_in = InputScienceProduct(sp_type=self.sp_type, locator=self.locator) + sp_in = InputScienceProduct(locator=self.locator) return InputGroup([sp_in]) def _build_output_group(self) -> OutputGroup: @@ -301,13 +300,12 @@ class IngestionManifest(ManifestIF): # find ancillary products, if any ancillary_products = self._find_ancillary_products() - weblog = Path(self.ingestion_path / WEBLOG) + weblog = Path(self.ingestion_path / WEBLOG_FILENAME) if weblog.exists(): - ancillary_products.append(AncillaryProduct(type=WEBLOG, filename=str(weblog))) + ancillary_products.append(AncillaryProduct(type=WEBLOG_FILENAME, filename=str(weblog))) return OutputGroup(sps_out) - # @property def ingestion_path(self) -> Path: return self.parameters.ingestion_path @@ -318,7 +316,7 @@ class IngestionManifest(ManifestIF): :return: """ - output_path = self.staging_source_dir / build_manifest_filename() + output_path = self.staging_source_dir / MANIFEST_FILENAME to_write = json.dumps(self.to_json(), indent=4) with open(output_path, "w") as out: @@ -394,17 +392,6 @@ def format_timestamp(datetime: DateTime) -> str: return datetime.format("YYYY_MM_DDThh_mm_ss.SSS") -def build_manifest_filename() -> str: - """ - Build unique manifest filename in standard format. - - :return: the filename - """ - current_time = pendulum.now() - timestamp = format_timestamp(current_time) - return f"{MANIFEST_NAME_BASE}{timestamp}{MANIFEST_NAME_EXT}" - - def find_manifest(ingestion_path: Path) -> Path: """ Find the ingestion manifest at this ingestion path. @@ -412,8 +399,7 @@ def find_manifest(ingestion_path: Path) -> Path: :param ingestion_path: home of ingestion files :return: """ - for file in ingestion_path.iterdir(): - if file.name.startswith(MANIFEST_NAME_BASE) and file.name.endswith(MANIFEST_NAME_EXT): - return file + for json_file in ingestion_path.glob(MANIFEST_FILENAME): + return json_file raise FileNotFoundError(f"No ingestion manifest found at {ingestion_path}") diff --git a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/manifest_components.py b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/manifest_components.py index acdcd8ff3fe2a2c5cf20e04099daea60fe7fc63b..f514907362f71616833879f08190babdcd0adb87 100644 --- a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/manifest_components.py +++ b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/manifest_components.py @@ -8,11 +8,10 @@ from typing import Union, List, Dict from ingest_envoy.utilities import ScienceProductType, Telescope, AncillaryProductType -MANIFEST_NAME_BASE = "ingestion_manifest_" -MANIFEST_NAME_EXT = ".json" +MANIFEST_FILENAME = "ingestion_manifest.json" ARTIFACT_NAME = "ingestion_artifacts_" -ARTIFACT_EXT = ".tar" -WEBLOG = "weblog.tgz" +TARFILE_EXT = ".tar" +WEBLOG_FILENAME = "weblog.tgz" SCIENCE_PRODUCT_PATTERN = re.compile("[a-zA-Z0-9._\\-+]*\\.tar") JSON = Union[int, float, str, List["JSON"], Dict[str, "JSON"]] @@ -57,17 +56,14 @@ class ManifestComponentIF(abc.ABC): class InputScienceProduct(ManifestComponentIF): - """Represents a science product in the "input-group" section of the ingestion manifest.""" + """Simplest type of science product: has only a locator""" - def __init__(self, locator: str, sp_type: ScienceProductType = None): - self.type = sp_type + def __init__(self, locator: str): self.locator = locator def __eq__(self, other): if isinstance(other, InputScienceProduct): - return other.type == self.type and other.locator == self.locator - - return False + return other.locator == self.locator def to_json(self) -> JSON: """ @@ -75,8 +71,6 @@ class InputScienceProduct(ManifestComponentIF): :return: dicty-me """ - if self.type: - return {"type": str(self.type), "locator": self.locator} return {"locator": self.locator} diff --git a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/utilities.py b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/utilities.py index 5dd4ca39be25d9edaf2dc84dfaa6692956eb8bf7..4e4f578f3742c747508c7d6e6168862fec0f8174 100644 --- a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/utilities.py +++ b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/utilities.py @@ -34,8 +34,8 @@ class AncillaryProductType(Enum): INGESTION_ARTIFACTS = "ingestion_artifacts" PIPELINE_ARTIFACTS = "pipeline_artifacts" - PIPELINE_WEBLOG = "pipeline_weblog" - LOG = "log_file" + PIPELINE_WEBLOG_TYPE = "pipeline_weblog" + LOG_TYPE = "log_file" ### Images ### diff --git a/apps/cli/executables/pexable/ingest_envoy/test/conftest.py b/apps/cli/executables/pexable/ingest_envoy/test/conftest.py index 800194aedd12bf16faf860c0adc0ebf8aa5703cd..32de44099dcb74adee09c2fe4afe299331d6c6a2 100644 --- a/apps/cli/executables/pexable/ingest_envoy/test/conftest.py +++ b/apps/cli/executables/pexable/ingest_envoy/test/conftest.py @@ -7,9 +7,9 @@ from typing import List import pytest -from ingest_envoy.manifest_components import WEBLOG +from ingest_envoy.manifest_components import WEBLOG_FILENAME -WANTED_FILENAMES = ["my_science_products.tar", WEBLOG] +WANTED_FILENAMES = ["my_science_products.tar", WEBLOG_FILENAME] UNWANTED = ["ignore_me.fits", "just_a_lotta_nothing", "uninteresting_metadata.xml"] diff --git a/apps/cli/executables/pexable/ingest_envoy/test/examples/_16B_069_cal_manifest.json b/apps/cli/executables/pexable/ingest_envoy/test/examples/_16B_069_cal_manifest.json new file mode 100644 index 0000000000000000000000000000000000000000..849c8021972d718e1f4ffa9ba85fb5d74c61181f --- /dev/null +++ b/apps/cli/executables/pexable/ingest_envoy/test/examples/_16B_069_cal_manifest.json @@ -0,0 +1,30 @@ +{ + "parameters": { + "reingest": "false", + "ngas_ingest": "false", + "calibrate": "false", + "ingestion_path": "/lustre/aoc/cluster/pipeline/dsoc-dev/workspaces/staging/cal_test6", + "telescope": "EVLA" + }, + "input_group": { + "science_products": [ + { + "locator": "uid://evla/execblock/48ba4c9d-d7c7-4a8f-9803-1115cd52459b" + } + ] + }, + "output_group": { + "science_products": [ + { + "type": "calibration", + "filename": "16B-069_sb32814386_1_001.57685.66193635417.testdate.caltables.tar" + } + ], + "ancillary_products": [ + { + "type": "pipeline_weblog", + "filename": "weblog.tgz" + } + ] + } +} diff --git a/apps/cli/executables/pexable/ingest_envoy/test/examples/image_set_manifest.json b/apps/cli/executables/pexable/ingest_envoy/test/examples/image_set_manifest.json index 8d27af401d24ac614dd618a8b48996fe2d39972d..23a812756b2eb27ffae5b2326780af7172bc2e06 100644 --- a/apps/cli/executables/pexable/ingest_envoy/test/examples/image_set_manifest.json +++ b/apps/cli/executables/pexable/ingest_envoy/test/examples/image_set_manifest.json @@ -1,11 +1,11 @@ { "parameters": { "reingest": "false", - "ngas-ingest": "false", + "ngas_ingest": "false", "calibrate": "false", "ingestion_path": "/lustre/.." }, - "input-group": { + "input_group": { "science_products": [ { "type": "calibration", @@ -13,7 +13,7 @@ } ] }, - "output-group": { + "output_group": { "science_products": [ { "type": "image", diff --git a/apps/cli/executables/pexable/ingest_envoy/test/examples/vlass_catalog_manifest.json b/apps/cli/executables/pexable/ingest_envoy/test/examples/vlass_catalog_manifest.json index 5306a3d5dfa029e145e453d55a0cbed3a49c0835..86d44f8e03c597661576a2cd15e75f92495f69ed 100644 --- a/apps/cli/executables/pexable/ingest_envoy/test/examples/vlass_catalog_manifest.json +++ b/apps/cli/executables/pexable/ingest_envoy/test/examples/vlass_catalog_manifest.json @@ -1,7 +1,7 @@ { "parameters": { "reingest": "false", - "ngas-ingest": "false", + "ngas_ingest": "false", "calibrate": "false", "ingestion_path": "/lustre/...../" }, diff --git a/apps/cli/executables/pexable/ingest_envoy/test/test_manifest_if.py b/apps/cli/executables/pexable/ingest_envoy/test/test_manifest_if.py index b7dff537a6cf4d56024a9743e6daec8c4e397081..be77e70f18838d3be87e39acdd5a650fdad4a460 100644 --- a/apps/cli/executables/pexable/ingest_envoy/test/test_manifest_if.py +++ b/apps/cli/executables/pexable/ingest_envoy/test/test_manifest_if.py @@ -5,7 +5,6 @@ import json import logging -import re import shutil import sys from pathlib import Path @@ -16,12 +15,9 @@ import pytest from ingest_envoy.ingestion_manifest import ( IngestionManifestBuilder, - build_manifest_filename, find_manifest, ) from ingest_envoy.manifest_components import ( - MANIFEST_NAME_BASE, - MANIFEST_NAME_EXT, IngestionManifestKey, ParamsKey, InputScienceProduct, @@ -30,8 +26,9 @@ from ingest_envoy.manifest_components import ( OutputScienceProduct, AncillaryProduct, OutputGroup, - ARTIFACT_EXT, + TARFILE_EXT, ARTIFACT_NAME, + WEBLOG_FILENAME, ) from ingest_envoy.utilities import ( ScienceProductType, @@ -69,16 +66,16 @@ def test_manifest_is_complete(ingest_path: Path): staging_source_dir=ingest_path, ) - sp1 = InputScienceProduct( - sp_type=ScienceProductType.EVLA_CAL, - locator=FAKE_LOCATOR, - ) + sp1 = InputScienceProduct(locator=FAKE_LOCATOR) ig_in = InputGroup(science_products=[sp1]) osp_in = OutputScienceProduct( type=ScienceProductType.EVLA_CAL, filename="my_science_products.tar" ) - ap_in = AncillaryProduct(type=AncillaryProductType.PIPELINE_WEBLOG, filename="weblog.tgz") + ap_in = AncillaryProduct( + type=AncillaryProductType.PIPELINE_WEBLOG_TYPE, + filename=WEBLOG_FILENAME, + ) manifest, _ = IngestionManifestBuilder( staging_source_dir=ingest_path, @@ -95,36 +92,13 @@ def test_manifest_is_complete(ingest_path: Path): af_tar_candidates = [ file for file in ingest_path.iterdir() - if file.name.startswith(ARTIFACT_NAME) and file.name.endswith(ARTIFACT_EXT) + if file.name.startswith(ARTIFACT_NAME) and file.name.endswith(TARFILE_EXT) ] assert len(af_tar_candidates) == 1 shutil.rmtree(ingest_path) -def test_builds_expected_manifest_filename(): - """ - We expect the manifest to be named like - - ingestion_manifest_2019_07_30_T13_03_00.936.json - - :return: - """ - filename = build_manifest_filename() - - assert filename.startswith(MANIFEST_NAME_BASE) - assert filename.endswith(MANIFEST_NAME_EXT) - - filename_parts = filename.split("_") - assert len(filename_parts) == 7 - - # get just the timestamp - timestamp = filename.replace(MANIFEST_NAME_BASE, "").replace(MANIFEST_NAME_EXT, "") - - # we should have gotten year, month, day, hours, minutes, seconds to 3 decimal places - assert re.match(r"\d{4}_\d{2}_\d{2}T\d{2}_\d{2}_\d{2}\.\d{0,3}", timestamp) - - def test_filters_cal_input_files(ingest_path: Path): """ We'll be getting calibration/image/eb, etc. science products from a directory under @@ -152,8 +126,6 @@ def test_filters_cal_input_files(ingest_path: Path): input_group = manifest.input_group assert len(input_group.science_products) == 1 - sp_in = input_group.science_products[0] - assert sp_in.type == ScienceProductType.EVLA_CAL output_group = manifest.output_group assert len(output_group.science_products) == 1 @@ -161,7 +133,7 @@ def test_filters_cal_input_files(ingest_path: Path): for product in output_group.ancillary_products: if product.filename not in WANTED_FILENAMES: assert product.filename.startswith(ARTIFACT_NAME) and product.filename.endswith( - ARTIFACT_EXT + TARFILE_EXT ) assert product.filename not in UNWANTED @@ -195,13 +167,9 @@ def test_writes_expected_output_files(ingest_path: Path): # at a minimum, we expect the manifest, the ingestion artifact, and the science products tar assert len(ingestion_files) >= 3 - mf_json = [ - file - for file in ingestion_files - if file.name.startswith(MANIFEST_NAME_BASE) and file.name.endswith(MANIFEST_NAME_EXT) - ][0] + mf_json = find_manifest(ingest_path) assert mf_json - tars = [file for file in ingestion_files if file.name.endswith(".tar")] + tars = [file for file in ingestion_files if file.name.endswith(TARFILE_EXT)] assert len(tars) >= 2 shutil.rmtree(ingest_path) @@ -259,11 +227,10 @@ def test_input_sp_well_formed(): locator = "uid://evla/calibration/vanilla_heath_bar_crunch_1a23e" # single science product sp_dict = { - "type": ScienceProductType.EVLA_CAL.value, "locator": locator, } - sp_in = InputScienceProduct(sp_type=ScienceProductType.EVLA_CAL.value, locator=locator) + sp_in = InputScienceProduct(locator=locator) assert sp_in.to_json() == sp_dict @@ -273,15 +240,10 @@ def test_input_group_well_formed(): :return: """ - sp1 = InputScienceProduct( - sp_type=ScienceProductType.EXEC_BLOCK.value, - locator="uid://evla/execblock/coffee_heath_bar_crunch_7a23f", - ) + sp1 = InputScienceProduct(locator="uid://evla/execblock/coffee_heath_bar_crunch_7a23f") sp1_json = sp1.to_json() - sp2 = InputScienceProduct( - sp_type=ScienceProductType.EVLA_CAL.value, locator="uid://evla/execblock/mint_oreo_omg_omg" - ) + sp2 = InputScienceProduct(locator="uid://evla/execblock/mint_oreo_omg_omg") sp2_json = sp2.to_json() expected = { @@ -318,8 +280,8 @@ def test_ancillary_product_well_formed(): :return: """ - ap1 = AncillaryProduct(type=AncillaryProductType.LOG, filename="without_feathers.tar") - expected = {"type": AncillaryProductType.LOG.value, "filename": "without_feathers.tar"} + ap1 = AncillaryProduct(type=AncillaryProductType.LOG_TYPE, filename="without_feathers.tar") + expected = {"type": AncillaryProductType.LOG_TYPE.value, "filename": "without_feathers.tar"} actual = ap1.to_json() assert actual == expected @@ -361,19 +323,15 @@ def test_input_group_properly_formatted(): :return: """ - sp1 = InputScienceProduct( - sp_type=ScienceProductType.EXEC_BLOCK.value, - locator="uid://evla/execblock/coffee_heath_bar_crunch_7a23f", - ) + sp1 = InputScienceProduct(locator="uid://evla/execblock/coffee_heath_bar_crunch_7a23f") ingroup = InputGroup(science_products=[sp1]) ig_dict = json.loads(ingroup.to_json()) ig_text = json.dumps(ig_dict, indent=4) expected = """ - "input-group": { + "input_group": { "science_products": [ { - "type": "calibration", "locator": "uid://evla/execblock/coffee_heath_bar_crunch_7a23f" } ] @@ -441,6 +399,16 @@ def test_builds_cal_manifest_as_expected(ingest_path: Path): shutil.rmtree(ingest_path) +@pytest.mark.skip("TODO before merge TODAY 2021-07-22") +def test_manifest_filename_is_correct(): + """ + Calibration ingestion manifest should always be named simply "ingestion_manifest.json" + + :return: + """ + # TODO + + def populate_ingest_path_for_manifest_evla_cal_example(ingestion_path: Path): """ Create fake input files to match EVLA CAL manifest example @@ -448,7 +416,7 @@ def populate_ingest_path_for_manifest_evla_cal_example(ingestion_path: Path): :param ingestion_path: :return: """ - weblog_file = ingestion_path / "qrs.weblog.tgz" + weblog_file = ingestion_path / "weblog.tgz" weblog_file.touch() cal_file = ingestion_path / "XYZ-abc+TMN.O00.tar" cal_file.touch() diff --git a/apps/cli/executables/pexable/ingest_envoy/test/test_miscellaneous_manifests.py b/apps/cli/executables/pexable/ingest_envoy/test/test_miscellaneous_manifests.py index 4c85e801e40326dd8ba9b64874be8543d93e8aa5..b67975dbf3b1c3e7b866ce868085d0b34e54065d 100644 --- a/apps/cli/executables/pexable/ingest_envoy/test/test_miscellaneous_manifests.py +++ b/apps/cli/executables/pexable/ingest_envoy/test/test_miscellaneous_manifests.py @@ -11,12 +11,11 @@ import pytest from ingest_envoy.ingestion_manifest import ( IngestionManifest, IngestionManifestBuilder, + find_manifest, ) from ingest_envoy.manifest_components import ( - MANIFEST_NAME_BASE, - MANIFEST_NAME_EXT, ARTIFACT_NAME, - ARTIFACT_EXT, + TARFILE_EXT, ) from ingest_envoy.utilities import ScienceProductType, Telescope from .conftest import ingest_path, populate_fake_evla_cal_ingest_path @@ -48,19 +47,15 @@ def test_entry_point_for_evla_cal(ingest_path: Path): ingestion_files = [file for file in ingest_path.iterdir()] # there should be one ingestion manifest.... - mf_jsons = [ - file - for file in ingestion_files - if file.name.startswith(MANIFEST_NAME_BASE) and file.name.endswith(MANIFEST_NAME_EXT) - ] - assert len(mf_jsons) == 1 + manifest_file = find_manifest(ingest_path) + assert manifest_file # ...and an artifacts tar, and the science products tar we started with assert sp_tar in ingestion_files artifact_tars = [ file for file in ingestion_files - if file.name.startswith(ARTIFACT_NAME) and file.name.endswith(ARTIFACT_EXT) + if file.name.startswith(ARTIFACT_NAME) and file.name.endswith(TARFILE_EXT) ] assert len(artifact_tars) == 1