diff --git a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py index 19cb92a225b67866f0c1b100a1bd69a95dac96a8..6947dec671cf42c7bb7f470caedb58f306e7bf73 100644 --- a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py +++ b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py @@ -28,6 +28,7 @@ from typing import Tuple import pendulum from ingest_envoy.manifest_components import ( INGESTION_ARTIFACTS_NAME, + INIT_WEBLOG_FILENAME, JSON, MANIFEST_FILENAME, TARFILE_EXT, @@ -46,9 +47,11 @@ from ingest_envoy.std_img_manifest_utils import ImageIngestionProductsFinder from ingest_envoy.utilities import ( AncillaryProductType, IngestionManifestException, + NoScienceProductException, ScienceProductType, Telescope, find_output_tars, + find_weblogs, ) from pendulum import DateTime @@ -239,12 +242,20 @@ class IngestionManifestBuilder: # (required for ingestion, evidently) artifacts_filename = self._build_artifacts_filename() artifacts_ap = AncillaryProduct(AncillaryProductType.INGESTION_ARTIFACTS, filename=artifacts_filename) + manifest.output_group.ancillary_products.append(artifacts_ap) if not manifest.output_group.ancillary_products: manifest.output_group.ancillary_products = [] weblog_ap = AncillaryProduct(type=AncillaryProductType.PIPELINE_WEBLOG_TYPE, filename=WEBLOG_FILENAME) - manifest.output_group.ancillary_products.append(weblog_ap) - manifest.output_group.ancillary_products.append(artifacts_ap) + if weblog_ap not in manifest.output_group.ancillary_products: + manifest.output_group.ancillary_products.append(weblog_ap) + + # # If this isn't version 1, there should be an initial weblog from v1 + # init_weblog = self._find_init_weblog_if_any() + # if init_weblog: + # init_weblog_ap = AncillaryProduct(type=AncillaryProductType.PIPELINE_WEBLOG_TYPE, + # filename=INIT_WEBLOG_FILENAME) + # manifest.output_group.ancillary_products.append(init_weblog_ap) manifest_file = manifest.write() artifacts_file = self.staging_source_dir / artifacts_filename @@ -252,6 +263,19 @@ class IngestionManifestBuilder: return manifest, manifest_file + def _find_init_weblog_if_any(self): + """ + Is there an initial weblog in the staging source dir? + (If so, this calibration is v2 or higher) + + :return: initial weblog, if any + """ + for file in [file for file in self.staging_source_dir.iterdir()]: + if file.name == INIT_WEBLOG_FILENAME: + return file + + return None + def _build_image_manifest(self): """ Image manifest has additional_metadata, and output group is way more complicated @@ -293,13 +317,13 @@ class IngestionManifestBuilder: def _build_evla_cal_output_group(self): """ - Create imaging manifest output group using the parameters + Create EVLA standard calibration manifest output group using the parameters and the contents of the staging dir. - :return: + :return: an output group, if a science product is found """ - # find science product (we expect just one for this SP type) + # find science product (we expect just one for this SP type) tars_found = find_output_tars(self.files_found, self.staging_source_dir) sci_prod = None @@ -308,7 +332,16 @@ class IngestionManifestBuilder: break if sci_prod: - return OutputGroup(science_products=[sci_prod]) + weblog_files = find_weblogs(self.files_found, self.staging_source_dir) + weblogs = [] + for file in weblog_files: + ap = AncillaryProduct(type=AncillaryProductType.PIPELINE_WEBLOG_TYPE, filename=file.name) + if ap not in weblogs: + weblogs.append(ap) + else: + raise NoScienceProductException(f">>> NO SCIENCE PRODUCT FOUND in {self.staging_source_dir}") + + return OutputGroup(science_products=[sci_prod], ancillary_products=weblogs) def _build_imaging_output_group(self) -> OutputGroup: """ diff --git a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/manifest_components.py b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/manifest_components.py index 551992378a2a9f14664bedb6b6d3d49c18b4bb77..5dae014cbf13212e7a87215a6175ee6e22ef85e7 100644 --- a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/manifest_components.py +++ b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/manifest_components.py @@ -204,13 +204,8 @@ class AncillaryProduct(ManifestComponentIF): self.group_with = group_with def __eq__(self, other): - if isinstance(other, AncillaryProduct): - return ( - other.type == self.type - and other.filename == self.filename - and other.group_with == self.group_with - and other.science_associate == self.science_associate - ) + if isinstance(other, AncillaryProduct) and other.type == self.type and other.filename == self.filename: + return other.group_with == self.group_with and other.science_associate == self.science_associate return False @@ -304,18 +299,3 @@ class OutputGroup(ManifestComponentIF): me_dict[IngestionManifestKey.ANCILLARY_PRODUCTS.value] = ap_jsons return me_dict - - -class Weblog: - """Represents a weblog.tgz as an ancillary product""" - - def __init__(self, weblog_path: Path): - self.ancillary_product = {"type": "weblog", "filename": str(weblog_path)} - - def to_json(self) -> JSON: - """ - JSON-ify this object - - :return: json.load()-able string - """ - return dict(self.__dict__) diff --git a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/utilities.py b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/utilities.py index a2f98b913c59c3c1275fdc768e3e3f8d83db54b0..e1c20cfdcf87a3e2206183102fb8f4f02e75cc9e 100644 --- a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/utilities.py +++ b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/utilities.py @@ -77,6 +77,10 @@ class IngestionManifestException(Exception): """Throw this if we're unable to construct an ingestion manifest using supplied inputs""" +class NoScienceProductException(Exception): + """Throw this if no science product is found in the staging source directory""" + + def find_output_tars(files_found, staging_source_dir) -> List[Path]: """ Round up the output science products associated with this SP type. @@ -89,3 +93,19 @@ def find_output_tars(files_found, staging_source_dir) -> List[Path]: raise IngestionManifestException(f"No output science products found at {staging_source_dir}") return tar_files + + +def find_weblogs(files_found, staging_source_dir) -> List[Path]: + """ + Get the weblog in the ingest source directory. + If there's an initial weblog (i.e., this calibration is v2 or higher), + get that too. + + :return: + """ + weblogs = [file for file in files_found if file.name.endswith(".tgz") and "weblog" in file.name] + + if len(weblogs) == 0: + raise IngestionManifestException(f"No weblogs found at {staging_source_dir}") + + return weblogs diff --git a/apps/cli/executables/pexable/ingest_envoy/test/conftest.py b/apps/cli/executables/pexable/ingest_envoy/test/conftest.py index a9d8ba3141a0f27e9647f359f6efea77402e1b81..a995154f2fbda27283b7d0f39ac8910ede7b3e33 100644 --- a/apps/cli/executables/pexable/ingest_envoy/test/conftest.py +++ b/apps/cli/executables/pexable/ingest_envoy/test/conftest.py @@ -99,6 +99,7 @@ def populate_fake_final_evla_cal_ingest_path(staging_dir: Path) -> List[Path]: """ files = populate_fake_evla_cal_ingest_path(staging_dir) init_weblog = staging_dir / "initial_weblog.tgz" + init_weblog.touch() files.append(init_weblog) return files @@ -147,7 +148,8 @@ STAGING_DIR_FILES = [ def populate_fake_tmpx_ratuqh_ingest_path(staging_source_dir: Path, is_final: bool = False) -> List[Path]: """ - make a bunch of fake files that should result in the example manifest + Make a bunch of fake files that should result in the example manifest. + If this is version 2 or later of a standard calibration, include the initial weblog. :return: """ diff --git a/apps/cli/executables/pexable/ingest_envoy/test/test_evla_cal_manifest.py b/apps/cli/executables/pexable/ingest_envoy/test/test_evla_cal_manifest.py index f02063915c7868e863210ffb532ebc840d2fe5ea..35824e830bec1a31c86b8fb063b6e57983388842 100644 --- a/apps/cli/executables/pexable/ingest_envoy/test/test_evla_cal_manifest.py +++ b/apps/cli/executables/pexable/ingest_envoy/test/test_evla_cal_manifest.py @@ -24,15 +24,18 @@ import sys import tarfile from pathlib import Path -# pylint: disable=E0401, E0402, R1721, W0621 +# pylint: disable=C0103, C0301, E0401, E0402, R0914, R1721, W0621, W1514 + from unittest.mock import patch import pytest from ingest_envoy.ingestion_manifest import IngestionManifestBuilder, find_manifest from ingest_envoy.manifest_components import ( INGESTION_ARTIFACTS_NAME, + INIT_WEBLOG_FILENAME, MANIFEST_FILENAME, TARFILE_EXT, + WEBLOG_FILENAME, AncillaryProduct, IngestionManifestKey, InputGroup, @@ -48,7 +51,6 @@ from .conftest import ( EVLA_CAL_INPUT_FILENAMES, UNWANTED, find_example_manifest, - ingest_path, populate_fake_evla_cal_ingest_path, populate_fake_final_evla_cal_ingest_path, ) @@ -73,7 +75,7 @@ def test_filters_cal_input_files(ingest_path: Path): :param ingest_path: our temporary dir """ - populate_fake_evla_cal_ingest_path(ingest_path) + populate_fake_evla_cal_ingest_path(staging_dir=ingest_path) locator = "uid://evla/calibration/twinkle-twinkle-little-quasar" manifest, _ = IngestionManifestBuilder( telescope=Telescope.EVLA.value, @@ -93,7 +95,6 @@ def test_filters_cal_input_files(ingest_path: Path): assert len(input_group.science_products) == 1 output_group = manifest.output_group - assert len(output_group.science_products) == 1 assert len(output_group.ancillary_products) == 2 for product in output_group.ancillary_products: @@ -348,18 +349,21 @@ def test_evla_cal_manifest_matches_example(ingest_path: Path): assert len(actual_osp) == len(expected_osp) expected_aps = expected_outgroup["ancillary_products"] + assert len(expected_aps) == 2 actual_aps = manifest.output_group.ancillary_products - assert len(actual_aps) == len(expected_aps) == 2 + logger.info(">>> ACTUAL ANCILLARY PRODUCTS") + for ap in actual_aps: + logger.info(f"{ap.filename}: {ap.type}, group with {ap.group_with}") + assert len(actual_aps) == len(expected_aps) shutil.rmtree(ingest_path) -@pytest.mark.skip("TODO") def test_evla_cal_final_manifest_matches_example(ingest_path: Path): """ Given the correct parameters, manifest that matches evla_cal_final_manifest - should be generated - TODO: make this test pass + should be generated; i.e., ancillary)products should contain initial_weblog.tgz + in addition to weblog.tgz :return: """ @@ -400,14 +404,24 @@ def test_evla_cal_final_manifest_matches_example(ingest_path: Path): expected_osp = expected_outgroup["science_products"] actual_osp = manifest.output_group.science_products assert len(actual_osp) == len(expected_osp) + logger.info(">>> SCIENCE PRODUCTS:") + for sp in actual_osp: + logger.info(f"{sp.filename}") + if sp.ancillary_products is not None: + for ap in sp.ancillary_products: + logger.info(f"{ap.filename}") expected_aps = expected_outgroup["ancillary_products"] + assert len(expected_aps) == 3 actual_aps = manifest.output_group.ancillary_products - assert len(actual_aps) == len(expected_aps) == 3 + logger.info(">>> ACTUAL ANCILLARY PRODUCTS") + for ap in actual_aps: + logger.info(f"{ap.filename}: {ap.type}, group with {ap.group_with}") + assert len(actual_aps) == len(expected_aps) found_count = 0 for ap in actual_aps: - if ap.filename == "weblog.tgz" or ap.filename == "initial_weblog.tgz": + if ap.filename == WEBLOG_FILENAME or ap.filename == INIT_WEBLOG_FILENAME: found_count += 1 assert found_count == 2