diff --git a/apps/cli/executables/pexable/ingest_envoy/test/conftest.py b/apps/cli/executables/pexable/ingest_envoy/test/conftest.py index 2ea3c70bb31a239aa94a26e5aae59cd89ff2ae45..87db6d0eef97458ef8089bac6ad9759699f72d60 100644 --- a/apps/cli/executables/pexable/ingest_envoy/test/conftest.py +++ b/apps/cli/executables/pexable/ingest_envoy/test/conftest.py @@ -43,10 +43,10 @@ UNWANTED = ["ignore_me.fits", "just_a_lotta_nothing", "uninteresting_metadata.xm @pytest.fixture(scope="function") def ingest_path(tmp_path: Path) -> Path: """ - Make an "ingestion path" for tests + Make a directory to use as the ingestion staging dir, or curation source - :param tmp_path: built-in pytest fixture; temporary home for ingestion location - :return: + :param tmp_path: built-in pytest fixture, Pytest cleans them up periodically + :return: Path to new directory """ # cast is necessary because otherwise we get a LocalPath, which doesn't work @@ -57,6 +57,12 @@ def ingest_path(tmp_path: Path) -> Path: @pytest.fixture def alternate_manifest_destination(tmp_path: Path) -> Path: + """ + Make an alternative directory to ingest_path for tests to put their manifests in + + :param tmp_path: built-in pytest fixture, Pytest cleans them up periodically + :return: Path to new directory + """ alternate_manifest_destination = tmp_path / "manifest_destination" alternate_manifest_destination.mkdir() return alternate_manifest_destination diff --git a/apps/cli/executables/pexable/ingest_envoy/test/test_curator_manifest.py b/apps/cli/executables/pexable/ingest_envoy/test/test_curator_manifest.py index c2d95a3639d8115c4306355784611aa4e28dc4f0..3a1f0196be547754c63efb1b37c82b2e9d16f15e 100644 --- a/apps/cli/executables/pexable/ingest_envoy/test/test_curator_manifest.py +++ b/apps/cli/executables/pexable/ingest_envoy/test/test_curator_manifest.py @@ -16,7 +16,6 @@ # along with Workspaces. If not, see <https://www.gnu.org/licenses/>. # import json -import shutil import tarfile from operator import itemgetter from pathlib import Path @@ -70,86 +69,82 @@ def test_manifest_full_curation_eb_manifest( manifest_destination_dir = ingest_path if use_alternate_manifest_destination: manifest_destination_dir = alternate_manifest_destination - try: - assert len(eb_files) == 1 - actual_manifest, actual_manifest_file = IngestionManifestBuilder( - telescope=Telescope.EVLA.value, - staging_source_dir=None, - sp_type=ScienceProductType.EXEC_BLOCK.value, - locator=EVLA_EB_LOCATOR, - filename=EVLA_EB_NAME, - curate=(CuratorType.FULL, str(ingest_path), None), - manifest_destination_dir=manifest_destination_dir, - ).build() - assert actual_manifest_file.name == CURATOR_MANIFEST_FILENAME - assert actual_manifest_file.parent == manifest_destination_dir - - # Check metadata - params = actual_manifest.parameters - assert params.ngas_ingest is False - assert params.telescope == Telescope.EVLA - assert params.staging_source_dir == ingest_path - assert not hasattr(params, "calibrate") - - assert actual_manifest.input_group - assert not actual_manifest.input_group.science_products - - output_group = actual_manifest.output_group - assert output_group - assert len(output_group.science_products) == 1 - assert output_group.science_products[0].filename == EVLA_EB_NAME - assert len(output_group.ancillary_products) == 1 - assert INGESTION_ARTIFACTS_NAME in output_group.ancillary_products[0].filename - - reingest = actual_manifest.reingest - assert reingest - assert reingest.targets == None - assert reingest.locator == EVLA_EB_LOCATOR - assert reingest.product_type == ScienceProductType.EXEC_BLOCK - - # Check actual file - with open(actual_manifest_file) as f: - actual_manifest_deser = json.load(f) - assert actual_manifest.to_dict() == actual_manifest_deser - - # Check against expected manifest - expected_manifest_file = find_example_manifest("full_curation_evla_eb_manifest") - with open(expected_manifest_file) as f: - expected_manifest = json.load(f) - - # ingestion_path depends on the ingest_path fixture, so ignore it - expected_params = expected_manifest["parameters"] - actual_params = actual_manifest_deser["parameters"] - assert actual_params["ngas_ingest"] == expected_params["ngas_ingest"] - assert actual_manifest_deser["reingest"] == expected_manifest["reingest"] - assert actual_manifest_deser.get("input_group") == expected_manifest.get("input_group") - - expected_outgroup = expected_manifest["output_group"] - expected_osp = expected_outgroup["science_products"] - actual_outgroup = actual_manifest_deser["output_group"] - actual_osp = actual_outgroup["science_products"] - assert actual_osp == expected_osp - - # the ingestion_artifacts' filename depends on current time, so ignore it - expected_aps = expected_outgroup["ancillary_products"] - actual_aps = actual_outgroup["ancillary_products"] - assert len(expected_aps) == len(actual_aps) - for expected_ap, actual_ap in zip(expected_aps, actual_aps): - assert expected_ap["type"] == actual_ap["type"] - - # get ingestion artifacts - artifacts_file = [file for file in manifest_destination_dir.glob("ingestion_artifacts*.tar")][0] - with tarfile.open(artifacts_file, "r") as tar: - # confirm that contains has as many files as we expect... - members = tar.getmembers() - - assert len(members) == 1 - member = members[0] - mf_path = Path(member.name) - assert mf_path.name == CURATOR_MANIFEST_FILENAME - finally: - # Clean up - shutil.rmtree(ingest_path) + assert len(eb_files) == 1 + actual_manifest, actual_manifest_file = IngestionManifestBuilder( + telescope=Telescope.EVLA.value, + staging_source_dir=None, + sp_type=ScienceProductType.EXEC_BLOCK.value, + locator=EVLA_EB_LOCATOR, + filename=EVLA_EB_NAME, + curate=(CuratorType.FULL, str(ingest_path), None), + manifest_destination_dir=manifest_destination_dir, + ).build() + assert actual_manifest_file.name == CURATOR_MANIFEST_FILENAME + assert actual_manifest_file.parent == manifest_destination_dir + + # Check metadata + params = actual_manifest.parameters + assert params.ngas_ingest is False + assert params.telescope == Telescope.EVLA + assert params.staging_source_dir == ingest_path + assert not hasattr(params, "calibrate") + + assert actual_manifest.input_group + assert not actual_manifest.input_group.science_products + + output_group = actual_manifest.output_group + assert output_group + assert len(output_group.science_products) == 1 + assert output_group.science_products[0].filename == EVLA_EB_NAME + assert len(output_group.ancillary_products) == 1 + assert INGESTION_ARTIFACTS_NAME in output_group.ancillary_products[0].filename + + reingest = actual_manifest.reingest + assert reingest + assert reingest.targets == None + assert reingest.locator == EVLA_EB_LOCATOR + assert reingest.product_type == ScienceProductType.EXEC_BLOCK + + # Check actual file + with open(actual_manifest_file) as f: + actual_manifest_deser = json.load(f) + assert actual_manifest.to_dict() == actual_manifest_deser + + # Check against expected manifest + expected_manifest_file = find_example_manifest("full_curation_evla_eb_manifest") + with open(expected_manifest_file) as f: + expected_manifest = json.load(f) + + # ingestion_path depends on the ingest_path fixture, so ignore it + expected_params = expected_manifest["parameters"] + actual_params = actual_manifest_deser["parameters"] + assert actual_params["ngas_ingest"] == expected_params["ngas_ingest"] + assert actual_manifest_deser["reingest"] == expected_manifest["reingest"] + assert actual_manifest_deser.get("input_group") == expected_manifest.get("input_group") + + expected_outgroup = expected_manifest["output_group"] + expected_osp = expected_outgroup["science_products"] + actual_outgroup = actual_manifest_deser["output_group"] + actual_osp = actual_outgroup["science_products"] + assert actual_osp == expected_osp + + # the ingestion_artifacts' filename depends on current time, so ignore it + expected_aps = expected_outgroup["ancillary_products"] + actual_aps = actual_outgroup["ancillary_products"] + assert len(expected_aps) == len(actual_aps) + for expected_ap, actual_ap in zip(expected_aps, actual_aps): + assert expected_ap["type"] == actual_ap["type"] + + # get ingestion artifacts + artifacts_file = [file for file in manifest_destination_dir.glob("ingestion_artifacts*.tar")][0] + with tarfile.open(artifacts_file, "r") as tar: + # confirm that contains has as many files as we expect... + members = tar.getmembers() + + assert len(members) == 1 + member = members[0] + mf_path = Path(member.name) + assert mf_path.name == CURATOR_MANIFEST_FILENAME @pytest.mark.parametrize("has_curation_source", [True, False]) @@ -164,55 +159,50 @@ def test_manifest_partial_curation_eb(has_curation_source: bool, ingest_path: Pa curation_source = None eb_files = populate_fake_evla_eb_curator_source_path(ingest_path) - try: - assert len(eb_files) == 1 - actual_manifest, actual_manifest_file = IngestionManifestBuilder( - telescope=Telescope.EVLA.value, - staging_source_dir=None, - sp_type=ScienceProductType.EXEC_BLOCK.value, - locator=EVLA_EB_LOCATOR, - filename=EVLA_EB_NAME, - curate=(CuratorType.PARTIAL, curation_source, ["subscans.dec"]), - manifest_destination_dir=ingest_path, - ).build() - - # Check metadata - params = actual_manifest.parameters - assert params.ngas_ingest is False - assert params.telescope == Telescope.EVLA - if has_curation_source: - assert params.staging_source_dir == Path(curation_source) - else: - assert not hasattr(params, "staging_source_dir") - assert not hasattr(params, "calibrate") - assert not actual_manifest.input_group - assert not actual_manifest.output_group - reingest = actual_manifest.reingest - assert reingest - assert reingest.targets is not None - assert reingest.locator == EVLA_EB_LOCATOR - assert reingest.product_type == ScienceProductType.EXEC_BLOCK - - # Check that manifest file exists on disk - with open(actual_manifest_file) as f: - actual_manifest_deser = json.load(f) - assert actual_manifest.to_dict() == actual_manifest_deser - - # Check against expected manifest - expected_manifest_file = find_example_manifest(expected_manifest_name) - with open(expected_manifest_file) as f: - expected_manifest = json.load(f) - assert actual_manifest_deser.get("input_group") == expected_manifest.get("input_group") - assert actual_manifest_deser.get("output_group") == expected_manifest.get("output_group") - assert actual_manifest_deser["reingest"] == expected_manifest["reingest"] - - # Make sure there are no artifacts - artifacts_file = [file for file in ingest_path.glob("ingestion_artifacts*.tar")] - assert not artifacts_file - - finally: - # Clean up - shutil.rmtree(ingest_path) + assert len(eb_files) == 1 + actual_manifest, actual_manifest_file = IngestionManifestBuilder( + telescope=Telescope.EVLA.value, + staging_source_dir=None, + sp_type=ScienceProductType.EXEC_BLOCK.value, + locator=EVLA_EB_LOCATOR, + filename=EVLA_EB_NAME, + curate=(CuratorType.PARTIAL, curation_source, ["subscans.dec"]), + manifest_destination_dir=ingest_path, + ).build() + + # Check metadata + params = actual_manifest.parameters + assert params.ngas_ingest is False + assert params.telescope == Telescope.EVLA + if has_curation_source: + assert params.staging_source_dir == Path(curation_source) + else: + assert not hasattr(params, "staging_source_dir") + assert not hasattr(params, "calibrate") + assert not actual_manifest.input_group + assert not actual_manifest.output_group + reingest = actual_manifest.reingest + assert reingest + assert reingest.targets is not None + assert reingest.locator == EVLA_EB_LOCATOR + assert reingest.product_type == ScienceProductType.EXEC_BLOCK + + # Check that manifest file exists on disk + with open(actual_manifest_file) as f: + actual_manifest_deser = json.load(f) + assert actual_manifest.to_dict() == actual_manifest_deser + + # Check against expected manifest + expected_manifest_file = find_example_manifest(expected_manifest_name) + with open(expected_manifest_file) as f: + expected_manifest = json.load(f) + assert actual_manifest_deser.get("input_group") == expected_manifest.get("input_group") + assert actual_manifest_deser.get("output_group") == expected_manifest.get("output_group") + assert actual_manifest_deser["reingest"] == expected_manifest["reingest"] + + # Make sure there are no artifacts + artifacts_file = [file for file in ingest_path.glob("ingestion_artifacts*.tar")] + assert not artifacts_file @pytest.mark.parametrize("is_full_curation", [True, False]) @@ -223,19 +213,16 @@ def test_curation_manifest_bad_no_manifest_destination_directory(is_full_curatio curator_type = CuratorType.FULL target_list = None populate_fake_evla_eb_curator_source_path(ingest_path) - try: - with pytest.raises(IngestionManifestException, match="directory to house the manifest"): - IngestionManifestBuilder( - telescope=Telescope.EVLA.value, - staging_source_dir=None, - sp_type=ScienceProductType.EXEC_BLOCK.value, - locator=EVLA_EB_LOCATOR, - filename=EVLA_EB_NAME, - curate=(curator_type, ingest_path, target_list), - manifest_destination_dir=None, - ) - finally: - shutil.rmtree(ingest_path) + with pytest.raises(IngestionManifestException, match="directory to house the manifest"): + IngestionManifestBuilder( + telescope=Telescope.EVLA.value, + staging_source_dir=None, + sp_type=ScienceProductType.EXEC_BLOCK.value, + locator=EVLA_EB_LOCATOR, + filename=EVLA_EB_NAME, + curate=(curator_type, ingest_path, target_list), + manifest_destination_dir=None, + ) def test_manifest_full_curation_image(ingest_path: Path): @@ -244,157 +231,154 @@ def test_manifest_full_curation_image(ingest_path: Path): """ populate_fake_tmpx_ratuqh_ingest_path(ingest_path) additional_metadata = AbstractTextFile(filename=ADDITIONAL_METADATA_FILENAME, content="") - try: - actual_manifest, actual_manifest_file = IngestionManifestBuilder( - staging_source_dir=None, - additional_metadata=additional_metadata, - telescope=Telescope.EVLA.value, - sp_type=ScienceProductType.IMAGE.value, - locator=IMAGE_LOCATOR, - curate=(CuratorType.FULL, str(ingest_path), None), - manifest_destination_dir=ingest_path, - ).build() - assert actual_manifest_file.name == CURATOR_MANIFEST_FILENAME - - # Check metadata - mf_json = actual_manifest.to_dict() - keys = ["parameters", "input_group", "output_group", "reingest"] - assert len(mf_json) == len(keys) - for key in keys: - assert key in mf_json.keys() - params = actual_manifest.parameters - assert params.ngas_ingest is False - assert params.telescope == Telescope.EVLA - assert params.staging_source_dir == ingest_path - assert params.calibrate is False - reingest = actual_manifest.reingest - assert reingest - assert reingest.targets == None - assert reingest.locator == IMAGE_LOCATOR - assert reingest.product_type == ScienceProductType.IMAGE - assert actual_manifest.input_group - assert len(actual_manifest.input_group.science_products) == 1 - - """ - The ancillary_products section of the manifest we build - should match the one in the example. - """ - output_group = actual_manifest.output_group - assert output_group - aps = output_group.ancillary_products - assert aps - assert len(aps) == len(ANCILLARY_PRODUCTS) - - # Check contents of ancillary_products - weblog_candidates = [ap for ap in aps if ap.filename == WEBLOG_FILENAME] - assert len(weblog_candidates) == 1 - assert weblog_candidates[0].type == AncillaryProductType.PIPELINE_WEBLOG - - ingest_artifacts_tar = find_ingestion_artifacts_tar(ingest_path) - ingest_artifacts_candidates = [ap for ap in aps if ap.filename == ingest_artifacts_tar.name] - assert len(ingest_artifacts_candidates) == 1 - assert ingest_artifacts_candidates[0].type == AncillaryProductType.INGESTION_ARTIFACTS - - maybe_pips = [file for file in ingest_path.glob(AncillaryProductType.PIPELINE_ARTIFACTS.value + "*.tar")] - assert len(maybe_pips) == 1 - pip_artie = maybe_pips[0] - pipeline_artifacts_candidates = [ap for ap in aps if ap.filename == pip_artie.name] - assert len(pipeline_artifacts_candidates) == 1 - assert pipeline_artifacts_candidates[0].type == AncillaryProductType.PIPELINE_ARTIFACTS - - # Inspect the manifest's JSON dict - """ - The output_group section of the manifest we build - should match the one in the example: - * a "science_products" section containing two science products comprising "type", "filename", - and six ancillary products on each science product - * an "ancillary products" section comprising three ancillary products - """ - mf_json = actual_manifest.to_dict() - og_json = mf_json[IngestionManifestKey.OUTPUT_GROUP.value] - assert len(og_json[IngestionManifestKey.SCIENCE_PRODUCTS.value]) == 2 - - for sp_json in og_json[IngestionManifestKey.SCIENCE_PRODUCTS.value]: - assert len(sp_json) == 3 - assert set(sp_json.keys()) == {IngestionManifestKey.ANCILLARY_PRODUCTS.value, "type", "filename"} - # and ancillary products belonging to the first science product... - sp_ap_jsons = sp_json[IngestionManifestKey.ANCILLARY_PRODUCTS.value] - assert len(sp_ap_jsons) == 6 - - # ... and ancillary products twisting in the wind all by themselves - ap_jsons = og_json[IngestionManifestKey.ANCILLARY_PRODUCTS.value] - assert len(ap_jsons) == 3 - - # there should be a weblog, the ingestion artifacts tar, and the pipeline artifacts tar - filenames_found = [] - for ap_json in ap_jsons: - filename = ap_json["filename"] - if filename.endswith(TARFILE_EXT): - if filename.startswith(AncillaryProductType.INGESTION_ARTIFACTS.value) or filename.startswith( - AncillaryProductType.PIPELINE_ARTIFACTS.value - ): - filenames_found.append(filename) - elif filename == WEBLOG_FILENAME: + actual_manifest, actual_manifest_file = IngestionManifestBuilder( + staging_source_dir=None, + additional_metadata=additional_metadata, + telescope=Telescope.EVLA.value, + sp_type=ScienceProductType.IMAGE.value, + locator=IMAGE_LOCATOR, + curate=(CuratorType.FULL, str(ingest_path), None), + manifest_destination_dir=ingest_path, + ).build() + assert actual_manifest_file.name == CURATOR_MANIFEST_FILENAME + + # Check metadata + mf_json = actual_manifest.to_dict() + keys = ["parameters", "input_group", "output_group", "reingest"] + assert len(mf_json) == len(keys) + for key in keys: + assert key in mf_json.keys() + params = actual_manifest.parameters + assert params.ngas_ingest is False + assert params.telescope == Telescope.EVLA + assert params.staging_source_dir == ingest_path + assert params.calibrate is False + reingest = actual_manifest.reingest + assert reingest + assert reingest.targets == None + assert reingest.locator == IMAGE_LOCATOR + assert reingest.product_type == ScienceProductType.IMAGE + assert actual_manifest.input_group + assert len(actual_manifest.input_group.science_products) == 1 + + """ + The ancillary_products section of the manifest we build + should match the one in the example. + """ + output_group = actual_manifest.output_group + assert output_group + aps = output_group.ancillary_products + assert aps + assert len(aps) == len(ANCILLARY_PRODUCTS) + + # Check contents of ancillary_products + weblog_candidates = [ap for ap in aps if ap.filename == WEBLOG_FILENAME] + assert len(weblog_candidates) == 1 + assert weblog_candidates[0].type == AncillaryProductType.PIPELINE_WEBLOG + + ingest_artifacts_tar = find_ingestion_artifacts_tar(ingest_path) + ingest_artifacts_candidates = [ap for ap in aps if ap.filename == ingest_artifacts_tar.name] + assert len(ingest_artifacts_candidates) == 1 + assert ingest_artifacts_candidates[0].type == AncillaryProductType.INGESTION_ARTIFACTS + + maybe_pips = [file for file in ingest_path.glob(AncillaryProductType.PIPELINE_ARTIFACTS.value + "*.tar")] + assert len(maybe_pips) == 1 + pip_artie = maybe_pips[0] + pipeline_artifacts_candidates = [ap for ap in aps if ap.filename == pip_artie.name] + assert len(pipeline_artifacts_candidates) == 1 + assert pipeline_artifacts_candidates[0].type == AncillaryProductType.PIPELINE_ARTIFACTS + + # Inspect the manifest's JSON dict + """ + The output_group section of the manifest we build + should match the one in the example: + * a "science_products" section containing two science products comprising "type", "filename", + and six ancillary products on each science product + * an "ancillary products" section comprising three ancillary products + """ + mf_json = actual_manifest.to_dict() + og_json = mf_json[IngestionManifestKey.OUTPUT_GROUP.value] + assert len(og_json[IngestionManifestKey.SCIENCE_PRODUCTS.value]) == 2 + + for sp_json in og_json[IngestionManifestKey.SCIENCE_PRODUCTS.value]: + assert len(sp_json) == 3 + assert set(sp_json.keys()) == {IngestionManifestKey.ANCILLARY_PRODUCTS.value, "type", "filename"} + # and ancillary products belonging to the first science product... + sp_ap_jsons = sp_json[IngestionManifestKey.ANCILLARY_PRODUCTS.value] + assert len(sp_ap_jsons) == 6 + + # ... and ancillary products twisting in the wind all by themselves + ap_jsons = og_json[IngestionManifestKey.ANCILLARY_PRODUCTS.value] + assert len(ap_jsons) == 3 + + # there should be a weblog, the ingestion artifacts tar, and the pipeline artifacts tar + filenames_found = [] + for ap_json in ap_jsons: + filename = ap_json["filename"] + if filename.endswith(TARFILE_EXT): + if filename.startswith(AncillaryProductType.INGESTION_ARTIFACTS.value) or filename.startswith( + AncillaryProductType.PIPELINE_ARTIFACTS.value + ): filenames_found.append(filename) + elif filename == WEBLOG_FILENAME: + filenames_found.append(filename) - assert len(filenames_found) == 3 - - """ - The ingestion_artifacts tar should contain ONLY - the ingestion manifest and the additional metadata file - """ - addl_md = actual_manifest.parameters.additional_metadata - assert addl_md - ing_arties_tar = find_ingestion_artifacts_tar(ingest_path) - with tarfile.open(ing_arties_tar, "r") as tar: - members = tar.getmembers() - assert len(members) == 2 - for member in members: - assert member.name.endswith(CURATOR_MANIFEST_FILENAME) or member.name.endswith(addl_md.filename) - - # Check that manifest file exists on disk - with open(actual_manifest_file) as f: - actual_manifest_deser = json.load(f) - assert actual_manifest.to_dict() == actual_manifest_deser - - # Check against expected manifest - expected_manifest_file = find_example_manifest("full_curation_evla_image_manifest") - with open(expected_manifest_file) as f: - expected_manifest = json.load(f) - - assert actual_manifest_deser.keys() == expected_manifest.keys() - # ingestion_path depends on the ingest_path fixture, so ignore it - expected_params = expected_manifest["parameters"] - actual_params = actual_manifest_deser["parameters"] - assert actual_params["ngas_ingest"] == expected_params["ngas_ingest"] - assert expected_manifest["input_group"] == actual_manifest_deser["input_group"] - - expected_outgroup = expected_manifest["output_group"] - expected_osps = expected_outgroup["science_products"] - actual_outgroup = actual_manifest_deser["output_group"] - actual_osps = actual_outgroup["science_products"] - assert len(actual_osps) == len(expected_osps) - for actual_osp, expected_osp in zip( - sorted(actual_osps, key=PRODUCT_SORT_KEY), sorted(expected_osps, key=PRODUCT_SORT_KEY) - ): - assert actual_osp["type"] == expected_osp["type"] - assert actual_osp["filename"] == expected_osp["filename"] - assert sorted(actual_osp["ancillary_products"], key=PRODUCT_SORT_KEY) == sorted( - expected_osp["ancillary_products"], key=PRODUCT_SORT_KEY - ) - - # the ingestion_artifacts' filename depends on current time, so ignore it - expected_aps = expected_outgroup["ancillary_products"] - actual_aps = actual_outgroup["ancillary_products"] - assert len(expected_aps) == len(actual_aps) - for expected_ap, actual_ap in zip( - sorted(expected_aps, key=PRODUCT_SORT_KEY), sorted(actual_aps, key=PRODUCT_SORT_KEY) - ): - assert expected_ap["type"] == actual_ap["type"] - if expected_ap["type"] != AncillaryProductType.INGESTION_ARTIFACTS.value: - assert expected_ap["filename"] == actual_ap["filename"] - finally: - shutil.rmtree(ingest_path) + assert len(filenames_found) == 3 + + """ + The ingestion_artifacts tar should contain ONLY + the ingestion manifest and the additional metadata file + """ + addl_md = actual_manifest.parameters.additional_metadata + assert addl_md + ing_arties_tar = find_ingestion_artifacts_tar(ingest_path) + with tarfile.open(ing_arties_tar, "r") as tar: + members = tar.getmembers() + assert len(members) == 2 + for member in members: + assert member.name.endswith(CURATOR_MANIFEST_FILENAME) or member.name.endswith(addl_md.filename) + + # Check that manifest file exists on disk + with open(actual_manifest_file) as f: + actual_manifest_deser = json.load(f) + assert actual_manifest.to_dict() == actual_manifest_deser + + # Check against expected manifest + expected_manifest_file = find_example_manifest("full_curation_evla_image_manifest") + with open(expected_manifest_file) as f: + expected_manifest = json.load(f) + + assert actual_manifest_deser.keys() == expected_manifest.keys() + # ingestion_path depends on the ingest_path fixture, so ignore it + expected_params = expected_manifest["parameters"] + actual_params = actual_manifest_deser["parameters"] + assert actual_params["ngas_ingest"] == expected_params["ngas_ingest"] + assert expected_manifest["input_group"] == actual_manifest_deser["input_group"] + + expected_outgroup = expected_manifest["output_group"] + expected_osps = expected_outgroup["science_products"] + actual_outgroup = actual_manifest_deser["output_group"] + actual_osps = actual_outgroup["science_products"] + assert len(actual_osps) == len(expected_osps) + for actual_osp, expected_osp in zip( + sorted(actual_osps, key=PRODUCT_SORT_KEY), sorted(expected_osps, key=PRODUCT_SORT_KEY) + ): + assert actual_osp["type"] == expected_osp["type"] + assert actual_osp["filename"] == expected_osp["filename"] + assert sorted(actual_osp["ancillary_products"], key=PRODUCT_SORT_KEY) == sorted( + expected_osp["ancillary_products"], key=PRODUCT_SORT_KEY + ) + + # the ingestion_artifacts' filename depends on current time, so ignore it + expected_aps = expected_outgroup["ancillary_products"] + actual_aps = actual_outgroup["ancillary_products"] + assert len(expected_aps) == len(actual_aps) + for expected_ap, actual_ap in zip( + sorted(expected_aps, key=PRODUCT_SORT_KEY), sorted(actual_aps, key=PRODUCT_SORT_KEY) + ): + assert expected_ap["type"] == actual_ap["type"] + if expected_ap["type"] != AncillaryProductType.INGESTION_ARTIFACTS.value: + assert expected_ap["filename"] == actual_ap["filename"] def test_reingest_block_json_well_formed_with_targets(): diff --git a/apps/cli/executables/pexable/ingest_envoy/test/test_evla_cal_manifest.py b/apps/cli/executables/pexable/ingest_envoy/test/test_evla_cal_manifest.py index f588fc210520e4b46b03749ad7b2f3920ca454c1..2e1d5ac14b6b461fdfa93561a8a209b8b3e560ab 100644 --- a/apps/cli/executables/pexable/ingest_envoy/test/test_evla_cal_manifest.py +++ b/apps/cli/executables/pexable/ingest_envoy/test/test_evla_cal_manifest.py @@ -19,7 +19,6 @@ import json import logging -import shutil import sys import tarfile from pathlib import Path @@ -109,8 +108,6 @@ def test_filters_cal_input_files(ingest_path: Path): assert sp_out.filename in EVLA_CAL_INPUT_FILENAMES assert sp_out.filename not in UNWANTED - shutil.rmtree(ingest_path) - def test_writes_expected_evla_cal_output_files(ingest_path: Path): """ @@ -138,8 +135,6 @@ def test_writes_expected_evla_cal_output_files(ingest_path: Path): tars = [file for file in ingestion_files if file.name.endswith(TARFILE_EXT)] assert len(tars) >= 2 - shutil.rmtree(ingest_path) - @pytest.mark.skip("TODO") def test_writes_final_evla_cal_output(ingest_path: Path): @@ -269,8 +264,6 @@ def test_ingestion_artifacts_tar_filename_built_just_once(ingest_path: Path): ).build() mock.assert_called_once() - shutil.rmtree(ingest_path) - def test_ingestion_artifacts_tar_correct(ingest_path: Path): """ @@ -301,8 +294,6 @@ def test_ingestion_artifacts_tar_correct(ingest_path: Path): mf_path = Path(member.name) assert mf_path.name == MANIFEST_FILENAME - shutil.rmtree(ingest_path) - def test_evla_cal_manifest_matches_example(ingest_path: Path): """ @@ -355,8 +346,6 @@ def test_evla_cal_manifest_matches_example(ingest_path: Path): logger.info(f"{ap.filename}: {ap.type}, group with {ap.group_with}") assert len(actual_aps) == len(expected_aps) - shutil.rmtree(ingest_path) - def test_evla_cal_final_manifest_finds_init_weblog(ingest_path: Path): """ @@ -468,5 +457,3 @@ def test_evla_cal_final_manifest_matches_example(ingest_path: Path): if ap.filename == WEBLOG_FILENAME or ap.filename == INIT_WEBLOG_FILENAME: found_count += 1 assert found_count == 2 - - shutil.rmtree(ingest_path) diff --git a/apps/cli/executables/pexable/ingest_envoy/test/test_img_manifest_example.py b/apps/cli/executables/pexable/ingest_envoy/test/test_img_manifest_example.py index 1e986703361e6d74b1376fd4edc2d43dad68982d..9e5ac3959297be331de6aa6bea3a6525d0a34a2b 100644 --- a/apps/cli/executables/pexable/ingest_envoy/test/test_img_manifest_example.py +++ b/apps/cli/executables/pexable/ingest_envoy/test/test_img_manifest_example.py @@ -19,7 +19,6 @@ /lustre/aoc/cluster/pipeline/dsoc-dev/workspaces/staging/tmpx_ratuqh""" import json -import shutil import tarfile from pathlib import Path @@ -52,95 +51,92 @@ def test_image_manifest_creation(ingest_path: Path): """Test manifest creation for an image""" populate_fake_tmpx_ratuqh_ingest_path(ingest_path) - try: - manifest, manifest_file = build_tmpx_ratuqh_image_manifest(ingest_path) - - # Check that top-level groups match expectations - mf_json = manifest.to_dict() - keys = ["parameters", "input_group", "output_group"] - assert len(mf_json) == len(keys) - for key in keys: - assert key in mf_json.keys() - - """ - The ancillary_products section of the manifest we build - should match the one in the example. - """ - output_group = manifest.output_group - aps = output_group.ancillary_products - assert len(aps) == len(ANCILLARY_PRODUCTS) - - # Check contents of ancillary_products - weblog_candidates = [ap for ap in manifest.output_group.ancillary_products if ap.filename == WEBLOG_FILENAME] - assert len(weblog_candidates) == 1 - assert weblog_candidates[0].type == AncillaryProductType.PIPELINE_WEBLOG - - ingest_artifacts_tar = find_ingestion_artifacts_tar(ingest_path) - ingest_artifacts_candidates = [ - ap for ap in manifest.output_group.ancillary_products if ap.filename == ingest_artifacts_tar.name - ] - assert len(ingest_artifacts_candidates) == 1 - assert ingest_artifacts_candidates[0].type == AncillaryProductType.INGESTION_ARTIFACTS - - maybe_pips = [file for file in ingest_path.glob(AncillaryProductType.PIPELINE_ARTIFACTS.value + "*.tar")] - assert len(maybe_pips) == 1 - pip_artie = maybe_pips[0] - pipeline_artifacts_candidates = [ - ap for ap in manifest.output_group.ancillary_products if ap.filename == pip_artie.name - ] - assert len(pipeline_artifacts_candidates) == 1 - assert pipeline_artifacts_candidates[0].type == AncillaryProductType.PIPELINE_ARTIFACTS - - # Inspect the manifest's JSON dict - """ - The output_group section of the manifest we build - should match the one in the example: - * a "science_products" section containing two science products comprising "type", "filename", - and six ancillary products on each science product - * an "ancillary products" section comprising three ancillary products - """ - mf_json = manifest.to_dict() - og_json = mf_json[IngestionManifestKey.OUTPUT_GROUP.value] - assert len(og_json[IngestionManifestKey.SCIENCE_PRODUCTS.value]) == 2 - - for sp_json in og_json[IngestionManifestKey.SCIENCE_PRODUCTS.value]: - assert len(sp_json) == 3 - assert set(sp_json.keys()) == {IngestionManifestKey.ANCILLARY_PRODUCTS.value, "type", "filename"} - # and ancillary products belonging to the first science product... - sp_ap_jsons = sp_json[IngestionManifestKey.ANCILLARY_PRODUCTS.value] - assert len(sp_ap_jsons) == 6 - - # ... and ancillary products twisting in the wind all by themselves - ap_jsons = og_json[IngestionManifestKey.ANCILLARY_PRODUCTS.value] - assert len(ap_jsons) == 3 - - # there should be a weblog, the ingestion artifacts tar, and the pipeline artifacts tar - filenames_found = [] - for ap_json in ap_jsons: - filename = ap_json["filename"] - if filename.endswith(TARFILE_EXT): - if filename.startswith(AncillaryProductType.INGESTION_ARTIFACTS.value) or filename.startswith( - AncillaryProductType.PIPELINE_ARTIFACTS.value - ): - filenames_found.append(filename) - elif filename == WEBLOG_FILENAME: + manifest, manifest_file = build_tmpx_ratuqh_image_manifest(ingest_path) + + # Check that top-level groups match expectations + mf_json = manifest.to_dict() + keys = ["parameters", "input_group", "output_group"] + assert len(mf_json) == len(keys) + for key in keys: + assert key in mf_json.keys() + + """ + The ancillary_products section of the manifest we build + should match the one in the example. + """ + output_group = manifest.output_group + aps = output_group.ancillary_products + assert len(aps) == len(ANCILLARY_PRODUCTS) + + # Check contents of ancillary_products + weblog_candidates = [ap for ap in manifest.output_group.ancillary_products if ap.filename == WEBLOG_FILENAME] + assert len(weblog_candidates) == 1 + assert weblog_candidates[0].type == AncillaryProductType.PIPELINE_WEBLOG + + ingest_artifacts_tar = find_ingestion_artifacts_tar(ingest_path) + ingest_artifacts_candidates = [ + ap for ap in manifest.output_group.ancillary_products if ap.filename == ingest_artifacts_tar.name + ] + assert len(ingest_artifacts_candidates) == 1 + assert ingest_artifacts_candidates[0].type == AncillaryProductType.INGESTION_ARTIFACTS + + maybe_pips = [file for file in ingest_path.glob(AncillaryProductType.PIPELINE_ARTIFACTS.value + "*.tar")] + assert len(maybe_pips) == 1 + pip_artie = maybe_pips[0] + pipeline_artifacts_candidates = [ + ap for ap in manifest.output_group.ancillary_products if ap.filename == pip_artie.name + ] + assert len(pipeline_artifacts_candidates) == 1 + assert pipeline_artifacts_candidates[0].type == AncillaryProductType.PIPELINE_ARTIFACTS + + # Inspect the manifest's JSON dict + """ + The output_group section of the manifest we build + should match the one in the example: + * a "science_products" section containing two science products comprising "type", "filename", + and six ancillary products on each science product + * an "ancillary products" section comprising three ancillary products + """ + mf_json = manifest.to_dict() + og_json = mf_json[IngestionManifestKey.OUTPUT_GROUP.value] + assert len(og_json[IngestionManifestKey.SCIENCE_PRODUCTS.value]) == 2 + + for sp_json in og_json[IngestionManifestKey.SCIENCE_PRODUCTS.value]: + assert len(sp_json) == 3 + assert set(sp_json.keys()) == {IngestionManifestKey.ANCILLARY_PRODUCTS.value, "type", "filename"} + # and ancillary products belonging to the first science product... + sp_ap_jsons = sp_json[IngestionManifestKey.ANCILLARY_PRODUCTS.value] + assert len(sp_ap_jsons) == 6 + + # ... and ancillary products twisting in the wind all by themselves + ap_jsons = og_json[IngestionManifestKey.ANCILLARY_PRODUCTS.value] + assert len(ap_jsons) == 3 + + # there should be a weblog, the ingestion artifacts tar, and the pipeline artifacts tar + filenames_found = [] + for ap_json in ap_jsons: + filename = ap_json["filename"] + if filename.endswith(TARFILE_EXT): + if filename.startswith(AncillaryProductType.INGESTION_ARTIFACTS.value) or filename.startswith( + AncillaryProductType.PIPELINE_ARTIFACTS.value + ): filenames_found.append(filename) + elif filename == WEBLOG_FILENAME: + filenames_found.append(filename) + + assert len(filenames_found) == 3 - assert len(filenames_found) == 3 - - """ - The ingestion_artifacts tar should contain ONLY - the ingestion manifest and the additional metadata file - """ - addl_md = manifest.parameters.additional_metadata - ing_arties_tar = find_ingestion_artifacts_tar(ingest_path) - with tarfile.open(ing_arties_tar, "r") as tar: - members = tar.getmembers() - assert len(members) == 2 - for member in members: - assert member.name.endswith(MANIFEST_FILENAME) or member.name.endswith(addl_md.filename) - finally: - shutil.rmtree(ingest_path) + """ + The ingestion_artifacts tar should contain ONLY + the ingestion manifest and the additional metadata file + """ + addl_md = manifest.parameters.additional_metadata + ing_arties_tar = find_ingestion_artifacts_tar(ingest_path) + with tarfile.open(ing_arties_tar, "r") as tar: + members = tar.getmembers() + assert len(members) == 2 + for member in members: + assert member.name.endswith(MANIFEST_FILENAME) or member.name.endswith(addl_md.filename) def test_catches_invalid_sp_type(): diff --git a/apps/cli/executables/pexable/ingest_envoy/test/test_manifest_builder_entry_points.py b/apps/cli/executables/pexable/ingest_envoy/test/test_manifest_builder_entry_points.py index 41d0aa3cb2dd4813f10a7f46709c14a9db37809e..7b8d156bd64365e165829518d86a5efbdafc5262 100644 --- a/apps/cli/executables/pexable/ingest_envoy/test/test_manifest_builder_entry_points.py +++ b/apps/cli/executables/pexable/ingest_envoy/test/test_manifest_builder_entry_points.py @@ -20,18 +20,9 @@ import logging # pylint: disable=E0401, E0402, R1721, W0611, W0621 -import shutil import sys from pathlib import Path -from ingest_envoy.ingestion_manifest import ( - IngestionManifest, - IngestionManifestBuilder, - find_manifest, -) -from ingest_envoy.manifest_components import INGESTION_ARTIFACTS_NAME, TARFILE_EXT -from ingest_envoy.utilities import ScienceProductType, Telescope - from conftest import ( ANCILLARY_PRODUCTS, OUTPUT_GROUP_SCIENCE_PRODUCT_ANCILLARIES_K, @@ -40,6 +31,13 @@ from conftest import ( populate_fake_evla_cal_ingest_path, populate_fake_tmpx_ratuqh_ingest_path, ) +from ingest_envoy.ingestion_manifest import ( + IngestionManifest, + IngestionManifestBuilder, + find_manifest, +) +from ingest_envoy.manifest_components import INGESTION_ARTIFACTS_NAME, TARFILE_EXT +from ingest_envoy.utilities import ScienceProductType, Telescope logger = logging.getLogger(IngestionManifest.__name__) logger.setLevel(logging.INFO) @@ -96,8 +94,12 @@ def test_entry_point_for_image(ingest_path: Path): # we should be starting out with various image manifest input files # and CASA byproducts, a random file, and -not- the image ingestion # manifest yet to be created - expected_file_count_before = len(ANCILLARY_PRODUCTS) + len(OUTPUT_GROUP_SCIENCE_PRODUCT_ANCILLARIES_K) +\ - len(OUTPUT_GROUP_SCIENCE_PRODUCT_ANCILLARIES_X) + 2 + expected_file_count_before = ( + len(ANCILLARY_PRODUCTS) + + len(OUTPUT_GROUP_SCIENCE_PRODUCT_ANCILLARIES_K) + + len(OUTPUT_GROUP_SCIENCE_PRODUCT_ANCILLARIES_X) + + 2 + ) ingestion_files_before = [file for file in ingest_path.iterdir()] assert len(ingestion_files_before) == expected_file_count_before @@ -123,4 +125,3 @@ def test_entry_point_for_image(ingest_path: Path): print(f">>> {file.name} present after manifest build") assert len(ingestion_files_after) == expected_file_count_before + 2 - shutil.rmtree(ingest_path)