Skip to content
Snippets Groups Projects

Implemented manifest generation for EB full curation in ingest_envoy

Merged Sam Kagan requested to merge teach-ingest_envoy-full-curation into 2.8.2.1-DEVELOMENT
5 unresolved threads
@@ -24,13 +24,6 @@ import tarfile
from pathlib import Path
import pytest
from conftest import (
ADDITIONAL_METADATA_FILENAME,
ANCILLARY_PRODUCTS,
WEBLOG_ANCILLARY,
ingest_path,
populate_fake_tmpx_ratuqh_ingest_path,
)
from ingest_envoy.ingestion_manifest import IngestionManifestBuilder
from ingest_envoy.manifest_components import (
MANIFEST_FILENAME,
@@ -41,105 +34,83 @@ from ingest_envoy.manifest_components import (
from ingest_envoy.schema import AbstractTextFile
from ingest_envoy.utilities import AncillaryProductType, ScienceProductType, Telescope
from conftest import (
ADDITIONAL_METADATA_FILENAME,
ANCILLARY_PRODUCTS,
ingest_path,
populate_fake_tmpx_ratuqh_ingest_path,
)
# pylint: disable=E0401, E0402, W0621
# pylint: disable=R1721
def test_image_manifest_creation(ingest_path: Path):
"""Test manifest creation for an image"""
def test_manifest_picks_up_ing_artifact(ingest_path: Path):
"""
There's an ingestion artifacts tar in the ingest path,
and it should show up among the ancillary products.
:return:
"""
populate_fake_tmpx_ratuqh_ingest_path(ingest_path)
try:
manifest, manifest_file = build_tmpx_ratuqh_image_manifest(ingest_path)
# Check that top-level groups match expectations
mf_json = manifest.to_dict()
keys = ["parameters", "input_group", "output_group"]
assert len(mf_json) == len(keys)
for key in keys:
assert key in mf_json.keys()
"""
The ancillary_products section of the manifest we build
should match the one in the example.
"""
output_group = manifest.output_group
aps = output_group.ancillary_products
assert len(aps) == len(ANCILLARY_PRODUCTS)
# Check contents of ancillary_products
weblog_candidates = [ap for ap in manifest.output_group.ancillary_products if ap.filename == WEBLOG_FILENAME]
assert len(weblog_candidates) == 1
assert weblog_candidates[0].type == AncillaryProductType.PIPELINE_WEBLOG
ingest_artifacts_tar = find_ingestion_artifacts_tar(ingest_path)
ingest_artifacts_candidates = [
ap for ap in manifest.output_group.ancillary_products if ap.filename == ingest_artifacts_tar.name
]
assert len(ingest_artifacts_candidates) == 1
assert ingest_artifacts_candidates[0].type == AncillaryProductType.INGESTION_ARTIFACTS
maybe_pips = [file for file in ingest_path.glob(AncillaryProductType.PIPELINE_ARTIFACTS.value + "*.tar")]
assert len(maybe_pips) == 1
pip_artie = maybe_pips[0]
pipeline_artifacts_candidates = [
ap for ap in manifest.output_group.ancillary_products if ap.filename == pip_artie.name
]
assert len(pipeline_artifacts_candidates) == 1
assert pipeline_artifacts_candidates[0].type == AncillaryProductType.PIPELINE_ARTIFACTS
# Inspect the manifest's JSON dict
"""
The output_group section of the manifest we build
should match the one in the example:
* a "science_products" section containing two science products comprising "type", "filename",
and six ancillary products on each science product
* an "ancillary products" section comprising three ancillary products
"""
mf_json = manifest.to_dict()
og_json = mf_json[IngestionManifestKey.OUTPUT_GROUP.value]
assert len(og_json[IngestionManifestKey.SCIENCE_PRODUCTS.value]) == 2
for sp_json in og_json[IngestionManifestKey.SCIENCE_PRODUCTS.value]:
assert len(sp_json) == 3
assert set(sp_json.keys()) == {IngestionManifestKey.ANCILLARY_PRODUCTS.value, "type", "filename"}
# and ancillary products belonging to the first science product...
sp_ap_jsons = sp_json[IngestionManifestKey.ANCILLARY_PRODUCTS.value]
assert len(sp_ap_jsons) == 6
# ... and ancillary products twisting in the wind all by themselves
ap_jsons = og_json[IngestionManifestKey.ANCILLARY_PRODUCTS.value]
assert len(ap_jsons) == 3
# there should be a weblog, the ingestion artifacts tar, and the pipeline artifacts tar
filenames_found = []
for ap_json in ap_jsons:
filename = ap_json["filename"]
if filename.endswith(TARFILE_EXT):
if filename.startswith(AncillaryProductType.INGESTION_ARTIFACTS.value) or filename.startswith(
AncillaryProductType.PIPELINE_ARTIFACTS.value
):
filenames_found.append(filename)
elif filename == WEBLOG_FILENAME:
filenames_found.append(filename)
assert len(filenames_found) == 3
_, manifest_file = build_tmpx_ratuqh_image_manifest(ingest_path)
# make sure that tar really is there
candidates = [file for file in ingest_path.glob("ingestion_artifacts*.tar")]
assert len(candidates) > 0
ing_arties = candidates[0]
with open(manifest_file, "r") as infile:
manifest_contents = infile.read()
assert ing_arties.name in manifest_contents
shutil.rmtree(ingest_path)
def test_addl_metadata_not_at_bottom_of_manifest(ingest_path: Path):
"""
Manifest should have only parameters, input group, and output group
:param ingest_path:
:return:
"""
populate_fake_tmpx_ratuqh_ingest_path(ingest_path)
manifest, _ = build_tmpx_ratuqh_image_manifest(ingest_path)
mf_json = manifest.to_dict()
keys = ["parameters", "input_group", "output_group"]
assert len(mf_json) == len(keys)
for key in keys:
assert key in mf_json.keys()
def test_manifest_picks_up_pip_artifact(ingest_path: Path):
"""
There's an pipeline artifacts tar in the ingest path,
and it should show up among the ancillary products.
:return:
"""
populate_fake_tmpx_ratuqh_ingest_path(ingest_path)
manifest, _ = build_tmpx_ratuqh_image_manifest(ingest_path)
maybe_pips = [file for file in ingest_path.glob(AncillaryProductType.PIPELINE_ARTIFACTS.value + "*.tar")]
pip_artie = maybe_pips[0]
mf_json = manifest.to_dict()
og_json = mf_json[IngestionManifestKey.OUTPUT_GROUP.value]
"""
The ingestion_artifacts tar should contain ONLY
the ingestion manifest and the additional metadata file
"""
addl_md = manifest.parameters.additional_metadata
ing_arties_tar = find_ingestion_artifacts_tar(ingest_path)
with tarfile.open(ing_arties_tar, "r") as tar:
members = tar.getmembers()
assert len(members) == 2
for member in members:
assert member.name.endswith(MANIFEST_FILENAME) or member.name.endswith(addl_md.filename)
finally:
shutil.rmtree(ingest_path)
ap_jsons = og_json[IngestionManifestKey.ANCILLARY_PRODUCTS.value]
found = False
for ap_json in ap_jsons:
if ap_json["filename"] == pip_artie.name:
found = True
break
assert found
shutil.rmtree(ingest_path)
def test_catches_invalid_sp_type():
@@ -200,6 +171,128 @@ def test_catches_invalid_telescope():
)
def test_ancillary_products_rendered_correctly(ingest_path: Path):
"""
The ancillary_products section of the manifest we build
should match the one in the example.
:param ingest_path: staging source dir
:return:
"""
populate_fake_tmpx_ratuqh_ingest_path(ingest_path)
manifest, _ = build_tmpx_ratuqh_image_manifest(ingest_path)
output_group = manifest.output_group
aps = output_group.ancillary_products
assert len(aps) == len(ANCILLARY_PRODUCTS)
ap_data = []
for ap in aps:
ap_data.append({"filename": ap.filename, "json": ap.to_dict()})
assert len(ap_data) == len(aps)
# make sure all the ancillary products were created...
filename_count = 0
for product in ANCILLARY_PRODUCTS:
# (It won't find the ingest artifact tar yet, because we didn't populate
# the ingest path with it; it's produced during manifest creation.)
# ...and that each one's JSON is well formed.
for properties in ap_data:
if properties["filename"] == product.filename:
filename_count += 1
# If there's a class `repr` in there rather than JSON-serializable text,
# dump will fail
json.dumps(properties["json"])
ingest_artifacts_tar = find_ingestion_artifacts_tar(ingest_path)
assert ingest_artifacts_tar
filename_count += 1
assert filename_count == len(ANCILLARY_PRODUCTS)
shutil.rmtree(ingest_path)
def test_output_science_products_rendered_correctly(ingest_path: Path):
"""
The output_group section of the manifest we build
should match the one in the example:
* a "science_products" section containing two science products comprising "type", "filename",
and six ancillary products on each science product
* an "ancillary products" section comprising three ancillary products
:param ingest_path:
:return:
"""
populate_fake_tmpx_ratuqh_ingest_path(ingest_path)
manifest, _ = build_tmpx_ratuqh_image_manifest(ingest_path)
mf_json = manifest.to_dict()
og_json = mf_json[IngestionManifestKey.OUTPUT_GROUP.value]
print(og_json)
# there should be the first science product...
sp_json = og_json[IngestionManifestKey.SCIENCE_PRODUCTS.value][0]
assert len(sp_json) == 3
for key in (IngestionManifestKey.ANCILLARY_PRODUCTS.value, "type", "filename"):
assert key in sp_json.keys()
# and ancillary products belonging to the first science product...
sp_ap_jsons = sp_json[IngestionManifestKey.ANCILLARY_PRODUCTS.value]
assert len(sp_ap_jsons) == 6
# then a second science product...
sp_json = og_json[IngestionManifestKey.SCIENCE_PRODUCTS.value][1]
assert len(sp_json) == 3
for key in (IngestionManifestKey.ANCILLARY_PRODUCTS.value, "type", "filename"):
assert key in sp_json.keys()
# and ancillary products belonging to the second science product...
sp_ap_jsons = sp_json[IngestionManifestKey.ANCILLARY_PRODUCTS.value]
assert len(sp_ap_jsons) == 6
# ... and ancillary products twisting in the wind all by themselves
ap_jsons = og_json[IngestionManifestKey.ANCILLARY_PRODUCTS.value]
assert len(ap_jsons) == 3
# there should be a weblog, the ingestion artifacts tar, and the pipeline artifacts tar
filenames_found = []
for ap_json in ap_jsons:
filename = ap_json["filename"]
if filename.endswith(TARFILE_EXT):
if filename.startswith(AncillaryProductType.INGESTION_ARTIFACTS.value) or filename.startswith(
AncillaryProductType.PIPELINE_ARTIFACTS.value
):
filenames_found.append(filename)
elif filename == WEBLOG_FILENAME:
filenames_found.append(filename)
assert len(filenames_found) == 3
shutil.rmtree(ingest_path)
def test_ing_artifacts_tar_has_only_what_it_should(ingest_path: Path):
"""
The ingestion_artifacts tar should contain ONLY
the ingestion manifest and the additional metadata file
:return:
"""
populate_fake_tmpx_ratuqh_ingest_path(ingest_path)
manifest, _ = build_tmpx_ratuqh_image_manifest(ingest_path)
addl_md = manifest.parameters.additional_metadata
ing_arties_tar = find_ingestion_artifacts_tar(ingest_path)
with tarfile.open(ing_arties_tar, "r") as tar:
members = tar.getmembers()
assert len(members) == 2
for member in members:
assert member.name.endswith(MANIFEST_FILENAME) or member.name.endswith(addl_md.filename)
shutil.rmtree(ingest_path)
# -----------------------------
# U T I L I T I E S
# -----------------------------
Loading