Skip to content
Snippets Groups Projects
Commit 926ef672 authored by Janet Goldstein's avatar Janet Goldstein
Browse files

WS-601: Rewrite/refactor ingestion manifest builder to pass disabled tests, part two

parent 668f9658
No related branches found
No related tags found
1 merge request!386WS-601: Rewrite/refactor ingestion manifest builder to pass disabled tests, part two
Pipeline #2367 passed
...@@ -7,7 +7,7 @@ import tarfile ...@@ -7,7 +7,7 @@ import tarfile
from pathlib import Path from pathlib import Path
# pylint: disable=E0401, R0903, R1721 # pylint: disable=E0401, R0903, R1721
from typing import Tuple, List from typing import Tuple
import pendulum import pendulum
from pendulum import DateTime from pendulum import DateTime
...@@ -15,7 +15,6 @@ from pendulum import DateTime ...@@ -15,7 +15,6 @@ from pendulum import DateTime
from ingest_envoy.manifest_components import ( from ingest_envoy.manifest_components import (
INGESTION_ARTIFACTS_NAME, INGESTION_ARTIFACTS_NAME,
TARFILE_EXT, TARFILE_EXT,
WEBLOG_FILENAME,
JSON, JSON,
IngestionManifestKey, IngestionManifestKey,
ManifestComponentIF, ManifestComponentIF,
...@@ -29,6 +28,7 @@ from ingest_envoy.manifest_components import ( ...@@ -29,6 +28,7 @@ from ingest_envoy.manifest_components import (
ParamsKey, ParamsKey,
) )
from ingest_envoy.schema import AbstractTextFile from ingest_envoy.schema import AbstractTextFile
from ingest_envoy.std_img_manifest_utils import ImageIngestionProductsFinder
from ingest_envoy.utilities import ( from ingest_envoy.utilities import (
ScienceProductType, ScienceProductType,
Telescope, Telescope,
...@@ -36,7 +36,6 @@ from ingest_envoy.utilities import ( ...@@ -36,7 +36,6 @@ from ingest_envoy.utilities import (
AncillaryProductType, AncillaryProductType,
find_output_tars, find_output_tars,
) )
from ingest_envoy.std_img_manifest_utils import ImageIngestionProductsFinder
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
...@@ -188,11 +187,13 @@ class IngestionManifestBuilder: ...@@ -188,11 +187,13 @@ class IngestionManifestBuilder:
input_group=self._build_input_group(), input_group=self._build_input_group(),
output_group=self._build_evla_cal_output_group(), output_group=self._build_evla_cal_output_group(),
) )
manifest_file = manifest.write()
artifacts_tar = self.write_ingestion_artifacts_tar() artifacts_tar = self.write_ingestion_artifacts_tar()
manifest.output_group.ancillary_products.append( artifacts_ap = AncillaryProduct(
AncillaryProduct(AncillaryProductType.INGESTION_ARTIFACTS, filename=str(artifacts_tar)) AncillaryProductType.INGESTION_ARTIFACTS, filename=str(artifacts_tar)
) )
manifest_file = manifest.write()
manifest.output_group.ancillary_products = [artifacts_ap]
return manifest, manifest_file return manifest, manifest_file
......
""" Helper for building image ingestion manifest """
from pathlib import Path from pathlib import Path
from typing import List from typing import List
from ingest_envoy.manifest_components import OutputScienceProduct, AncillaryProduct, WEBLOG_FILENAME from ingest_envoy.manifest_components import OutputScienceProduct, AncillaryProduct, WEBLOG_FILENAME
from ingest_envoy.utilities import AncillaryProductType from ingest_envoy.utilities import AncillaryProductType
# pylint: disable=R1721
class ImageIngestionProductsFinder: class ImageIngestionProductsFinder:
"""Finds ancillary science products and other ancillary products needed for image ingestion""" """Finds ancillary science products and other ancillary products needed for image ingestion"""
...@@ -33,7 +34,7 @@ class ImageIngestionProductsFinder: ...@@ -33,7 +34,7 @@ class ImageIngestionProductsFinder:
if file.name.endswith(".fits") and "rms" not in file.name if file.name.endswith(".fits") and "rms" not in file.name
][0] ][0]
image_files = [ image_files = [
file for file in self.files_found if "rms" in file.name or file.name.endswith(".png") file for file in self.files_found if ("rms" in file.name or file.name.endswith(".png"))
] ]
sp_aps = [] sp_aps = []
...@@ -51,7 +52,6 @@ class ImageIngestionProductsFinder: ...@@ -51,7 +52,6 @@ class ImageIngestionProductsFinder:
def _find_other_ancillary_products(self) -> List[AncillaryProduct]: def _find_other_ancillary_products(self) -> List[AncillaryProduct]:
""" """
TODO
Find the "other" ancillary image products in the staging dir: there should be a weblog Find the "other" ancillary image products in the staging dir: there should be a weblog
and a pipeline artifacts tar. (The ingestion artifacts tar will be produced during the and a pipeline artifacts tar. (The ingestion artifacts tar will be produced during the
building of the manifest.) building of the manifest.)
...@@ -67,8 +67,7 @@ class ImageIngestionProductsFinder: ...@@ -67,8 +67,7 @@ class ImageIngestionProductsFinder:
type=AncillaryProductType.PIPELINE_WEBLOG_TYPE, filename=str(weblog) type=AncillaryProductType.PIPELINE_WEBLOG_TYPE, filename=str(weblog)
) )
) )
except Exception as exc: except ValueError as exc:
# TODO which exception will this be?
raise FileNotFoundError(f"No weblog found in {self.staging_source_dir}") from exc raise FileNotFoundError(f"No weblog found in {self.staging_source_dir}") from exc
try: try:
...@@ -83,8 +82,7 @@ class ImageIngestionProductsFinder: ...@@ -83,8 +82,7 @@ class ImageIngestionProductsFinder:
filename=str(pipeline_artifacts_tar), filename=str(pipeline_artifacts_tar),
) )
) )
except Exception as exc: except ValueError as exc:
# TODO which exception will this be?
raise FileNotFoundError( raise FileNotFoundError(
f"No pipeline artifacts found in {self.staging_source_dir}" f"No pipeline artifacts found in {self.staging_source_dir}"
) from exc ) from exc
...@@ -103,12 +101,30 @@ class ImageIngestionProductsFinder: ...@@ -103,12 +101,30 @@ class ImageIngestionProductsFinder:
if "image" in file.name: if "image" in file.name:
if file.name.endswith(".png"): if file.name.endswith(".png"):
return AncillaryProduct(type=AncillaryProductType.THUMBNAIL_IMG, filename=filename) return AncillaryProduct(type=AncillaryProductType.THUMBNAIL_IMG, filename=filename)
elif file.name.endswith(".fits"): if file.name.endswith(".fits") and "rms" in file.name:
if "rms" in file.name: return AncillaryProduct(
return AncillaryProduct( type=AncillaryProductType.QUICKLOOK_RMS_IMAGE, filename=filename
type=AncillaryProductType.QUICKLOOK_RMS_IMAGE, filename=filename )
) return AncillaryProduct(type=AncillaryProductType.QUICKLOOK_IMAGE, filename=filename)
else:
return AncillaryProduct( @staticmethod
type=AncillaryProductType.QUICKLOOK_IMAGE, filename=filename def is_casa_product(file: Path) -> bool:
) """
Was this file created by CASA? If so, and it's not the weblog,
we'll include it in the pipeline artifacts tar.
:param file: some file found in the ingestion staging dir
:return: whether it's a CASA byproduct
"""
if file.name.startswith("casa_"):
return True
if "_band" in file.name and file.name.endswith(".fits"):
return True
return file.name in [
"pipeline_aquareport.xml",
"unknown.auxproducts.tgz",
"unknown.pipeline_manifest.xml",
]
...@@ -9,9 +9,37 @@ import pytest ...@@ -9,9 +9,37 @@ import pytest
from ingest_envoy.manifest_components import WEBLOG_FILENAME from ingest_envoy.manifest_components import WEBLOG_FILENAME
WANTED_FILENAMES = ["my_science_products.tar", WEBLOG_FILENAME] EVLA_CAL_WANTED_FILENAMES = ["my_science_products.tar", WEBLOG_FILENAME]
UNWANTED = ["ignore_me.fits", "just_a_lotta_nothing", "uninteresting_metadata.xml"] UNWANTED = ["ignore_me.fits", "just_a_lotta_nothing", "uninteresting_metadata.xml"]
IMG_MANIFEST_INPUT_FILENAMES = [
# additional metadata
"image_metadata_2021_05_21_T10_17_19.180.json",
# quicklook image
"VLASS2.1.ql.T08t09.J055438-113000.10.2048.v1.I.iter1.image.pbcor.tt0.subim.fits",
# quicklook RMS image
"VLASS2.1.ql.T08t09.J055438-113000.10.2048.v1.I.iter1.image.pbcor.tt0.rms.subim.fits",
# thumbnail
"VLASS2.1.ql.T08t09.J055438_113000.10.2048.v1.I.iter1.image.pbcor.tt0.subim.png",
# weblog
WEBLOG_FILENAME,
# ingestion artifacts tar -- to be created as side effect of ingestion manifest creation
# pipeline artifacts tar
"VLASS2.1.ql.T08t09.J055438-113000.10.2048.v1.tar",
]
CASA_BYPRODUCTS = [
"unknown.pipeline_manifest.xml",
"unknown.aux_products.tgz",
"casa_commands.log",
"casa_pipescript.py",
]
# this file gets created during construction of image ingestion manifest
IMG_ARTIFACTS_FILENAME = "ingestion_artifacts_2021_05_21_T10_17_19.275.tar"
# just to make things interesting
RANDOM_TAR = "totally_random_tar.tar"
@pytest.fixture(scope="function") @pytest.fixture(scope="function")
def ingest_path(tmpdir: Path) -> Path: def ingest_path(tmpdir: Path) -> Path:
...@@ -52,7 +80,7 @@ def populate_fake_evla_cal_ingest_path(staging_dir: Path) -> List[Path]: ...@@ -52,7 +80,7 @@ def populate_fake_evla_cal_ingest_path(staging_dir: Path) -> List[Path]:
""" """
files = [] files = []
filenames = [filename for filename in WANTED_FILENAMES] filenames = [filename for filename in EVLA_CAL_WANTED_FILENAMES]
for filename in UNWANTED: for filename in UNWANTED:
filenames.append(filename) filenames.append(filename)
...@@ -62,3 +90,25 @@ def populate_fake_evla_cal_ingest_path(staging_dir: Path) -> List[Path]: ...@@ -62,3 +90,25 @@ def populate_fake_evla_cal_ingest_path(staging_dir: Path) -> List[Path]:
files.append(path) files.append(path)
return files return files
def populate_fake_image_ingest_path(staging_dir: Path) -> List[Path]:
"""
Create a directory containing fake image products, plus other stuff
that we -don't- want to ingest.
:param staging_dir: our temporary dir
:return:
"""
for filename in IMG_MANIFEST_INPUT_FILENAMES:
file = staging_dir / filename
file.touch()
for filename in CASA_BYPRODUCTS:
file = staging_dir / filename
file.touch()
file = staging_dir / RANDOM_TAR
file.touch()
return [file for file in staging_dir.iterdir()]
...@@ -12,8 +12,8 @@ from ingest_envoy.ingestion_manifest import IngestionManifestBuilder ...@@ -12,8 +12,8 @@ from ingest_envoy.ingestion_manifest import IngestionManifestBuilder
from ingest_envoy.schema import AbstractTextFile from ingest_envoy.schema import AbstractTextFile
from ingest_envoy.utilities import Telescope, AncillaryProductType, ScienceProductType from ingest_envoy.utilities import Telescope, AncillaryProductType, ScienceProductType
# NOT unused. IJ is dumb. KEEP THIS. # ingest_path is NOT unused. IJ is dumb.
from .conftest import ingest_path from .conftest import ingest_path, populate_fake_image_ingest_path
from ingest_envoy.manifest_components import ( from ingest_envoy.manifest_components import (
WEBLOG_FILENAME, WEBLOG_FILENAME,
...@@ -26,30 +26,6 @@ from ingest_envoy.manifest_components import ( ...@@ -26,30 +26,6 @@ from ingest_envoy.manifest_components import (
InputScienceProduct, InputScienceProduct,
) )
IMG_MANIFEST_FILENAMES = [
# additional metadata
"image_metadata_2021_05_21_T10_17_19.180.json",
# quicklook image
"VLASS2.1.ql.T08t09.J055438-113000.10.2048.v1.I.iter1.image.pbcor.tt0.subim.fits",
# quicklook RMS image
"VLASS2.1.ql.T08t09.J055438-113000.10.2048.v1.I.iter1.image.pbcor.tt0.rms.subim.fits",
# thumbnail
"VLASS2.1.ql.T08t09.J055438_113000.10.2048.v1.I.iter1.image.pbcor.tt0.subim.png",
# weblog
WEBLOG_FILENAME,
# ingestion artifacts tar
"ingestion_artifacts_2021_05_21_T10_17_19.275.tar",
# pipeline artifacts tar
"VLASS2.1.ql.T08t09.J055438-113000.10.2048.v1.tar",
]
OTHER_FILENAMES = [
"unknown.pipeline_manifest.xml",
"unknown.aux_products.tgz",
"casa_commands.log",
"casa_pipescript.py",
"totally_random_tar.tar",
]
def test_parameters_json_well_formed(): def test_parameters_json_well_formed():
""" """
...@@ -143,7 +119,7 @@ def test_creates_expected_manifest(ingest_path: Path): ...@@ -143,7 +119,7 @@ def test_creates_expected_manifest(ingest_path: Path):
""" """
# fill the ingestion path with fake files # fill the ingestion path with fake files
populate_fake_ingest_path(ingest_path) populate_fake_image_ingest_path(ingest_path)
locator = "uid://evla/calibration/3dfa528b-9870-46c9-a200-131dbac701cc" locator = "uid://evla/calibration/3dfa528b-9870-46c9-a200-131dbac701cc"
addl_md = AbstractTextFile(filename="image_metadata_2021_05_21_T10_17_19.180.json", content="") addl_md = AbstractTextFile(filename="image_metadata_2021_05_21_T10_17_19.180.json", content="")
...@@ -325,22 +301,3 @@ def build_output_group() -> OutputGroup: ...@@ -325,22 +301,3 @@ def build_output_group() -> OutputGroup:
ap_list = other_aps ap_list = other_aps
return OutputGroup(science_products=[osp], ancillary_products=ap_list) return OutputGroup(science_products=[osp], ancillary_products=ap_list)
def populate_fake_ingest_path(staging_dir: Path) -> List[Path]:
"""
Create a directory containing fake image products, plus other stuff
that we -don't- want to ingest.
:param staging_dir: our temporary dir
:return:
"""
for filename in IMG_MANIFEST_FILENAMES:
# ingestion artifacts tar is produced during manifest creation
if not filename.startswith("ingestion_artifacts"):
file = staging_dir / filename
file.touch()
for filename in OTHER_FILENAMES:
file = staging_dir / filename
file.touch()
return [file for file in staging_dir.iterdir()]
...@@ -37,7 +37,7 @@ from ingest_envoy.utilities import ( ...@@ -37,7 +37,7 @@ from ingest_envoy.utilities import (
from .conftest import ( from .conftest import (
ingest_path, ingest_path,
populate_fake_evla_cal_ingest_path, populate_fake_evla_cal_ingest_path,
WANTED_FILENAMES, EVLA_CAL_WANTED_FILENAMES,
UNWANTED, UNWANTED,
find_example_manifest, find_example_manifest,
) )
...@@ -63,7 +63,7 @@ def test_filters_cal_input_files(ingest_path: Path): ...@@ -63,7 +63,7 @@ def test_filters_cal_input_files(ingest_path: Path):
populate_fake_evla_cal_ingest_path(ingest_path) populate_fake_evla_cal_ingest_path(ingest_path)
locator = "uid://evla/calibration/twinkle-twinkle-little-quasar" locator = "uid://evla/calibration/twinkle-twinkle-little-quasar"
manifest, _ = IngestionManifestBuilder( manifest, _ = IngestionManifestBuilder(
telescope=Telescope.EVLA, telescope=Telescope.EVLA.value,
staging_source_dir=ingest_path, staging_source_dir=ingest_path,
sp_type=ScienceProductType.EVLA_CAL.value, sp_type=ScienceProductType.EVLA_CAL.value,
locator=locator, locator=locator,
...@@ -82,7 +82,7 @@ def test_filters_cal_input_files(ingest_path: Path): ...@@ -82,7 +82,7 @@ def test_filters_cal_input_files(ingest_path: Path):
assert len(output_group.science_products) == 1 assert len(output_group.science_products) == 1
assert len(output_group.ancillary_products) == 1 assert len(output_group.ancillary_products) == 1
for product in output_group.ancillary_products: for product in output_group.ancillary_products:
if product.filename not in WANTED_FILENAMES: if product.filename not in EVLA_CAL_WANTED_FILENAMES:
assert product.filename.startswith( assert product.filename.startswith(
INGESTION_ARTIFACTS_NAME INGESTION_ARTIFACTS_NAME
) and product.filename.endswith(TARFILE_EXT) ) and product.filename.endswith(TARFILE_EXT)
...@@ -91,7 +91,7 @@ def test_filters_cal_input_files(ingest_path: Path): ...@@ -91,7 +91,7 @@ def test_filters_cal_input_files(ingest_path: Path):
sp_out = output_group.science_products[0] sp_out = output_group.science_products[0]
assert sp_out.type == ScienceProductType.EVLA_CAL assert sp_out.type == ScienceProductType.EVLA_CAL
assert sp_out.filename in WANTED_FILENAMES assert sp_out.filename in EVLA_CAL_WANTED_FILENAMES
assert sp_out.filename not in UNWANTED assert sp_out.filename not in UNWANTED
shutil.rmtree(ingest_path) shutil.rmtree(ingest_path)
...@@ -107,7 +107,7 @@ def test_writes_expected_output_files(ingest_path: Path): ...@@ -107,7 +107,7 @@ def test_writes_expected_output_files(ingest_path: Path):
""" """
populate_fake_evla_cal_ingest_path(ingest_path) populate_fake_evla_cal_ingest_path(ingest_path)
manifest_file, manifest = IngestionManifestBuilder( manifest_file, manifest = IngestionManifestBuilder(
telescope=Telescope.EVLA, telescope=Telescope.EVLA.value,
staging_source_dir=ingest_path, staging_source_dir=ingest_path,
locator="uid://evla/calibration/fee-fi-fo-fum-acdf23", locator="uid://evla/calibration/fee-fi-fo-fum-acdf23",
sp_type=ScienceProductType.EVLA_CAL.value, sp_type=ScienceProductType.EVLA_CAL.value,
...@@ -243,8 +243,8 @@ def test_evla_cal_manifest_matches_example(ingest_path: Path): ...@@ -243,8 +243,8 @@ def test_evla_cal_manifest_matches_example(ingest_path: Path):
builder = IngestionManifestBuilder( builder = IngestionManifestBuilder(
staging_source_dir=ingest_path, staging_source_dir=ingest_path,
telescope=Telescope.EVLA, telescope=Telescope.EVLA.value,
sp_type=ScienceProductType.EVLA_CAL, sp_type=ScienceProductType.EVLA_CAL.value,
locator="uid://evla/execblock/48ba4c9d-d7c7-4a8f-9803-1115cd52459b", locator="uid://evla/execblock/48ba4c9d-d7c7-4a8f-9803-1115cd52459b",
) )
_, manifest_file = builder.build() _, manifest_file = builder.build()
......
...@@ -6,8 +6,6 @@ from pathlib import Path ...@@ -6,8 +6,6 @@ from pathlib import Path
# pylint: disable=E0401, E0402, R1721, W0611, W0621 # pylint: disable=E0401, E0402, R1721, W0611, W0621
import pytest
from ingest_envoy.ingestion_manifest import ( from ingest_envoy.ingestion_manifest import (
IngestionManifest, IngestionManifest,
IngestionManifestBuilder, IngestionManifestBuilder,
...@@ -18,14 +16,19 @@ from ingest_envoy.manifest_components import ( ...@@ -18,14 +16,19 @@ from ingest_envoy.manifest_components import (
TARFILE_EXT, TARFILE_EXT,
) )
from ingest_envoy.utilities import ScienceProductType, Telescope from ingest_envoy.utilities import ScienceProductType, Telescope
from .conftest import ingest_path, populate_fake_evla_cal_ingest_path from .conftest import (
ingest_path,
populate_fake_evla_cal_ingest_path,
populate_fake_image_ingest_path,
IMG_MANIFEST_INPUT_FILENAMES,
CASA_BYPRODUCTS,
)
logger = logging.getLogger(IngestionManifest.__name__) logger = logging.getLogger(IngestionManifest.__name__)
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler(sys.stdout)) logger.addHandler(logging.StreamHandler(sys.stdout))
@pytest.mark.skip("TODO: broken temporarily, pending fix to output group creation")
def test_entry_point_for_evla_cal(ingest_path: Path): def test_entry_point_for_evla_cal(ingest_path: Path):
""" """
Confirm that the ingestion launcher entrypoint kicks off production of ingestion manifest Confirm that the ingestion launcher entrypoint kicks off production of ingestion manifest
...@@ -39,7 +42,7 @@ def test_entry_point_for_evla_cal(ingest_path: Path): ...@@ -39,7 +42,7 @@ def test_entry_point_for_evla_cal(ingest_path: Path):
assert sp_tar assert sp_tar
builder = IngestionManifestBuilder( builder = IngestionManifestBuilder(
telescope=Telescope.EVLA, telescope=Telescope.EVLA.value,
locator=locator, locator=locator,
sp_type=ScienceProductType.EVLA_CAL.value, sp_type=ScienceProductType.EVLA_CAL.value,
staging_source_dir=ingest_path, staging_source_dir=ingest_path,
...@@ -61,20 +64,36 @@ def test_entry_point_for_evla_cal(ingest_path: Path): ...@@ -61,20 +64,36 @@ def test_entry_point_for_evla_cal(ingest_path: Path):
assert len(artifact_tars) == 1 assert len(artifact_tars) == 1
@pytest.mark.skip("TODO: test_builds_image_manifest") def test_entry_point_for_image(ingest_path: Path):
def test_builds_image_manifest(ingest_path: Path):
""" """
TODO WS-600 Confirm that the ingestion launcher entrypoint kicks off production
of standard imaging manifest
:param ingest_path: staging directory
:return: :return:
""" """
raise NotImplementedError
locator = "uid://evla/calibration/mmm-NY-style-pizza-Giovanni-ABQ-12345"
populate_fake_image_ingest_path(ingest_path)
@pytest.mark.skip("TODO: test_entry_point_for_image") # we should be starting out with various image manifest input files
def test_entry_point_for_image(ingest_path: Path): # and CASA byproducts, a random file, and -not- the image ingestion
""" # manifest yet to be created
TODO expected_file_count_before = len(IMG_MANIFEST_INPUT_FILENAMES) + len(CASA_BYPRODUCTS) + 1
:return: ingestion_files_before = [file for file in ingest_path.iterdir()]
""" assert len(ingestion_files_before) == expected_file_count_before
raise NotImplementedError
IngestionManifestBuilder(
telescope=Telescope.EVLA.value,
locator=locator,
sp_type=ScienceProductType.IMAGE.value,
staging_source_dir=ingest_path,
).build()
# there should be one ingestion manifest....
manifest_file = find_manifest(ingest_path)
assert manifest_file
# ...and everything we started with, plus a new ingestion artifacts tar
ingestion_files_after = [file for file in ingest_path.iterdir()]
assert len(ingestion_files_after) == expected_file_count_before + 2
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment