Skip to content
Snippets Groups Projects
Commit 5c7a47c0 authored by Janet Goldstein's avatar Janet Goldstein Committed by Charlotte Hausman
Browse files

WS-543: Addressed issues found in calibration ingestion testing:

* hyphens in manifest text are all changed to underscores
* InputScienceProduct is simplified
* weblog type is now `pipeline_weblog`
* manifest filename is just `ingestion_manifest.json`

TODO: additional tests to ensure manifest JSON is in the form of the 16B-069 example manifest
parent 0eb41c17
No related branches found
No related tags found
1 merge request!365WS-543: Addressed issues found in calibration ingestion testing
Showing
with 88 additions and 114 deletions
......@@ -14,11 +14,9 @@ import pendulum
from pendulum import DateTime
from ingest_envoy.manifest_components import (
MANIFEST_NAME_BASE,
MANIFEST_NAME_EXT,
ARTIFACT_NAME,
ARTIFACT_EXT,
WEBLOG,
TARFILE_EXT,
WEBLOG_FILENAME,
JSON,
IngestionManifestKey,
ManifestComponentIF,
......@@ -29,6 +27,7 @@ from ingest_envoy.manifest_components import (
AncillaryProduct,
OutputGroup,
SCIENCE_PRODUCT_PATTERN,
MANIFEST_FILENAME,
)
from ingest_envoy.utilities import (
ScienceProductType,
......@@ -152,7 +151,7 @@ class IngestionManifestBuilder:
# N.B. this is sufficient for most types of ingestion,
# but ALMA CALs will have multiple EB SPs, identified only by locator,
# and VLBAs have no input group at all.
sp_in = InputScienceProduct(sp_type=self.sp_type, locator=self.locator)
sp_in = InputScienceProduct(locator=self.locator)
return InputGroup([sp_in])
......@@ -188,7 +187,7 @@ class IngestionManifestBuilder:
"""
current_time = pendulum.now()
timestamp = format_timestamp(current_time)
return f"{ARTIFACT_NAME}{timestamp}{ARTIFACT_EXT}"
return f"{ARTIFACT_NAME}{timestamp}{TARFILE_EXT}"
def write_ingestion_artifacts_tar(self) -> Path:
"""
......@@ -220,11 +219,11 @@ class IngestionManifestBuilder:
ancillary_products = []
# if there's a weblog in here, grab it
maybe_weblogs = [file for file in self.files_found if file.name.endswith(WEBLOG)]
maybe_weblogs = [file for file in self.files_found if file.name == WEBLOG_FILENAME]
if len(maybe_weblogs) > 0:
weblog = maybe_weblogs[0]
weblog_ap = AncillaryProduct(
type=AncillaryProductType.PIPELINE_WEBLOG, filename=weblog.name
type=AncillaryProductType.PIPELINE_WEBLOG_TYPE, filename=weblog.name
)
ancillary_products.append(weblog_ap)
......@@ -281,7 +280,7 @@ class IngestionManifest(ManifestIF):
:return:
"""
output_path = self.staging_source_dir / build_manifest_filename()
output_path = self.staging_source_dir / MANIFEST_FILENAME
to_write = json.dumps(self.to_json(), indent=4)
with open(output_path, "w") as out:
......@@ -336,17 +335,6 @@ def format_timestamp(datetime: DateTime) -> str:
return datetime.format("YYYY_MM_DDThh_mm_ss.SSS")
def build_manifest_filename() -> str:
"""
Build unique manifest filename in standard format.
:return: the filename
"""
current_time = pendulum.now()
timestamp = format_timestamp(current_time)
return f"{MANIFEST_NAME_BASE}{timestamp}{MANIFEST_NAME_EXT}"
def find_manifest(ingestion_path: Path) -> Path:
"""
Find the ingestion manifest at this ingestion path.
......@@ -354,8 +342,7 @@ def find_manifest(ingestion_path: Path) -> Path:
:param ingestion_path: home of ingestion files
:return:
"""
for file in ingestion_path.iterdir():
if file.name.startswith(MANIFEST_NAME_BASE) and file.name.endswith(MANIFEST_NAME_EXT):
return file
for json_file in ingestion_path.glob(MANIFEST_FILENAME):
return json_file
raise FileNotFoundError(f"No ingestion manifest found at {ingestion_path}")
......@@ -8,11 +8,10 @@ from typing import Union, List, Dict
from ingest_envoy.utilities import ScienceProductType, Telescope, AncillaryProductType
MANIFEST_NAME_BASE = "ingestion_manifest_"
MANIFEST_NAME_EXT = ".json"
MANIFEST_FILENAME = "ingestion_manifest.json"
ARTIFACT_NAME = "ingestion_artifacts_"
ARTIFACT_EXT = ".tar"
WEBLOG = "weblog.tgz"
TARFILE_EXT = ".tar"
WEBLOG_FILENAME = "weblog.tgz"
SCIENCE_PRODUCT_PATTERN = re.compile("[a-zA-Z0-9._\\-+]*\\.tar")
JSON = Union[int, float, str, List["JSON"], Dict[str, "JSON"]]
......@@ -57,17 +56,14 @@ class ManifestComponentIF(abc.ABC):
class InputScienceProduct(ManifestComponentIF):
"""Represents a science product in the "input-group" section of the ingestion manifest."""
"""Simplest type of science product: has only a locator"""
def __init__(self, locator: str, sp_type: ScienceProductType = None):
self.type = sp_type
def __init__(self, locator: str):
self.locator = locator
def __eq__(self, other):
if isinstance(other, InputScienceProduct):
return other.type == self.type and other.locator == self.locator
return False
return other.locator == self.locator
def to_json(self) -> JSON:
"""
......@@ -75,8 +71,6 @@ class InputScienceProduct(ManifestComponentIF):
:return: dicty-me
"""
if self.type:
return {"type": str(self.type), "locator": self.locator}
return {"locator": self.locator}
......
......@@ -34,8 +34,8 @@ class AncillaryProductType(Enum):
INGESTION_ARTIFACTS = "ingestion_artifacts"
PIPELINE_ARTIFACTS = "pipeline_artifacts"
PIPELINE_WEBLOG = "pipeline_weblog"
LOG = "log_file"
PIPELINE_WEBLOG_TYPE = "pipeline_weblog"
LOG_TYPE = "log_file"
### Images ###
......
......@@ -7,9 +7,9 @@ from typing import List
import pytest
from ingest_envoy.manifest_components import WEBLOG
from ingest_envoy.manifest_components import WEBLOG_FILENAME
WANTED_FILENAMES = ["my_science_products.tar", WEBLOG]
WANTED_FILENAMES = ["my_science_products.tar", WEBLOG_FILENAME]
UNWANTED = ["ignore_me.fits", "just_a_lotta_nothing", "uninteresting_metadata.xml"]
......
{
"parameters": {
"reingest": "false",
"ngas_ingest": "false",
"calibrate": "false",
"ingestion_path": "/lustre/aoc/cluster/pipeline/dsoc-dev/workspaces/staging/cal_test6",
"telescope": "EVLA"
},
"input_group": {
"science_products": [
{
"locator": "uid://evla/execblock/48ba4c9d-d7c7-4a8f-9803-1115cd52459b"
}
]
},
"output_group": {
"science_products": [
{
"type": "calibration",
"filename": "16B-069_sb32814386_1_001.57685.66193635417.testdate.caltables.tar"
}
],
"ancillary_products": [
{
"type": "pipeline_weblog",
"filename": "weblog.tgz"
}
]
}
}
{
"parameters": {
"reingest": "false",
"ngas-ingest": "false",
"ngas_ingest": "false",
"calibrate": "false",
"ingestion_path": "/lustre/.."
},
"input-group": {
"input_group": {
"science_products": [
{
"type": "calibration",
......@@ -13,7 +13,7 @@
}
]
},
"output-group": {
"output_group": {
"science_products": [
{
"type": "image",
......
{
"parameters": {
"reingest": "false",
"ngas-ingest": "false",
"ngas_ingest": "false",
"calibrate": "false",
"ingestion_path": "/lustre/...../"
},
......
......@@ -5,7 +5,6 @@
import json
import logging
import re
import shutil
import sys
from pathlib import Path
......@@ -16,12 +15,9 @@ import pytest
from ingest_envoy.ingestion_manifest import (
IngestionManifestBuilder,
build_manifest_filename,
find_manifest,
)
from ingest_envoy.manifest_components import (
MANIFEST_NAME_BASE,
MANIFEST_NAME_EXT,
IngestionManifestKey,
ParamsKey,
InputScienceProduct,
......@@ -30,8 +26,9 @@ from ingest_envoy.manifest_components import (
OutputScienceProduct,
AncillaryProduct,
OutputGroup,
ARTIFACT_EXT,
TARFILE_EXT,
ARTIFACT_NAME,
WEBLOG_FILENAME,
)
from ingest_envoy.utilities import (
ScienceProductType,
......@@ -69,16 +66,16 @@ def test_manifest_is_complete(ingest_path: Path):
staging_source_dir=ingest_path,
)
sp1 = InputScienceProduct(
sp_type=ScienceProductType.EVLA_CAL,
locator=FAKE_LOCATOR,
)
sp1 = InputScienceProduct(locator=FAKE_LOCATOR)
ig_in = InputGroup(science_products=[sp1])
osp_in = OutputScienceProduct(
type=ScienceProductType.EVLA_CAL, filename="my_science_products.tar"
)
ap_in = AncillaryProduct(type=AncillaryProductType.PIPELINE_WEBLOG, filename="weblog.tgz")
ap_in = AncillaryProduct(
type=AncillaryProductType.PIPELINE_WEBLOG_TYPE,
filename=WEBLOG_FILENAME,
)
manifest, _ = IngestionManifestBuilder(
staging_source_dir=ingest_path,
......@@ -95,36 +92,13 @@ def test_manifest_is_complete(ingest_path: Path):
af_tar_candidates = [
file
for file in ingest_path.iterdir()
if file.name.startswith(ARTIFACT_NAME) and file.name.endswith(ARTIFACT_EXT)
if file.name.startswith(ARTIFACT_NAME) and file.name.endswith(TARFILE_EXT)
]
assert len(af_tar_candidates) == 1
shutil.rmtree(ingest_path)
def test_builds_expected_manifest_filename():
"""
We expect the manifest to be named like
ingestion_manifest_2019_07_30_T13_03_00.936.json
:return:
"""
filename = build_manifest_filename()
assert filename.startswith(MANIFEST_NAME_BASE)
assert filename.endswith(MANIFEST_NAME_EXT)
filename_parts = filename.split("_")
assert len(filename_parts) == 7
# get just the timestamp
timestamp = filename.replace(MANIFEST_NAME_BASE, "").replace(MANIFEST_NAME_EXT, "")
# we should have gotten year, month, day, hours, minutes, seconds to 3 decimal places
assert re.match(r"\d{4}_\d{2}_\d{2}T\d{2}_\d{2}_\d{2}\.\d{0,3}", timestamp)
def test_filters_cal_input_files(ingest_path: Path):
"""
We'll be getting calibration/image/eb, etc. science products from a directory under
......@@ -152,8 +126,6 @@ def test_filters_cal_input_files(ingest_path: Path):
input_group = manifest.input_group
assert len(input_group.science_products) == 1
sp_in = input_group.science_products[0]
assert sp_in.type == ScienceProductType.EVLA_CAL
output_group = manifest.output_group
assert len(output_group.science_products) == 1
......@@ -161,7 +133,7 @@ def test_filters_cal_input_files(ingest_path: Path):
for product in output_group.ancillary_products:
if product.filename not in WANTED_FILENAMES:
assert product.filename.startswith(ARTIFACT_NAME) and product.filename.endswith(
ARTIFACT_EXT
TARFILE_EXT
)
assert product.filename not in UNWANTED
......@@ -195,13 +167,9 @@ def test_writes_expected_output_files(ingest_path: Path):
# at a minimum, we expect the manifest, the ingestion artifact, and the science products tar
assert len(ingestion_files) >= 3
mf_json = [
file
for file in ingestion_files
if file.name.startswith(MANIFEST_NAME_BASE) and file.name.endswith(MANIFEST_NAME_EXT)
][0]
mf_json = find_manifest(ingest_path)
assert mf_json
tars = [file for file in ingestion_files if file.name.endswith(".tar")]
tars = [file for file in ingestion_files if file.name.endswith(TARFILE_EXT)]
assert len(tars) >= 2
shutil.rmtree(ingest_path)
......@@ -259,11 +227,10 @@ def test_input_sp_well_formed():
locator = "uid://evla/calibration/vanilla_heath_bar_crunch_1a23e"
# single science product
sp_dict = {
"type": ScienceProductType.EVLA_CAL.value,
"locator": locator,
}
sp_in = InputScienceProduct(sp_type=ScienceProductType.EVLA_CAL.value, locator=locator)
sp_in = InputScienceProduct(locator=locator)
assert sp_in.to_json() == sp_dict
......@@ -273,15 +240,10 @@ def test_input_group_well_formed():
:return:
"""
sp1 = InputScienceProduct(
sp_type=ScienceProductType.EXEC_BLOCK.value,
locator="uid://evla/execblock/coffee_heath_bar_crunch_7a23f",
)
sp1 = InputScienceProduct(locator="uid://evla/execblock/coffee_heath_bar_crunch_7a23f")
sp1_json = sp1.to_json()
sp2 = InputScienceProduct(
sp_type=ScienceProductType.EVLA_CAL.value, locator="uid://evla/execblock/mint_oreo_omg_omg"
)
sp2 = InputScienceProduct(locator="uid://evla/execblock/mint_oreo_omg_omg")
sp2_json = sp2.to_json()
expected = {
......@@ -318,8 +280,8 @@ def test_ancillary_product_well_formed():
:return:
"""
ap1 = AncillaryProduct(type=AncillaryProductType.LOG, filename="without_feathers.tar")
expected = {"type": AncillaryProductType.LOG.value, "filename": "without_feathers.tar"}
ap1 = AncillaryProduct(type=AncillaryProductType.LOG_TYPE, filename="without_feathers.tar")
expected = {"type": AncillaryProductType.LOG_TYPE.value, "filename": "without_feathers.tar"}
actual = ap1.to_json()
assert actual == expected
......@@ -361,19 +323,15 @@ def test_input_group_properly_formatted():
:return:
"""
sp1 = InputScienceProduct(
sp_type=ScienceProductType.EXEC_BLOCK.value,
locator="uid://evla/execblock/coffee_heath_bar_crunch_7a23f",
)
sp1 = InputScienceProduct(locator="uid://evla/execblock/coffee_heath_bar_crunch_7a23f")
ingroup = InputGroup(science_products=[sp1])
ig_dict = json.loads(ingroup.to_json())
ig_text = json.dumps(ig_dict, indent=4)
expected = """
"input-group": {
"input_group": {
"science_products": [
{
"type": "calibration",
"locator": "uid://evla/execblock/coffee_heath_bar_crunch_7a23f"
}
]
......@@ -441,6 +399,16 @@ def test_builds_cal_manifest_as_expected(ingest_path: Path):
shutil.rmtree(ingest_path)
@pytest.mark.skip("TODO before merge TODAY 2021-07-22")
def test_manifest_filename_is_correct():
"""
Calibration ingestion manifest should always be named simply "ingestion_manifest.json"
:return:
"""
# TODO
def populate_ingest_path_for_manifest_evla_cal_example(ingestion_path: Path):
"""
Create fake input files to match EVLA CAL manifest example
......@@ -448,7 +416,7 @@ def populate_ingest_path_for_manifest_evla_cal_example(ingestion_path: Path):
:param ingestion_path:
:return:
"""
weblog_file = ingestion_path / "qrs.weblog.tgz"
weblog_file = ingestion_path / "weblog.tgz"
weblog_file.touch()
cal_file = ingestion_path / "XYZ-abc+TMN.O00.tar"
cal_file.touch()
......@@ -11,12 +11,11 @@ import pytest
from ingest_envoy.ingestion_manifest import (
IngestionManifest,
IngestionManifestBuilder,
find_manifest,
)
from ingest_envoy.manifest_components import (
MANIFEST_NAME_BASE,
MANIFEST_NAME_EXT,
ARTIFACT_NAME,
ARTIFACT_EXT,
TARFILE_EXT,
)
from ingest_envoy.utilities import ScienceProductType, Telescope
from .conftest import ingest_path, populate_fake_evla_cal_ingest_path
......@@ -48,19 +47,15 @@ def test_entry_point_for_evla_cal(ingest_path: Path):
ingestion_files = [file for file in ingest_path.iterdir()]
# there should be one ingestion manifest....
mf_jsons = [
file
for file in ingestion_files
if file.name.startswith(MANIFEST_NAME_BASE) and file.name.endswith(MANIFEST_NAME_EXT)
]
assert len(mf_jsons) == 1
manifest_file = find_manifest(ingest_path)
assert manifest_file
# ...and an artifacts tar, and the science products tar we started with
assert sp_tar in ingestion_files
artifact_tars = [
file
for file in ingestion_files
if file.name.startswith(ARTIFACT_NAME) and file.name.endswith(ARTIFACT_EXT)
if file.name.startswith(ARTIFACT_NAME) and file.name.endswith(TARFILE_EXT)
]
assert len(artifact_tars) == 1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment