Skip to content
Snippets Groups Projects
Commit 8464972c authored by Janet Goldstein's avatar Janet Goldstein Committed by Daniel Lyons
Browse files

WS-507: NOT READY FOR PRIME TIME; committing so others can take over.

parent 1739141e
No related branches found
No related tags found
1 merge request!337ingestion manifest creation for EVLA CAL ingestion only
This commit is part of merge request !337. Comments created here will be created in the context of that merge request.
......@@ -19,6 +19,7 @@ class Parameters:
collection_metadata: str,
reingest: bool = False,
ngas_ingest: bool = False,
calibrate: bool = False,
):
self.telescope = telescope
self.ingestion_path = ingestion_path
......@@ -26,6 +27,7 @@ class Parameters:
self.collection_metadata = collection_metadata
self.reingest = reingest
self.ngas_ingest = ngas_ingest
self.calibrate = calibrate
def __repr__(self):
return repr(self.__dict__)
......
"""Build an ingestion manifest file"""
# TODO: for a guide to implementation, see ingestion package in archive-metaproject
import json
import logging
import shutil
......@@ -37,7 +40,7 @@ class IngestionManifestWriter:
self.manifest_filename, self.artifact_filename = self.build_filenames()
def build(self) -> Path:
def write(self) -> Tuple[Path, List[Path]]:
"""
Write the ingestion manifest indicated by the parameters.
......@@ -53,33 +56,27 @@ class IngestionManifestWriter:
with open(staging_manifest, "w") as out:
out.write(manifest_content)
# # Open up permissions so we can delete the manifest file later.
# rw_mode = 0o666
# staging_manifest.chmod(rw_mode)
# Get all the files we'll need....
ingestion_files = self.find_ingestion_files()
ingestion_files = self.find_ingestion_filenames()
# there should be at least one file, the manifest
if len(ingestion_files) < 1:
raise ValueError(f"no ingestion files nor manifest found at {ingestion_location}")
# ....and put them in both places, with rw permissions
files_written = []
for filename in ingestion_files:
staging_dir_copy = ingestion_location / filename
working_dir_copy = self.working_dir / filename
working_dir_copy = Path(self.working_dir) / filename
shutil.copy(str(staging_manifest), str(working_dir_copy))
# Rename the manifest to the shared name decided on for ingestion invocation.
# Path() cast "shouldn't" be necessary, but if we don't do it,
# file is a LocalPath and we can't create the symlink
generalized = Path(self.working_dir / "ingestion-manifest.json")
generalized.symlink_to(working_dir_copy, target_is_directory=False)
files_written.append(working_dir_copy)
# Now that all the loose files are copied, create the ingestion artifacts tar
self.write_ingestion_artifact_tar(ingestion_files)
# TODO START HERE: not yet implemented; will throw exception
files_written.append(
self.write_ingestion_artifact_tar(Path(ingestion_location), files_written)
)
# again: return a Path explicitly, for a LocalPath won't work
return Path(staging_manifest)
return Path(staging_manifest), files_written
@staticmethod
def format_timestamp(start_time: DateTime) -> str:
......@@ -91,7 +88,7 @@ class IngestionManifestWriter:
2021_07_01'T'13_49_17.237
:param start_time: current pendulum timestamp
:return:
:return: timestamp suitable for ingestion manifest filename
"""
time_str = str(start_time)
......@@ -122,7 +119,7 @@ class IngestionManifestWriter:
artifact_filename = f"{ARTIFACT_NAME}{timestamp}{ARTIFACT_EXT}"
return manifest_filename, artifact_filename
def find_ingestion_files(self) -> List[Path]:
def find_ingestion_filenames(self) -> List[Path]:
"""
Gather the files required for ingestion
......@@ -132,25 +129,29 @@ class IngestionManifestWriter:
coll_files = aux_files = []
if self.parameters.additional_metadata is not None:
aux_string = self.parameters.additional_metadata
aux_files = aux_string.split(",")
addl_md = json.loads(self.parameters.additional_metadata)
aux_files.append(addl_md["filename"])
if self.parameters.collection_metadata is not None:
coll_str = self.parameters.collection_metadata
coll_files = coll_str.split(",")
coll_md = json.loads(self.parameters.collection_metadata)
coll_files.append(coll_md["filename"])
aux_files += coll_files
# be sure to add the manifest itself
aux_files.append(self.manifest_filename)
return aux_files
return list(set(aux_files))
def write_ingestion_artifact_tar(self, ingestion_files: List[Path]) -> Path:
def write_ingestion_artifact_tar(
self, ingestion_location: Path, ingestion_files: List[Path]
) -> Path:
"""
TODO: implement this next -- JLG 2021-07-02 a.m.
Take the list of files and build a tar for inclusion into the archive.
This happens in the staging area for ingestion.
:param ingestion_files: all the files needed for ingestion
:return: a .tar archive of the ingestion artifacts
"""
raise NotImplementedError
......@@ -15,6 +15,22 @@ class Telescope(Enum):
GBT = 5
NONE = 6
def __str__(self):
return self.name
@staticmethod
def from_str(ts_name: str):
"""
Convert a telescope name string to a Telescope.
:param ts_name:
:return:
"""
for ts in Telescope:
if ts.name == ts_name:
return ts
raise ValueError(f"telescope '{ts_name}' not found")
class IngestionType(Enum):
"""Types of ingestion we'll have to do"""
......
......@@ -22,7 +22,6 @@ setup(
url="TBD",
license="GPL",
install_requires=requires,
tests_require=["pytest", "astropy", "pendulum"],
keywords=[],
packages=find_packages(),
classifiers=["Programming Language :: Python :: 3.8"],
......
......@@ -3,13 +3,13 @@
"reingest": "false",
"ngas-ingest": "false",
"calibrate": "false",
"ingestion_path": "/lustre/...../"
"ingestion_path": "/home/mchammer/evla/parallel-prod"
},
"input-group": {
"science_products": [
{
"type": "execution-block",
"locator": "......"
"locator": "uid://evla/calibration/long-freakin-uuid-22"
}
]
},
......@@ -17,7 +17,7 @@
"science_products": [
{
"type": "calibration",
"filename": "19A-321_2019......tar"
"filename": "19A-321_2019_more_stuff.tar"
}
],
"ancillary_products": [
......
......@@ -4,6 +4,7 @@ import logging
import re
import sys
from pathlib import Path
from typing import Dict
import pytest
......@@ -66,7 +67,8 @@ def test_generates_correct_filename(tmpdir):
parameters = Parameters(Telescope.EVLA, Path(ingest_path), None, None)
manifest = IngestionManifest(parameters)
manifest_file = IngestionManifestWriter(manifest, working_dir).build()
# TODO: test fails now due to unimplemented method called in write()
manifest_file, ingestion_files = IngestionManifestWriter(manifest, working_dir).write()
filename = manifest_file.name
assert filename.startswith(MANIFEST_NAME_BASE)
assert filename.endswith(MANIFEST_NAME_EXT)
......@@ -81,6 +83,89 @@ def test_generates_correct_filename(tmpdir):
assert re.match(r"\d{4}_\d{2}_\d{2}'T'\d{2}_\d{2}_\d{2}\.\d{0,3}", timestamp)
# @pytest.mark.skip("TODO: test_builds_evla_cal_manifest")
def test_builds_evla_cal_manifest(tmpdir):
"""
Have we built a well-formed EVLA calibration ingestion manifest?
:return:
"""
ingest_path = tmpdir / "ingestion"
ingest_path.mkdir()
working_dir = tmpdir / "working"
working_dir.mkdir()
input_group = json.dumps(
{
"science_products": [
{
"type": "execution-block",
"locator": "uid://evla/calibration/long-freakin-uuid-22",
}
]
},
)
output_group = json.dumps(
{
"output-group": {
"science_products": [
{"type": "calibration", "filename": "19A-321_2019_more_stuff.tar"}
],
"ancillary_products": [{"type": "weblog", "filename": "weblog.tgz"}],
}
}
)
parameters = Parameters(
telescope=Telescope.EVLA,
ingestion_path=str(ingest_path),
additional_metadata=input_group,
collection_metadata=output_group,
)
manifest = IngestionManifest(parameters)
assert len(manifest.input_group.science_products) == 1
assert manifest.input_group.science_products[0] == json.loads(input_group)
assert len(manifest.output_group.ancillary_products) == 1
assert manifest.output_group.ancillary_products == [json.loads(output_group)]
def test_writes_evla_cal_manifest(tmpdir):
"""
Do CAL ingestion manifest and archive get written as expected?
:return:
"""
ingest_path = tmpdir / "ingestion"
ingest_path.mkdir()
working_dir = tmpdir / "working"
working_dir.mkdir()
example_manifest = find_example_manifest("evla_calibration")
content = ""
with open(example_manifest, "r") as infile:
for line in infile.readlines():
content += line
mf_json = json.loads(content)
og_in = mf_json["output-group"]
sp_in = og_in["science_products"][0]
ap_in = og_in["ancillary_products"][0]
ip_in = mf_json["parameters"]["ingestion_path"]
assert ip_in == "/home/mchammer/evla/parallel-prod"
params = Parameters(
telescope=Telescope.EVLA,
ingestion_path=str(ingest_path),
collection_metadata=json.dumps(sp_in),
additional_metadata=json.dumps(ap_in),
)
manifest = IngestionManifest(params)
# TODO: TEST FAILS because of a not-yet-implemented method in writer class
manifest_file, ingestion_files = IngestionManifestWriter(manifest, working_dir).write()
assert manifest_file.exists()
def test_builds_evla_sdm_manifest(tmpdir):
"""
Have we built a well-formed EVLA SDM ingestion manifest?
......@@ -105,7 +190,7 @@ def test_builds_evla_sdm_manifest(tmpdir):
"filename": "ingestion_artifacts_2021_06_03_T15_52_35.031.tar",
}
)
# no reingest, ngas ingest, additional metadata nor collection metadata
# no reingest, ngas ingest, calibration
parameters = Parameters(
telescope=Telescope.EVLA,
ingestion_path=str(ingest_path),
......@@ -124,12 +209,52 @@ def test_builds_evla_sdm_manifest(tmpdir):
@pytest.mark.skip("TODO: test_writes_evla_sdm_manifest")
def test_writes_evla_sdm_manifest():
"""
Have we written an EVLA BDF ingestion manifest correctly?
Have we written an EVLA SDM ingestion manifest correctly?
See if we can match the evla_eb_manifest.json example.
:return:
"""
example_file = find_example_manifest("evla_eb")
content = ""
with open(example_file, "r") as infile:
for line in infile.readlines():
content += line
mf_json = json.loads(content)
assert isinstance(mf_json, Dict)
assert len(mf_json) >= 2
# pprint(f">>> mf_json:\n{mf_json}; it's a {type(mf_json)}")
parameters_in = mf_json["parameters"]
to_reingest = parameters_in["reingest"] == "True"
assert not to_reingest
to_ngas = parameters_in["ngas_ingest"] == "True"
assert not to_ngas
ts_in = Telescope.from_str(parameters_in["telescope"])
assert ts_in == Telescope.EVLA
og_in = mf_json["output_group"]
sp_in = og_in["science_products"][0]
ap_in = og_in["ancillary_products"][0]
ip_in = parameters_in["ingestion_path"]
assert ip_in == "/home/mchammer/evla/parallel-prod"
params_out = Parameters(
telescope=Telescope.EVLA,
ingestion_path=ip_in,
collection_metadata=json.dumps(sp_in),
additional_metadata=json.dumps(ap_in),
)
manifest = IngestionManifest(params_out)
assert manifest.parameters == params_out
assert not manifest.parameters.reingest
assert not manifest.parameters.ngas_ingest
og_out = manifest.output_group
assert og_out.science_products == sp_in
assert manifest.output_group == og_in
# TODO:
# manifest_file = IngestionManifestWriter(manifest, working_dir).build()
# manifest_file = IngestionManifestWriter(manifest, working_dir).write()
raise NotImplementedError
......@@ -143,11 +268,22 @@ def test_builds_evla_bdf_manifest():
raise NotImplementedError
@pytest.mark.skip("TODO: test_builds_evla_cal_manifest")
def test_builds_evla_cal_manifest():
# ------------------------------#
# #
# U T I L I T I E S #
# #
# ------------------------------#
def find_example_manifest(manifest_name: str) -> Path:
"""
Have we built a well-formed EVLA calibration ingestion manifest?
Get this example manifest for comparison with one we've generated in a test.
:return:
:param manifest_name: unique file identifier
:return: full path to the manifest file
"""
raise NotImplementedError
filename = manifest_name + "_manifest.json"
for file in Path.cwd().rglob(filename):
return file
raise FileNotFoundError(filename)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment