diff --git a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py index a8480b4b3cbaffbc091071062bfa6c578dccde3d..e80d175ba7592693f7fbb36b286ef255181e55ac 100644 --- a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py +++ b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py @@ -19,6 +19,7 @@ class Parameters: collection_metadata: str, reingest: bool = False, ngas_ingest: bool = False, + calibrate: bool = False, ): self.telescope = telescope self.ingestion_path = ingestion_path @@ -26,6 +27,7 @@ class Parameters: self.collection_metadata = collection_metadata self.reingest = reingest self.ngas_ingest = ngas_ingest + self.calibrate = calibrate def __repr__(self): return repr(self.__dict__) diff --git a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest_writer.py b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest_writer.py index 90877ba1caa2e47e5d2e6f8d37838c9d13c97b2e..503457b29055ae033ecd7f1c2803baa373df2cbd 100644 --- a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest_writer.py +++ b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest_writer.py @@ -1,4 +1,7 @@ """Build an ingestion manifest file""" + +# TODO: for a guide to implementation, see ingestion package in archive-metaproject + import json import logging import shutil @@ -37,7 +40,7 @@ class IngestionManifestWriter: self.manifest_filename, self.artifact_filename = self.build_filenames() - def build(self) -> Path: + def write(self) -> Tuple[Path, List[Path]]: """ Write the ingestion manifest indicated by the parameters. @@ -53,33 +56,27 @@ class IngestionManifestWriter: with open(staging_manifest, "w") as out: out.write(manifest_content) - # # Open up permissions so we can delete the manifest file later. - # rw_mode = 0o666 - # staging_manifest.chmod(rw_mode) - # Get all the files we'll need.... - ingestion_files = self.find_ingestion_files() + ingestion_files = self.find_ingestion_filenames() # there should be at least one file, the manifest if len(ingestion_files) < 1: raise ValueError(f"no ingestion files nor manifest found at {ingestion_location}") # ....and put them in both places, with rw permissions + files_written = [] for filename in ingestion_files: - staging_dir_copy = ingestion_location / filename - working_dir_copy = self.working_dir / filename + working_dir_copy = Path(self.working_dir) / filename shutil.copy(str(staging_manifest), str(working_dir_copy)) - - # Rename the manifest to the shared name decided on for ingestion invocation. - # Path() cast "shouldn't" be necessary, but if we don't do it, - # file is a LocalPath and we can't create the symlink - generalized = Path(self.working_dir / "ingestion-manifest.json") - generalized.symlink_to(working_dir_copy, target_is_directory=False) + files_written.append(working_dir_copy) # Now that all the loose files are copied, create the ingestion artifacts tar - self.write_ingestion_artifact_tar(ingestion_files) + # TODO START HERE: not yet implemented; will throw exception + files_written.append( + self.write_ingestion_artifact_tar(Path(ingestion_location), files_written) + ) # again: return a Path explicitly, for a LocalPath won't work - return Path(staging_manifest) + return Path(staging_manifest), files_written @staticmethod def format_timestamp(start_time: DateTime) -> str: @@ -91,7 +88,7 @@ class IngestionManifestWriter: 2021_07_01'T'13_49_17.237 :param start_time: current pendulum timestamp - :return: + :return: timestamp suitable for ingestion manifest filename """ time_str = str(start_time) @@ -122,7 +119,7 @@ class IngestionManifestWriter: artifact_filename = f"{ARTIFACT_NAME}{timestamp}{ARTIFACT_EXT}" return manifest_filename, artifact_filename - def find_ingestion_files(self) -> List[Path]: + def find_ingestion_filenames(self) -> List[Path]: """ Gather the files required for ingestion @@ -132,25 +129,29 @@ class IngestionManifestWriter: coll_files = aux_files = [] if self.parameters.additional_metadata is not None: - aux_string = self.parameters.additional_metadata - aux_files = aux_string.split(",") + addl_md = json.loads(self.parameters.additional_metadata) + aux_files.append(addl_md["filename"]) if self.parameters.collection_metadata is not None: - coll_str = self.parameters.collection_metadata - coll_files = coll_str.split(",") + coll_md = json.loads(self.parameters.collection_metadata) + coll_files.append(coll_md["filename"]) aux_files += coll_files # be sure to add the manifest itself aux_files.append(self.manifest_filename) - return aux_files + return list(set(aux_files)) - def write_ingestion_artifact_tar(self, ingestion_files: List[Path]) -> Path: + def write_ingestion_artifact_tar( + self, ingestion_location: Path, ingestion_files: List[Path] + ) -> Path: """ + TODO: implement this next -- JLG 2021-07-02 a.m. Take the list of files and build a tar for inclusion into the archive. This happens in the staging area for ingestion. :param ingestion_files: all the files needed for ingestion :return: a .tar archive of the ingestion artifacts """ + raise NotImplementedError diff --git a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/utilities.py b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/utilities.py index a0e9bb3bdde4b9a58eb0661716fe2c9648c21514..398a3ba15f5d375c74901664e7298b5dfc354bd1 100644 --- a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/utilities.py +++ b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/utilities.py @@ -15,6 +15,22 @@ class Telescope(Enum): GBT = 5 NONE = 6 + def __str__(self): + return self.name + + @staticmethod + def from_str(ts_name: str): + """ + Convert a telescope name string to a Telescope. + + :param ts_name: + :return: + """ + for ts in Telescope: + if ts.name == ts_name: + return ts + raise ValueError(f"telescope '{ts_name}' not found") + class IngestionType(Enum): """Types of ingestion we'll have to do""" diff --git a/apps/cli/executables/pexable/ingest_envoy/setup.py b/apps/cli/executables/pexable/ingest_envoy/setup.py index 6e62620063d474a1ea0688121fde3681cd979660..f6516734bf0c71022b51cc3724e1a9058bb494ac 100644 --- a/apps/cli/executables/pexable/ingest_envoy/setup.py +++ b/apps/cli/executables/pexable/ingest_envoy/setup.py @@ -22,7 +22,6 @@ setup( url="TBD", license="GPL", install_requires=requires, - tests_require=["pytest", "astropy", "pendulum"], keywords=[], packages=find_packages(), classifiers=["Programming Language :: Python :: 3.8"], diff --git a/apps/cli/executables/pexable/ingest_envoy/test/examples/evla_calibration_manifest.json b/apps/cli/executables/pexable/ingest_envoy/test/examples/evla_calibration_manifest.json index 87a5dd84cd935b0d367a62fa33c8ebc70466460c..2e5c70812ac6bfe6b4cd06341987feb50f156715 100644 --- a/apps/cli/executables/pexable/ingest_envoy/test/examples/evla_calibration_manifest.json +++ b/apps/cli/executables/pexable/ingest_envoy/test/examples/evla_calibration_manifest.json @@ -3,13 +3,13 @@ "reingest": "false", "ngas-ingest": "false", "calibrate": "false", - "ingestion_path": "/lustre/...../" + "ingestion_path": "/home/mchammer/evla/parallel-prod" }, "input-group": { "science_products": [ { "type": "execution-block", - "locator": "......" + "locator": "uid://evla/calibration/long-freakin-uuid-22" } ] }, @@ -17,7 +17,7 @@ "science_products": [ { "type": "calibration", - "filename": "19A-321_2019......tar" + "filename": "19A-321_2019_more_stuff.tar" } ], "ancillary_products": [ diff --git a/apps/cli/executables/pexable/ingest_envoy/test/test_evla_ing_manifests.py b/apps/cli/executables/pexable/ingest_envoy/test/test_evla_ing_manifests.py index 5b083990a7ad9d03e7b60aaf46504a2f188485cb..4ac9896acf18ae041dbccf3237f717da16223c8b 100644 --- a/apps/cli/executables/pexable/ingest_envoy/test/test_evla_ing_manifests.py +++ b/apps/cli/executables/pexable/ingest_envoy/test/test_evla_ing_manifests.py @@ -4,6 +4,7 @@ import logging import re import sys from pathlib import Path +from typing import Dict import pytest @@ -66,7 +67,8 @@ def test_generates_correct_filename(tmpdir): parameters = Parameters(Telescope.EVLA, Path(ingest_path), None, None) manifest = IngestionManifest(parameters) - manifest_file = IngestionManifestWriter(manifest, working_dir).build() + # TODO: test fails now due to unimplemented method called in write() + manifest_file, ingestion_files = IngestionManifestWriter(manifest, working_dir).write() filename = manifest_file.name assert filename.startswith(MANIFEST_NAME_BASE) assert filename.endswith(MANIFEST_NAME_EXT) @@ -81,6 +83,89 @@ def test_generates_correct_filename(tmpdir): assert re.match(r"\d{4}_\d{2}_\d{2}'T'\d{2}_\d{2}_\d{2}\.\d{0,3}", timestamp) +# @pytest.mark.skip("TODO: test_builds_evla_cal_manifest") +def test_builds_evla_cal_manifest(tmpdir): + """ + Have we built a well-formed EVLA calibration ingestion manifest? + + :return: + """ + ingest_path = tmpdir / "ingestion" + ingest_path.mkdir() + working_dir = tmpdir / "working" + working_dir.mkdir() + + input_group = json.dumps( + { + "science_products": [ + { + "type": "execution-block", + "locator": "uid://evla/calibration/long-freakin-uuid-22", + } + ] + }, + ) + output_group = json.dumps( + { + "output-group": { + "science_products": [ + {"type": "calibration", "filename": "19A-321_2019_more_stuff.tar"} + ], + "ancillary_products": [{"type": "weblog", "filename": "weblog.tgz"}], + } + } + ) + + parameters = Parameters( + telescope=Telescope.EVLA, + ingestion_path=str(ingest_path), + additional_metadata=input_group, + collection_metadata=output_group, + ) + manifest = IngestionManifest(parameters) + + assert len(manifest.input_group.science_products) == 1 + assert manifest.input_group.science_products[0] == json.loads(input_group) + assert len(manifest.output_group.ancillary_products) == 1 + assert manifest.output_group.ancillary_products == [json.loads(output_group)] + + +def test_writes_evla_cal_manifest(tmpdir): + """ + Do CAL ingestion manifest and archive get written as expected? + + :return: + """ + ingest_path = tmpdir / "ingestion" + ingest_path.mkdir() + + working_dir = tmpdir / "working" + working_dir.mkdir() + + example_manifest = find_example_manifest("evla_calibration") + content = "" + with open(example_manifest, "r") as infile: + for line in infile.readlines(): + content += line + mf_json = json.loads(content) + og_in = mf_json["output-group"] + sp_in = og_in["science_products"][0] + ap_in = og_in["ancillary_products"][0] + ip_in = mf_json["parameters"]["ingestion_path"] + assert ip_in == "/home/mchammer/evla/parallel-prod" + + params = Parameters( + telescope=Telescope.EVLA, + ingestion_path=str(ingest_path), + collection_metadata=json.dumps(sp_in), + additional_metadata=json.dumps(ap_in), + ) + manifest = IngestionManifest(params) + # TODO: TEST FAILS because of a not-yet-implemented method in writer class + manifest_file, ingestion_files = IngestionManifestWriter(manifest, working_dir).write() + assert manifest_file.exists() + + def test_builds_evla_sdm_manifest(tmpdir): """ Have we built a well-formed EVLA SDM ingestion manifest? @@ -105,7 +190,7 @@ def test_builds_evla_sdm_manifest(tmpdir): "filename": "ingestion_artifacts_2021_06_03_T15_52_35.031.tar", } ) - # no reingest, ngas ingest, additional metadata nor collection metadata + # no reingest, ngas ingest, calibration parameters = Parameters( telescope=Telescope.EVLA, ingestion_path=str(ingest_path), @@ -124,12 +209,52 @@ def test_builds_evla_sdm_manifest(tmpdir): @pytest.mark.skip("TODO: test_writes_evla_sdm_manifest") def test_writes_evla_sdm_manifest(): """ - Have we written an EVLA BDF ingestion manifest correctly? + Have we written an EVLA SDM ingestion manifest correctly? + See if we can match the evla_eb_manifest.json example. :return: """ + example_file = find_example_manifest("evla_eb") + content = "" + with open(example_file, "r") as infile: + for line in infile.readlines(): + content += line + mf_json = json.loads(content) + assert isinstance(mf_json, Dict) + assert len(mf_json) >= 2 + # pprint(f">>> mf_json:\n{mf_json}; it's a {type(mf_json)}") + + parameters_in = mf_json["parameters"] + to_reingest = parameters_in["reingest"] == "True" + assert not to_reingest + to_ngas = parameters_in["ngas_ingest"] == "True" + assert not to_ngas + ts_in = Telescope.from_str(parameters_in["telescope"]) + assert ts_in == Telescope.EVLA + + og_in = mf_json["output_group"] + sp_in = og_in["science_products"][0] + ap_in = og_in["ancillary_products"][0] + ip_in = parameters_in["ingestion_path"] + assert ip_in == "/home/mchammer/evla/parallel-prod" + + params_out = Parameters( + telescope=Telescope.EVLA, + ingestion_path=ip_in, + collection_metadata=json.dumps(sp_in), + additional_metadata=json.dumps(ap_in), + ) + manifest = IngestionManifest(params_out) + assert manifest.parameters == params_out + assert not manifest.parameters.reingest + assert not manifest.parameters.ngas_ingest + + og_out = manifest.output_group + assert og_out.science_products == sp_in + assert manifest.output_group == og_in + # TODO: - # manifest_file = IngestionManifestWriter(manifest, working_dir).build() + # manifest_file = IngestionManifestWriter(manifest, working_dir).write() raise NotImplementedError @@ -143,11 +268,22 @@ def test_builds_evla_bdf_manifest(): raise NotImplementedError -@pytest.mark.skip("TODO: test_builds_evla_cal_manifest") -def test_builds_evla_cal_manifest(): +# ------------------------------# +# # +# U T I L I T I E S # +# # +# ------------------------------# + + +def find_example_manifest(manifest_name: str) -> Path: """ - Have we built a well-formed EVLA calibration ingestion manifest? + Get this example manifest for comparison with one we've generated in a test. - :return: + :param manifest_name: unique file identifier + :return: full path to the manifest file """ - raise NotImplementedError + filename = manifest_name + "_manifest.json" + for file in Path.cwd().rglob(filename): + return file + + raise FileNotFoundError(filename)