diff --git a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py index aa25eb6f03f121609daff3a18b8ad12a3124cc04..a8480b4b3cbaffbc091071062bfa6c578dccde3d 100644 --- a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py +++ b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest.py @@ -1,7 +1,8 @@ """ The ingestion manifest """ -from pathlib import Path +import json # pylint: disable=E0402, R0903, R0913 +from typing import Dict from .utilities import Telescope @@ -12,7 +13,8 @@ class Parameters: def __init__( self, telescope: Telescope, - ingestion_path: Path, + # for JSON parsing, path must be a string + ingestion_path: str, additional_metadata: str, collection_metadata: str, reingest: bool = False, @@ -25,6 +27,9 @@ class Parameters: self.reingest = reingest self.ngas_ingest = ngas_ingest + def __repr__(self): + return repr(self.__dict__) + class InputGroup: """ @@ -38,6 +43,9 @@ class InputGroup: def __init__(self): self.science_products = [] + def __repr__(self) -> str: + return repr(self.__dict__) + class IngestionManifest: """Represents JSON layout of ingestion information, encompassing several potential scenarios. @@ -47,13 +55,35 @@ class IngestionManifest: def __init__(self, parameters: Parameters): self.parameters = parameters - # to be an InputGroup self.input_group = InputGroup() - # to be an OutputGroup + if self.parameters.additional_metadata: + self.input_group.science_products.append( + json.loads(self.parameters.additional_metadata) + ) + self.output_group = OutputGroup() + if self.parameters.collection_metadata: + self.output_group.ancillary_products.append( + json.loads(self.parameters.collection_metadata) + ) + self.ingestion_path = self.parameters.ingestion_path + # TODO: what is this, and how do we use it? self.associate_group = AssociateGroup() - self.ingestion_path = None - self.science_products = [] + + def content(self) -> Dict: + """ + Accessor for manifest content + + :return: manifest as dict + """ + return dict( + input_group=repr(self.input_group), + output_group=repr(self.output_group), + associate_group=repr(self.associate_group), + ingestion_path=repr(self.ingestion_path), + science_products=repr(self.input_group.science_products), + ancillary_products=repr(self.output_group.ancillary_products), + ) class OutputGroup: @@ -63,6 +93,9 @@ class OutputGroup: self.science_products = [] self.ancillary_products = [] + def __repr__(self): + return repr(self.__dict__) + class AssociateGroup: """ @@ -78,3 +111,6 @@ class AssociateGroup: def __init__(self): self.science_products = [] + + def __repr__(self): + return repr(self.__dict__) diff --git a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest_builder.py b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest_writer.py similarity index 52% rename from apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest_builder.py rename to apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest_writer.py index 2cd57d330d9b40b3f6f4aca75e79efea8f972c4b..90877ba1caa2e47e5d2e6f8d37838c9d13c97b2e 100644 --- a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest_builder.py +++ b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/ingestion_manifest_writer.py @@ -1,19 +1,16 @@ """Build an ingestion manifest file""" -import datetime import json import logging import shutil -import stat import sys from pathlib import Path from typing import Tuple, List # pylint: disable=E0401, E0402 - -from astropy.time import Time +import pendulum +from pendulum import DateTime from .ingestion_manifest import IngestionManifest -from .utilities import IngestionType logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -24,80 +21,94 @@ MANIFEST_NAME_EXT = ".json" ARTIFACT_NAME = "ingestion_artifacts_" ARTIFACT_EXT = ".tar" -# TODO: say what?? we're accepting a manifest in the constructor and ALSO building one? -class IngestionManifestBuilder: + +class IngestionManifestWriter: """Uses supplied parameters to build ingestion manifest files for the various types of ingestion""" def __init__( self, manifest: IngestionManifest, - ingest_type: IngestionType, - ingestion_location: Path, working_dir: Path, ): self.manifest = manifest - self.ingestion_type = ingest_type - self.ingestion_location = ingestion_location self.working_dir = working_dir self.parameters = manifest.parameters self.manifest_filename, self.artifact_filename = self.build_filenames() - def build(self) -> IngestionManifest: + def build(self) -> Path: """ - TODO: Write the ingestion manifest indicated by the parameters. + Write the ingestion manifest indicated by the parameters. :return: the ingestion manifest constructed from the parameters """ + # Pull out the manifest content and stringify it + manifest_content = json.dumps(self.manifest.content()) + # Write the manifest to the staging area, where other files may be. - manifest_content = json.dumps(self.manifest.__json__()) - staging_manifest = self.ingestion_location / self.manifest_filename + ingestion_location = self.parameters.ingestion_path + staging_manifest = Path(ingestion_location) / self.manifest_filename with open(staging_manifest, "w") as out: out.write(manifest_content) - # Open up permissions so we can delete the manifest file later. - staging_manifest.chmod(staging_manifest.stat().st_mode | stat.S_IREAD | stat.S_IWRITE) + # # Open up permissions so we can delete the manifest file later. + # rw_mode = 0o666 + # staging_manifest.chmod(rw_mode) # Get all the files we'll need.... ingestion_files = self.find_ingestion_files() + # there should be at least one file, the manifest + if len(ingestion_files) < 1: + raise ValueError(f"no ingestion files nor manifest found at {ingestion_location}") + # ....and put them in both places, with rw permissions for filename in ingestion_files: - staging_dir_copy = self.ingestion_location / filename - shutil.copy(str(staging_manifest), str(staging_dir_copy)) - staging_dir_copy.chmod(staging_manifest.stat().st_mode | stat.S_IREAD | stat.S_IWRITE) + staging_dir_copy = ingestion_location / filename working_dir_copy = self.working_dir / filename shutil.copy(str(staging_manifest), str(working_dir_copy)) - working_dir_copy.chmod(staging_manifest.stat().st_mode | stat.S_IREAD | stat.S_IWRITE) - # Rename the manifest to the shared name decided on for ingestion invocation - generalized = self.working_dir / "ingestion-manifest.json" - generalized.symlink_to(working_dir_copy, target_is_directory=False) - generalized.chmod(staging_manifest.stat().st_mode | stat.S_IREAD | stat.S_IWRITE) + # Rename the manifest to the shared name decided on for ingestion invocation. + # Path() cast "shouldn't" be necessary, but if we don't do it, + # file is a LocalPath and we can't create the symlink + generalized = Path(self.working_dir / "ingestion-manifest.json") + generalized.symlink_to(working_dir_copy, target_is_directory=False) # Now that all the loose files are copied, create the ingestion artifacts tar self.write_ingestion_artifact_tar(ingestion_files) - # TODO: we need the manifest itself, not the file - return staging_manifest + # again: return a Path explicitly, for a LocalPath won't work + return Path(staging_manifest) @staticmethod - def format_timestamp(start_time_mjd: float) -> str: + def format_timestamp(start_time: DateTime) -> str: """ - Take an mjd time and format it as "yyyy_MM_dd_'T'HH_mm_ss.SSS" - for the ingestion manifest filename - - :param start_time_mjd: observation start time directly from database; - will be modified Julian date - :return: a human-readable timestamp string + Format the current time as follows: + input format: + 2021-07-01T13:49:17.237119+00:00 + desired output format as yyyy_MM_dd_'T'HH_mm_ss.SSS: + 2021_07_01'T'13_49_17.237 + + :param start_time: current pendulum timestamp + :return: """ - # TODO Some Fine Day: is there a more elegant way to do this? - astro_time = Time(start_time_mjd, scale="tt", format="mjd") - astro_time.format = "fits" - str_time = str(astro_time).replace("-", "_", len(str(astro_time))) - str_time = str_time.replace(":", "_", len(str_time)).replace("T", "'T'") - return str_time + + time_str = str(start_time) + # change hyphens and colons to underscores + timestamp = time_str.replace("-", "_", len(time_str)) + timestamp = timestamp.replace(":", "_", len(timestamp)) + # chop off the last bit + timestamp = timestamp.split("+")[0] + # now timestamp ends with ss.###....; round to 3 places + ts_parts = timestamp.split("_") + seconds = float(ts_parts[len(ts_parts) - 1]) + rounded = round(seconds, 3) + timestamp = timestamp.replace(str(seconds), str(rounded)) + # finally, the T in single quotes + timestamp = timestamp.replace("T", "'T'") + + return timestamp def build_filenames(self) -> Tuple[str, str]: """ @@ -105,9 +116,10 @@ class IngestionManifestBuilder: :return: the filenames """ - timestamp = self.format_timestamp(datetime.datetime.now()) - manifest_filename = f"{MANIFEST_NAME_BASE} {timestamp}{MANIFEST_NAME_EXT}" - artifact_filename = f"{ARTIFACT_NAME} {timestamp}{ARTIFACT_EXT}" + current_time = pendulum.now() + timestamp = self.format_timestamp(current_time) + manifest_filename = f"{MANIFEST_NAME_BASE}{timestamp}{MANIFEST_NAME_EXT}" + artifact_filename = f"{ARTIFACT_NAME}{timestamp}{ARTIFACT_EXT}" return manifest_filename, artifact_filename def find_ingestion_files(self) -> List[Path]: @@ -117,7 +129,7 @@ class IngestionManifestBuilder: :return: ingestion inputs """ - aux_files = [] + coll_files = aux_files = [] if self.parameters.additional_metadata is not None: aux_string = self.parameters.additional_metadata diff --git a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/utilities.py b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/utilities.py index 09366f128bed2c6ad85023d50e2aadc25894793c..a0e9bb3bdde4b9a58eb0661716fe2c9648c21514 100644 --- a/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/utilities.py +++ b/apps/cli/executables/pexable/ingest_envoy/ingest_envoy/utilities.py @@ -2,6 +2,8 @@ from enum import Enum +# pylint: disable=R0903 + class Telescope(Enum): """Codifying the names of our telescopes, because Janet can't abide magic strings""" @@ -95,11 +97,11 @@ class AncillaryProduct: """Represents an ancillary product in an ingestion manifest""" def __init__( - self, type: AncillaryProductType, filename: str, science_associate: str, group_with: str + self, ap_type: AncillaryProductType, filename: str, science_associate: str, group_with: str ): - self.type = type + self.ap_type = ap_type self.filename = filename # make this an ancillary to a particular science product (assumes locator string) - self.science_associate = science_associate # TODO: enum? + self.science_associate = science_associate # TODO, possibly: enum? # make this an ancillary to the group of a science product (assumes locator string) self.group_with = group_with diff --git a/apps/cli/executables/pexable/ingest_envoy/setup.py b/apps/cli/executables/pexable/ingest_envoy/setup.py index 49d8b7daaebeff93c26bb7ace72a2c49879d3221..6e62620063d474a1ea0688121fde3681cd979660 100644 --- a/apps/cli/executables/pexable/ingest_envoy/setup.py +++ b/apps/cli/executables/pexable/ingest_envoy/setup.py @@ -10,7 +10,7 @@ from setuptools import find_packages, setup VERSION = open("ingest_envoy/_version.py").readlines()[-1].split()[-1].strip("\"'") README = Path("README.md").read_text() -requires = ["pycapo", "pex==2.1.41", "astropy"] +requires = ["pycapo", "pex==2.1.41", "astropy", "pendulum"] setup( name="ssa-" + Path().absolute().name, @@ -22,7 +22,7 @@ setup( url="TBD", license="GPL", install_requires=requires, - tests_require=["pytest", "astropy"], + tests_require=["pytest", "astropy", "pendulum"], keywords=[], packages=find_packages(), classifiers=["Programming Language :: Python :: 3.8"], diff --git a/apps/cli/executables/pexable/ingest_envoy/test/test_evla_ing_manifests.py b/apps/cli/executables/pexable/ingest_envoy/test/test_evla_ing_manifests.py index 5b3df3caaa924ce72f6463f8ccb9f1ebf6cab52f..5b083990a7ad9d03e7b60aaf46504a2f188485cb 100644 --- a/apps/cli/executables/pexable/ingest_envoy/test/test_evla_ing_manifests.py +++ b/apps/cli/executables/pexable/ingest_envoy/test/test_evla_ing_manifests.py @@ -1,15 +1,21 @@ """ Test for the various types of EVLA ingestion manifests """ +import json import logging +import re import sys +from pathlib import Path import pytest + from ingest_envoy.ingestion_manifest import IngestionManifest, Parameters -from ingest_envoy.ingestion_manifest_builder import IngestionManifestBuilder +from ingest_envoy.ingestion_manifest_writer import IngestionManifestWriter +from ingest_envoy.ingestion_manifest_writer import ( + MANIFEST_NAME_BASE, + MANIFEST_NAME_EXT, +) from ingest_envoy.utilities import Telescope -# TODO: fix test_builds_evla_sdm_manifest, then *REMOVE* E1120 from disable list -# pylint: disable=E0401, E1120, W1203 - +# pylint: disable=E0401, W1203 logger = logging.getLogger(__name__) @@ -17,25 +23,14 @@ logger.setLevel(logging.INFO) logger.addHandler(logging.StreamHandler(sys.stdout)) -def test_builds_timestamp_in_desired_format(): - """ - Does IngestionManifestBuilder format an MJD timestamp yanked out of the DB as expected? - - :return: - """ - str_time = IngestionManifestBuilder.format_timestamp(59215.3) - assert str_time == "2021_01_01'T'07_12_00.000" - - -def test_minimal_manifest_is_as_expected(): +def test_minimal_manifest_is_as_expected(tmpdir): """ Make sure a minimal, generic manifest has the properties we expect it to have. :return: """ - temp_dir = Path(tempfile.mkdtemp()) - ingest_path = temp_dir / "ingestion" + ingest_path = tmpdir / "ingestion" ingest_path.mkdir() # no reingest, ngas ingest, additional metadata nor collection metadata @@ -51,104 +46,91 @@ def test_minimal_manifest_is_as_expected(): manifest.output_group.ancillary_products.append(fake_ancillary_product) assert manifest.parameters == parameters - mf_json = manifest.__json__() - - logger.info(f">>> THE JSON:\n{mf_json}") - - output_group = mf_json["output_group"] - assert len(output_group.science_products) == 0 - assert output_group.ancillary_products == [fake_ancillary_product] + assert len(manifest.output_group.science_products) == 0 + assert manifest.output_group.ancillary_products == [fake_ancillary_product] -@pytest.mark.skip("TODO: test_generates_correct_filename") -def test_generates_correct_filename(): +def test_generates_correct_filename(tmpdir): """ - TODO - We expect the manifest to be named like "ingestion_manifest_2019_07_30_T13_03_00.936" + We expect the manifest to be named like "ingestion_manifest_2019_07_30_T13_03_00.936.json" + :return: """ - return NotImplementedError + ingest_path = tmpdir / "ingestion" + ingest_path.mkdir() + working_dir = tmpdir / "working" + working_dir.mkdir() -from ingest_envoy.ingestion_manifest import IngestionManifest, Parameters -from ingest_envoy.ingestion_manifest_builder import IngestionManifestBuilder -from ingest_envoy.utilities import Telescope + # no reingest, ngas ingest, additional metadata nor collection metadata + parameters = Parameters(Telescope.EVLA, Path(ingest_path), None, None) -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) -logger.addHandler(logging.StreamHandler(sys.stdout)) + manifest = IngestionManifest(parameters) + manifest_file = IngestionManifestWriter(manifest, working_dir).build() + filename = manifest_file.name + assert filename.startswith(MANIFEST_NAME_BASE) + assert filename.endswith(MANIFEST_NAME_EXT) + filename_parts = filename.split("_") + assert len(filename_parts) == 7 -def test_builds_timestamp_in_desired_format(): - """ - Does IngestionManifestBuilder format an MJD timestamp yanked out of the DB as expected? + # get just the timestamp + timestamp = filename.replace(MANIFEST_NAME_BASE, "").replace(MANIFEST_NAME_EXT, "") - :return: - """ - str_time = IngestionManifestBuilder.format_timestamp(59215.3) - assert str_time == "2021_01_01'T'07_12_00.000" + # we should have gotten year, month, day, hours, minutes, seconds to 3 decimal places + assert re.match(r"\d{4}_\d{2}_\d{2}'T'\d{2}_\d{2}_\d{2}\.\d{0,3}", timestamp) -def test_minimal_manifest_is_as_expected(tmpdir): +def test_builds_evla_sdm_manifest(tmpdir): """ - Make sure a minimal, generic manifest has the properties we expect it to have. + Have we built a well-formed EVLA SDM ingestion manifest? :return: """ ingest_path = tmpdir / "ingestion" ingest_path.mkdir() - + working_dir = tmpdir / "working" + working_dir.mkdir() + + fake_science_product = json.dumps( + { + "type": "execution_block", + "filename": "X_osro_000.59368.65423814815", + } + ) + fake_ancillary_product = json.dumps( + { + "type": "ingestion_artifacts", + "filename": "ingestion_artifacts_2021_06_03_T15_52_35.031.tar", + } + ) # no reingest, ngas ingest, additional metadata nor collection metadata - parameters = Parameters(Telescope.EVLA, ingest_path, None, None) + parameters = Parameters( + telescope=Telescope.EVLA, + ingestion_path=str(ingest_path), + additional_metadata=fake_science_product, + collection_metadata=fake_ancillary_product, + ) manifest = IngestionManifest(parameters) - fake_ancillary_product = { - "type": "obslog", - "filename": "my_fabulous_obs.log", - "group_with": "uid://evla/execblock/ABCDEFGHIJKLMNOP", - } - manifest.output_group.ancillary_products.append(fake_ancillary_product) - - assert manifest.parameters == parameters - assert len(manifest.output_group.science_products) == 0 - assert manifest.output_group.ancillary_products == [fake_ancillary_product] + assert len(manifest.input_group.science_products) == 1 + assert manifest.input_group.science_products[0] == json.loads(fake_science_product) + assert len(manifest.output_group.ancillary_products) == 1 + assert manifest.output_group.ancillary_products == [json.loads(fake_ancillary_product)] -@pytest.mark.skip("TODO: test_generates_correct_filename") -def test_generates_correct_filename(): +@pytest.mark.skip("TODO: test_writes_evla_sdm_manifest") +def test_writes_evla_sdm_manifest(): """ - TODO - We expect the manifest to be named like "ingestion_manifest_2019_07_30_T13_03_00.936" - :return: - """ - return NotImplementedError - - -@pytest.mark.skip("TODO: test_builds_evla_sdm_manifest") -def test_builds_evla_sdm_manifest(tmpdir): - """ - Have we built a well-formed EVLA SDM ingestion manifest? + Have we written an EVLA BDF ingestion manifest correctly? :return: """ - ingest_path = tmpdir / "ingestion" - ingest_path.mkdir() - - # no reingest, ngas ingest, additional metadata nor collection metadata - parameters = Parameters(Telescope.EVLA, ingest_path, None, None) - manifest = IngestionManifestBuilder(parameters).build() - # TODO: will need ingestion files - assert manifest.input_group is None - assert manifest.output_group["science_products"] == { - "type": "execution_block", - "filename": "X_osro_000.59368.65423814815", - } - assert manifest.ancillary_products == { - "type": "ingestion_artifacts", - "filename": "ingestion_artifacts_2021_06_03_T15_52_35.031.tar", - } + # TODO: + # manifest_file = IngestionManifestWriter(manifest, working_dir).build() + raise NotImplementedError @pytest.mark.skip("TODO: test_builds_evla_bdf_manifest")