From 35da8c8a7276fcf562a5b656bd60f783afc67a96 Mon Sep 17 00:00:00 2001
From: Daniel K Lyons <dlyons@nrao.edu>
Date: Thu, 15 Jul 2021 10:00:23 -0600
Subject: [PATCH] Support fetching multiple locators or files

---
 .../productfetcher/locations.py               | 28 +++++++---
 .../productfetcher/product_fetcher.py         | 55 ++++++++++---------
 .../tests/test_product_fetcher.py             | 52 ++++++++++++------
 3 files changed, 84 insertions(+), 51 deletions(-)

diff --git a/apps/cli/executables/pexable/productfetcher/productfetcher/locations.py b/apps/cli/executables/pexable/productfetcher/productfetcher/locations.py
index 98e898e9f..f72241b80 100644
--- a/apps/cli/executables/pexable/productfetcher/productfetcher/locations.py
+++ b/apps/cli/executables/pexable/productfetcher/productfetcher/locations.py
@@ -165,9 +165,21 @@ class NgasFile(LocatedFile):
     def __str__(self):
         return f"NGAS {self.ngas_file_id} from {self.server} -> {self.subdirectory}/{self.relative_path}"
 
+    def __eq__(self, other):
+        return (
+            self.ngas_file_id == other.ngas_file_id
+            and self.subdirectory == other.subdirectory
+            and self.relative_path == other.relative_path
+            and self.checksum == other.checksum
+            and self.checksum_type == other.checksum_type
+            and self.version == other.version
+            and self.size == other.size
+            and self.server == other.server
+        )
+
 
 class OracleXml(LocatedFile):
-    """ Represents the metadata of an ALMA SDM stored as XML in the ALMA DB """
+    """Represents the metadata of an ALMA SDM stored as XML in the ALMA DB"""
 
     archive_uid: str
     table: str
@@ -195,7 +207,7 @@ class OracleXml(LocatedFile):
 
 
 class FileLocator(Locator):
-    """ Loads a locations report from a .json report file """
+    """Loads a locations report from a .json report file"""
 
     def __init__(self, file: Path):
         self.file = file
@@ -205,7 +217,7 @@ class FileLocator(Locator):
 
 
 class ServiceLocator(Locator):
-    """ Acquires a locations report from the archiveService """
+    """Acquires a locations report from the archiveService"""
 
     def __init__(self, science_product_locator: str):
         self.spl = science_product_locator
@@ -216,9 +228,7 @@ class ServiceLocator(Locator):
         # this is needed to prevent SSL errors when tests are run
         # inside a Docker container
         requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += ":HIGH:!DH:!aNULL"
-        requests.Session().mount(
-            settings.locatorServiceUrlPrefix, adapter=requests.adapters.HTTPAdapter()
-        )
+        requests.Session().mount(settings.locatorServiceUrlPrefix, adapter=requests.adapters.HTTPAdapter())
 
         try:
             response = requests.get(settings.locatorServiceUrlPrefix, params={"locator": self.spl})
@@ -237,7 +247,7 @@ class ServiceLocator(Locator):
 
 
 class NgasServerSchema(Schema):
-    """ marshmallow schema to interpret "server" section of a "files" item """
+    """marshmallow schema to interpret "server" section of a "files" item"""
 
     server = fields.Str()
     location = fields.Str()
@@ -249,7 +259,7 @@ class NgasServerSchema(Schema):
 
 
 class NgasFileSchema(Schema):
-    """ One of the items in a location report's "files" list """
+    """One of the items in a location report's "files" list"""
 
     ngas_file_id = fields.Str()
     subdirectory = fields.Str(allow_none=True)
@@ -276,7 +286,7 @@ class NgasFileSchema(Schema):
 
 
 class LocationReportSchema(Schema):
-    """ Encapsulates an entire locations report """
+    """Encapsulates an entire locations report"""
 
     files = fields.List(fields.Nested(NgasFileSchema()))
     aggregate_size = fields.Integer()
diff --git a/apps/cli/executables/pexable/productfetcher/productfetcher/product_fetcher.py b/apps/cli/executables/pexable/productfetcher/productfetcher/product_fetcher.py
index 2e7071a49..6735daab9 100644
--- a/apps/cli/executables/pexable/productfetcher/productfetcher/product_fetcher.py
+++ b/apps/cli/executables/pexable/productfetcher/productfetcher/product_fetcher.py
@@ -1,12 +1,13 @@
 """ Command-line parsing conveniences """
 import argparse
+import itertools
 import sys
 from enum import Enum
 from pathlib import Path
 from typing import List
 
 # pylint: disable=E0401, E0402, R0913
-from .exceptions import *
+from .exceptions import FetchError
 from .fetcher_factory import ConfiguredFetcherFactory
 from .fetchers import ForceMode
 from .interfaces import FetchPlan, FetcherFactory, FileFetcher
@@ -23,7 +24,7 @@ class RetrievalMode(Enum):
 
 
 class CLIParam(Enum):
-    """ Codifies productfetcher's various command-line parameters """
+    """Codifies productfetcher's various command-line parameters"""
 
     SPL = "--product-locator"
     FILE = "--location-file"
@@ -41,18 +42,18 @@ providing the path to a product locator report."""
 
 
 class FetchContext:
-    """ Handles the various command-line options """
+    """Handles the various command-line options"""
 
     def __init__(
         self,
-        locator: Locator,
+        locators: List[Locator],
         streaming: bool,
         direct_copy: bool,
         dry_run=False,
         force=False,
         concurrency=1,
     ):
-        self.locator = locator
+        self.locators = locators
         self.streaming = streaming
         self.direct_copy = direct_copy
         self.dry_run = dry_run
@@ -66,7 +67,7 @@ class FetchContext:
         :return: a plan
         """
         # Get the location report
-        report = self.locator.locate()
+        reports = [locator.locate() for locator in self.locators]
 
         # Using the arguments we've parsed, generate the fetcher factory
         factory = ConfiguredFetcherFactory(
@@ -75,11 +76,12 @@ class FetchContext:
             direct_copy=self.direct_copy,
             dry_run=self.dry_run,
             force=self.force,
-            aggregate_size=report.aggregate_size,
+            aggregate_size=sum([report.aggregate_size for report in reports]),
         )
 
         # First we must prepare the fetchers for the location report
-        all_fetchers = report.fetchers(factory)
+        # this itertools.chain is a recipe from the itertools documentation for flattening a mapping
+        all_fetchers = list(itertools.chain.from_iterable(report.fetchers(factory) for report in reports))
 
         # now we have enough stuff to proceed with generating the plan
         return self.calculate_plan(factory, all_fetchers)
@@ -129,17 +131,22 @@ class FetchContext:
         :param args:
         :return:
         """
-        namespace = FetchContext.arg_parser().parse_args(args)
+        parser = FetchContext.arg_parser()
+        namespace = parser.parse_args(args)
 
-        # determine the locator
-        locator = (
-            FileLocator(Path(namespace.location_file))
-            if namespace.location_file
-            else ServiceLocator(namespace.product_locator)
-        )
+        # determine the locators
+        location_files: List[Locator] = [FileLocator(Path(file)) for file in namespace.location_files]
+        service_locators: List[Locator] = [ServiceLocator(spl) for spl in namespace.product_locators]
+
+        locators = location_files + service_locators
+
+        # we can no longer rely on the argument parser to enforce this constraint, so we have to do it
+        # ourselves here
+        if len(locators) == 0:
+            parser.error("You must supply at least one science product locator or location file")
 
         return FetchContext(
-            locator,
+            locators,
             namespace.streaming,
             namespace.direct_copy,
             namespace.dry_run,
@@ -181,19 +188,15 @@ class FetchContext:
             default=16,
         )
 
-        exclusive_group = parser.add_mutually_exclusive_group(required=True)
-
-        exclusive_group.add_argument(
-            CLIParam.SPL.value,
-            action="store",
-            dest="product_locator",
-            help="product locator to download",
+        parser.add_argument(
+            CLIParam.SPL.value, action="append", dest="product_locators", help="product locator to download", default=[]
         )
-        exclusive_group.add_argument(
+        parser.add_argument(
             CLIParam.FILE.value,
-            action="store",
-            dest="location_file",
+            action="append",
+            dest="location_files",
             help="product locator report (in JSON)",
+            default=[],
         )
 
         dry_or_force_options = parser.add_mutually_exclusive_group(required=False)
diff --git a/apps/cli/executables/pexable/productfetcher/tests/test_product_fetcher.py b/apps/cli/executables/pexable/productfetcher/tests/test_product_fetcher.py
index 24ab762e9..84bc4c560 100644
--- a/apps/cli/executables/pexable/productfetcher/tests/test_product_fetcher.py
+++ b/apps/cli/executables/pexable/productfetcher/tests/test_product_fetcher.py
@@ -39,9 +39,7 @@ class FakeFactory(FetcherFactory):
     def fetch_oracle_xml(self, file: LocatedFile) -> FileFetcher:
         return DryRunFakeFileFetcher(file)
 
-    def fetch_plan(
-        self, fetchers: List[Union[FileFetcher, FetchPlan]], concurrency: int = 1
-    ) -> FetchPlan:
+    def fetch_plan(self, fetchers: List[Union[FileFetcher, FetchPlan]], concurrency: int = 1) -> FetchPlan:
         return FakePlan(fetchers, concurrency)
 
 
@@ -66,6 +64,36 @@ def test_plan_generation(resource_path_root):
     assert sum(len(subplan.fetchers) for subplan in plan.fetchers) == len(report.files)
 
 
+def test_multiple_locator_fetching(capsys, resource_path_root):
+    """
+    Can we handle multiple locators?
+
+    :param capsys:
+    :return:
+    """
+    img = resource_path_root / "location_files" / "IMG.json"
+    cal = resource_path_root / "location_files" / "CALIBRATION.json"
+
+    # parse the command line with these two
+    fc = FetchContext.parse_commandline([CLIParam.FILE.value, str(img), CLIParam.FILE.value, str(cal)])
+    assert len(fc.locators) == 2
+
+    # let's make the plan and ensure we have all the stuff we expect from both
+    plan = fc.generate_plan()
+
+    # we'll need to open these files ourselves to figure out what fetchers we expect
+    for locator_file in [img, cal]:
+        for file in FileLocator(locator_file).locate().files:
+            seen = False
+
+            # there may be a more "test friendly" way of doing this, such as by asking the plan
+            # if it is fetching a certain file, but it seems like a lot of refactoring for this
+            # one test, so I'm going to leave it alone for now
+            for hostgroup_fetcher in plan.fetchers:
+                seen = seen or file in [fetcher.file for fetcher in hostgroup_fetcher.fetchers]
+            assert seen
+
+
 def test_argument_parsing(capsys):
     """
     Can we parse the command-line arguments passed in?
@@ -78,12 +106,6 @@ def test_argument_parsing(capsys):
         # must have an SPL or a file
         FetchContext.parse_commandline([])
 
-    with pytest.raises(SystemExit):
-        # cannot have an SPL and a file
-        FetchContext.parse_commandline(
-            [CLIParam.FILE.value, "foo", CLIParam.SPL.value, "uid://this/is/a/fakesy"]
-        )
-
     # check the dry run value
     fc = FetchContext.parse_commandline([CLIParam.DRY.value, CLIParam.FILE.value, "foo"])
     assert fc.dry_run
@@ -97,16 +119,14 @@ def test_argument_parsing(capsys):
     assert fc.force == ForceMode.NORMAL
 
     fc = FetchContext.parse_commandline([CLIParam.FILE.value, "foo"])
-    assert isinstance(fc.locator, FileLocator)
-    assert fc.locator.file == Path("foo")
+    assert isinstance(fc.locators[0], FileLocator)
+    assert fc.locators[0].file == Path("foo")
 
     fc = FetchContext.parse_commandline([CLIParam.SPL.value, "uid://this/is/a/fakesy"])
-    assert isinstance(fc.locator, ServiceLocator)
-    assert fc.locator.spl == "uid://this/is/a/fakesy"
+    assert isinstance(fc.locators[0], ServiceLocator)
+    assert fc.locators[0].spl == "uid://this/is/a/fakesy"
 
-    fc = FetchContext.parse_commandline(
-        [CLIParam.FILE.value, "foo", CLIParam.CONCURRENCY.value, "732"]
-    )
+    fc = FetchContext.parse_commandline([CLIParam.FILE.value, "foo", CLIParam.CONCURRENCY.value, "732"])
     assert fc.concurrency == 732
 
     capsys.readouterr()
-- 
GitLab