diff --git a/apps/cli/executables/datafetcher/datafetcher/datafetcher.py b/apps/cli/executables/datafetcher/datafetcher/datafetcher.py index 97018fc4f216aa5f01bb853315f0a122e0609a1c..7cd47587b404eab95c28f7f4601dfa3262b664e9 100755 --- a/apps/cli/executables/datafetcher/datafetcher/datafetcher.py +++ b/apps/cli/executables/datafetcher/datafetcher/datafetcher.py @@ -3,16 +3,23 @@ """ Module for the command line interface to data-fetcher. """ import logging -from argparse import Namespace +import os +import sys + from pathlib import Path # pylint: disable=C0103, E0402, E0611, R0902, R0903, W0703, W1203 +from typing import List, Dict -from datafetcher.errors import MissingSettingsException, NoProfileException, FileErrorException +from datafetcher.errors import ( + MissingSettingsException, + NoProfileException, + FileErrorException, +) from datafetcher.project_fetcher import ParallelFetcher from .locations_report import LocationsReport -from .utilities import get_arg_parser, get_capo_settings, path_is_accessible +from .utilities import parse_args, get_capo_settings, path_is_accessible _APPLICATION_NAME = "datafetcher" @@ -41,28 +48,33 @@ class DataFetcher: """ - # TODO Some Fine Day: refactor to reduce cognitive complexity - def __init__(self, args: Namespace, df_capo_settings: dict): + def __init__(self, args_in: List[str], df_capo_settings: Dict): self.usage = self._build_usage_message() - if args is None or df_capo_settings is None: + if args_in is None or df_capo_settings is None: raise MissingSettingsException() - self.args = args + args = parse_args(args_in) self.settings = df_capo_settings try: - self.verbose = self.args.verbose + self.verbose = args.verbose except AttributeError: # we don't care; --verbose will be dropped later in WS-179 pass # required arguments - self.profile = args.profile - if self.profile is None: - raise NoProfileException() + if hasattr(args, "profile"): + self.profile = args.profile + else: + if "CAPO_PROFILE" in os.environ.keys(): + self.profile = os.environ["CAPO_PROFILE"] + else: + raise NoProfileException("Capo profile is required") + self.output_dir = args.output_dir if self.output_dir is None: raise MissingSettingsException("output directory option is missing") - output_dir = Path(self.output_dir) - if not output_dir.is_dir() or not path_is_accessible(output_dir): + + self.output_dir = Path(self.output_dir) + if not self.output_dir.is_dir() or not path_is_accessible(self.output_dir): raise FileErrorException(f"output location {self.output_dir} inaccessible or not found") if args.location_file is not None: @@ -70,9 +82,11 @@ class DataFetcher: raise MissingSettingsException( "required: location file OR product locator -- not both" ) - self.location_file = args.location_file + self.location_file = Path(args.location_file) + self.product_locator = None elif args.product_locator is not None: self.product_locator = args.product_locator + self.location_file = None else: raise MissingSettingsException( "you must specify either a location file or a product locator" @@ -106,12 +120,16 @@ class DataFetcher: :return: """ - fetcher = ParallelFetcher(self.args, self.settings, self.servers_report) + fetcher = ParallelFetcher( + self.output_dir, self.is_dry, self.force, self.settings, self.servers_report + ) fetcher.run() def _get_locations(self): capo_settings = get_capo_settings(self.profile) - return LocationsReport(self.args, capo_settings) + if self.product_locator: + return LocationsReport(self.product_locator, capo_settings) + return LocationsReport(self.location_file, capo_settings) def main(): @@ -121,9 +139,16 @@ def main(): logging.basicConfig(level=logging.DEBUG) - args = get_arg_parser().parse_args() - - settings = get_capo_settings(args.profile) + args = sys.argv + profile = None + if "--profile" in args: + for i in range(0, len(args)): + if args[i] == "--profile": + profile = args[i + 1] + break + if not profile: + profile = os.environ["CAPO_PROFILE"] + settings = get_capo_settings(profile) DataFetcher(args, settings).run() diff --git a/apps/cli/executables/datafetcher/datafetcher/file_retrievers.py b/apps/cli/executables/datafetcher/datafetcher/file_retrievers.py index b61c162be9ebb638fe0b077826d9dd45554a42a9..ae6c50ade5bc046067dafdf88f2ef4bfaf8f6877 100644 --- a/apps/cli/executables/datafetcher/datafetcher/file_retrievers.py +++ b/apps/cli/executables/datafetcher/datafetcher/file_retrievers.py @@ -33,10 +33,10 @@ class NGASFileRetriever: and saving it to the requested location. """ - def __init__(self, args: Namespace): - self.output_dir = args.output_dir - self.dry_run = args.dry_run - self.force_overwrite = args.force + def __init__(self, output_dir: Path, dry_run: bool, force: bool): + self.output_dir = output_dir + self.dry_run = dry_run + self.force_overwrite = force self.fetch_attempted = False self.num_tries = 0 diff --git a/apps/cli/executables/datafetcher/datafetcher/locations_report.py b/apps/cli/executables/datafetcher/datafetcher/locations_report.py index e46655a49f5cfe9a87ce6fc34918ae94c972f63e..ace742f64ef92ab37687b572a8974a870c7e13e0 100644 --- a/apps/cli/executables/datafetcher/datafetcher/locations_report.py +++ b/apps/cli/executables/datafetcher/datafetcher/locations_report.py @@ -12,8 +12,8 @@ import copy import http import json import logging -from argparse import Namespace -from typing import Dict +from pathlib import Path +from typing import Dict, List import requests @@ -25,26 +25,28 @@ from .errors import ( NoLocatorException, MissingSettingsException, ) -from .utilities import Cluster, RetrievalMode, validate_file_spec +from .utilities import Cluster, RetrievalMode, validate_file_spec, parse_args logger = logging.getLogger(__name__) class LocationsReport: - """ Builds a location report """ + """ + Builds a location report from specified .json locations file, or grabs + the report from archiveService using the product locator. + """ + + def __init__(self, source, settings: Dict): + if isinstance(source, str): + self.product_locator = source + self.location_file = None + elif isinstance(source, Path): + self.product_locator = None + self.location_file = source - def __init__(self, args: Namespace, settings: Dict): - if args is None: - raise MissingSettingsException( - "arguments (locator and/or report file, destination) are required" - ) - self.product_locator = args.product_locator - self.location_file = args.location_file if not self.product_locator and not self.location_file: - raise NoLocatorException("either product locator or report file must be specified") - if self.product_locator and self.location_file: - raise NoLocatorException( - "either product locator -or- report file must be specified -- not both" + raise MissingSettingsException( + "either product locator or report file must be specified" ) if settings is None: @@ -54,12 +56,6 @@ class LocationsReport: if not self.settings["execution_site"]: raise MissingSettingsException("execution_site is required") - try: - self.verbose = args.verbose or False - except AttributeError: - # doesn't matter; verbose is going away soon - self.verbose = False - self._run() def _run(self): @@ -134,9 +130,9 @@ class LocationsReport: :return: location report (from file, in JSON) """ result = dict() - if self.location_file is not None: + if self.location_file: result = self._get_location_report_from_file() - if self.product_locator is not None: + elif self.product_locator is not None: result = self._get_location_report_from_service() return self._add_retrieve_method_field(result) diff --git a/apps/cli/executables/datafetcher/datafetcher/locations_report_refactor.py b/apps/cli/executables/datafetcher/datafetcher/locations_report_refactor.py index 2379321104e2d04dbcfd0b9630f3cdbcc55b0746..39e9ccbc8dd7a3b9fb33d665c3c7bf427be8ff46 100644 --- a/apps/cli/executables/datafetcher/datafetcher/locations_report_refactor.py +++ b/apps/cli/executables/datafetcher/datafetcher/locations_report_refactor.py @@ -15,6 +15,7 @@ import logging from json import JSONDecodeError from typing import Dict, List +import datafetcher.utilities import requests from pycapo import CapoConfig @@ -42,7 +43,7 @@ class LocationsReportRefactor: """ Builds a location report """ def __init__(self, args: List[str]): - namespace = get_arg_parser().parse_args(args) + namespace = datafetcher.utilities.parse_args(args) try: self.capo_config = CapoConfig(profile=namespace.profile) except Exception as exc: diff --git a/apps/cli/executables/datafetcher/datafetcher/project_fetcher.py b/apps/cli/executables/datafetcher/datafetcher/project_fetcher.py index 377ca6081d916774220b211b7a34a03812654ed8..497640f066971df349d4ea2fb46232365e83227f 100644 --- a/apps/cli/executables/datafetcher/datafetcher/project_fetcher.py +++ b/apps/cli/executables/datafetcher/datafetcher/project_fetcher.py @@ -21,21 +21,27 @@ logger = logging.getLogger(__name__) class BaseFetcher: """ This is a base class for fetchers. """ - def __init__(self, args: Namespace, df_capo_settings: dict, servers_report: dict): - self.args = args - self.output_dir = self.args.output_dir - self.force_overwrite = args.force - self.dry_run = args.dry_run + def __init__( + self, + output_dir: Path, + dry_run: bool, + force: bool, + df_capo_settings: dict, + servers_report: dict, + ): + self.output_dir = output_dir + self.force_overwrite = force + self.dry_run = dry_run self.servers_report = servers_report self.settings = df_capo_settings - self.ngas_retriever = NGASFileRetriever(self.args) + self.ngas_retriever = NGASFileRetriever(self.output_dir, self.dry_run, self.force_overwrite) self.retrieved = [] self.num_files_retrieved = 0 def retrieve_files(self, server, retrieve_method, file_specs): """ This is the part where we actually fetch the files. """ - retriever = NGASFileRetriever(self.args) + retriever = NGASFileRetriever(self.output_dir, self.dry_run, self.force_overwrite) num_files = len(file_specs) count = 0 @@ -58,8 +64,15 @@ class SerialFetcher(BaseFetcher): """ - def __init__(self, args: Namespace, df_capo_settings: Dict, servers_report: Dict): - super().__init__(args, df_capo_settings, servers_report) + def __init__( + self, + output_dir: Path, + dry_run: bool, + force: bool, + df_capo_settings: dict, + servers_report: dict, + ): + super().__init__(output_dir, dry_run, force, df_capo_settings, servers_report) def run(self): """ fetch 'em """ @@ -78,8 +91,15 @@ class SerialFetcher(BaseFetcher): class ParallelFetcher(BaseFetcher): """ Pull the files out in parallel; try to be clever about it. """ - def __init__(self, args: Namespace, df_capo_settings: dict, servers_report: dict): - super().__init__(args, df_capo_settings, servers_report) + def __init__( + self, + output_dir: Path, + dry_run: bool, + force: bool, + df_capo_settings: dict, + servers_report: dict, + ): + super().__init__(output_dir, dry_run, force, df_capo_settings, servers_report) self.num_files_expected = self._count_files_expected() self.bucketized_files = self._bucketize_files() @@ -140,7 +160,7 @@ class ParallelFetcher(BaseFetcher): def run(self): """ Fetch all the files for the product locator """ - if self.args.dry_run: + if self.dry_run: logger.debug("This is a dry run; files will not be fetched") with ThreadPoolExecutor() as executor: @@ -159,7 +179,7 @@ class ParallelFetcher(BaseFetcher): # (This error sometimes gets thrown after all files # actually -have- been retrieved. I blame the NGAS API. - JLG) - output_path = Path(self.args.output_dir) + output_path = Path(self.output_dir) files = [ file for file in output_path.rglob("*") diff --git a/apps/cli/executables/datafetcher/datafetcher/utilities.py b/apps/cli/executables/datafetcher/datafetcher/utilities.py index 8ca1f7d446f1b8576bf96521834d510656370965..3241e3b0a9281542db1af0ec70d45b2d718ca059 100644 --- a/apps/cli/executables/datafetcher/datafetcher/utilities.py +++ b/apps/cli/executables/datafetcher/datafetcher/utilities.py @@ -13,7 +13,8 @@ import logging import os import pathlib import time -from typing import Callable +from pathlib import Path +from typing import Callable, List # pylint:disable=C0301, C0303, C0415, E0401, E0402, R0903, W0212, W1202, W0404, W0621, W1203 @@ -123,14 +124,6 @@ def get_arg_parser(): default=cwd, help="output directory, default current dir", ) - # optional_group.add_argument( - # "--verbose", - # action="store_true", - # required=False, - # dest="verbose", - # default=False, - # help="make a lot of noise", - # ) optional_group.add_argument( "--force", action="store_true", @@ -327,3 +320,63 @@ class RetrievalMode(Enum): STREAM = "stream" COPY = "copy" + + +def parse_args(args: List[str]): + """ + Wrapper around parser.parse_args() so in the event of a foo + an exception is thrown rather than sys.exe + + :param args: + :return: + """ + confirm_complete_args(args) + to_parse = [str(arg) for arg in args] + return get_arg_parser().parse_args(to_parse) + + +def confirm_complete_args(args: List[str]): + """ + Let's scrutinize the args -before- calling parse_args() + so we can differentiate among errors. + + :param args: + :return: + """ + + # we must have a profile + if "--profile" not in args and "CAPO_PROFILE" not in os.environ.keys(): + raise NoProfileException("Capo profile is required.") + + # we must have an output dir.... + if "--output-dir" not in args: + raise MissingSettingsException("output dir is required") + + # ... and it must exist + args_iter = iter(args) + args_dict = dict(zip(args_iter, args_iter)) + + output_dir_arg = args_dict["--output-dir"] + + output_dir = Path(output_dir_arg) + if not output_dir.exists(): + # did they just forget to specify it? + if output_dir_arg.startswith("--"): + raise MissingSettingsException("--output-dir is required") + raise FileNotFoundError(f"output dir '{output_dir_arg}' not found") + if not path_is_accessible(output_dir): + raise FileNotFoundError(f"permission denied on {output_dir}") + + # we must have EITHER a product locator OR a locations file.... + if "--product-locator" not in args_dict.keys() and "--location-file" not in args_dict.keys(): + raise MissingSettingsException("either product locator or locations file required") + if "--product-locator" in args_dict.keys() and "--location-file" in args_dict.keys(): + raise MissingSettingsException("product locator OR locations file required -- not both") + + if "--product-locator" in args: + product_locator = args_dict["--product-locator"] + assert product_locator + else: + locations_file = args_dict["--location-file"] + if not Path(locations_file).exists(): + raise FileNotFoundError(f"{locations_file} not found") diff --git a/apps/cli/executables/datafetcher/test/df_pytest_arg_utils.py b/apps/cli/executables/datafetcher/test/df_pytest_arg_utils.py deleted file mode 100644 index 23d86909c80e5a48c986784df5404b43e8364351..0000000000000000000000000000000000000000 --- a/apps/cli/executables/datafetcher/test/df_pytest_arg_utils.py +++ /dev/null @@ -1,65 +0,0 @@ -""" Utilities for passing arguments to datafetcher in tests """ -import os -from pathlib import Path -from typing import List - -from datafetcher.errors import MissingSettingsException, NoProfileException -from datafetcher.utilities import get_arg_parser, path_is_accessible - - -def parse_args(args: List[str]): - """ - Wrapper around parser.parse_args() so in the event of a foo - an exception is thrown rather than sys.exe - - :param args: - :return: - """ - confirm_complete_args(args) - get_arg_parser().parse_args(args) - - -def confirm_complete_args(args: List[str]): - """ - Let's scrutinize the args -before- calling parse_args() - so we can differentiate among errors. - - :param args: - :return: - """ - - # we must have a profile - assert "--profile" in args or "CAPO_PROFILE" in os.environ - if "--profile" not in args and "CAPO_PROFILE" in os.environ: - raise NoProfileException("Capo profile is required.") - - # we must have an output dir.... - assert "--output-dir" in args - - # ... and it must exist - args_iter = iter(args) - args_dict = dict(zip(args_iter, args_iter)) - - output_dir_arg = args_dict["--output-dir"] - output_dir = Path(output_dir_arg) - if not output_dir.exists(): - # did they just forget to specify it? - if output_dir_arg.startswith("--"): - raise MissingSettingsException("--output-dir is required") - raise FileNotFoundError(f"output dir '{output_dir_arg}' not found") - if not path_is_accessible(output_dir): - raise FileNotFoundError(f"permission denied on {output_dir}") - - # we must have EITHER a product locator OR a locations file.... - if "--product-locator" not in args_dict.keys() and "--location-file" not in args_dict.keys(): - raise MissingSettingsException("either product locator or locations file required") - if "--product-locator" in args_dict.keys() and "--location-file" in args_dict.keys(): - raise MissingSettingsException("product locator OR locations file required -- not both") - - if "--product-locator" in args: - product_locator = args_dict["--product-locator"] - assert product_locator - else: - locations_file = args_dict["--location-file"] - if not Path(locations_file).exists(): - raise FileNotFoundError(f"{locations_file} not found") diff --git a/apps/cli/executables/datafetcher/test/df_pytest_utils.py b/apps/cli/executables/datafetcher/test/df_pytest_utils.py index da83f5317cdde2b3ac8cce06cfed99c62cd8ac3b..fb496dcb5e8fb21faff8ea95bd2c2bfc24a33ab5 100644 --- a/apps/cli/executables/datafetcher/test/df_pytest_utils.py +++ b/apps/cli/executables/datafetcher/test/df_pytest_utils.py @@ -5,18 +5,17 @@ import json import os -import sys import tempfile from pathlib import Path from typing import List, Dict +# pylint: disable=C0115, C0116, C0200, C0411, E0401, E0402, R0902, R0903, R0914, R1721 +# pylint: disable=W0212, W0613, W0621, W0703, W1203 + import pytest from pycapo import CapoConfig -# pylint: disable=C0115, C0116, C0200, R0902, R0903, R0914, R1721, W0212, W0613, -# pylint: disable=W0621, W0703, W1203 - from .df_testdata_utils import ( get_locations_report, get_test_data_dir, @@ -27,10 +26,10 @@ from datafetcher.errors import MissingSettingsException, NoProfileException from datafetcher.locations_report import LocationsReport from datafetcher.utilities import ( REQUIRED_SETTINGS, - get_arg_parser, ExecutionSite, ProductLocatorLookup, RetrievalMode, + parse_args, ) TEST_PROFILE = "docker" @@ -137,30 +136,6 @@ def get_mini_locations_file(destination): return destination -def get_filenames_for_locator(product_locator: str, settings: dict): - """ - For a given product locators, return names of all the files - in its locations report's files report - - :param product_locator: - :param settings: - :return: - """ - - args = [ - "--product-locator", - product_locator, - "--profile", - TEST_PROFILE, - "--output-dir", - None, - ] - namespace = get_arg_parser().parse_args(args) - locations_report = LocationsReport(None, namespace, settings) - - return [file["relative_path"] for file in locations_report.files_report["files"]] - - def find_newest_fetch_log_file(target_dir: Path): """ Data-fetcher command line was executed, perhaps more than once; @@ -266,8 +241,7 @@ def settings(capo_settings): :param capo_settings: :return: """ - """ g - """ + db_settings = get_metadata_db_settings(TEST_PROFILE) test_data = _initialize_test_data(db_settings) yield Settings(capo_settings, db_settings, test_data) @@ -305,9 +279,10 @@ def launch_datafetcher(args: list, df_capo_settings: dict) -> int: if args is None or len(args) == 0: raise MissingSettingsException - namespace = evaluate_args_and_capo(args, df_capo_settings) - datafetcher = DataFetcher(namespace, df_capo_settings) + # namespace = evaluate_args_and_capo(args, df_capo_settings) + datafetcher = DataFetcher(args, df_capo_settings) datafetcher.run() + return datafetcher def evaluate_args_and_capo(args: List[str], capo_settings: Dict[str, str]): @@ -320,10 +295,9 @@ def evaluate_args_and_capo(args: List[str], capo_settings: Dict[str, str]): profile = capo_settings["profile"] if profile is None: raise NoProfileException - else: - args["profile"] = profile + args["profile"] = profile - return get_arg_parser().parse_args(args) + return parse_args(args) def get_profile_from_args(args: list) -> str: diff --git a/apps/cli/executables/datafetcher/test/mock_data_fetcher.py b/apps/cli/executables/datafetcher/test/mock_data_fetcher.py deleted file mode 100644 index 488f874f7ef033ee18e8827b6fd642c9fc7223e1..0000000000000000000000000000000000000000 --- a/apps/cli/executables/datafetcher/test/mock_data_fetcher.py +++ /dev/null @@ -1,50 +0,0 @@ -""" for testing the attempt to copy rather than stream files """ -import logging -from argparse import Namespace - -# pylint: disable=C0103, C0301, E0401, E0402, R0201, R0902, R0903, W0621 - -from datafetcher.locations_report import LocationsReport -from datafetcher.project_fetcher import ParallelFetcher -from datafetcher.utilities import get_capo_settings, ExecutionSite -from datafetcher.errors import MissingSettingsException - -from .df_pytest_utils import TEST_PROFILE - -logger = logging.getLogger(__name__) - - -class MockProdDataFetcher: - """ Creates and launches a datafetcher using the dsoc-prod profile """ - - def __init__(self, args: Namespace, settings: dict): - if args is None or settings is None: - raise MissingSettingsException() - self.args = args - self.settings = settings - - self.output_dir = args.output_dir - self.profile = args.profile - - self.locations_report = self._get_locations() - self.servers_report = self.locations_report.servers_report - - def _get_locations(self): - """ - Create a locations report with DSOC as exec site - to force copy rather than stream - :return: - """ - - capo_settings = get_capo_settings(TEST_PROFILE) - capo_settings["execution_site"] = ExecutionSite.DSOC.value - - return LocationsReport(self.args, capo_settings) - - def run(self): - """ - identical to DataFetcher.run() - :return: - """ - - return ParallelFetcher(self.args, self.settings, self.servers_report).run() diff --git a/apps/cli/executables/datafetcher/test/test_df_function.py b/apps/cli/executables/datafetcher/test/test_df_function.py index 5a7035486d3891702bddeaed6618831041d15429..d39413ba8039f1c18d5e29aa6f1d0470ef0d86ed 100644 --- a/apps/cli/executables/datafetcher/test/test_df_function.py +++ b/apps/cli/executables/datafetcher/test/test_df_function.py @@ -2,7 +2,10 @@ from pathlib import Path +# pylint: disable=C0115, C0116, C0200, C0415, R0801, R0902, R0903, R0914, R1721 +# pylint: disable=W0106, W0212, W0611, W0613, W0621, W0703, W1203 # pylint: disable=E0401, E0402, W0511 + import pytest from datafetcher.errors import ( @@ -11,17 +14,8 @@ from datafetcher.errors import ( FileErrorException, ) -# pylint: disable=C0115, C0116, C0200, C0415, R0801, R0902, R0903, R0914, R1721, -# pylint: disable=W0212, W0611, W0613, W0621, W0703, W1203 - from datafetcher.datafetcher import DataFetcher -from datafetcher.utilities import ( - get_arg_parser, - ProductLocatorLookup, - RetrievalMode, - Location, - Cluster, -) +from datafetcher.utilities import ProductLocatorLookup, parse_args from .test_df_return_status import try_to_launch_df @@ -49,16 +43,6 @@ _EB_EXTERNAL_NAME = "sysstartS.58955.83384832176" print(f">>> RUNNING ALL TESTS: {RUN_ALL}") -def test_settings_setup(settings): - """ Ensure that the test settings we're using make sense """ - assert capo_settings is not None - assert isinstance(settings.capo_settings, dict) - assert settings.db_settings is not None - assert isinstance(settings.db_settings, dict) - assert settings.test_data is not None - assert isinstance(settings.test_data, dict) - - @pytest.mark.skipif(not RUN_ALL, reason="debug") def test_nothing_retrieved_if_dry_locator(make_tempdir, settings): """ Simulates dry run with product locator """ @@ -156,7 +140,7 @@ def test_no_overwrite_without_force(make_tempdir, capo_settings): args = [ "--location-file", - str(location_file), + location_file, "--profile", TEST_PROFILE, "--output-dir", @@ -210,7 +194,7 @@ def test_copy_attempt_throws_fetch_error(make_tempdir, settings): "--profile", prod_profile, ] - namespace = get_arg_parser().parse_args(args) + namespace = parse_args(args) datafetcher = DataFetcher(namespace, settings.capo_settings) servers_report = datafetcher.servers_report @@ -264,156 +248,6 @@ def test_throws_file_err_for_existing_destination(settings): launch_datafetcher(args, settings.capo_settings) -@pytest.mark.skip("takes too long; re-enable with WS-179-1") -def test_gets_vlbas_from_report_file(make_tempdir, capo_settings): - - location_file = get_locations_file("VLBA_EB") - args = [ - "--profile", - TEST_PROFILE, - "--output-dir", - str(make_tempdir), - "--location-file", - str(location_file), - ] - datafetcher = DataFetcher(get_arg_parser().parse_args(args), capo_settings) - servers_report = datafetcher.servers_report - assert len(servers_report) == 1 - - datafetcher.run() - - dest_dir = Path(make_tempdir) - file_info_dict = dict() - for server in servers_report.items(): - assert server[0] == "nmngas03.aoc.nrao.edu:7777" - values = server[1] - assert values["location"] == Location.DSOC.value - assert values["cluster"] == Cluster.DSOC.value - assert values["retrieve_method"] == RetrievalMode.STREAM - file_values = values["files"] - assert len(file_values) == 16 - for filename in file_values: - write_fake_file(dest_dir, filename) - file_info_dict[filename["ngas_file_id"]] = filename - - datafetcher = DataFetcher(get_arg_parser().parse_args(args), settings.capo_settings) - datafetcher.run() - - for filename in file_info_dict: - path = Path(dest_dir, filename) - assert path.is_file() - contents = path.read_text().strip() - assert int(contents) == file_info_dict[filename]["size"] - - -@pytest.mark.skip("takes too long; re-enable with WS-179-1") -def test_gets_large_vla_ebs_from_report_file(make_tempdir, capo_settings): - location_file = get_locations_file("VLA_SMALL_EB") - args = [ - "--profile", - TEST_PROFILE, - "--output-dir", - str(make_tempdir), - "--location-file", - str(location_file), - ] - datafetcher = DataFetcher(get_arg_parser().parse_args(args), capo_settings) - servers_report = datafetcher.servers_report - assert len(servers_report) == 2 - datafetcher.run() - - server_file_count = { - "nmngas03.aoc.nrao.edu:7777": 0, - "nmngas04.aoc.nrao.edu:7777": 0, - } - dest_dir = Path(make_tempdir) - file_list = list() - for server in servers_report.items(): - server_url = server[0] - assert server_url in server_file_count.keys() - values = server[1] - assert values["location"] == Location.DSOC.value - assert values["cluster"] == Cluster.DSOC.value - assert values["retrieve_method"] == RetrievalMode.STREAM - file_values = values["files"] - server_file_count[server_url] += len(file_values) - - for filename in file_values: - write_fake_file(dest_dir, filename) - file_list.append(values) - - assert server_file_count["nmngas03.aoc.nrao.edu:7777"] == 3 - assert server_file_count["nmngas04.aoc.nrao.edu:7777"] == 41 - - datafetcher = DataFetcher(get_arg_parser().parse_args(args), capo_settings) - datafetcher.run() - - found_count = 0 - for file_info in file_list: - for file in file_info["files"]: - filename = file["ngas_file_id"] - path = Path(dest_dir, filename) - assert path.is_file() - contents = path.read_text().strip() - assert int(contents) == file["size"] - found_count += 1 - - assert found_count == len(file_list) - - -@pytest.mark.skip("takes too long; re-enable with WS-179-1") -def test_gets_images_from_report_file(make_tempdir, capo_settings): - location_file = get_locations_file("IMG") - args = [ - "--profile", - TEST_PROFILE, - "--output-dir", - str(make_tempdir), - "--location-file", - str(location_file), - ] - datafetcher = DataFetcher(get_arg_parser().parse_args(args), capo_settings) - servers_report = datafetcher.servers_report - assert len(servers_report) == 2 - - server_file_count = { - "nmngas01.aoc.nrao.edu:7777": 0, - "nmngas02.aoc.nrao.edu:7777": 0, - } - dest_dir = Path(make_tempdir) - file_list = list() - for server in servers_report.items(): - server_url = server[0] - assert server_url in server_file_count.keys() - values = server[1] - assert values["location"] == Location.DSOC.value - assert values["cluster"] == Cluster.DSOC.value - assert values["retrieve_method"] == RetrievalMode.STREAM - file_values = values["files"] - server_file_count[server_url] += len(file_values) - - for filename in file_values: - write_fake_file(dest_dir, filename) - file_list.append(values) - - for server_url, count in server_file_count.items(): - assert count == 1 - - datafetcher.run() - - found_count = 0 - for file_info in file_list: - for file in file_info["files"]: - filename = file["ngas_file_id"] - path = Path(dest_dir, filename) - assert path.is_file() - contents = path.read_text().strip() - assert int(contents) == file["size"] - found_count += 1 - - assert found_count == len(file_list) - - @pytest.mark.skipif(not RUN_ALL, reason="debug") def test_gets_calibration_from_report_file(make_tempdir): location_file = get_locations_file("CALIBRATION") @@ -421,11 +255,11 @@ def test_gets_calibration_from_report_file(make_tempdir): "--profile", TEST_PROFILE, "--output-dir", - str(make_tempdir), + make_tempdir, "--location-file", - str(location_file), + location_file, ] - datafetcher = DataFetcher(get_arg_parser().parse_args(args), capo_settings) + datafetcher = DataFetcher(args, capo_settings) servers_report = datafetcher.servers_report assert len(servers_report) == 1 @@ -454,12 +288,11 @@ def test_gets_calibration_from_locator(make_tempdir, settings): "--product-locator", product_locator, "--output-dir", - str(make_tempdir), + make_tempdir, "--profile", TEST_PROFILE, ] - namespace = get_arg_parser().parse_args(args) - fetch = DataFetcher(namespace, settings.capo_settings) + fetch = DataFetcher(args, settings.capo_settings) report_files = fetch.locations_report.files_report["files"] assert len(report_files) == 1 @@ -477,40 +310,6 @@ def test_gets_calibration_from_locator(make_tempdir, settings): assert int(contents) == file_spec["size"] -@pytest.mark.skipif(not RUN_ALL, reason="debug") -def test_gets_gbt_data_from_locator(make_tempdir, settings): - """ Can we cope with GBT data? """ - - external_name = "AGBT17B_044_553492" - product_locator = ProductLocatorLookup(settings.db_settings).look_up_locator_for_ext_name( - external_name - ) - args = [ - "--product-locator", - product_locator, - "--output-dir", - str(make_tempdir), - "--profile", - TEST_PROFILE, - ] - namespace = get_arg_parser().parse_args(args) - fetch = DataFetcher(namespace, settings.capo_settings) - report_files = fetch.locations_report.files_report["files"] - assert len(report_files) == 1 - - file_spec = report_files[0] - relative_path = file_spec["relative_path"] - assert relative_path == "AGBT17B_044_01.tar" - destination = Path(make_tempdir) / relative_path - destination.mkdir() - write_fake_file(destination, file_spec) - - fake_file = Path(destination, file_spec["ngas_file_id"]) - assert fake_file.is_file() - contents = fake_file.read_text().strip() - assert int(contents) == file_spec["size"] - - def write_fake_file(destination: Path, file_info: dict): filename = file_info["ngas_file_id"] path = Path(destination, filename) diff --git a/apps/cli/executables/datafetcher/test/test_df_return_status.py b/apps/cli/executables/datafetcher/test/test_df_return_status.py index 3009ab2d2620ee69146a848b3238a72239c1bef0..f2cf67a8055caa61a178a6f44270c358cefdd899 100644 --- a/apps/cli/executables/datafetcher/test/test_df_return_status.py +++ b/apps/cli/executables/datafetcher/test/test_df_return_status.py @@ -23,14 +23,12 @@ from datafetcher.errors import ( ) from datafetcher.datafetcher import DataFetcher from datafetcher.utilities import ( - get_arg_parser, ProductLocatorLookup, path_is_accessible, ) # N.B. IJ doesn't recognize imported fixtures as being in use. # don't let these imports (make_tempdir, settings) get disappeared. -from .df_pytest_arg_utils import parse_args from .df_pytest_utils import ( TEST_PROFILE, @@ -40,12 +38,8 @@ from .df_pytest_utils import ( settings, launch_datafetcher, RUN_ALL, - confirm_retrieve_mode_copy, - evaluate_args_and_capo, ) -from .mock_data_fetcher import MockProdDataFetcher - def test_launch_df(make_tempdir, settings): """ @@ -59,7 +53,7 @@ def test_launch_df(make_tempdir, settings): "--product-locator", settings.test_data["product_locator"], "--output-dir", - str(make_tempdir), + make_tempdir, "--profile", TEST_PROFILE, "--dry-run", @@ -109,7 +103,7 @@ def test_omitted_profile_throws_no_profile_exc(make_tempdir, settings): "--product-locator", settings.test_data["product_locator"], "--output-dir", - str(make_tempdir), + make_tempdir, ] try: @@ -154,7 +148,7 @@ def test_invalid_capo_profile_raises_no_profile_exc(make_tempdir, settings): "--product-locator", locator, "--output-dir", - str(make_tempdir), + make_tempdir, ] with pytest.raises(MissingSettingsException): try_to_launch_df(settings.capo_settings, args) @@ -212,9 +206,9 @@ def test_locator_plus_file_throws_missing_setting(make_tempdir, capo_settings): "--product-locator", "a_locator", "--location-file", - "location.json", + Path("location.json"), "--output-dir", - str(make_tempdir), + make_tempdir, "--profile", TEST_PROFILE, ] @@ -239,7 +233,7 @@ def test_locator_service_timeout_returns_expected_code(monkeypatch, settings, ma "--product-locator", settings.test_data["product_locator"], "--output-dir", - str(make_tempdir), + make_tempdir, "--profile", TEST_PROFILE, ] @@ -263,7 +257,7 @@ def test_too_many_service_redirects_returns_expected_code(monkeypatch, settings, "--product-locator", settings.test_data["product_locator"], "--output-dir", - str(make_tempdir), + make_tempdir, "--profile", TEST_PROFILE, ] @@ -287,7 +281,7 @@ def test_catastrophic_service_error_returns_expected_code(monkeypatch, settings, "--product-locator", settings.test_data["product_locator"], "--output-dir", - str(make_tempdir), + make_tempdir, "--profile", TEST_PROFILE, ] @@ -297,46 +291,13 @@ def test_catastrophic_service_error_returns_expected_code(monkeypatch, settings, launch_datafetcher(args, settings.capo_settings) -@pytest.mark.skipif(not RUN_ALL, reason="debug") -def test_copy_attempt_raises_ngas_fetch_error(make_tempdir, settings): - args = [ - "--product-locator", - settings.test_data["product_locator"], - "--output-dir", - str(make_tempdir), - "--profile", - TEST_PROFILE, - ] - - namespace = get_arg_parser().parse_args(args) - fetcher = MockProdDataFetcher(namespace, settings.capo_settings) - - servers_report = fetcher.servers_report - confirm_retrieve_mode_copy(servers_report) - - # let's try just one file so we're not sitting here all day - a_server = None - for server in servers_report: - entry = servers_report[server] - servers_report = {server: entry} - fetcher.servers_report = servers_report - assert fetcher.servers_report[server] is not None - a_server = server - break - files = fetcher.servers_report[a_server]["files"] - fetcher.servers_report[a_server]["files"] = [files[0]] - - with pytest.raises(NGASFetchError): - fetcher.run() - - @pytest.mark.skipif(not RUN_ALL, reason="debug") def test_product_locator_not_found_returns_expected_code(make_tempdir, settings): args = [ "--product-locator", "not_a_locator", "--output-dir", - str(make_tempdir), + make_tempdir, "--profile", TEST_PROFILE, ] @@ -348,9 +309,9 @@ def test_product_locator_not_found_returns_expected_code(make_tempdir, settings) def test_unable_to_open_location_file_throws_file_not_found(make_tempdir, capo_settings): args = [ "--location-file", - "location.json", + Path("location.json"), "--output-dir", - str(make_tempdir), + make_tempdir, "--profile", TEST_PROFILE, ] @@ -375,7 +336,7 @@ def test_error_fetching_file_from_ngas_returns_expected_code(monkeypatch, settin "--product-locator", settings.test_data["product_locator"], "--output-dir", - str(make_tempdir), + make_tempdir, "--profile", TEST_PROFILE, ] @@ -401,7 +362,7 @@ def test_unexpected_size_returns_expected_code(monkeypatch, settings, make_tempd "--product-locator", settings.test_data["product_locator"], "--output-dir", - str(make_tempdir), + make_tempdir, "--profile", TEST_PROFILE, ] @@ -411,7 +372,5 @@ def test_unexpected_size_returns_expected_code(monkeypatch, settings, make_tempd def try_to_launch_df(capo_settings, args: List[str]): - parse_args(args) - namespace = evaluate_args_and_capo(args, capo_settings) - fetcher = DataFetcher(namespace, capo_settings) + fetcher = DataFetcher(args, capo_settings) return fetcher.run()