From 71b97f8f62b11bb444ff2b24e1d314c9c06bc814 Mon Sep 17 00:00:00 2001 From: Janet Goldstein <jgoldste@nrao.edu> Date: Wed, 7 Apr 2021 21:23:20 +0000 Subject: [PATCH] WS-179 subtask #3: refactor logging --- .../datafetcher/datafetcher/datafetcher.py | 27 +++---- .../datafetcher/file_retrievers.py | 42 +++++------ .../datafetcher/locations_report.py | 17 +++-- .../datafetcher/locations_report_refactor.py | 17 ++--- .../datafetcher/project_fetcher.py | 49 ++++++------- .../datafetcher/datafetcher/utilities.py | 73 ++----------------- .../datafetcher/test/fake_archive_service.py | 2 +- .../datafetcher/test/mock_data_fetcher.py | 12 +-- .../datafetcher/test/test_df_function.py | 50 +++++-------- ...turn_codes.py => test_df_return_status.py} | 0 10 files changed, 103 insertions(+), 186 deletions(-) rename apps/cli/executables/datafetcher/test/{test_df_return_codes.py => test_df_return_status.py} (100%) diff --git a/apps/cli/executables/datafetcher/datafetcher/datafetcher.py b/apps/cli/executables/datafetcher/datafetcher/datafetcher.py index db01c2c64..962e54724 100755 --- a/apps/cli/executables/datafetcher/datafetcher/datafetcher.py +++ b/apps/cli/executables/datafetcher/datafetcher/datafetcher.py @@ -2,21 +2,21 @@ # -*- coding: utf-8 -*- """ Module for the command line interface to data-fetcher. """ - +import logging from argparse import Namespace from pathlib import Path +# pylint: disable=C0103, E0402, E0611, R0902, R0903, W0703, W1203 + from datafetcher.errors import MissingSettingsException, NoProfileException from datafetcher.project_fetcher import ParallelFetcher from .locations_report import LocationsReport -from .utilities import get_arg_parser, get_capo_settings, FlexLogger, path_is_accessible - -# pylint: disable=C0103, W1203, R0902, R0903 +from .utilities import get_arg_parser, get_capo_settings, path_is_accessible _APPLICATION_NAME = "datafetcher" -# pylint: disable=C0103, W0703 +logger = logging.getLogger(__name__) class DataFetcher: @@ -50,7 +50,7 @@ class DataFetcher: self.settings = df_capo_settings try: self.verbose = self.args.verbose - except Exception: + except AttributeError: # we don't care; --verbose will be dropped later in WS-179 pass @@ -67,13 +67,6 @@ class DataFetcher: f"output location {self.output_dir} inaccessible or not found" ) - try: - self._LOG = FlexLogger(self.__class__.__name__, self.output_dir, self.verbose) - except AttributeError as a_err: - # verbose is going away, so let's not require it - if "no attribute 'verbose'" in str(a_err): - self._LOG = FlexLogger(self.__class__.__name__, self.output_dir) - if args.location_file is not None: if args.product_locator is not None: raise MissingSettingsException( @@ -93,7 +86,7 @@ class DataFetcher: try: self.verbose = args.verbose or False - except AttributeError as a_err: + except AttributeError: # verbose is going away soon self.verbose = False @@ -115,12 +108,12 @@ class DataFetcher: :return: """ - fetcher = ParallelFetcher(self.args, self.settings, self._LOG, self.servers_report) + fetcher = ParallelFetcher(self.args, self.settings, self.servers_report) fetcher.run() def _get_locations(self): capo_settings = get_capo_settings(self.profile) - return LocationsReport(self._LOG, self.args, capo_settings) + return LocationsReport(self.args, capo_settings) def main(): @@ -128,6 +121,8 @@ def main(): from the command line. """ + logging.basicConfig(level=logging.DEBUG) + args = get_arg_parser().parse_args() settings = get_capo_settings(args.profile) diff --git a/apps/cli/executables/datafetcher/datafetcher/file_retrievers.py b/apps/cli/executables/datafetcher/datafetcher/file_retrievers.py index fee92780a..a509e19ae 100644 --- a/apps/cli/executables/datafetcher/datafetcher/file_retrievers.py +++ b/apps/cli/executables/datafetcher/datafetcher/file_retrievers.py @@ -4,10 +4,13 @@ Implementations of assorted file retrievers. """ import http +import logging import os from argparse import Namespace from pathlib import Path +# pylint: disable=C0103, E0401, E0402, R0903, R0914, W1202, W1203 + import requests from bs4 import BeautifulSoup @@ -17,21 +20,21 @@ from .errors import ( FileErrorException, MissingSettingsException, ) -from .utilities import RetrievalMode, Retryer, MAX_TRIES, SLEEP_INTERVAL_SECONDS, FlexLogger +from .utilities import RetrievalMode, Retryer, MAX_TRIES, SLEEP_INTERVAL_SECONDS _DIRECT_COPY_PLUGIN = "ngamsDirectCopyDppi" _STREAMING_CHUNK_SIZE = 8192 -# pylint: disable=C0103, R0903, R0914 +logger = logging.getLogger(__name__) + + class NGASFileRetriever: """Responsible for getting a file out of NGAS and saving it to the requested location. """ - def __init__(self, args: Namespace, logger: FlexLogger): + def __init__(self, args: Namespace): self.output_dir = args.output_dir - self._LOG = logger - self.logfile = self._LOG.logfile self.dry_run = args.dry_run self.force_overwrite = args.force self.fetch_attempted = False @@ -56,7 +59,7 @@ class NGASFileRetriever: func = ( self._copying_fetch if retrieve_method == RetrievalMode.COPY else self._streaming_fetch ) - retryer = Retryer(func, MAX_TRIES, SLEEP_INTERVAL_SECONDS, self._LOG) + retryer = Retryer(func, MAX_TRIES, SLEEP_INTERVAL_SECONDS) try: retryer.retry(download_url, destination, file_spec) finally: @@ -113,7 +116,7 @@ class NGASFileRetriever: :param file_spec: the file specification of that file """ if not self.dry_run: - self._LOG.debug(f"verifying fetch of {destination}") + logger.debug(f"verifying fetch of {destination}") if not destination.exists(): raise NGASServiceErrorException(f"file not delivered to {destination}") if file_spec["size"] != os.path.getsize(destination): @@ -122,9 +125,9 @@ class NGASFileRetriever: f"expected {file_spec['size']}, " f"got {os.path.getsize(destination)}" ) - self._LOG.debug("\tlooks good; sizes match") + logger.debug("\tlooks good; sizes match") else: - self._LOG.debug("(This was a dry run; no files should have been fetched)") + logger.debug("(This was a dry run; no files should have been fetched)") def _copying_fetch(self, args: list): """Pull a file out of NGAS via the direct copy plugin. @@ -138,7 +141,7 @@ class NGASFileRetriever: "processingPars": "outfile=" + str(destination), "file_version": file_spec["version"], } - self._LOG.debug( + logger.debug( "attempting copying download:\nurl: {}\ndestination: {}".format( download_url, destination ) @@ -148,7 +151,7 @@ class NGASFileRetriever: response = session.get(download_url, params=params) if response.status_code != http.HTTPStatus.OK: - self._LOG.error("NGAS does not like this request:\n{}".format(response.url)) + logger.error("NGAS does not like this request:\n{}".format(response.url)) soup = BeautifulSoup(response.text, features="lxml") ngams_status = soup.NgamsStatus.Status message = ngams_status.get("Message") @@ -163,7 +166,7 @@ class NGASFileRetriever: ) else: - self._LOG.debug( + logger.debug( f"if this were not a dry run, we would have been copying " f'{file_spec["relative_path"]}' ) @@ -180,7 +183,7 @@ class NGASFileRetriever: download_url, destination, file_spec = args params = {"file_id": file_spec["ngas_file_id"], "file_version": file_spec["version"]} - self._LOG.debug( + logger.debug( "attempting streaming download:\nurl: {}\ndestination: {}".format( download_url, destination ) @@ -191,15 +194,12 @@ class NGASFileRetriever: try: response = session.get(download_url, params=params, stream=True) with open(destination, "wb") as file_to_write: - # a word to the wise: DO NOT try to assign chunk size - # to a variable or to make it a constant. This will - # result in an incomplete download. for chunk in response.iter_content(chunk_size=_STREAMING_CHUNK_SIZE): file_to_write.write(chunk) expected_size = file_spec["size"] actual_size = os.path.getsize(destination) if actual_size == 0: - raise FileErrorException(f"{Path(destination).name} " f"was not retrieved") + raise FileErrorException(f"{Path(destination).name} was not retrieved") if actual_size != expected_size: raise SizeMismatchException( f"expected {Path(destination).name} " @@ -212,10 +212,10 @@ class NGASFileRetriever: f"problem connecting with {download_url}" ) from c_err except AttributeError as a_err: - self._LOG.warning(f"possible problem streaming: {a_err}") + logger.warning(f"possible problem streaming: {a_err}") if response.status_code != http.HTTPStatus.OK: - self._LOG.error("NGAS does not like this request:\n{}".format(response.url)) + logger.error("NGAS does not like this request:\n{}".format(response.url)) soup = BeautifulSoup(response.text, "lxml-xml") ngams_status = soup.NgamsStatus.Status message = ngams_status.get("Message") @@ -229,10 +229,10 @@ class NGASFileRetriever: } ) - self._LOG.debug("retrieved {} from {}".format(destination, response.url)) + logger.debug(f"retrieved {destination} from {response.url}") else: - self._LOG.debug( + logger.debug( f"if this were not a dry run, we would have been streaming " f'{file_spec["relative_path"]}' ) diff --git a/apps/cli/executables/datafetcher/datafetcher/locations_report.py b/apps/cli/executables/datafetcher/datafetcher/locations_report.py index aa6c68562..f8e751ee6 100644 --- a/apps/cli/executables/datafetcher/datafetcher/locations_report.py +++ b/apps/cli/executables/datafetcher/datafetcher/locations_report.py @@ -6,11 +6,12 @@ """ -# pylint: disable=C0103, C0301, E0401, E0402, R0902, R0903, R0914, W0703 +# pylint: disable=C0103, C0301, E0401, E0402, R0902, R0903, R0914, W0703, W1202, W1203 import copy import http import json +import logging from argparse import Namespace from typing import Dict @@ -24,22 +25,22 @@ from .errors import ( NoLocatorException, MissingSettingsException, ) -from .utilities import Cluster, RetrievalMode, validate_file_spec, FlexLogger +from .utilities import Cluster, RetrievalMode, validate_file_spec + +logger = logging.getLogger(__name__) class LocationsReport: """ Builds a location report """ - def __init__(self, logger: FlexLogger, args: Namespace, settings: Dict): - self._LOG = logger - self.logfile = logger.logfile + def __init__(self, args: Namespace, settings: Dict): try: self.verbose = args.verbose or False except AttributeError: # doesn't matter; verbose is going away soon self.verbose = False - logger.set_verbose(self.verbose) + self._capture_and_validate_input(args, settings) self._run() @@ -144,7 +145,7 @@ class LocationsReport: to pull in the location report. """ - self._LOG.debug(f'fetching files from report "{self.location_file}"') + logger.debug(f'fetching files from report "{self.location_file}"') # try: try: with open(self.location_file) as to_read: @@ -160,7 +161,7 @@ class LocationsReport: """ url = self.settings["locator_service_url"] - self._LOG.debug("fetching report from {} for {}".format(url, self.product_locator)) + logger.debug("fetching report from {} for {}".format(url, self.product_locator)) # this is needed to prevent SSL errors when tests are run # inside a Docker container diff --git a/apps/cli/executables/datafetcher/datafetcher/locations_report_refactor.py b/apps/cli/executables/datafetcher/datafetcher/locations_report_refactor.py index fa3f7b7ec..237932110 100644 --- a/apps/cli/executables/datafetcher/datafetcher/locations_report_refactor.py +++ b/apps/cli/executables/datafetcher/datafetcher/locations_report_refactor.py @@ -30,8 +30,7 @@ from .utilities import Cluster, RetrievalMode, validate_file_spec, get_arg_parse # pylint: disable=C0103, R0902, R0903, R0914, W0703, W1203 -_LOG = logging.getLogger(__name__) -logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger(__name__) REQUIRED_SETTINGS = { "EDU.NRAO.ARCHIVE.DATAFETCHER.DATAFETCHERSETTINGS.LOCATORSERVICEURLPREFIX": "locator_service_url", @@ -79,7 +78,7 @@ class LocationsReportRefactor: return self._add_retrieve_method_field(result) except JSONDecodeError as js_err: - _LOG.error(f"Unable to parse {self.location_file}") + logger.error(f"Unable to parse {self.location_file}") raise ReportFileParseException from js_err def _get_location_report_from_file(self) -> Dict[str, str]: @@ -88,19 +87,19 @@ class LocationsReportRefactor: :return: """ - _LOG.debug(f'fetching files from report "{self.location_file}"') + logger.debug(f'fetching files from report "{self.location_file}"') try: with open(self.location_file) as to_read: result = json.load(to_read) return result except FileNotFoundError as err: - _LOG.error(f"{err}") + logger.error(f"{err}") raise except JSONDecodeError as js_err: - _LOG.error(f"Unable to parse {self.location_file}") + logger.error(f"Unable to parse {self.location_file}") raise ReportFileParseException from js_err except Exception as exc: - _LOG.error(f"Problem getting location report from f{self.location_file}: {exc}") + logger.error(f"Problem getting location report from f{self.location_file}: {exc}") raise def _get_location_report_from_service(self): @@ -112,7 +111,7 @@ class LocationsReportRefactor: "edu.nrao.archive.datafetcher.DataFetcherSettings.locatorServiceUrlPrefix" ) - _LOG.debug(f"fetching report from {url} for {self.product_locator}") + logger.debug(f"fetching report from {url} for {self.product_locator}") # this is needed to prevent SSL errors when tests are run # inside a Docker container @@ -129,7 +128,7 @@ class LocationsReportRefactor: except requests.exceptions.RequestException as ex: raise LocationServiceErrorException(ex) from ex except Exception as exc: - _LOG.error(f"{exc}") + logger.error(f"{exc}") if response.status_code == http.HTTPStatus.OK: return response.json() diff --git a/apps/cli/executables/datafetcher/datafetcher/project_fetcher.py b/apps/cli/executables/datafetcher/datafetcher/project_fetcher.py index c5efbc0f1..377ca6081 100644 --- a/apps/cli/executables/datafetcher/datafetcher/project_fetcher.py +++ b/apps/cli/executables/datafetcher/datafetcher/project_fetcher.py @@ -3,47 +3,46 @@ """ Implementations of assorted product fetchers """ import copy +import logging from argparse import Namespace from concurrent.futures import ThreadPoolExecutor from pathlib import Path from typing import Dict -from datafetcher.errors import NGASServiceErrorException, NGASFetchError +# pylint: disable=C0103, E0402, R0201, R0902, R0903, W0703, W1202, W1203 + +from datafetcher.errors import NGASFetchError from .file_retrievers import NGASFileRetriever -from .utilities import FlexLogger -# pylint: disable=C0103, R0201, R0902, R0903, W0703 +logger = logging.getLogger(__name__) class BaseFetcher: """ This is a base class for fetchers. """ - def __init__( - self, args: Namespace, df_capo_settings: dict, logger: FlexLogger, servers_report: dict - ): + def __init__(self, args: Namespace, df_capo_settings: dict, servers_report: dict): self.args = args self.output_dir = self.args.output_dir - self._LOG = logger self.force_overwrite = args.force self.dry_run = args.dry_run self.servers_report = servers_report self.settings = df_capo_settings - self.ngas_retriever = NGASFileRetriever(self.args, self._LOG) + self.ngas_retriever = NGASFileRetriever(self.args) self.retrieved = [] self.num_files_retrieved = 0 def retrieve_files(self, server, retrieve_method, file_specs): """ This is the part where we actually fetch the files. """ - retriever = NGASFileRetriever(self.args, self._LOG) + retriever = NGASFileRetriever(self.args) num_files = len(file_specs) count = 0 for file_spec in file_specs: count += 1 - self._LOG.debug( + logger.debug( f">>> retrieving {file_spec['relative_path']} " f"({file_spec['size']} bytes, " f"no. {count} of {num_files})...." @@ -59,17 +58,15 @@ class SerialFetcher(BaseFetcher): """ - def __init__( - self, args: Namespace, df_capo_settings: Dict, logger: FlexLogger, servers_report: Dict - ): - super().__init__(args, df_capo_settings, logger, servers_report) + def __init__(self, args: Namespace, df_capo_settings: Dict, servers_report: Dict): + super().__init__(args, df_capo_settings, servers_report) def run(self): """ fetch 'em """ - self._LOG.debug("writing to {}".format(self.output_dir)) - self._LOG.debug("dry run: {}".format(self.dry_run)) - self._LOG.debug(f"force overwrite: {self.force_overwrite}") + logger.debug("writing to {}".format(self.output_dir)) + logger.debug("dry run: {}".format(self.dry_run)) + logger.debug(f"force overwrite: {self.force_overwrite}") for server in self.servers_report: self.retrieve_files( server, @@ -81,10 +78,8 @@ class SerialFetcher(BaseFetcher): class ParallelFetcher(BaseFetcher): """ Pull the files out in parallel; try to be clever about it. """ - def __init__( - self, args: Namespace, df_capo_settings: dict, logger: FlexLogger, servers_report: dict - ): - super().__init__(args, df_capo_settings, logger, servers_report) + def __init__(self, args: Namespace, df_capo_settings: dict, servers_report: dict): + super().__init__(args, df_capo_settings, servers_report) self.num_files_expected = self._count_files_expected() self.bucketized_files = self._bucketize_files() @@ -131,7 +126,7 @@ class ParallelFetcher(BaseFetcher): def fetch_bucket(self, bucket): """ Grab the files in this bucket """ file_sizes = [file["size"] for file in bucket["files"]] - self._LOG.debug( + logger.debug( f"{bucket['retrieve_method']} " f"{len(bucket['files'])} files from " f"{bucket['server']}...." @@ -146,7 +141,7 @@ class ParallelFetcher(BaseFetcher): """ Fetch all the files for the product locator """ if self.args.dry_run: - self._LOG.debug("This is a dry run; files will not be fetched") + logger.debug("This is a dry run; files will not be fetched") with ThreadPoolExecutor() as executor: results = executor.map(self.fetch_bucket, self.bucketized_files) @@ -156,15 +151,13 @@ class ParallelFetcher(BaseFetcher): print(f"result has arrived: {result}") self.num_files_retrieved += result if self.num_files_retrieved != self.num_files_expected: - self._LOG.error( + logger.error( f"{self.num_files_expected} files expected, " f"but only {self.num_files_retrieved} retrieved" ) - except (FileExistsError, NGASFetchError, NGASServiceErrorException): - raise except AttributeError: # (This error sometimes gets thrown after all files - # actually -have- been retrieved. I blame the NGAS API.) + # actually -have- been retrieved. I blame the NGAS API. - JLG) output_path = Path(self.args.output_dir) files = [ @@ -175,7 +168,7 @@ class ParallelFetcher(BaseFetcher): and not str(file).endswith(".log") ] if len(files) < self.num_files_expected: - self._LOG.error( + logger.error( f"{self.num_files_expected} files expected, but only " f"{self.num_files_retrieved} found" ) diff --git a/apps/cli/executables/datafetcher/datafetcher/utilities.py b/apps/cli/executables/datafetcher/datafetcher/utilities.py index 49f022ecd..8ca1f7d44 100644 --- a/apps/cli/executables/datafetcher/datafetcher/utilities.py +++ b/apps/cli/executables/datafetcher/datafetcher/utilities.py @@ -15,7 +15,7 @@ import pathlib import time from typing import Callable -# pylint:disable=C0301, C0303, C0415, E0401, E0402, R0903, W0212, W0404, W0621, W1203 +# pylint:disable=C0301, C0303, C0415, E0401, E0402, R0903, W0212, W1202, W0404, W0621, W1203 import psycopg2 as pg from pycapo import CapoConfig @@ -59,6 +59,8 @@ REQUIRED_SETTINGS = { "EDU.NRAO.ARCHIVE.WORKFLOW.CONFIG.REQUESTHANDLERSETTINGS.DOWNLOADDIRECTORY": "download_dir", } +logger = logging.getLogger(__name__) + def path_is_accessible(path: pathlib.Path) -> bool: """ @@ -86,7 +88,9 @@ def get_arg_parser(): cwd = pathlib.Path().absolute() parser = argparse.ArgumentParser( - description=_PROLOGUE, epilog=_EPILOGUE, formatter_class=argparse.RawTextHelpFormatter + description=_PROLOGUE, + epilog=_EPILOGUE, + formatter_class=argparse.RawTextHelpFormatter, ) # Can't find a way of clearing the action groups # without hitting an internal attribute. @@ -233,75 +237,12 @@ class ProductLocatorLookup: return product_locator[0] -class FlexLogger: - """ - This class wraps a logger, adding the ability to specify - logging level as warning or debug based on the "verbose" flag - """ - - def __init__(self, class_name: str, output_dir: pathlib.Path, verbose=False): - """ - Set up logging. - :param class_name: class to be logged - :param output_dir: where log is to be written - :param verbose: if true, additional output - - """ - - if class_name is None: - raise MissingSettingsException("class name is required") - log_pathname = f"{output_dir}/{class_name}_{str(time.time())}.log" - try: - - self.logfile = pathlib.Path(output_dir, log_pathname) - self.logger = logging.getLogger(str(self.logfile)) - self.verbose = verbose - handler = logging.FileHandler(self.logfile) - formatter = logging.Formatter(LOG_FORMAT) - handler.setFormatter(formatter) - self.logger.addHandler(handler) - except (PermissionError, FileNotFoundError) as err: - self.logger.error(f"Problem creating logger for {class_name}: {err}") - raise - - level = logging.DEBUG if self.verbose else logging.WARN - self.logger.setLevel(level) - - def set_verbose(self, verbose: bool) -> None: - """ - Specify verbose logging: - True == debug and above - False == warnings and above - - :param verbose: - :return: - - """ - - self.verbose = verbose - - def debug(self, message: str): - """ log a debug message """ - - if self.verbose: - self.logger.debug(message) - - def warning(self, message: str): - """ log a warning message """ - - self.logger.warning(message) - - def error(self, message: str): - """ log an error """ - self.logger.error(message) - - class Retryer: """ Retry executing a function, or die trying """ - def __init__(self, func: Callable, max_tries: int, sleep_interval: int, logger: FlexLogger): + def __init__(self, func: Callable, max_tries: int, sleep_interval: int): self.func = func self.num_tries = 0 self.max_tries = max_tries diff --git a/apps/cli/executables/datafetcher/test/fake_archive_service.py b/apps/cli/executables/datafetcher/test/fake_archive_service.py index 2b9bceb92..e80de324e 100644 --- a/apps/cli/executables/datafetcher/test/fake_archive_service.py +++ b/apps/cli/executables/datafetcher/test/fake_archive_service.py @@ -7,8 +7,8 @@ from pathlib import Path from datafetcher.locations_report_refactor import LocationsReportRefactor, UnknownLocatorException # pylint: disable=C0301, R0903 + _LOG = logging.getLogger(__name__) -logging.basicConfig(level=logging.DEBUG) PROFILE = "docker" diff --git a/apps/cli/executables/datafetcher/test/mock_data_fetcher.py b/apps/cli/executables/datafetcher/test/mock_data_fetcher.py index 012b797b8..488f874f7 100644 --- a/apps/cli/executables/datafetcher/test/mock_data_fetcher.py +++ b/apps/cli/executables/datafetcher/test/mock_data_fetcher.py @@ -1,16 +1,18 @@ """ for testing the attempt to copy rather than stream files """ - +import logging from argparse import Namespace # pylint: disable=C0103, C0301, E0401, E0402, R0201, R0902, R0903, W0621 from datafetcher.locations_report import LocationsReport from datafetcher.project_fetcher import ParallelFetcher -from datafetcher.utilities import get_capo_settings, ExecutionSite, FlexLogger +from datafetcher.utilities import get_capo_settings, ExecutionSite from datafetcher.errors import MissingSettingsException from .df_pytest_utils import TEST_PROFILE +logger = logging.getLogger(__name__) + class MockProdDataFetcher: """ Creates and launches a datafetcher using the dsoc-prod profile """ @@ -24,8 +26,6 @@ class MockProdDataFetcher: self.output_dir = args.output_dir self.profile = args.profile - self._LOG = FlexLogger(self.__class__.__name__, self.output_dir, False) - self.locations_report = self._get_locations() self.servers_report = self.locations_report.servers_report @@ -39,7 +39,7 @@ class MockProdDataFetcher: capo_settings = get_capo_settings(TEST_PROFILE) capo_settings["execution_site"] = ExecutionSite.DSOC.value - return LocationsReport(self._LOG, self.args, capo_settings) + return LocationsReport(self.args, capo_settings) def run(self): """ @@ -47,4 +47,4 @@ class MockProdDataFetcher: :return: """ - return ParallelFetcher(self.args, self.settings, self._LOG, self.servers_report).run() + return ParallelFetcher(self.args, self.settings, self.servers_report).run() diff --git a/apps/cli/executables/datafetcher/test/test_df_function.py b/apps/cli/executables/datafetcher/test/test_df_function.py index ce2ccb8f0..1f337682f 100644 --- a/apps/cli/executables/datafetcher/test/test_df_function.py +++ b/apps/cli/executables/datafetcher/test/test_df_function.py @@ -24,7 +24,7 @@ from datafetcher.utilities import ( ) -from .test_df_return_codes import try_to_launch_df +from .test_df_return_status import try_to_launch_df # N.B. these are all in use; SonarLint just doesn't get it from .df_pytest_utils import ( @@ -142,6 +142,7 @@ def test_no_overwrite_without_force(make_tempdir, capo_settings): files = locations_dict["files"] num_files_expected = 37 assert len(files) == num_files_expected + dest_dir = Path(top_level, _EB_EXTERNAL_NAME) dest_dir.mkdir(parents=True, exist_ok=True) @@ -151,6 +152,7 @@ def test_no_overwrite_without_force(make_tempdir, capo_settings): fake_file.unlink() with open(fake_file, "w") as to_write: to_write.write("dang! what a kick in the rubber parts!") + assert fake_file.stat().st_size == 38 args = [ "--location-file", @@ -164,49 +166,35 @@ def test_no_overwrite_without_force(make_tempdir, capo_settings): with pytest.raises(FileExistsError): try_to_launch_df(capo_settings, args) - sizes = dict() - for file in dest_dir.rglob("*"): - sizes[str(file)] = file.stat().st_size - assert len(sizes) < num_files_expected - fake_size = fake_file.stat().st_size - assert fake_size == 38 - - with pytest.raises(FileExistsError): - launch_datafetcher(args, capo_settings) - - retrieved = [file for file in top_level.rglob("*") if file.is_file()] - num_not_too_big_files_expected = 29 - assert len(retrieved) - 2 == num_not_too_big_files_expected + retrieved = [ + file for file in top_level.rglob("*") if file.is_file() and not str(file).endswith(".json") + ] + num_not_too_big_files_expected = 28 + assert len(retrieved) == num_not_too_big_files_expected # get rid of all the files we downloaded, plus the log - deleted = [file.unlink() for file in retrieved if not str(file).endswith(".json")] - assert len(deleted) >= num_not_too_big_files_expected - - # same download, but without verbose logging - args = [ - "--location-file", - str(location_file), - "--profile", - TEST_PROFILE, - "--output-dir", - str(make_tempdir), + deleted = [ + file.unlink() for file in retrieved if file.is_file() and not str(file).endswith(".json") ] - launch_datafetcher(args, capo_settings) - - retrieved = [file for file in top_level.rglob("*")] - assert len(retrieved) == num_files_expected + 3 + assert len(deleted) >= num_not_too_big_files_expected @pytest.mark.skipif(not RUN_ALL, reason="debug") def test_copy_attempt_throws_fetch_error(make_tempdir, settings): - """We set profile to dsoc-prod here so as to force the DF + """ + We set profile to dsoc-prod here so as to force the DF to try to copy rather than stream + + :param make_tempdir: temp directory fixture + :param settings: Capo settings for datafetcher + + :return: """ # N.B. can't do this import with the rest of the imports up top, # because test_df_return_codes not yet initialized - from .test_df_return_codes import we_are_in_docker + from .test_df_return_status import we_are_in_docker if we_are_in_docker(): # this test doesn't work in a docker container: diff --git a/apps/cli/executables/datafetcher/test/test_df_return_codes.py b/apps/cli/executables/datafetcher/test/test_df_return_status.py similarity index 100% rename from apps/cli/executables/datafetcher/test/test_df_return_codes.py rename to apps/cli/executables/datafetcher/test/test_df_return_status.py -- GitLab