diff --git a/apps/cli/executables/datafetcher/README.md b/apps/cli/executables/datafetcher/README.md index b2cc894843867d2fa2ab8c1a76de9c55036411ac..3a46f7229fcbd412dd9c08992c40dc741641268e 100644 --- a/apps/cli/executables/datafetcher/README.md +++ b/apps/cli/executables/datafetcher/README.md @@ -15,17 +15,23 @@ This is intended to be a library wrapped in a command line interface. usage: datafetcher [-h] (--product-locator PRODUCT_LOCATOR | --location-file LOCATION_FILE) [--dry-run] [--output-dir OUTPUT_DIR] [--verbose] - [--profile PROFILE] + [--profile PROFILE] [--force] Retrieve a product (a science product or an ancillary product) from the NRAO archive, either by specifying the product's locator or by providing the path to a product locator report. +If any of the files to be retrieved is already present in the target location, +do not overwrite it unless the --force argument is supplied; display an informative +error message. + Optional Arguments: --dry-run dry run, do not fetch product --output-dir OUTPUT_DIR output directory, default current directory --verbose make a lot of noise + --force if a file to fetched exists in the target location, + overwrite it --profile PROFILE CAPO profile to use Return Codes: diff --git a/apps/cli/executables/datafetcher/src/datafetcher/commands.py b/apps/cli/executables/datafetcher/src/datafetcher/commands.py deleted file mode 100755 index e8107449bcd41d079fee396db992f7c36f69f57f..0000000000000000000000000000000000000000 --- a/apps/cli/executables/datafetcher/src/datafetcher/commands.py +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env/python -# -*- coding: utf-8 -*- - -""" Module for the command line interface to data-fetcher. """ - -import logging -import sys -import traceback - -from .errors import NoLocatorException, \ - NGASServiceErrorException, exception_to_error, terminal_exception -from .locations_report import LocationsReport -from .product_fetchers import ParallelProductFetcher -from .utilities import get_arg_parser, get_capo_settings, FlexLogger - - -class DataFetcher: - ''' - TO EXECUTE ALL DF TESTS: from datafetcher/, just run pytest - - example command line that should work with the correct local profile: - datafetcher --profile local --output-dir ~/Downloads/ - --product-locator \ - uid://evla/execblock/93e1c0cd-76de-4e65-a3f2-d5fe55f386d8 \ - --verbose - - local.properties must have: - - edu.nrao.archive.workflow.config - .StartupSettings.temporaryDataDirectory pointing to a locally - writable temp dir, e.g., /var/tmp - - edu.nrao.archive.workflow.config.DeliverySettings.hostname must point - to local computer - - execution_site must NOT be DSOC or NAASC - - ''' - - def __init__(self, args, settings): - self.args = args - self.settings = settings - - verbose = args and args.verbose - try: - self._LOG = FlexLogger(self.__class__.__name__, args.output_dir, verbose) - self.logfile = self._LOG.logfile - self.locations_report = self._get_locations() - self.servers_report = self.locations_report.servers_report - except (NoLocatorException, FileNotFoundError, PermissionError) as exc: - self._terminal_exception(exc) - except TypeError as exc: - self._LOG.error('TODO: handle TypeError') - self._terminal_exception(exc) - except Exception as exc: - self._LOG.error( - f'>>> throwing unexpected {type(exc)} during init: {exc}') - self._terminal_exception(exc) - - def run(self): - """ - launch the fetcher - :return: - """ - - try: - return ParallelProductFetcher( - self.args, self.settings, self._LOG, - self.servers_report).run() - except (NGASServiceErrorException, FileExistsError) as exc: - self._terminal_exception(exc) - except AttributeError as a_err: - self._LOG.error(f'>>> throwing AttributeError during run: {a_err}') - self._terminal_exception(a_err) - except Exception as exc: - self._LOG.error( - f'>>> throwing unexpected exception during run: {exc}') - self._terminal_exception(exc) - - def _get_locations(self): - try: - return LocationsReport(self._LOG, self.args, self.settings) - except NoLocatorException as exc: - self._terminal_exception(exc) - - def _terminal_exception(self, exception: Exception): - ''' report exception, then throw in the towel - ''' - errorno = exception_to_error(exception) - try: - self._LOG.debug(traceback.format_exc()) - exc_type = type(exception) - self._LOG.error('terminal_exception') - self._LOG.error(f'{exc_type}: {str(exception)}') - except Exception as exc: - logging.error(exc) - finally: - sys.exit(errorno.value) - - -def main(): - ''' this will be executed when fetcher is launched - from the command line - ''' - try: - parser = get_arg_parser() - args = parser.parse_args() - settings = get_capo_settings(args.profile) - datafetcher = DataFetcher(args, settings) - datafetcher.run() - except (NGASServiceErrorException, FileExistsError) as ex: - terminal_exception(ex) - except Exception as ex: - logging.error(f'>>> some other kind of exception during main: {ex}') - terminal_exception(ex) - - -if __name__ == '__main__': - main() diff --git a/apps/cli/executables/datafetcher/src/datafetcher/datafetcher.py b/apps/cli/executables/datafetcher/src/datafetcher/datafetcher.py new file mode 100755 index 0000000000000000000000000000000000000000..5793eb94ed5a697711f6bc1078425d2c9b478823 --- /dev/null +++ b/apps/cli/executables/datafetcher/src/datafetcher/datafetcher.py @@ -0,0 +1,179 @@ +#!/usr/bin/env/python +# -*- coding: utf-8 -*- + +''' Module for the command line interface to data-fetcher. ''' + +import logging +import sys +from argparse import Namespace +from pathlib import Path + +from datafetcher.project_fetcher import ParallelFetcher +from datafetcher.return_codes import ReturnCode + +from .locations_report import LocationsReport +from .utilities import get_arg_parser, get_capo_settings, FlexLogger, \ + path_is_accessible + +# pylint: disable=W1203, R0902, R0903 + +_APPLICATION_NAME = 'datafetcher' +_VALID_PROFILES = ['naasc-dev', 'naasc-test', 'dsoc-test', 'dsoc-dev', + 'nmprod', 'local'] + +class DataFetcher: + ''' + TO EXECUTE ALL DF TESTS: from datafetcher/, just run pytest + + example command line that should work with the correct local profile + and a current product locator (these are changed whenever the + database is rebuilt: + datafetcher --profile local --output-dir ~/Downloads/ + --product-locator \ + uid://evla/execblock/93e1c0cd-76de-4e65-a3f2-d5fe55f386d8 \ + --verbose + + local.properties must have: + - edu.nrao.archive.workflow.config + .StartupSettings.temporaryDataDirectory pointing to a locally + writable temp dir, e.g., /var/tmp + - edu.nrao.archive.workflow.config.DeliverySettings.hostname must point + to local computer + - execution_site must NOT be DSOC or NAASC + + ''' + + def __init__(self, args: Namespace, df_capo_settings: dict): + self.usage = self._build_usage_message() + if args is None or df_capo_settings is None: + self._exit_with_error(ReturnCode.MISSING_SETTING) + self.args = args + self.settings = df_capo_settings + self.verbose = self.args.verbose + + # required arguments + self.output_dir = args.output_dir + if self.output_dir is None: + self._exit_with_error(ReturnCode.MISSING_SETTING) + + output_dir = Path(self.output_dir) + if not output_dir.is_dir()\ + or not path_is_accessible(output_dir): + logging.error(f'output location {self.output_dir} inaccessible ' + f'or not found') + self._exit_with_error(ReturnCode.MISSING_SETTING) + + self._LOG = FlexLogger(self.__class__.__name__, + self.output_dir, self.verbose) + if args.location_file is not None: + if args.product_locator is not None: + self._LOG.error('required: location file OR product locator ' + '-- not both') + self._exit_with_error(ReturnCode.MISSING_SETTING) + self.location_file = args.location_file + elif args.product_locator is not None: + self.product_locator = args.product_locator + else: + self._LOG.error('you must specify either a location file or a ' + 'product locator') + self._exit_with_error(ReturnCode.MISSING_SETTING) + + self.profile = args.profile + if self.profile is None or not self.profile in _VALID_PROFILES: + self._exit_with_error(ReturnCode.MISSING_PROFILE) + + # optional arguments + self.is_dry = args.dry_run + self.force = args.force + self.verbose = args.verbose or False + + try: + self.locations_report = self._get_locations() + self.servers_report = self.locations_report.servers_report + except SystemExit as exc: + self._LOG.error(f'{exc}') + if args.location_file: + self._exit_with_error(ReturnCode.CANNOT_OPEN_LOCATION_FILE) + else: + self._exit_with_error(ReturnCode.PRODUCT_LOCATOR_NOT_FOUND) + raise + + except Exception as exc: + self._LOG.error( + f'>>> throwing unexpected {type(exc)} during init: {exc}') + raise + + def _exit_with_error(self, return_code: ReturnCode): + print(self.usage) + sys.exit(return_code.value['code']) + + + @staticmethod + def _build_usage_message() -> str: + usage_str = 'Usage:' + usage_str += """\tdatafetcher [-h] + (--product-locator PRODUCT_LOCATOR | --location-file LOCATION_FILE) + [--dry-run] [--output-dir OUTPUT_DIR] [-v, --verbose] + [--profile PROFILE]\n""" + usage_str += "\n\t\tReturn codes:\n" + for return_code in ReturnCode: + usage_str += f"\n\t\t\t{return_code.value['code']}: " \ + f"{return_code.value['text']}" + return usage_str + + def run(self): + """ + launch the fetcher + :return: + """ + + try: + fetcher = ParallelFetcher( + self.args, self.settings, self._LOG, + self.servers_report) + return fetcher.run() + except SystemExit as exc: + self._LOG.error(f'{exc}') + raise + except Exception as exc: + self._LOG.error( + f'>>> throwing unexpected exception during run: {exc}') + raise + + def _get_locations(self): + try: + capo_settings = get_capo_settings(self.profile) + return LocationsReport(self._LOG, + self.args, + capo_settings) + except Exception as exc: + self._LOG.error(f'{exc}') + if hasattr(self, 'location_file'): + self._exit_with_error(ReturnCode.CANNOT_OPEN_LOCATION_FILE) + elif hasattr(self, 'product_locator'): + self._exit_with_error(ReturnCode.PRODUCT_LOCATOR_NOT_FOUND) + else: + # should never happen; we've already checked + self._exit_with_error(ReturnCode.MISSING_SETTING) + + +def main(): + ''' this will be executed when fetcher is launched + from the command line + ''' + + parser = get_arg_parser() + try: + args = parser.parse_args() + except Exception as exc: + logging.error(f'{exc}') + return exc.value + except SystemExit as exc: + return exc.value.code + settings = get_capo_settings(args.profile) + datafetcher = DataFetcher(args, settings) + return datafetcher.run() + + +if __name__ == '__main__': + main() diff --git a/apps/cli/executables/datafetcher/src/datafetcher/locations_report.py b/apps/cli/executables/datafetcher/src/datafetcher/locations_report.py index 5788dc67ba404c4e26816d3397e9e3cc30d94cd8..39bfacc6e46f3f8d3c1a0c6e48494f2e0b4ccbdd 100644 --- a/apps/cli/executables/datafetcher/src/datafetcher/locations_report.py +++ b/apps/cli/executables/datafetcher/src/datafetcher/locations_report.py @@ -126,10 +126,6 @@ class LocationsReport: :return: location report (from file, in JSON) """ result = dict() - if self.product_locator is None and self.location_file is None: - raise ValueError( - 'product_locator or location_file must be provided; ' - 'neither were') if self.location_file is not None: result = self._get_location_report_from_file() if self.product_locator is not None: @@ -143,9 +139,13 @@ class LocationsReport: self._LOG.debug(f'fetching files from report "{self.location_file}"') # try: - with open(self.location_file) as to_read: - result = json.load(to_read) - return result + try: + with open(self.location_file) as to_read: + result = json.load(to_read) + return result + except FileNotFoundError as err: + self._LOG.error(f'{err}') + raise def _get_location_report_from_service(self): """ Use 'requests' to fetch the location report from the locator service. @@ -171,6 +171,8 @@ class LocationsReport: raise LocationServiceRedirectsException() except requests.exceptions.RequestException as ex: raise LocationServiceErrorException(ex) + except Exception as exc: + self._LOG.error(f'{exc}') if response.status_code == http.HTTPStatus.OK: return response.json() diff --git a/apps/cli/executables/datafetcher/src/datafetcher/product_fetchers.py b/apps/cli/executables/datafetcher/src/datafetcher/project_fetcher.py similarity index 62% rename from apps/cli/executables/datafetcher/src/datafetcher/product_fetchers.py rename to apps/cli/executables/datafetcher/src/datafetcher/project_fetcher.py index f3f30841581f398ac849f95c60fd5094d50cdaed..f44cc2988bb0f31fe78ebbc7ec93af01c1a6420c 100644 --- a/apps/cli/executables/datafetcher/src/datafetcher/product_fetchers.py +++ b/apps/cli/executables/datafetcher/src/datafetcher/project_fetcher.py @@ -3,36 +3,38 @@ """ Implementations of assorted product fetchers """ import copy -import os +import sys from argparse import Namespace from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path from typing import Dict -from .errors import NGASServiceErrorException +from datafetcher.errors import NGASServiceErrorException +from datafetcher.return_codes import ReturnCode + from .file_retrievers import NGASFileRetriever from .utilities import FlexLogger -class BaseProductFetcher: +class BaseFetcher: """ This is a base class for fetchers. """ - def __init__(self, args: Namespace, settings: Dict, logger: FlexLogger, - servers_report: Dict): + def __init__(self, args: Namespace, df_capo_settings: dict, logger: FlexLogger, + servers_report: dict): self.args = args self.output_dir = self.args.output_dir self._LOG = logger self.force_overwrite = args.force self.dry_run = args.dry_run self.servers_report = servers_report - self.settings = settings + self.settings = df_capo_settings self.ngas_retriever = NGASFileRetriever(self.args, self._LOG) self.retrieved = [] self.num_files_retrieved = 0 def retrieve_files(self, server, retrieve_method, file_specs): - ''' this is the part where we actually fetch the files - ''' + ''' this is the part where we actually fetch the files ''' + retriever = NGASFileRetriever(self.args, self._LOG) num_files = len(file_specs) count = 0 @@ -49,18 +51,18 @@ class BaseProductFetcher: return num_files -class SerialProductFetcher(BaseProductFetcher): - """ Pull the files out, one right after another; - don't try to be clever about it. - """ - # TODO some fine day: add datafetcher_tests for this IFF it will be used in - # production +class SerialFetcher(BaseFetcher): + ''' Pull the files out, one right after another; + don't try to be clever about it. + + ''' - def __init__(self, args: Namespace, settings: Dict, logger: FlexLogger, + def __init__(self, args: Namespace, df_capo_settings: Dict, logger: FlexLogger, servers_report: Dict): - super().__init__(args, settings, logger, servers_report) + super().__init__(args, df_capo_settings, logger, servers_report) def run(self): + ''' fetch 'em ''' self._LOG.debug('writing to {}'.format(self.output_dir)) self._LOG.debug('dry run: {}'.format(self.dry_run)) self._LOG.debug(f'force overwrite: {self.force_overwrite}') @@ -70,25 +72,24 @@ class SerialProductFetcher(BaseProductFetcher): self.servers_report[server]['files']) -class ParallelProductFetcher(BaseProductFetcher): - """ Pull the files out in parallel; try to be clever about it. - """ - +class ParallelFetcher(BaseFetcher): + ''' Pull the files out in parallel; try to be clever about it. ''' - def __init__(self, args: Namespace, settings: Dict, logger: FlexLogger, - servers_report: Dict): - super().__init__(args, settings, logger, servers_report) + def __init__(self, args: Namespace, df_capo_settings: dict, logger: FlexLogger, + servers_report: dict): + super().__init__(args, df_capo_settings, logger, servers_report) self.num_files_expected = self._count_files_expected() self.bucketized_files = self._bucketize_files() def _count_files_expected(self): + ''' determine how many files we expect to retrieve ''' count = 0 for server in self.servers_report: count += len(self.servers_report[server]['files']) return count def _bucketize_files(self): - """ Takes the servers report and splits it up into a list of buckets, + ''' Takes the servers report and splits it up into a list of buckets, each of which a single thread will handle. There will be X * Y buckets, where X is the number of servers and Y is the 'threads per host', and the files for a given server will be distributed among the buckets @@ -97,8 +98,9 @@ class ParallelProductFetcher(BaseProductFetcher): Basically what we are doing here is splitting up the work among the threads we'll be creating and creating a list of work for each thread to do. - """ + ''' + threads_per_host = int(self.settings['threads_per_host']) result = list() for server in self.servers_report: # Setup the 'buckets', one per server. @@ -106,12 +108,11 @@ class ParallelProductFetcher(BaseProductFetcher): 'retrieve_method': self.servers_report[server]['retrieve_method'], 'files': list()} - buckets = [copy.deepcopy(bucket) for x in - range(int(self.settings['threads_per_host']))] + buckets = [copy.deepcopy(bucket) for _ in range(threads_per_host)] # Spread the files for a given server around its buckets. i = 0 for file_spec in self.servers_report[server]['files']: - list_number = i % int(self.settings['threads_per_host']) + list_number = i % threads_per_host buckets[list_number]['files'].append(file_spec) i += 1 # Trim out every bucket with no files, add the rest to the result. @@ -132,37 +133,48 @@ class ParallelProductFetcher(BaseProductFetcher): def run(self): ''' Fetch all the files ''' + if self.args.dry_run: + self._LOG.debug('This is a dry run; files will not be fetched') + return 0 + with ThreadPoolExecutor() as executor: results = executor.map(self.fetch_bucket, self.bucketized_files) try: - for future in as_completed(results): + futures = as_completed(results) + for future in futures: self.num_files_retrieved += future.result() if self.num_files_retrieved != self.num_files_expected: self._LOG.error( f'{self.num_files_expected} files expected, ' f'but only {self.num_files_retrieved} retrieved') - raise NGASServiceErrorException - return self.retrieved - except (FileExistsError, NGASServiceErrorException) as n_exc: - raise n_exc + self._exit_with_error(ReturnCode.NGAS_FETCH_ERROR) + + # successful retrieval + return 0 + except (FileExistsError, NGASServiceErrorException) as exc: + self._LOG.error(f'{exc}') + self._exit_with_error(ReturnCode.NGAS_FETCH_ERROR) except AttributeError: - # This error may -- but doesn't always -- occur after all files - # actually -have- been retrieved. TODO some fine day: why? - for dirname, dirnames, _ in os.walk( - self.args.output_dir): - if dirnames: - # we can expect one subdir: the external_name associated - # with the product locator - to_walk = Path(dirname) / dirnames[0] - for dname, dnames, files in os.walk(to_walk): - if self.num_files_expected <= len(files): - self.num_files_retrieved += len(files) - break - if self.num_files_expected >= self.num_files_retrieved: - break - if self.num_files_retrieved < self.num_files_expected: - raise NGASServiceErrorException( + # (This error sometimes gets thrown after all files + # actually -have- been retrieved. I blame the NGAS API.) + + output_path = Path(self.args.output_dir) + files = [file for file in output_path.rglob('*') + if not file.is_dir() + and not str(file).endswith('.json') + and not str(file).endswith('.log') + ] + if len(files) < self.num_files_expected: + self._LOG.error( f'{self.num_files_expected} files expected, but only ' f'{self.num_files_retrieved} found') + self._exit_with_error(ReturnCode.CATASTROPHIC_REQUEST_ERROR) + + return 0 + + except Exception as exc: + self._LOG.error(f'{exc}') + self._exit_with_error(ReturnCode.NGAS_FETCH_ERROR) - return self.retrieved + def _exit_with_error(self, return_code: ReturnCode): + sys.exit(return_code.value['code']) diff --git a/apps/cli/executables/datafetcher/src/datafetcher/return_codes.py b/apps/cli/executables/datafetcher/src/datafetcher/return_codes.py new file mode 100644 index 0000000000000000000000000000000000000000..4e4629863a5f426b4e0b7e34fa7a9016de79659d --- /dev/null +++ b/apps/cli/executables/datafetcher/src/datafetcher/return_codes.py @@ -0,0 +1,17 @@ +''' data-fetcher return codes as specified in README and usage string ''' + +from enum import Enum + + +class ReturnCode(Enum): + SUCCESS = {'code': 0, 'text' : 'success'} + MISSING_PROFILE = {'code': 1, 'text' : 'no CAPO profile provided'} + MISSING_SETTING = {'code': 2, 'text' : 'missing required setting'} + LOCATOR_SERVICE_TIMEOUT = {'code': 3, 'text' : 'request to locator service timed out'} + TOO_MANY_SERVICE_REDIRECTS = {'code': 4, 'text' : 'too many redirects on locator service'} + CATASTROPHIC_REQUEST_ERROR = {'code': 5, 'text' : 'catastrophic error on request service'} + PRODUCT_LOCATOR_NOT_FOUND = {'code': 6, 'text' : 'product locator not found'} + CANNOT_OPEN_LOCATION_FILE = {'code': 7, 'text' : 'not able to open specified location file'} + NGAS_FETCH_ERROR = {'code': 8, 'text' : 'error fetching file from NGAS server'} + SIZE_MISMATCH = {'code': 9, 'text' : 'retrieved file not expected size'} + diff --git a/apps/cli/executables/datafetcher/src/datafetcher/utilities.py b/apps/cli/executables/datafetcher/src/datafetcher/utilities.py index 880f0cedab2cde8047ce26e5f31e6dee8495af01..b22c3dfe51a2031079586b7cb74945fe26350be0 100644 --- a/apps/cli/executables/datafetcher/src/datafetcher/utilities.py +++ b/apps/cli/executables/datafetcher/src/datafetcher/utilities.py @@ -114,10 +114,9 @@ def get_capo_settings(profile: str): """ result = dict() if profile is None: - raise NoProfileException('CAPO_PROFILE required, none provided') + raise NoProfileException('CAPO_PROFILE required; none provided') capo = CapoConfig(profile=profile) for setting in REQUIRED_SETTINGS: - value = None setting = setting.upper() try: value = capo[setting] @@ -141,9 +140,8 @@ def validate_file_spec(file_spec: dict, retrieve_method_expected: bool): server = file_spec['server'] for key in SERVER_SPEC_KEYS: - if not key in server.keys(): - # if this is before retrieval mode has been set, OK not to have it - if retrieve_method_expected: + # if this is before retrieval mode has been set, OK not to have it + if not key in server.keys() and retrieve_method_expected: raise MissingSettingsException( f'{key} not found in server spec: {server}') @@ -194,7 +192,7 @@ class FlexLogger(): ''' if class_name is None: raise MissingSettingsException('class name is required') - log_pathname = f'{class_name}_{str(time())}.log' + log_pathname = f'{output_dir}/{class_name}_{str(time())}.log' try: self.logfile = pathlib.Path(output_dir, log_pathname) diff --git a/apps/cli/executables/datafetcher/test/Dockerfile b/apps/cli/executables/datafetcher/test/Dockerfile index cb33c4307141f61d77bb840b12c5f9bb409b4217..bf82118f2ea34119dd0a760e881e2b02755eabb7 100644 --- a/apps/cli/executables/datafetcher/test/Dockerfile +++ b/apps/cli/executables/datafetcher/test/Dockerfile @@ -9,7 +9,7 @@ # where '-t' specifies a name and N' is the version. # (If ':N' is omitted, version is 'latest' by default.) # tag is not required for the build, but without it -#the container name is an unhelpful hexadecimal value. +# the container name is an unhelpful hexadecimal value. FROM continuumio/miniconda3:latest @@ -38,5 +38,4 @@ ENV CAPO_PATH test/ # finally, run the tests. be verbose. log stuff. # (for more detailed output, use "-vv" and/or "--log-level=DEBUG"; # to quit after first failure, use "-x") -# TODO: not finding imports -ENTRYPOINT ["conda", "run", "-n", "data", "pytest", "-vv", "--log-level=DEBUG", "--showlocals", "test/datafetcher_test.py" ] + ENTRYPOINT ["conda", "run", "-n", "data", "pytest", "-vv", "--log-level=DEBUG", "--showlocals", "test/"] diff --git a/apps/cli/executables/datafetcher/test/testing_utils.py b/apps/cli/executables/datafetcher/test/df_pytest_utils.py similarity index 79% rename from apps/cli/executables/datafetcher/test/testing_utils.py rename to apps/cli/executables/datafetcher/test/df_pytest_utils.py index 0a2e708fa4927df2173dc834f6a33715fdd17b21..9669baed925b0cff6ecec7f9b8e32f458ec8623e 100644 --- a/apps/cli/executables/datafetcher/test/testing_utils.py +++ b/apps/cli/executables/datafetcher/test/df_pytest_utils.py @@ -5,13 +5,16 @@ import json import os +import tempfile from pathlib import Path +import pytest from pycapo import CapoConfig from datafetcher.errors import MissingSettingsException, NoProfileException from datafetcher.locations_report import LocationsReport -from datafetcher.utilities import REQUIRED_SETTINGS, get_arg_parser, ExecutionSite +from datafetcher.utilities import REQUIRED_SETTINGS, get_arg_parser, \ + ExecutionSite, ProductLocatorLookup TEST_PROFILE = 'local' @@ -182,3 +185,51 @@ def get_metadata_db_settings(profile): raise MissingSettingsException( f'missing required setting "{field}"') return result + + +@pytest.fixture(autouse=True, scope='function') +def make_tempdir() -> None: + ''' Creates a new temporary working directory for each test. ''' + umask = os.umask(0o000) + top_level = tempfile.mkdtemp(prefix='datafetcher_test_', dir='/var/tmp') + os.umask(umask) + yield top_level + + +@pytest.fixture(scope='session') +def capo_settings(): + ''' get Capo settings once for whole module ''' + + def retrieve_capo_settings() -> CapoConfig: + return get_test_capo_settings() + + to_return = retrieve_capo_settings() + yield to_return + + +@pytest.fixture(scope='session') +def settings(capo_settings): + ''' grabs all the settings we will need for the datafetcher: + Capo, database, test data + ''' + db_settings = get_metadata_db_settings(TEST_PROFILE) + test_data = _initialize_test_data(db_settings) + yield Settings(capo_settings, db_settings, test_data) + + +def _initialize_test_data(db_settings): + ''' Set up test data for use in several tests ''' + ext_name = '13B-014.sb28862036.eb29155786.56782.5720116088' + + product_locator = ProductLocatorLookup(db_settings) \ + .look_up_locator_for_ext_name(ext_name) + return {'external_name': ext_name, + 'product_locator': product_locator} + + +class Settings: + + def __init__(self, capo_settings, db_settings, test_data): + self.capo_settings = capo_settings + self.db_settings = db_settings + self.test_data = test_data diff --git a/apps/cli/executables/datafetcher/test/docker-build.sh b/apps/cli/executables/datafetcher/test/docker-build.sh index 484146963dd8b85a13b0639b808ee28cc216ab36..d9d2eaa9cd8b234ffd2c9b30664113c24aa2c3f8 100755 --- a/apps/cli/executables/datafetcher/test/docker-build.sh +++ b/apps/cli/executables/datafetcher/test/docker-build.sh @@ -10,10 +10,11 @@ # do not support conditional logic; hence this script. # Execute script from apps/executables/cli/datafetcher/ + FILENAME=local.properties CONTAINER_NAME=$1;shift CACHE_FLAG=$1;shift -USAGE='Usage: $0 <container_name> [NO-CACHE]' +USAGE='Usage: $0 <container_name> [--NO-CACHE]' if [[ -z "${CONTAINER_NAME}" ]] then echo "${USAGE}" diff --git a/apps/cli/executables/datafetcher/test/mock_data_fetcher.py b/apps/cli/executables/datafetcher/test/mock_data_fetcher.py new file mode 100644 index 0000000000000000000000000000000000000000..21ed02bab91a114aebfcf5069546e828876b4c43 --- /dev/null +++ b/apps/cli/executables/datafetcher/test/mock_data_fetcher.py @@ -0,0 +1,74 @@ +""" for testing the attempt to copy rather than stream files """ + +import sys +from argparse import Namespace + +from datafetcher.locations_report import LocationsReport +from datafetcher.project_fetcher import ParallelFetcher +from datafetcher.return_codes import ReturnCode +from datafetcher.utilities import get_capo_settings, ExecutionSite, FlexLogger + +from .df_pytest_utils import TEST_PROFILE + + +class MockProdDataFetcher: + ''' Creates and launches a datafetcher using the nmprod profile ''' + def __init__(self, args: Namespace, settings: dict): + if args is None or settings is None: + self._exit_with_error(ReturnCode.MISSING_SETTING) + self.args = args + self.settings = settings + self.verbose = self.args.verbose + + self.output_dir = args.output_dir + self.profile = args.profile + + self._LOG = FlexLogger(self.__class__.__name__, + self.output_dir, self.verbose) + + try: + self.locations_report = self._get_locations() + self.servers_report = self.locations_report.servers_report + except SystemExit: + if args.location_file: + self._exit_with_error(ReturnCode.CANNOT_OPEN_LOCATION_FILE) + else: + self._exit_with_error(ReturnCode.PRODUCT_LOCATOR_NOT_FOUND) + raise + + except Exception as exc: + self._LOG.error( + f'>>> throwing unexpected {type(exc)} during init: {exc}') + raise + + def _get_locations(self): + ''' create a locations report with DSOC as exec site + to force copy rather than stream + ''' + capo_settings = get_capo_settings(TEST_PROFILE) + capo_settings['execution_site'] = ExecutionSite.DSOC.value + + return LocationsReport(self._LOG, + self.args, + capo_settings) + + def run(self): + """ + identical to DataFetcher.run() + :return: + """ + + try: + return ParallelFetcher( + self.args, self.settings, self._LOG, + self.servers_report).run() + except SystemExit as exc: + self._LOG.error(f'{exc}') + raise + except Exception as exc: + self._LOG.error( + f'>>> throwing unexpected exception during run: {exc}') + raise + + def _exit_with_error(self, return_code: ReturnCode): + sys.exit(return_code.value['code']) diff --git a/apps/cli/executables/datafetcher/test/test_datafetcher.py b/apps/cli/executables/datafetcher/test/test_datafetcher.py deleted file mode 100644 index 73cb4e551b8072fc848aa0137b8efa018d3e3d07..0000000000000000000000000000000000000000 --- a/apps/cli/executables/datafetcher/test/test_datafetcher.py +++ /dev/null @@ -1,701 +0,0 @@ -""" datafetcher unit tests """ - -import os -import subprocess -import tempfile -from pathlib import Path -from typing import List -from unittest.mock import MagicMock - -import pytest - -from datafetcher.commands import DataFetcher -from datafetcher.errors import Errors -from datafetcher.locations_report import LocationsReport -from datafetcher.utilities import get_arg_parser, ExecutionSite, \ - RetrievalMode, FlexLogger, ProductLocatorLookup - -from .testing_utils import TEST_PROFILE, LOCATION_REPORTS, \ - get_locations_report, get_locations_file, \ - get_mini_locations_file, find_newest_fetch_log_file, get_test_data_dir, \ - get_metadata_db_settings, get_test_capo_settings - -_VLA_SMALL_KEY = 'VLA_SMALL_EB' -_FETCH_COMMAND = 'datafetcher' -_LOCATION_FILENAME = 'locations.json' -_EB_EXTERNAL_NAME = 'sysstartS.58955.83384832176' -_ASDM_XML = 'ASDM.xml' - -''' -TO EXECUTE THESE TESTS: from apps/cli/executables/datafetcher, - - pytest -vx --logging-level=INFO - -''' - -class TestDataFetcher: - """ IMPORTANT NOTE: we CANNOT retrieve by copy if we don't have access to a - location to which NGAS can write, e.g, lustre. Therefore, any test - that involves -actual- retrieval of files must be by streaming, to - ensure which we must use a Capo profile in which the execution site is - -not- DSOC or NAASC. - The reason is this algorithm used in LocationsReport: - - for f in files_report['files']: - if f['server']['cluster'] == Cluster.DSOC and \ - f['server']['location'] == self.settings['execution_site']: - f['server']['retrieve_method'] = RetrievalMode.COPY - else: - f['server']['retrieve_method'] = RetrievalMode.STREAM - - - Be sure to have on the test system a local profile (local.properties) - that meets these criteria: - - - edu.nrao.archive.workflow.config.StartupSettings.temporaryDataDirectory - pointing to a locally writable temp dir, e.g., /var/tmp - - edu.nrao.archive.workflow.config.DeliverySettings.hostname - must point to local computer - - execution_site must NOT be DSOC or NAASC - - - """ - - @pytest.fixture(autouse=True, scope='function') - def setup_settings_datadir(self) -> None: - self.settings = get_test_capo_settings() - self.db_settings = get_metadata_db_settings(TEST_PROFILE) - self.test_data = self._initialize_test_data() - self.DATA_DIR = get_test_data_dir() - if self.DATA_DIR is None: - pytest.fail(f'test data directory not found under {os.getcwd()}') - - @pytest.fixture(autouse=True, scope='function') - def make_tempdir(self) -> None: - umask = os.umask(0o000) - self.top_level = tempfile.mkdtemp() - os.umask(umask) - self._LOG = FlexLogger(__name__, self.top_level) - - def test_bad_command_line(self): - - # bad product locator - args = [_FETCH_COMMAND, - '--product-locator', 'not-even', - '--profile', TEST_PROFILE, '--output-dir', self.top_level] - fetcher = CommandLineFetchLauncher(args, self._LOG) - try: - fetcher.run() - except FileNotFoundError as err: - self._LOG.debug(f'>>> {err}') - raise err - - exception_found = False - terminal_exception_thrown = False - - bad_locator_logfile = find_newest_fetch_log_file(self.top_level) - assert bad_locator_logfile is not None - assert 0 != os.path.getsize(bad_locator_logfile) - with open(bad_locator_logfile) as log: - log_contents = log.readlines() - - for line in log_contents: - if 'NoLocatorException' in line: - exception_found = True - if 'terminal_exception' in line: - terminal_exception_thrown = True - if exception_found and terminal_exception_thrown: - break - assert exception_found - assert terminal_exception_thrown - bad_locator_logfile.unlink() - - # nonexistent locations file - args = [_FETCH_COMMAND, '--location-file', 'aint_got_one', '--output-dir', - self.top_level, '--profile', TEST_PROFILE] - - fetcher = CommandLineFetchLauncher(args, self._LOG) - fetcher.run() - logfile = find_newest_fetch_log_file(self.top_level) - with open(logfile, 'r') as log: - log_contents = log.readlines() - - exception_found = False - terminal_exception_thrown = False - for line in log_contents: - if 'FileNotFoundError' in line: - exception_found = True - if 'terminal_exception' in line: - terminal_exception_thrown = True - if exception_found and terminal_exception_thrown: - break - assert exception_found - - def test_nothing_retrieved_if_dry_on_cmd_line(self): - toplevel = Path(self.top_level) - location_file = get_mini_locations_file( - Path(toplevel, _LOCATION_FILENAME)) - args = [_FETCH_COMMAND, - '--location-file', str(location_file), - '--profile', TEST_PROFILE, '--output-dir', self.top_level, - '--dry', '--verbose'] - fetcher = CommandLineFetchLauncher(args, self._LOG) - output = fetcher.run() - logfile = find_newest_fetch_log_file(self.top_level) - assert [] == output - assert 0 != os.path.getsize(logfile) - Path.unlink(location_file) - - # make sure none of these files written - file_count = 0 - for _ in os.walk(location_file): - file_count += 1 - assert 0 == file_count - - def test_force_overwrite_from_cmd_line(self): - toplevel = Path(self.top_level) - location_file = get_mini_locations_file(toplevel / _LOCATION_FILENAME) - dest_dir = Path(toplevel, _EB_EXTERNAL_NAME) - dest_dir.mkdir(parents=True, exist_ok=True) - - # make a fake file to be overwritten - fake_file = dest_dir / _ASDM_XML - with open(fake_file, 'w') as to_write: - to_write.write('alas, my days are numbered') - args = [_FETCH_COMMAND, - '--location-file', str(location_file), - '--profile', TEST_PROFILE, - '--output-dir', self.top_level, - '--force'] - CommandLineFetchLauncher(args, self._LOG).run() - - sizes = dict() - for _, _, fnames in os.walk(dest_dir): - for fname in fnames: - path = dest_dir / fname - sizes[path] = os.path.getsize(path) - assert 37 == len(sizes) - fake_size = os.path.getsize(fake_file) - assert 9339 == fake_size - - def test_no_overwrite_from_cmd_line(self): - toplevel = Path(self.top_level) - location_file = get_mini_locations_file(toplevel / _LOCATION_FILENAME) - dest_dir = toplevel / _EB_EXTERNAL_NAME - dest_dir.mkdir(parents=True, exist_ok=True) - - # make a fake file that shouldn't be overwritten - fake_file = dest_dir / _ASDM_XML - with open(fake_file, 'w') as to_write: - to_write.write("I'm not going anywhere!") - args = [_FETCH_COMMAND, - '--location-file', str(location_file), - '--profile', TEST_PROFILE, '--output-dir', self.top_level] - fetcher = CommandLineFetchLauncher(args, self._LOG) - fetcher.run() - - term_except_found = False - file_exists_found = False - logfile = find_newest_fetch_log_file(self.top_level) - with open(logfile, 'r') as log: - log_contents = log.readlines() - for line in log_contents: - if 'terminal_exception' in line: - term_except_found = True - if 'FileExistsError' in line: - file_exists_found = True - if term_except_found and file_exists_found: - break - - assert term_except_found and file_exists_found - - def test_cmd_line_more_output_when_verbose(self): - report_file = get_mini_locations_file( - Path(self.top_level, 'locations_verbose.json')) - args = [_FETCH_COMMAND, - '--location-file', str(report_file), - '--profile', TEST_PROFILE, '--output-dir', self.top_level, - '--verbose'] - fetcher = CommandLineFetchLauncher(args, self._LOG) - retrieved = fetcher.run() - num_files_expected = 37 - assert num_files_expected == len(retrieved) - - verbose_logfile = find_newest_fetch_log_file(self.top_level) - assert 0 != os.path.getsize(verbose_logfile) - - [file.unlink() for file in retrieved] - verbose_logfile.unlink() - - # same thing, but without verbose - args = [_FETCH_COMMAND, - '--location-file', str(report_file), - '--profile', TEST_PROFILE, '--output-dir', self.top_level] - fetcher = CommandLineFetchLauncher(args, self._LOG) - retrieved = fetcher.run() - assert num_files_expected == len(retrieved) - logfile = find_newest_fetch_log_file(self.top_level) - assert 0 == os.path.getsize(logfile) - - def test_can_stream_from_mini_locations_file(self): - """ gin up a location report with just a few small files in it - and confirm that we can actually stream them - """ - location_file = get_mini_locations_file(Path(self.top_level, _LOCATION_FILENAME)) - - report_file = get_mini_locations_file(location_file) - args = ['--location-file', str(report_file), - '--output-dir', self.top_level, - '--profile', TEST_PROFILE] - namespace = get_arg_parser().parse_args(args) - fetch = DataFetcher(namespace, self.settings) - retrieved = fetch.run() - file_count = len(retrieved) - assert 37 == file_count - - def test_verbose_writes_stuff_to_log(self): - path = Path(self.top_level, _LOCATION_FILENAME) - report_file = get_mini_locations_file(path) - args = ['--location-file', str(report_file), - '--output-dir', self.top_level, - '--profile', TEST_PROFILE, '--verbose'] - namespace = get_arg_parser().parse_args(args) - fetch = DataFetcher(namespace, self.settings) - fetch.run() - - logfile = fetch.logfile - assert logfile.is_file() - assert 0 != os.path.getsize(logfile) - - def test_empty_log_if_not_verbose(self): - path = Path(self.top_level, _LOCATION_FILENAME) - report_file = get_mini_locations_file(path) - args = ['--location-file', str(report_file), - '--output-dir', self.top_level, - '--profile', TEST_PROFILE] - namespace = get_arg_parser().parse_args(args) - fetch = DataFetcher(namespace, self.settings) - fetch.run() - - logfile = fetch.logfile - assert logfile.is_file() - assert 0 == os.path.getsize(logfile) - - def test_copy_attempt_throws_sys_exit_service_error(self): - product_locator = self.test_data['13B-014']['product_locator'] - - # use site from non-local profile to guarantee copy attempt - local_exec_site = self.settings['execution_site'] - self.settings['execution_site'] = ExecutionSite.DSOC - - args = ['--product-locator', product_locator, - '--output-dir', self.top_level, - '--profile', self.settings['execution_site'].value, - '--verbose'] - parser = get_arg_parser() - namespace = parser.parse_args(args) - fetch = DataFetcher(namespace, self.settings) - servers_report = fetch.servers_report - for server in servers_report: - entry = servers_report[server] - assert entry['retrieve_method'].value == RetrievalMode.COPY.value - - # let's try just one file so we're not sitting here all day - for server in servers_report: - entry = servers_report[server] - servers_report = {server: entry} - break - fetch.servers_report = servers_report - assert fetch.servers_report[server] is not None - files = fetch.servers_report[server]['files'] - fetch.servers_report[server]['files'] = [files[0]] - - try: - with pytest.raises(SystemExit) as s_ex: - fetch.run() - assert Errors.NGAS_SERVICE_ERROR.value == s_ex.value.code - finally: - self.settings['execution_site'] = local_exec_site - - def test_dies_with_bad_server_info(self): - report_file = get_locations_file('VLA_BAD_SERVER') - args = ['--location-file', str(report_file), - '--output-dir', self.top_level, - '--profile', TEST_PROFILE] - namespace = get_arg_parser().parse_args(args) - fetch = DataFetcher(namespace, self.settings) - with pytest.raises(SystemExit) as s_ex: - fetch.run() - exc_code = s_ex.value.code - expected = Errors.NGAS_SERVICE_ERROR.value - assert expected == exc_code - - def test_throws_sys_exit_file_exists_if_overwrite_not_forced(self): - toplevel = Path(self.top_level) - location_file = get_mini_locations_file( - Path(self.top_level, _LOCATION_FILENAME)) - assert Path.exists(location_file) - destination = Path(toplevel, _EB_EXTERNAL_NAME) - Path(destination).mkdir(parents=True, exist_ok=True) - assert destination.is_dir() - - # stick a fake SDM in there so it will fall over - fake_file = Path(destination, _ASDM_XML) - with open(fake_file, 'w') as to_write: - to_write.write('lalalalalala') - assert fake_file.exists() - assert os.path.getsize(fake_file) != 0 - - args = ['--location-file', str(location_file), - '--output-dir', self.top_level, - '--profile', TEST_PROFILE] - namespace = get_arg_parser().parse_args(args) - - # exception should be thrown because one of the files to be retrieved - # is in the destination dir and we're not forcing overwrite here - with pytest.raises(SystemExit) as exc: - DataFetcher(namespace, self.settings).run() - exc_code = exc.value.code - expected = Errors.FILE_EXISTS_ERROR.value - assert expected == exc_code - - def test_overwrites_when_forced(self): - external_name = LOCATION_REPORTS[_VLA_SMALL_KEY]['external_name'] - toplevel = Path(self.top_level) - destination = toplevel / external_name - destination.mkdir(parents=True, exist_ok=True) - assert destination.is_dir() - - # stick a fake SDM in there to see if overwrite really happens - to_overwrite = _ASDM_XML - fake_file = destination / to_overwrite - text = '"Bother!" said Pooh. "Lock phasers on that heffalump!"' - with open(fake_file, 'w') as to_write: - to_write.write(text) - assert fake_file.exists() - assert len(text) == os.path.getsize(fake_file) - report_metadata = LOCATION_REPORTS['VLA_SMALL_EB'] - external_name = report_metadata['external_name'] - destination = toplevel / external_name - Path(destination).mkdir(parents=True, exist_ok=True) - - json_path = destination / report_metadata['filename'] - report_file = get_mini_locations_file(json_path) - args = ['--location-file', str(report_file), - '--output-dir', self.top_level, - '--profile', TEST_PROFILE, '--force'] - namespace = get_arg_parser().parse_args(args) - report = LocationsReport(self._LOG, namespace, self.settings) - - # expecting 37 files - files = report.files_report['files'] - - sizes = [file['size'] for file in files] - total_size_expected = sum(sizes) - num_files_expected = 37 - assert num_files_expected == len(files) - - fetch = DataFetcher(namespace, self.settings) - retrieved = fetch.run() - assert num_files_expected == len(retrieved) - - # delete the .json so it doesn't mess up our total size computation - Path.unlink(report_file) - - total_size_actual = 0 - dest = Path(destination) - for dirpath, _, filenames in os.walk(dest): - for fname in filenames: - path = Path(dirpath, fname) - total_size_actual += os.path.getsize(path) - assert total_size_expected == total_size_actual - - def test_sys_exit_file_error_on_bad_destination(self): - file_spec = self.test_data['13B-014'] - args = ['--product-locator', file_spec['product_locator'], - '--output-dir', '/foo', - '--profile', TEST_PROFILE] - namespace = get_arg_parser().parse_args(args) - with pytest.raises(SystemExit) as s_ex: - DataFetcher(namespace, self.settings) - assert Errors.FILE_NOT_FOUND_ERROR.value == s_ex.value.code - - def test_sys_exit_no_locator_for_bad_product_locator(self): - args = ['--product-locator', '/foo', - '--output-dir', self.top_level, '--profile', TEST_PROFILE] - namespace = get_arg_parser().parse_args(args) - - with pytest.raises(SystemExit) as s_ex: - fetch = DataFetcher(namespace, self.settings) - fetch.run() - assert Errors.NO_LOCATOR.value == s_ex.value.code - - def test_gets_expected_test_data(self): - assert self.test_data['13B-014'] is not None - file_spec = self.test_data['13B-014'] - assert '13B-014.sb28862036.eb29155786.56782.5720116088' == file_spec['external_name'] - locator = file_spec['product_locator'] - assert locator.startswith('uid://evla/execblock/') - - def test_gets_vlbas_from_report_file(self): - report_file = get_locations_file('VLBA_EB') - args = ['--location-file', str(report_file), - '--output-dir', self.top_level, '--profile', TEST_PROFILE] - namespace = get_arg_parser().parse_args(args) - fetch = DataFetcher(namespace, self.settings) - report_files = fetch.locations_report.files_report['files'] - - assert 16 == len(report_files) - expected_files = [Path(self.top_level, item['relative_path']) - for item in report_files] - - # files we're getting take waaaaayyy too long to fetch in a test case, - # so we're mocking DataFetcher.run() - fetch.run = MagicMock(return_value=expected_files) - actual_files = fetch.run() - num_expected = len(expected_files) - assert num_expected == len(actual_files) - - match_count = 0 - for exp_file in expected_files: - for act_file in actual_files: - act_parent = act_file.name - if act_parent == exp_file.name: - match_count += 1 - break - assert num_expected == match_count - - def test_gets_large_vla_ebs_from_report_file(self): - report_file = get_locations_file('VLA_LARGE_EB') - args = ['--location-file', str(report_file), - '--output-dir', self.top_level, '--profile', TEST_PROFILE] - namespace = get_arg_parser().parse_args(args) - fetch = DataFetcher(namespace, self.settings) - report_files = fetch.locations_report.files_report['files'] - assert 46 == len(report_files) - toplevel = Path(self.top_level) - expected_files = [toplevel / item['relative_path'] - for item in report_files] - fetch.run = MagicMock(return_value=expected_files) - actual_files = fetch.run() - num_expected = len(expected_files) - assert num_expected == len(actual_files) - - def test_gets_images_from_report_file(self): - report_file = get_locations_file('IMG') - args = ['--location-file', str(report_file), - '--output-dir', self.top_level, '--profile', TEST_PROFILE] - namespace = get_arg_parser().parse_args(args) - fetch = DataFetcher(namespace, self.settings) - report_files = fetch.locations_report.files_report['files'] - assert 2 == len(report_files) - toplevel = Path(self.top_level) - expected_files = [toplevel / item['relative_path'] - for item in report_files] - # files are too big to fetch in a test; mock DataFetcher.run() - fetch.run = MagicMock(return_value=expected_files) - actual_files = fetch.run() - num_expected = len(expected_files) - assert num_expected == len(actual_files) - - def test_gets_calibration_from_report_file(self): - report_file = get_locations_file('CALIBRATION') - args = ['--location-file', str(report_file), - '--output-dir', self.top_level, '--profile', TEST_PROFILE] - namespace = get_arg_parser().parse_args(args) - fetch = DataFetcher(namespace, self.settings) - report_files = fetch.locations_report.files_report['files'] - assert 1 == len(report_files) - file_spec = report_files[0] - - # calibration will have external name = relative path = subdirectory - relative_path = file_spec['relative_path'] - assert relative_path == file_spec['subdirectory'] - - expected_files = [Path(self.top_level, relative_path)] - fetch.run = MagicMock(return_value=expected_files) - actual_files = fetch.run() - num_expected = len(expected_files) - assert num_expected == len(actual_files) - - def test_gets_calibration_from_locator(self): - external_name = LOCATION_REPORTS['CALIBRATION']['external_name'] - product_locator = ProductLocatorLookup( - self.db_settings).look_up_locator_for_ext_name(external_name) - args = ['--product-locator', product_locator, - '--output-dir', self.top_level, '--profile', TEST_PROFILE] - namespace = get_arg_parser().parse_args(args) - fetch = DataFetcher(namespace, self.settings) - report_files = fetch.locations_report.files_report['files'] - assert 1 == len(report_files) - - file_spec = report_files[0] - - # calibration will have external name = relative path = subdirectory - relative_path = file_spec['relative_path'] - assert external_name == relative_path - assert relative_path == file_spec['subdirectory'] - - expected_files = [Path(self.top_level) / relative_path] - fetch.run = MagicMock(return_value=expected_files) - actual_files = fetch.run() - num_expected = len(expected_files) - assert num_expected == len(actual_files) - - def test_retrieval_finds_size_mismatch(self): - report_spec = LOCATION_REPORTS[_VLA_SMALL_KEY] - external_name = report_spec['external_name'] - - data_dir = Path(self.DATA_DIR) - locations_file = data_dir / 'VLA_SMALL_EB_BUSTED.json' - args = ['--location-file', str(locations_file), - '--output-dir', self.top_level, '--profile', TEST_PROFILE] - namespace = get_arg_parser().parse_args(args) - fetch1 = DataFetcher(namespace, self.settings) - report_files = fetch1.locations_report.files_report['files'] - assert 44 == len(report_files) - - filename = 'Weather.xml' - for file in report_files: - if filename == file['relative_path']: - assert 165100 == file['size'] - break - - product_locator = ProductLocatorLookup(self.db_settings) \ - .look_up_locator_for_ext_name(external_name) - args = ['--product-locator', product_locator, - '--output-dir', self.top_level, '--profile', TEST_PROFILE] - namespace = get_arg_parser().parse_args(args) - fetch2 = DataFetcher(namespace, self.settings) - - locations_report = get_locations_report(_VLA_SMALL_KEY) - fetch2.run = MagicMock(return_value=locations_report['files']) - locator_files = fetch2.run() - assert len(report_files) == len(locator_files) - for file1 in report_files: - for file2 in locator_files: - if file2['relative_path'] == file1['relative_path']: - if filename != file1['relative_path']: - assert file2['size'] == file1['size'] - else: - assert file2['size'] != file1['size'] - break - - def test_throws_sys_exit_missing_setting_if_no_args(self): - args = [] - with pytest.raises(SystemExit) as s_ex: - get_arg_parser().parse_args(args) - assert Errors.MISSING_SETTING.value == s_ex.value.code - - def test_throws_sys_exit_no_locator_if_no_product_locator(self): - args = ['--product-locator', '', - '--output-dir', self.top_level, '--profile', TEST_PROFILE] - namespace = get_arg_parser().parse_args(args) - - with pytest.raises(SystemExit) as s_ex: - DataFetcher(namespace, self.settings) - assert Errors.NO_LOCATOR.value == s_ex.value.code - - # -------------------------------------------------------------------------- - # - # U T I L I T I E S - # - # -------------------------------------------------------------------------- - - @staticmethod - def _remove_large_files_from_location_report(locations_in: LocationsReport): - ''' strip files > 100000 bytes from location report, so we can try - an actual stream without it taking forever - - :returns: LocationsReport - ''' - - files = locations_in['files'] - locations_out = locations_in.copy() - locations_out['files'] = \ - [file for file in files if file['size'] <= 100000] - return locations_out - - def _initialize_test_data(self): - ext_name = '13B-014.sb28862036.eb29155786.56782.5720116088' - - product_locator = ProductLocatorLookup(self.db_settings) \ - .look_up_locator_for_ext_name(ext_name) - dict13b = {'external_name': ext_name, - 'product_locator': product_locator} - - to_return = {'13B-014': dict13b} - return to_return - - -class CommandLineFetchLauncher: - """ Launches DataFetcher from command line, with logging - """ - - def __init__(self, args: List, logger: FlexLogger): - args_to_parse = args if args[0] != _FETCH_COMMAND else args[1:] - self._LOG = logger - namespace = get_arg_parser().parse_args(args_to_parse) - self.args = args - self.output_dir = Path(namespace.output_dir) - - if not Path.is_dir(self.output_dir): - raise FileNotFoundError(f'{self.output_dir} not found') - elif not os.access(self.output_dir, os.R_OK): - raise PermissionError(f'{self.output_dir} not found') - self.verbose = namespace.verbose - - def run(self): - ''' launch fetch from command line - @:returns directory listing - ''' - - with subprocess.Popen(self.args, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - bufsize=1, - universal_newlines=True) as proc: - return self.analyze_output(proc) - - def analyze_output(self, proc): - if proc.stderr: - for err in proc.stderr: - self._LOG.error(err.strip()) - - output = proc.stdout - error_found = output.errors - if error_found: - if isinstance(error_found, list): - [self._LOG.error(line) for line in error_found] - else: - if error_found != 'strict': - self._LOG.error(error_found) - - lines = list() - for line in output: - lines.append(line.strip()) - - for i in range(0, len(lines) - 1): - line = lines[i] - self._LOG.debug(f'{line}') - if 'error' in line.lower(): - # log everything after the error - for j in range(i, len(lines) - 1): - self._LOG.error(lines[j]) - if 'debug' in line.lower() and self.verbose: - self._LOG.debug(line) - if 'warn' in line.lower(): - self._LOG.warning(line) - - files_retrieved = list() - for root, dirnames, filenames in os.walk(self.output_dir): - root_dir = Path(root) - if dirnames: - subdir = root_dir / dirnames[0] - else: - subdir = root_dir - for filename in filenames: - if not filename.endswith('.log') and not filename.endswith('.json'): - files_retrieved.append(subdir / filename) - self._LOG.debug(f'>>> {len(files_retrieved)} files retrieved') - return files_retrieved diff --git a/apps/cli/executables/datafetcher/test/test_df_function.py b/apps/cli/executables/datafetcher/test/test_df_function.py new file mode 100644 index 0000000000000000000000000000000000000000..45f6b6f15b0fba1199b57132c820662beec23e99 --- /dev/null +++ b/apps/cli/executables/datafetcher/test/test_df_function.py @@ -0,0 +1,553 @@ +""" Unit tests for data-fetcher. """ + +import sys +from pathlib import Path + +import pytest + +from datafetcher.datafetcher import DataFetcher, ReturnCode +from datafetcher.utilities import get_arg_parser, ProductLocatorLookup, \ + RetrievalMode, Location, Cluster + +# N.B. IJ is dumb -- NONE of these imports are unused! +from .df_pytest_utils import TEST_PROFILE, get_mini_locations_file, \ + get_locations_file, LOCATION_REPORTS, make_tempdir, capo_settings, settings + +_LOCATION_FILENAME = 'locations.json' +_ASDM_XML = 'ASDM.xml' +_EB_EXTERNAL_NAME = 'sysstartS.58955.83384832176' + +MISSING_SETTING = ReturnCode.MISSING_SETTING.value['code'] +MISSING_PROFILE = ReturnCode.MISSING_PROFILE.value['code'] + +# set this to False when debugging one or more tests +# so as not to have to sit thru every test; +# comment out the target test(s)' @pytest.skip +RUN_ALL = True +print(f'>>> RUNNING ALL TESTS: {RUN_ALL}') + +# pylint: disable=C0115, C0116, E0401, E1101, R0902, R0903, R0914, W1203, W0613, W0621 + + +def get_profile_from_args(args: list) -> str: + for i in range(0, len(args)): + if args[i] == '--profile' and i < len(args) - 1: + profile = args[i + 1] + return profile + + return None + + +def evaluate_args_and_capo(args: list, capo_settings: dict): + + if args is None or len(args) == 0: + sys.exit(MISSING_SETTING) + + profile = get_profile_from_args(args) + if profile is None: + profile = capo_settings['profile'] + if profile is None: + sys.exit(MISSING_PROFILE) + else: + args['profile'] = profile + + namespace = get_arg_parser().parse_args(args) + return namespace + + +def launch_datafetcher(args: list, df_capo_settings: dict) -> int: + """ invoke the DF with these args as in df.main(), + launch it with df.run(), + and return the appropriate return/error code + + """ + if args is None or len(args) == 0: + return MISSING_SETTING + + try: + namespace = evaluate_args_and_capo(args, df_capo_settings) + fetcher = DataFetcher(namespace, df_capo_settings) + return fetcher.run() + except SystemExit as exc: + if hasattr(exc, 'value'): + return exc.value.code if hasattr(exc.value, 'code') else exc.value + if hasattr(exc, 'code'): + return exc.code + + raise + except KeyError: + sys.exit(MISSING_PROFILE) + except Exception as exc: + pytest.fail(f'{exc}') + + +def test_settings_setup(settings): + """ Ensure that the test settings we're using make sense """ + assert capo_settings is not None + assert isinstance(settings.capo_settings, dict) + assert settings.db_settings is not None + assert isinstance(settings.db_settings, dict) + assert settings.test_data is not None + assert isinstance(settings.test_data, dict) + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_usage_statement_makes_sense(): + """ Ensure that the datafetcher's "usage" statement is as we expect """ + + usage = DataFetcher._build_usage_message() + assert usage.startswith('Usage:') + lines = usage.split('\n') + assert len(lines) >= len(ReturnCode) + 1 + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_nothing_retrieved_if_dry_locator(make_tempdir, settings): + """ Simulates dry run with product locator """ + + args = ['--product-locator', settings.test_data['product_locator'], + '--output-dir', str(make_tempdir), + '--profile', TEST_PROFILE, + '--dry-run' + ] + return_code = launch_datafetcher(args, settings.capo_settings) + assert return_code == 0 + tempdir_files = Path(make_tempdir).iterdir() + for file in tempdir_files: + if not str(file).endswith('.log') \ + and not str(file).endswith('.json'): + pytest.fail('dry run with product locator -DID- fetch files') + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_nothing_retrieved_if_dry_file(make_tempdir, settings): + """ dry run with locations file """ + + output_dir = Path(make_tempdir) + locations_file = get_mini_locations_file(Path(output_dir, _LOCATION_FILENAME)) + args = ['--location-file', str(locations_file), + '--output-dir', str(make_tempdir), + '--profile', TEST_PROFILE, + '--dry-run' + ] + launch_datafetcher(args, settings.capo_settings) + tempdir_files = output_dir.iterdir() + for file in tempdir_files: + if not str(file).endswith('.log') \ + and not str(file).endswith('.json'): + pytest.fail('dry run with locations file -DID- fetch files') + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_force_flag_overwrites_existing_file(make_tempdir, settings): + top_level = Path(make_tempdir) + location_file = get_mini_locations_file(top_level / _LOCATION_FILENAME) + dest_dir = Path(top_level, _EB_EXTERNAL_NAME) + dest_dir.mkdir(parents=True, exist_ok=True) + + # make a fake file to be overwritten + fake_file = dest_dir / _ASDM_XML + with open(fake_file, 'w') as to_write: + to_write.write('alas, my days are numbered') + + args = ['--location-file', str(location_file), + '--profile', TEST_PROFILE, + '--output-dir', str(top_level), + '--force'] + try: + launch_datafetcher(args, settings.capo_settings) + except SystemExit as ex: + pytest.fail(f'{ex}') + raise + except Exception as exc: + pytest.fail(f'{exc}') + raise + + sizes = dict() + + # go thru destination directory recursively + # and get everybody's size + for file in dest_dir.rglob('*'): + sizes[str(file)] = file.stat().st_size + assert len(sizes) == 37 + fake_size = fake_file.stat().st_size + assert fake_size == 9339 + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_no_overwrite_without_force(make_tempdir, settings): + top_level = Path(make_tempdir) + location_file = get_mini_locations_file(top_level / _LOCATION_FILENAME) + dest_dir = Path(top_level, _EB_EXTERNAL_NAME) + dest_dir.mkdir(parents=True, exist_ok=True) + + # make a fake file to be overwritten + fake_file = dest_dir / _ASDM_XML + with open(fake_file, 'w') as to_write: + to_write.write('dang! what a kick in the rubber parts!') + + args = ['--location-file', str(location_file), + '--profile', TEST_PROFILE, + '--output-dir', str(top_level) + ] + + return_code = launch_datafetcher(args, settings.capo_settings) + assert return_code == ReturnCode.NGAS_FETCH_ERROR.value['code'] + + sizes = dict() + for file in dest_dir.rglob('*'): + sizes[str(file)] = file.stat().st_size + assert len(sizes) < 37 + fake_size = fake_file.stat().st_size + assert fake_size == 38 + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_more_output_when_verbose(make_tempdir, settings): + top_level = Path(make_tempdir) + location_file = get_mini_locations_file(top_level / _LOCATION_FILENAME) + + args = ['--location-file', str(location_file), + '--profile', TEST_PROFILE, + '--output-dir', str(make_tempdir), + '--verbose'] + + return_code = launch_datafetcher(args, settings.capo_settings) + assert return_code == ReturnCode.SUCCESS.value['code'] + + num_files_expected = 37 + retrieved = [file for file in top_level.rglob('*') + if file.is_file()] + assert num_files_expected == len(retrieved) - 2 + + verbose_logfile = None + for file in retrieved: + if str(file).endswith('.log'): + verbose_logfile = file + break + + assert verbose_logfile is not None + verbose_log_size = verbose_logfile.stat().st_size + assert verbose_log_size > 0 + + # get rid of all the files we downloaded, plus the log + deleted = [file.unlink() for file in retrieved + if not str(file).endswith('.json')] + assert len(deleted) >= num_files_expected + + # same download, but without verbose logging + args = ['--location-file', str(location_file), + '--profile', TEST_PROFILE, + '--output-dir', str(make_tempdir)] + + return_code = launch_datafetcher(args, settings.capo_settings) + assert return_code == ReturnCode.SUCCESS.value['code'] + + retrieved = [file for file in top_level.rglob('*')] + assert len(retrieved) == num_files_expected + 3 + + logfile = None + for file in retrieved: + if str(file).endswith('.log'): + logfile = file + break + assert logfile is not None + + logsize = logfile.stat().st_size + # successful download, non-verbose logging, + # should result in zero-size log file + assert logsize == 0 + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_copy_attempt_throws_sys_exit_service_error(make_tempdir, settings): + """ We set profile to nmprod here so as to force the DF + to try to copy rather than stream + """ + + # N.B. can't do this import with the rest of the imports up top, + # because test_df_return_codes not yet initialized + from .test_df_return_codes import we_are_in_docker + if we_are_in_docker(): + # this test doesn't work in a docker container: + # locator service URL in capo profile is None, + # even if we write a fake props file inside the test. + # Instead, we mock this in test_df_return_codes + return + + prod_profile = 'nmprod' + prod_props_filename = prod_profile + '.properties' + props_file = Path(make_tempdir, prod_props_filename) + + try: + args = ['--product-locator', settings.test_data['product_locator'], + '--output-dir', str(make_tempdir), + '--profile', prod_profile] + namespace = get_arg_parser().parse_args(args) + fetcher = DataFetcher(namespace, settings.capo_settings) + + servers_report = fetcher.servers_report + for server in servers_report: + entry = servers_report[server] + assert entry['retrieve_method'].value == RetrievalMode.COPY.value + + # let's try just one file so we're not sitting here all day + for server in servers_report: + entry = servers_report[server] + servers_report = {server: entry} + fetcher.servers_report = servers_report + assert fetcher.servers_report[server] is not None + files = fetcher.servers_report[server]['files'] + fetcher.servers_report[server]['files'] = [files[0]] + break + + with pytest.raises(SystemExit) as exc: + fetcher.run() + assert exc.value.code == \ + ReturnCode.CATASTROPHIC_REQUEST_ERROR.value['code'] + finally: + if props_file.exists(): + props_file.unlink() + assert not props_file.exists() + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_dies_with_bad_server_info(make_tempdir, settings): + location_file = get_locations_file('VLA_BAD_SERVER') + args = ['--profile', TEST_PROFILE, '--output-dir', str(make_tempdir), + '--location-file', str(location_file)] + try: + launch_datafetcher(args, settings.capo_settings) + except Exception as exc: + assert exc.value.code == ReturnCode.NGAS_FETCH_ERROR['code'] + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_missing_setting_error_on_bad_destination(settings): + args = ['--profile', TEST_PROFILE, + '--product-locator', settings.test_data['product_locator'], + '--output-dir', 'floob'] + try: + launch_datafetcher(args, settings.capo_settings) + except Exception as exc: + assert exc.value.code == ReturnCode.MISSING_SETTING['code'] + + +def write_fake_file(destination: Path, file_info: dict): + filename = file_info['ngas_file_id'] + path = Path(destination, filename) + with open(path, 'w') as file: + file.write(f'{str(file_info["size"])}\n') + + +class MockSuccessfulFetchReturn: + + @staticmethod + def run(): + return 0 + + +@pytest.fixture +def mock_successful_fetch_run(monkeypatch): + + def mock_run(*args, **kwargs): + return MockSuccessfulFetchReturn().run() + + monkeypatch.setattr(DataFetcher, "run", mock_run) + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_gets_vlbas_from_report_file(mock_successful_fetch_run, + make_tempdir, + settings): + + location_file = get_locations_file('VLBA_EB') + args = ['--profile', TEST_PROFILE, '--output-dir', str(make_tempdir), + '--location-file', str(location_file)] + fetcher = DataFetcher(get_arg_parser().parse_args(args), + settings.capo_settings) + servers_report = fetcher.servers_report + assert len(servers_report) == 1 + + return_code = fetcher.run() + assert return_code == 0 + + dest_dir = Path(make_tempdir) + file_info_dict = dict() + for server in servers_report.items(): + assert server[0] == 'nmngas03.aoc.nrao.edu:7777' + values = server[1] + assert values['location'] == Location.DSOC.value + assert values['cluster'] == Cluster.DSOC.value + assert values['retrieve_method'] == RetrievalMode.STREAM + file_values = values['files'] + assert len(file_values) == 16 + for filename in file_values: + write_fake_file(dest_dir, filename) + file_info_dict[filename['ngas_file_id']] = filename + + datafetcher = DataFetcher(get_arg_parser().parse_args(args), + settings.capo_settings) + return_code = datafetcher.run() + assert return_code == 0 + + for filename in file_info_dict: + path = Path(dest_dir, filename) + assert path.is_file() + contents = path.read_text().strip() + assert int(contents) == file_info_dict[filename]['size'] + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_gets_large_vla_ebs_from_report_file(mock_successful_fetch_run, + make_tempdir, + settings): + location_file = get_locations_file('VLA_SMALL_EB') + args = ['--profile', TEST_PROFILE, '--output-dir', str(make_tempdir), + '--location-file', str(location_file)] + fetcher = DataFetcher(get_arg_parser().parse_args(args), capo_settings) + servers_report = fetcher.servers_report + assert len(servers_report) == 2 + + return_code = fetcher.run() + assert return_code == 0 + + server_file_count = {'nmngas03.aoc.nrao.edu:7777': 0, + 'nmngas04.aoc.nrao.edu:7777': 0} + dest_dir = Path(make_tempdir) + file_list = list() + for server in servers_report.items(): + server_url = server[0] + assert server_url in server_file_count.keys() + values = server[1] + assert values['location'] == Location.DSOC.value + assert values['cluster'] == Cluster.DSOC.value + assert values['retrieve_method'] == RetrievalMode.STREAM + file_values = values['files'] + server_file_count[server_url] += len(file_values) + + for filename in file_values: + write_fake_file(dest_dir, filename) + file_list.append(values) + + assert server_file_count['nmngas03.aoc.nrao.edu:7777'] == 3 + assert server_file_count['nmngas04.aoc.nrao.edu:7777'] == 41 + + datafetcher = DataFetcher(get_arg_parser().parse_args(args), + capo_settings) + return_code = datafetcher.run() + assert return_code == 0 + + found_count = 0 + for file_info in file_list: + for file in file_info['files']: + filename = file['ngas_file_id'] + path = Path(dest_dir, filename) + assert path.is_file() + contents = path.read_text().strip() + assert int(contents) == file['size'] + found_count += 1 + + assert found_count == len(file_list) + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_gets_images_from_report_file(mock_successful_fetch_run, + make_tempdir, + settings): + location_file = get_locations_file('IMG') + args = ['--profile', TEST_PROFILE, '--output-dir', str(make_tempdir), + '--location-file', str(location_file)] + fetcher = DataFetcher(get_arg_parser().parse_args(args), capo_settings) + servers_report = fetcher.servers_report + assert len(servers_report) == 2 + + server_file_count = {'nmngas01.aoc.nrao.edu:7777': 0, + 'nmngas02.aoc.nrao.edu:7777': 0} + dest_dir = Path(make_tempdir) + file_list = list() + for server in servers_report.items(): + server_url = server[0] + assert server_url in server_file_count.keys() + values = server[1] + assert values['location'] == Location.DSOC.value + assert values['cluster'] == Cluster.DSOC.value + assert values['retrieve_method'] == RetrievalMode.STREAM + file_values = values['files'] + server_file_count[server_url] += len(file_values) + + for filename in file_values: + write_fake_file(dest_dir, filename) + file_list.append(values) + + for server_url, count in server_file_count.items(): + assert count == 1 + + return_code = fetcher.run() + assert return_code == 0 + + found_count = 0 + for file_info in file_list: + for file in file_info['files']: + filename = file['ngas_file_id'] + path = Path(dest_dir, filename) + assert path.is_file() + contents = path.read_text().strip() + assert int(contents) == file['size'] + found_count += 1 + + assert found_count == len(file_list) + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_gets_calibration_from_report_file(mock_successful_fetch_run, + make_tempdir, + settings): + location_file = get_locations_file('CALIBRATION') + args = ['--profile', TEST_PROFILE, '--output-dir', str(make_tempdir), + '--location-file', str(location_file)] + fetcher = DataFetcher(get_arg_parser().parse_args(args), capo_settings) + servers_report = fetcher.servers_report + assert len(servers_report) == 1 + + fake_file = None + file_info = None + # (there will be just one file, therefore iteration) + for server in servers_report.items(): + metadata = server[1] + destination = Path(make_tempdir) + file_info = metadata['files'][0] + fake_file = Path(destination, file_info['ngas_file_id']) + write_fake_file(destination, file_info) + + assert fake_file.is_file() + contents = fake_file.read_text().strip() + assert int(contents) == file_info['size'] + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_gets_calibration_from_locator(mock_successful_fetch_run, + make_tempdir, + settings): + external_name = LOCATION_REPORTS['CALIBRATION']['external_name'] + product_locator = ProductLocatorLookup( + settings.db_settings).look_up_locator_for_ext_name(external_name) + args = ['--product-locator', product_locator, + '--output-dir', str(make_tempdir), '--profile', TEST_PROFILE] + namespace = get_arg_parser().parse_args(args) + fetch = DataFetcher(namespace, settings.capo_settings) + report_files = fetch.locations_report.files_report['files'] + assert len(report_files) == 1 + + file_spec = report_files[0] + relative_path = file_spec['relative_path'] + assert external_name == relative_path + assert relative_path == file_spec['subdirectory'] + destination = Path(make_tempdir) / relative_path + destination.mkdir() + write_fake_file(destination, file_spec) + + fake_file = Path(destination, file_spec['ngas_file_id']) + assert fake_file.is_file() + contents = fake_file.read_text().strip() + assert int(contents) == file_spec['size'] diff --git a/apps/cli/executables/datafetcher/test/test_df_return_codes.py b/apps/cli/executables/datafetcher/test/test_df_return_codes.py new file mode 100644 index 0000000000000000000000000000000000000000..bdbfc7e916a1820b9c965016d260edc956b976e0 --- /dev/null +++ b/apps/cli/executables/datafetcher/test/test_df_return_codes.py @@ -0,0 +1,321 @@ +''' Tests of datafetcher return codes: + check that arguments passed return expected codes + +''' + +import os +from pathlib import Path + +import pytest +from datafetcher.datafetcher import DataFetcher +from datafetcher.return_codes import ReturnCode +from datafetcher.utilities import get_arg_parser, RetrievalMode + +from .mock_data_fetcher import MockProdDataFetcher +# N.B. IJ doesn't recognize imported fixtures as being in use. +# don't let these imports (make_tempdir, settings) get disappeared. +from .test_df_function import launch_datafetcher, RUN_ALL, \ + MISSING_PROFILE, MISSING_SETTING +from .df_pytest_utils import TEST_PROFILE, get_test_capo_settings, make_tempdir, \ + capo_settings, settings + + +# pylint: disable=C0115, C0116, E0401, E1101, R0902, R0903, R0914, W0613, W0621, W1203 + +def test_launch_df(make_tempdir, settings): + args = ['--product-locator', settings.test_data['product_locator'], + '--output-dir', str(make_tempdir), + '--profile', TEST_PROFILE, + '--dry-run' + ] + return_code = launch_datafetcher(args, settings.capo_settings) + assert return_code is not None + + +def test_launch_df_no_args(settings): + try: + return_code = launch_datafetcher(None, settings.capo_settings) + assert return_code is not None + except Exception as exc: + pytest.fail(f'{exc}') + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_no_args_prints_usage_and_croaks(): + ''' If the datafetcher is called without any arguments, + it should print the "usage" statement, then exit + + ''' + + with pytest.raises(TypeError): + datafetcher = DataFetcher() + assert datafetcher.usage == DataFetcher._build_usage_message() + + +# @pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_omitted_capo_args_returns_expected_code(make_tempdir, settings): + ''' Be sure DF dies with appropriate error message if called without Capo profile ''' + + args = ['--product-locator', settings.test_data['product_locator'], + '--output-dir', str(make_tempdir)] + + with pytest.raises(SystemExit) as exc: + launch_datafetcher(args, settings.capo_settings) + assert exc.value.code == 1 + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_omitted_capo_value_returns_expected_code(make_tempdir, settings): + ''' Be sure DF dies with appropriate error message if Capo profile + argument but no profile value + + ''' + + args = ['--product-locator', "we're not going to get this far", + '--output-dir', str(make_tempdir), + '--profile'] + with pytest.raises(SystemExit) as exc: + launch_datafetcher(args, settings.capo_settings) + assert exc.value.code == 1 + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_invalid_capo_profile_returns_expected_code(make_tempdir, settings): + ''' Be sure DF dies with appropriate error message if called without Capo profile ''' + + args = ['--profile', 'whatevs', + '--product-locator', "we'll never get this far", + '--output-dir', str(make_tempdir), + ] + return_code = launch_datafetcher(args, settings.capo_settings) + assert return_code == MISSING_PROFILE + + +def we_are_in_docker(): + ''' if we are in a Docker container, the SHELL env var will not be set ''' + + for key, _ in os.environ.items(): + if key == 'SHELL': + return False + + return True + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_inaccessible_output_dir_returns_expected_code(settings, make_tempdir): + try: + umask = os.umask(0o000) + tmpdir = Path(make_tempdir) + tmpdir.chmod(0o666) + + altered_capo = get_test_capo_settings() + altered_capo['download_dir'] = str(tmpdir) + + args = ['--product-locator', settings.test_data['product_locator'], + '--profile', TEST_PROFILE, + '--output-dir', str(tmpdir)] + + namespace = get_arg_parser().parse_args(args) + + # N.B. DataFetcher.__init__ throws SystemExit in pytest at command line, + # but in Docker container the failure is interpreted as an Exception + + if we_are_in_docker(): + try: + DataFetcher(namespace, capo_settings) + + except Exception as exc: + assert isinstance(exc, SystemExit) + assert exc.value.code == MISSING_SETTING + else: + with pytest.raises(SystemExit) as exc: + DataFetcher(namespace, settings.capo_settings) + assert exc.value.code == MISSING_SETTING + + except Exception as exc: + pytest.fail(f'{exc}') + finally: + try: + os.umask(umask) + except Exception as exc: + pytest.fail(f'{exc}') + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_two_locator_args_returns_expected_code(make_tempdir, settings): + ''' We should reject invocation with both product locator -and- location + file. One or the other, people! + + ''' + + args = ['--product-locator', 'a_locator', + '--location-file', 'location.json' + '--output-dir', str(make_tempdir), + '--profile', TEST_PROFILE] + return_code = launch_datafetcher(args, settings.capo_settings) + assert return_code == MISSING_SETTING + + +class MockServiceTimeoutReturn: + ''' Simulates locator request service timeout ''' + + @staticmethod + def run(): + return ReturnCode.LOCATOR_SERVICE_TIMEOUT.value['code'] + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_locator_service_timeout_returns_expected_code(monkeypatch, + settings, + make_tempdir): + def mock_run(*args, **kwargs): + return MockServiceTimeoutReturn.run() + + args = ['--product-locator', settings.test_data['product_locator'], + '--output-dir', str(make_tempdir), + '--profile', TEST_PROFILE] + monkeypatch.setattr(DataFetcher, "run", mock_run) + return_code = launch_datafetcher(args, settings.capo_settings) + assert return_code == ReturnCode.LOCATOR_SERVICE_TIMEOUT.value['code'] + + +class MockTooManyServiceRedirectsReturn: + + @staticmethod + def run(): + return ReturnCode.TOO_MANY_SERVICE_REDIRECTS.value + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_too_many_service_redirects_returns_expected_code(monkeypatch, + settings, + make_tempdir): + def mock_run(*args, **kwargs): + return MockTooManyServiceRedirectsReturn().run() + + args = ['--product-locator', settings.test_data['product_locator'], + '--output-dir', str(make_tempdir), + '--profile', TEST_PROFILE] + monkeypatch.setattr(DataFetcher, "run", mock_run) + return_code = launch_datafetcher(args, settings.capo_settings) + assert return_code == ReturnCode.TOO_MANY_SERVICE_REDIRECTS.value + + +class MockCatastrophicServiceErrorReturn: + + @staticmethod + def run(): + return ReturnCode.CATASTROPHIC_REQUEST_ERROR + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_catastrophic_service_error_returns_expected_code(monkeypatch, + settings, + make_tempdir): + def mock_run(*args, **kwargs): + return MockCatastrophicServiceErrorReturn().run() + + args = ['--product-locator', settings.test_data['product_locator'], + '--output-dir', str(make_tempdir), + '--profile', TEST_PROFILE] + + monkeypatch.setattr(DataFetcher, "run", mock_run) + return_code = launch_datafetcher(args, settings.capo_settings) + assert return_code == ReturnCode.CATASTROPHIC_REQUEST_ERROR + + +def test_copy_attempt_throws_sys_exit_service_error(monkeypatch, + settings, + make_tempdir): + args = ['--product-locator', settings.test_data['product_locator'], + '--output-dir', str(make_tempdir), + '--profile', TEST_PROFILE] + + namespace = get_arg_parser().parse_args(args) + fetcher = MockProdDataFetcher(namespace, settings.capo_settings) + + servers_report = fetcher.servers_report + for server in servers_report: + entry = servers_report[server] + assert entry['retrieve_method'].value == RetrievalMode.COPY.value + + # let's try just one file so we're not sitting here all day + for server in servers_report: + entry = servers_report[server] + servers_report = {server: entry} + fetcher.servers_report = servers_report + assert fetcher.servers_report[server] is not None + break + files = fetcher.servers_report[server]['files'] + fetcher.servers_report[server]['files'] = [files[0]] + + with pytest.raises(SystemExit) as exc: + fetcher.run() + assert exc.value.code == \ + ReturnCode.CATASTROPHIC_REQUEST_ERROR.value['code'] + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_product_locator_not_found_returns_expected_code(make_tempdir, + settings): + args = ['--product-locator', 'not_a_locator', + '--output-dir', str(make_tempdir), + '--profile', TEST_PROFILE] + return_code = launch_datafetcher(args, settings.capo_settings) + assert return_code == ReturnCode.PRODUCT_LOCATOR_NOT_FOUND.value['code'] + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_unable_to_open_location_file_returns_expected_code(make_tempdir, + settings): + args = ['--location-file', 'location.json', + '--output-dir', str(make_tempdir), + '--profile', TEST_PROFILE] + return_code = launch_datafetcher(args, settings.capo_settings) + assert return_code == ReturnCode.CANNOT_OPEN_LOCATION_FILE.value['code'] + + +class MockNgasFetchError: + ''' Simulates a problem fetching file(s) from NGAS server ''' + + @staticmethod + def run(): + return ReturnCode.NGAS_FETCH_ERROR + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_error_fetching_file_from_ngas_returns_expected_code(monkeypatch, + settings, + make_tempdir): + def mock_run(*args, **kwargs): + return MockNgasFetchError().run() + + args = ['--product-locator', settings.test_data['product_locator'], + '--output-dir', str(make_tempdir), + '--profile', TEST_PROFILE] + monkeypatch.setattr(DataFetcher, "run", mock_run) + return_code = launch_datafetcher(args, settings.capo_settings) + assert return_code == ReturnCode.NGAS_FETCH_ERROR + + +class MockSizeMismatchError: + ''' Simulates a retrieved file of unexpected size ''' + + @staticmethod + def run(): + return ReturnCode.SIZE_MISMATCH + + +@pytest.mark.skipif(not RUN_ALL, reason='debug') +def test_unexpected_size_returns_expected_code(monkeypatch, + settings, + make_tempdir): + def mock_run(*args, **kwargs): + return MockSizeMismatchError().run() + + args = ['--product-locator', settings.test_data['product_locator'], + '--output-dir', str(make_tempdir), + '--profile', TEST_PROFILE] + monkeypatch.setattr(DataFetcher, "run", mock_run) + return_code = launch_datafetcher(args, settings.capo_settings) + assert return_code == ReturnCode.SIZE_MISMATCH diff --git a/apps/cli/executables/null/setup.py b/apps/cli/executables/null/setup.py index 3399db9f1e7f7c075ea47f44ce6436478e139f3d..8380b22ab27e066b4bfc9a2d4af1bda145d457f2 100644 --- a/apps/cli/executables/null/setup.py +++ b/apps/cli/executables/null/setup.py @@ -19,11 +19,10 @@ setup( author_email='dms-ssa@nrao.edu', url='TBD', license="GPL", - # install_requires=requires, tests_require=tests_require, keywords=[], packages=['null'], - package_dir={'':'src'}, + package_dir={'': 'src'}, classifiers=[ 'Programming Language :: Python :: 3.8' ], diff --git a/apps/cli/executables/null/src/null/null.py b/apps/cli/executables/null/src/null/null.py index 6f806ae60910206b36c8a97d3fb9eca700595529..1cdacad80b9c8e028ce8d0f4ddd6f31bb4dae0e6 100644 --- a/apps/cli/executables/null/src/null/null.py +++ b/apps/cli/executables/null/src/null/null.py @@ -16,47 +16,75 @@ logger = logging.getLogger("null") logger.setLevel(logging.INFO) handler = logging.StreamHandler(stream=sys.stdout) -class Null: - def __init__(self, args, verbose): - self.args = args - if verbose: - logger.setLevel(logging.DEBUG) - self.args_to_funcs = { - 'print-error': self.print_error, - 'greeting': self.print_greeting, - 'exit-fail': self.exit_with_failure, - 'exit-random': self.exit_randomly, - 'nap': self.take_nap, - 'dump': self.dump_core - } - def print_error(self): - logger.removeHandler(handler) - err_handler = logging.StreamHandler(stream=sys.stderr) - logger.addHandler(err_handler) - logger.error("ERROR: This is an error.") +def print_error(): + """ + Logs an error message to stderr. + """ + logger.removeHandler(handler) + err_handler = logging.StreamHandler(stream=sys.stderr) + logger.addHandler(err_handler) + logger.error("ERROR: This is an error.") + + +def print_greeting(): + """ + Prints a friendly greeting to stdout + """ + logger.info("Hello, world!") + logger.debug("And goodbye, world...") + + +def exit_with_failure(): + """ + Exits with status code -1 + """ + logger.error("Error purposefully induced. Exiting with status code -1...") + sys.exit(-1) + + +def exit_randomly(): + """ + Exits with a random status code between -50 and 50 + """ + status_code = random.randint(-50, 50) + logger.debug("Exiting with status code {}".format(status_code)) + sys.exit(status_code) - def print_greeting(self): - logger.info("Hello, world!") - logger.debug("And goodbye, world...") - def exit_with_failure(self): - logger.error("Error purposefully induced. Exiting with status code -1...") - sys.exit(-1) +def take_nap(): + """ + Sleeps for 5 seconds + """ + logger.debug("Going to sleep...") + time.sleep(5) + logger.debug("Waking up.") - def exit_randomly(self): - status_code = random.randint(-50, 50) - logger.debug("Exiting with status code {}".format(status_code)) - sys.exit(status_code) - def take_nap(self): - logger.debug("Going to sleep...") - time.sleep(5) - logger.debug("Waking up.") +def dump_core(): + """ + Makes a call to os.abort() which dumps the core + """ + logger.debug("Aborting and dumping core...", stack_info=True) + os.abort() + - def dump_core(self): - logger.debug("Aborting and dumping core...", stack_info=True) - os.abort() +class Null: + """ + Null executable that executes null functionality based on arguments given + """ + def __init__(self, args: argparse.Namespace, verbose: bool): + self.args = args + if verbose: + logger.setLevel(logging.DEBUG) + self.args_to_funcs = { + 'print-error': print_error, + 'greeting': print_greeting, + 'exit-fail': exit_with_failure, + 'exit-random': exit_randomly, + 'nap': take_nap, + 'dump': dump_core + } def execute(self): """ @@ -66,7 +94,8 @@ class Null: if val and arg in self.args_to_funcs: self.args_to_funcs[arg]() -def make_arg_parser(): + +def make_arg_parser() -> argparse.ArgumentParser: """ Creates an argparse arguments parser with appropriate options :return: Said argument parser @@ -99,6 +128,7 @@ def make_arg_parser(): help='abort program and dump core') return parser + def main(): arg_parser = make_arg_parser() args = arg_parser.parse_args() @@ -107,5 +137,6 @@ def main(): executable = Null(args, args.verbose) executable.execute() + if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/apps/cli/executables/null/test/test_null.py b/apps/cli/executables/null/test/test_null.py index ae8acdf8fec8828de51b8569fcb0c7be99293d8a..83f6b5fc4564742e1c7a881ec33707bf58d6f088 100644 --- a/apps/cli/executables/null/test/test_null.py +++ b/apps/cli/executables/null/test/test_null.py @@ -3,33 +3,39 @@ import argparse from null.null import Null + @pytest.fixture() def null(): null = Null(argparse.Namespace(), True) return null + def test_print_error(null, caplog): null.print_error() assert 'ERROR: This is an error.' in caplog.text + def test_print_greeting(null, caplog): null.print_greeting() assert 'Hello, world!' in caplog.text assert 'And goodbye, world...' in caplog.text + def test_exit_with_failure(null, caplog): with pytest.raises(SystemExit) as e: null.exit_with_failure() assert 'Error purposefully induced. Exiting with status code -1...' in caplog.text assert e.value.code == -1 + def test_exit_randomly(null, caplog): with pytest.raises(SystemExit) as e: null.exit_randomly() assert 'Exiting with status code' in caplog.text assert -50 <= e.value.code <= 50 + def test_take_nap(null, caplog): null.take_nap() assert 'Going to sleep...' in caplog.text - assert 'Waking up.' in caplog.text \ No newline at end of file + assert 'Waking up.' in caplog.text diff --git a/apps/cli/utilities/s_code_project_updater/src/s_code_project_updater/commands.py b/apps/cli/utilities/s_code_project_updater/src/s_code_project_updater/commands.py index d7f5f3ed53bef0da2a3cfeab98a7449a34e5ad83..8b8897ab29623dc57023a17dbe09c380507629ca 100644 --- a/apps/cli/utilities/s_code_project_updater/src/s_code_project_updater/commands.py +++ b/apps/cli/utilities/s_code_project_updater/src/s_code_project_updater/commands.py @@ -1,558 +1 @@ -#!/usr/bin/env/python -# -*- coding: utf-8 -*- - -""" -A module for updating properties (title, abstract, PI and coI) of a provided project. -""" -import argparse as ap -import logging -import os -import sys -import warnings -from typing import List - -from sqlalchemy import exc as sa_exc, asc, desc - -from pymygdala import LogHandler, SendNRAOEvent -from s_code_project_updater import Telescope -from schema import Author, Project, ExecutionBlock, create_session -from schema.pstmodel import Person, UserAuthentication -from support.capo import get_my_capo_config -from support.logging import LOG_MESSAGE_FORMATTER, get_console_logger - -from ._version import ___version___ as version -from .project_fetcher import ArchiveProjectFetcher - -_APPLICATION_NAME = 's_code_project_updater' -_LOG = get_console_logger(_APPLICATION_NAME, logging.DEBUG) -_MISSING_PROFILE = """ERROR: unknown 'profile', provide the -P argument or set the CAPO_PROFILE - environment variable.""" -_DESCRIPTION = """A tool for updating the investigators, title and/or abstract of a project.""" -_EPILOG = """Return values: -0: everything worked, -1: error with capo configuration -2: Error with input parameters -3: Project not found -4: Investigator not found -5: Update failed""" - - -class ScodeProjectUpdater: - """ - A class to bundle the operations involved with updating a project in the archive. - """ - - def __init__(self, **kwargs): - - """ - Build an instance of the class with the cmd line parser, the capo_config, a context - manager for the archive and pst, and do some rudamentary tests of the input arguments - so we fail early. - :param kwargs: the command line arguments or namespace with the arguments to the parser - """ - self._make_parser() - try: - self.args = self.parser.parse_args(**kwargs) - - except Exception as exc: - _LOG.error(f'parser threw {exc}') - self.exit_with_error('Capo profile and project code are ' - 'required', 2) - - # at a minimum, Capo profile and project code are required - if not self.args.profile or not self.args.project: - if not self.args.profile and not self.args.project: - self.exit_with_error('Capo profile and project code are ' - 'required', 2) - if not self.args.profile: - self.exit_with_error('Capo profile not specified', 2) - if not self.args.project: - self.exit_with_error('project code not specified', 2) - - args_dict = self.args.__dict__ - - self.is_dry = args_dict['dry'] - - if not args_dict['investigators'] \ - and not args_dict['title'] and not args_dict['abstract']: - self.set_minimum_properties_from_args(args_dict) - # return - - self.project_code = args_dict['project'] - self.stored_project = None - _LOG.debug(f'{self.args}') - - try: - self.capo_config = get_my_capo_config(profile=self.args.profile) - except Exception as exc: - self.exit_with_error(f'Capo configuration error: {exc}', 1) - try: - self.archive_session = create_session('SDM') - except KeyError as k_ex: - self.exit_with_error( - f'An error occurred while creating a db context: {k_ex}', 1) - - if self.args.investigators and \ - [inv for inv in self.args.investigators if - self.args.investigators.count(inv) > 1]: - self.exit_with_error( - 'You appear to be trying to add an investigator more than once, ' - 'which could cause issues with the presentation of investigators ' - 'in the archive. There should be only one PI and any number of ' - 'unique CoIs on a project.', 2) - - def set_minimum_properties_from_args(self, args): - ''' basic info needed for a fetch ''' - self.project_code = args['project'] - self.profile = args['profile'] - self.is_dry = True - self.fetch_only = True - - def _make_parser(self): - - r""" Build a command line parser for this app. """ - result = ap.ArgumentParser(description=_DESCRIPTION.format(version), - formatter_class=ap.RawTextHelpFormatter, - epilog=_EPILOG) - result.add_argument('-C', '--project', action='store', - help='project_code to update') - result.add_argument('-P', '--profile', action='store', - help='profile name to use, e.g. test, production') - result.add_argument('-T', '--title', action='store', - help='a quoted string for the new title for the project') - result.add_argument('-A', '--abstract', action='store', - help='a quoted string for the new abstract for the project') - result.add_argument('-I', '--investigators', action='store', type=int, - nargs='+', - help='a PST ID, or list of PST IDs, of investigators for the project, ' - 'as an unquoted integer or space seperated integer list. The ' - 'first ID in the list will be added as the PI and all subsequenct ' - 'IDs will be added as CoIs.') - result.add_argument('-d', '--dry', action='store_true', - help='perform a dry run, going through the motions, but not committing ' - 'changes and not performing a re-index of the project. This may ' - 'be useful because it will print the current state of the project ' - 'and what the project would look like after the changes.') - self.parser = result - - def exit_with_error(self, msg, code): - """ - On discovering we have an unresolvable condition the prevents us from proceeding with the - update of this project, print an error and exit with the code provided. - :param msg: an error message to the user - :param code: the exit code to accompany the error message - :return: None - """ - self.error_message = msg - self.code = code - _LOG.error( - f'error message received: {self.error_message}; code = {self.code}') - _LOG.error(msg) - self.parser.print_help() - sys.exit(code) - - def get_stored_project(self): - """ - Return the project specified by the input arguments, if it exists. - :return: the first Project we found with the project code passed in - """ - with warnings.catch_warnings(): - # Suppress SQLAlchemy warnings - warnings.simplefilter("ignore", category=sa_exc.SAWarning) - - try: - project = self.archive_session.query(Project) \ - .filter(Project.project_code == self.args.project) \ - .first() - return project - except Exception as exc: - _LOG.error(f'{exc}') - raise - - def get_pst_users(self, investigators): - """ - Get the Person(s) associated with the investigators (a list of PST person ID(s)). - :return: the Person(s) mapped to the person ID(s) passed in - """ - pst_session = create_session('PST') - try: - users = pst_session.query(Person.person_id, - Person.firstName, - Person.lastName, - UserAuthentication.personName) \ - .join(UserAuthentication, - Person.personAuthentication_id \ - == UserAuthentication.userAuthentication_id) \ - .filter(Person.person_id.in_(investigators)).all() - return users - except Exception as exc: - _LOG.error(f'error getting PST person IDs: {exc}') - self.exit_with_error("Could not get user info from PST", 5) - finally: - pst_session.close() - - # users = self.pst_context.session.query(Person.person_id, Person.firstName, - # Person.lastName, UserAuthentication.personName)\ - # .join(UserAuthentication, - # Person.personAuthentication_id == UserAuthentication.userAuthentication_id)\ - # .filter(Person.person_id.in_(investigators)).all() - # return users - - def get_projects_current_investigators(self): - """ - Get a list of investigators associated with this project, ordered such that the PI(s) - is/are the first element(s) of the list - :return: a list of investigators associated with the project code passed in, ordered with - the PI(s) first - """ - investigators_list = self.archive_session.query(Author) \ - .filter(Author.project_code == self.args.project) \ - .order_by(desc(Author.is_pi), asc(Author.pst_person_id)) \ - .all() - return investigators_list - - def clear_projects_current_investigators(self): - """ - Clear the investigators currently attached to this project - :return: None - """ - investigators_list = self.archive_session.query(Author) \ - .filter(Author.project_code == self.args.project) \ - .all() - for inv in investigators_list: - self.archive_session.delete(inv) - - def set_new_project_investigators(self, new_investigators): - """ - Add a list of new project investigators - :param new_investigators: a list of investigators with PST personName, firstName, lastName - and person_id - :return: None - """ - - # if any of the new investigators already exists, - # use the old author_id rather than making a new author - is_pi = True - num_expected = len(new_investigators) - num_changed = 0 - # Loop through our cmd line investigator list and map them to investigator list passed in. - # On the first pass through, we set the Author as the PI. - for in_inv in self.args.investigators: - for pst_user in new_investigators: - if in_inv == int(pst_user.person_id): - # The Author table has the project as a foreign key, so we use the whole - # project here, rather than just a string, sqlalchemy wires the rest. If you - # just give the table a project_code as a string, you will get an error. - # noinspection PyTypeChecker - auth = Author(author_id=None, - project=self.stored_project, - username=pst_user.personName, - firstname=pst_user.firstName, - lastname=pst_user.lastName, - pst_person_id=str(pst_user.person_id), - is_pi=is_pi) - self.archive_session.add(auth) - num_changed += 1 - is_pi = False - break - - if num_changed < num_expected: - _LOG.error( - f'{num_changed} of {num_expected} investigators were NOT set') - raise UpdateException('incomplete investigator update') - - def print_project(self): - """ - Print the project's current investigators - :return: None - """ - output = self.get_project_info() - [_LOG.info(line) for line in output] - - def get_project_info(self): - - output = [] - if self.stored_project is None: - self.stored_project = self.get_stored_project() - output.append(f'Title: {self.stored_project.title}') - output.append(f'Abstract: {self.stored_project.abstract}') - investigator_list = self.get_projects_current_investigators() - - # we want the PI's pst_person_id followed by the CoIs' pst_person_ids in numeric order - pi_author = investigator_list[0] - if pi_author.pst_person_id is not None: - coi_pst_ids = [int(coi.pst_person_id) for coi in - investigator_list[1:]] - coi_pst_ids = sorted(coi_pst_ids) - author_pst_ids = [int(pi_author.pst_person_id)] - [author_pst_ids.append(id) for id in coi_pst_ids] - authors_to_print = [str(id) for id in author_pst_ids] - id_list = ' '.join(authors_to_print) - output.append(f'Authors: {id_list}') - - return output - - def is_fetch_only(self): - try: - return self.args.title is None \ - and self.args.abstract is None \ - and self.args.investigators is None - except AttributeError: - return False - - def update_project(self) -> Project: - ''' - The main function responsible for updating the project. - It makes sure the project exists, and, if the user is updating - the investigators, that they have valid PST mappings. If there - aren't errors with those two checks it clears the projects current - archive authors and replaces them with the investigators found - from the PST mapping to users. And, of course, if the title and - abstract are being updated, it adds those to the project. - - :return: Project - - ''' - - fetcher = ArchiveProjectFetcher(self.args.profile) - project = None - try: - project = fetcher.fetch_project(self.project_code) - except AttributeError: - self.exit_with_error(f'project code "{self.project_code}" not ' - f'found', 3) - if self.is_fetch_only(): - output = fetcher.build_project_info() - try: - [_LOG.info(line) for line in output] - return project - except TypeError: - _LOG.error( - 'Cannot display project info; is this an ALMA project?') - - with warnings.catch_warnings(): - # Suppress SQLAlchemy warnings - warnings.simplefilter("ignore", category=sa_exc.SAWarning) - - # Get the project or fail - self.stored_project = self.get_stored_project() - if self.stored_project is None: - self.exit_with_error( - 'No project found for the project_code provided', 3) - - if self.is_alma(): - self.exit_with_error(f'{self.stored_project.project_code} ' - f'is an ALMA project; update not permitted', - 2) - - if self.args.investigators: - proposed_investigators = self.get_pst_users( - self.args.investigators) - if len(proposed_investigators) == 0 or \ - len(self.args.investigators) \ - != len(proposed_investigators): - self.exit_with_error( - 'One or more of the investigators you entered was not ' - 'found in the PST.', 4) - self.clear_projects_current_investigators() - self.set_new_project_investigators(proposed_investigators) - - if self.args.title: - self.stored_project.title = self.args.title - if self.args.abstract: - self.stored_project.abstract = self.args.abstract - - if not self.is_dry: - self.archive_session.commit() - _LOG.info('Changes committed') - else: - self.archive_session.rollback() - _LOG.info( - 'Successful dry run; this would have updated the project') - - self.print_project() - return self.get_stored_project() - - def is_alma(self): - ''' is this an alma project? ''' - with warnings.catch_warnings(): - # Suppress SQLAlchemy warnings - warnings.simplefilter("ignore", category=sa_exc.SAWarning) - - exec_block = self.archive_session.query(ExecutionBlock) \ - .filter(ExecutionBlock.project_code == self.project_code) \ - .filter(ExecutionBlock.telescope == Telescope.ALMA.value) \ - .first() - return exec_block is not None - - def reindex_project(self): - """ - If we are not performing a dry run, and have made it this far without error, then we - re-index the project so the updates will show up in the profile-mapped archive. - :return: None - """ - if not self.args.dry and not self.is_fetch_only() \ - and '_TEST_PROJECT' not in self.project_code: - _LOG.info( - f'Re-indexing project {self.args.project} to make changes available....') - # Set up a LogHandler to record the fact we just made a change to this project. - # We're adding it here, instead of earlier, because nothing we log earlier should be - # presented to anyone but the command line user and would only add useless clutter to - # our system logging. We only really want the completed task to make a record in our - # system. - broadcast = LogHandler(profile=self.capo_config.profile, - application=_APPLICATION_NAME) - broadcast.setLevel(logging.DEBUG) - broadcast.setFormatter(LOG_MESSAGE_FORMATTER) - _LOG.addHandler(broadcast) - _LOG.info(f'Project {self.args.project} has been updated.') - - event = {'logData': {'project_code': self.args.project, - 'title_updated': self.args.title is not None, - 'abstract_updated': self.args.abstract is not None, - 'investigators_updated': self.args.investigators is not None, - 'ingestion_type': 'evla_sdm' - }, - 'message': 's-code project updated', - 'request': 're-index please'} - SendNRAOEvent(profile=self.capo_config.profile, - application=_APPLICATION_NAME) \ - .send(routing_key='ingestion-complete.metadata', event=event) - sys.exit(0) - - -class ArchiveProject: - """ - A class to encapsulate the attributes of a project as stored in the archive - for the purpose of updating and getting current information about the project. - """ - - def __init__(self, project_code: str, title: str, abstract: str, - author_pst_ids: List, ): - """ - Represents the state of a project. - :param project_code: - :param title: - :param abstract: - :param author_pst_ids: - """ - self.project_code = project_code - self.title = title - self.abstract = abstract - self.investigators = author_pst_ids - - options = [] - options.append('-C') - options.append('--project') - options.append('-P') - options.append('--profile') - options.append('-T') - options.append('--title') - options.append('-A') - options.append('--abstract') - options.append('-I') - options.append('--investigators') - self.options = options - self.profile = os.environ['CAPO_PROFILE'] - - def make_args(self, is_dry): - args = [] - if is_dry: - args.append('-d') - - args.append('-C') - args.append(self.project_code) - args.append('-P') - args.append(self.profile) - args.append('-T') - args.append(self.title) - args.append('-A') - args.append(self.abstract) - - if self.investigators: - args.append('-I') - for pst_id in self.investigators: - args.append(str(pst_id)) - - return args - - def is_arg(self, arg): - return arg in self.options - - def add_parameter(self, new_project, key, value): - if key in ('-C', '--project'): - new_project.project_code = value - elif key in ('-T', '--title'): - new_project.title = value - elif key in ('-A', '--abstract'): - new_project.abstract = value - elif key in ('-P', '--profile'): - self.profile = value - - def add_investigators(self, new_project, args, start_position): - ''' Add specified investigators to project ''' - value = args[start_position] - - while value not in self.options: - new_project.investigators.append(value) - - return new_project.investigators - - -class UpdateArgParser: - ''' Command-line argument parser for ScodeProjectUpdater ''' - - def __init__(self): - self.parser = self._make_parser() - - def _make_parser(self): - parser = ap.ArgumentParser(description=_DESCRIPTION.format(version), - formatter_class=ap.RawTextHelpFormatter, - epilog=_EPILOG) - parser.add_argument('-C', '--project', action='store', - help='project_code to update') - parser.add_argument('-P', '--profile', action='store', - help='profile name to use, e.g. test, production') - parser.add_argument('-T', '--title', action='store', - help='a quoted string for the new title for the project') - parser.add_argument('-A', '--abstract', action='store', - help='a quoted string for the new abstract for the project') - parser.add_argument('-I', '--investigators', action='store', type=int, - nargs='+', - help='a PST ID, or list of PST IDs, of investigators for the project, ' - 'as an unquoted integer or space seperated integer list. The ' - 'first ID in the list will be added as the PI and all subsequenct ' - 'IDs will be added as CoIs.') - parser.add_argument('-d', '--dry', action='store_true', - help='perform a dry run, going through the motions, but not committing ' - 'changes and not performing a re-index of the project. This may ' - 'be useful because it will print the current state of the project ' - 'and what the project would look like after the changes.') - return parser - - def parse_args(self, **kwargs): - ''' Try to parse command-line arguments, and fail informatively - if there are errors - ''' - return self.parser.parse_args(kwargs) - - -class UpdateException(Exception): - ''' throw this if there is trouble during the update ''' - - -def main(**kwargs): - """ - The script's main entry point. - :param kwargs: the command line arguments - :return: None - """ - updater = ScodeProjectUpdater(**kwargs) - updater.update_project() - # reindex only if there's been an update - if not updater.is_dry: - updater.reindex_project() - - -if __name__ == '__main__': - main() +''' Just a placeholder until scode_project_updater is merged in with archive ''' \ No newline at end of file diff --git a/apps/cli/utilities/s_code_project_updater/test/test_projects.py b/apps/cli/utilities/s_code_project_updater/test/test_projects.py index afb5d065a995a25176c735b4bbf40e62326cd7ba..81351a303065823d4252db78222c30ef2886ab8d 100644 --- a/apps/cli/utilities/s_code_project_updater/test/test_projects.py +++ b/apps/cli/utilities/s_code_project_updater/test/test_projects.py @@ -1,9 +1,10 @@ ''' fake projects to use in testing scode_project_updater ''' + import warnings from sqlalchemy import exc as sa_exc -from schema.model import Author, Project +from schema import Author, Project class ScodeTestProject(): diff --git a/apps/cli/utilities/s_code_project_updater/test/test_updater.py b/apps/cli/utilities/s_code_project_updater/test/test_updater.py index 83ea0a3b4c6bec2a040fdc4059679f3682e1d34c..274e486ce9dfc9dccb919223e93bcab1e676f336 100755 --- a/apps/cli/utilities/s_code_project_updater/test/test_updater.py +++ b/apps/cli/utilities/s_code_project_updater/test/test_updater.py @@ -1,389 +1 @@ -import logging -import os -import subprocess -import warnings - -from sqlalchemy import exc as sa_exc - -import pytest -from s_code_project_updater.commands import ScodeProjectUpdater -from schema import create_session, Project -from support.logging import get_console_logger - -from .test_projects import \ - ScodeTestProject, ScienceTestProject, AlmaTestProject, get_author_pst_ids - -_LOG = get_console_logger("scode_project_updater_tests", logging.DEBUG) -_UPDATE_COMMAND = 'update_sproj' -PROFILE = 'local' - -class TestUpdater: - ''' Exercises ScodeProjectUpdater ''' - @pytest.fixture(autouse=True, scope='function') - def install_test_data(self): - os.environ['CAPO_PROFILE'] = PROFILE - self.initialize_test_data() - yield - self.remove_test_data() - - def test_dry_run_does_not_update(self): - fake_project = ScodeTestProject().project - project_code = fake_project.project_code - try: - new_title = 'this is the new title' - assert fake_project.title != new_title - args = [ - '-C', project_code, - '-P', PROFILE, - '-T', new_title, - '--dry' - ] - updated = ScodeProjectUpdater(args=args).update_project() - # nothing should have been updated - assert fake_project.title == updated.title - assert fake_project.abstract == updated.abstract - assert len(fake_project.authors) == len(updated.authors) - except SystemExit as exc: - pytest.fail(f'unexpected failure with return code {exc.code}') - raise - except Exception as exc: - pytest.fail(f'{project_code}: {exc}') - raise - - def test_project_code_only_fetches(self): - fake_project = ScodeTestProject().project - project_code = fake_project.project_code - args = [ - '-C', project_code, - '-P', PROFILE, - ] - - updated = None - try: - updated = ScodeProjectUpdater(args=args).update_project() - except SystemExit as exc: - pytest.fail(f'unexpected failure with return code {exc.code}') - raise - - assert updated != None - - assert fake_project.title == updated.title - assert fake_project.abstract == updated.abstract - assert len(fake_project.authors) == len(updated.authors) - count = 0 - for orig_author in fake_project.authors: - for author in updated.authors: - if author.username == orig_author.username: - count += 1 - break - assert len(fake_project.authors) == count - - def test_updates_abstract_only(self): - fake_project = ScodeTestProject().project - project_code = fake_project.project_code - new_abstract = "Well, here's another nice mess you've gotten us into, Ollie" - assert fake_project.abstract != new_abstract - args = [ - '-C', project_code, - '-P', PROFILE, - '-A', new_abstract, - ] - try: - updated = ScodeProjectUpdater(args=args).update_project() - # only abstract should have been updated; - # all else should be same - assert fake_project.title == updated.title - assert new_abstract == updated.abstract - assert len(fake_project.authors) == len(updated.authors) - except SystemExit as exc: - pytest.fail(f'unexpected failure; return code = {exc.code}') - raise - - def test_updates_abstract_and_title(self): - fake_project = ScodeTestProject().project - project_code = fake_project.project_code - new_abstract = "I think you ought to know I'm feeling very depressed" - new_title = 'A Survey of the Mattresses of Sqornshellous Zeta' - assert fake_project.abstract != new_abstract - assert fake_project.title != new_title - args = [ - '-C', project_code, - '-P', PROFILE, - '-A', new_abstract, - '-T', new_title, - ] - try: - updated = ScodeProjectUpdater(args=args).update_project() - assert new_title == updated.title - assert new_abstract == updated.abstract - assert len(fake_project.authors) == len(updated.authors) - except SystemExit as exc: - pytest.fail(f'unexpected failure; exit code = {exc.code}') - raise - - def test_adds_new_abstract_deletes_author(self): - fake_project = ScodeTestProject().project - project_code = fake_project.project_code - new_project = Project(project_code=project_code, - title=fake_project.title, - abstract=fake_project.abstract) - new_abstract = "First there is a mountain, then there is no " \ - "mountain, then there is" - assert new_abstract != fake_project.abstract - new_project.abstract = new_abstract - original_authors = fake_project.authors.copy() - assert 4 == len(original_authors) - last_author = original_authors[3] - new_authors = original_authors[:3] - assert len(original_authors) - 1 == len(new_authors) - new_project.authors = new_authors - args = [ - '-C', project_code, - '-P', PROFILE, - '-A', new_abstract, - '-I', - ] - for author_id in get_author_pst_ids(new_project): - args.append(str(author_id)) - - updated = None - try: - updated = ScodeProjectUpdater(args=args).update_project() - assert updated is not None - except SystemExit as exc: - pytest.fail(f'unexpected failure; return code = {exc.code}') - raise - - assert fake_project.abstract != updated.abstract - assert fake_project.title == updated.title - assert len(original_authors) - 1 == len(updated.authors) - authors_updated = last_author in updated.authors - assert not authors_updated - count = 0 - for orig_author in original_authors[:3]: - for new_author in updated.authors: - if new_author.username == orig_author.username: - count += 1 - break - assert len(new_authors) == count - - def test_output_is_as_expected(self): - fake_project = ScodeTestProject().project - project_code = fake_project.project_code - args = [ - '-C', project_code, - '-P', PROFILE, - ] - updater = ScodeProjectUpdater(args=args) - updater.update_project() - output = updater.get_project_info() - assert output is not None - assert ('Title: ' + fake_project.title) in output - assert ('Abstract: ' + fake_project.abstract) in output - pst_ids = [str(id) for id in get_author_pst_ids(fake_project)] - pst_id_str = ' '.join(pst_ids) - assert 'Authors: ' + pst_id_str in output - - def test_copes_with_single_pi(self): - project = ScodeTestProject().project - args = ['-P', PROFILE, '-C', project.project_code, '-I', '4686'] - try: - updated = ScodeProjectUpdater(args=args).update_project() - assert 1 == len(updated.authors) - except SystemExit as ex: - pytest.fail(f'update failed with exit code {ex.code}') - raise - - def test_alma_project_is_rejected(self): - project_code = '2018.A.00062.S' - args = ['-P', PROFILE, '-C', project_code, - '-T', 'Physics at High Angular Resolution in Nearby Galaxies: ' - 'The Local Galaxy Inventory Continued'] - - with pytest.raises(SystemExit) as exc: - ScodeProjectUpdater(args=args).update_project() - assert 2 == exc.code - - def test_update_failure_returns_expected_code(self): - result = FailingUpdater().update_project() - assert isinstance(result, SystemExit) - assert 5 == result.code - - """ The following test should be moved to another test case, - where we'll use a bash script, via subprocess.call(), to create an - appropriate env and execute pytest. - """ - @pytest.mark.skip('pytest passes only in IJ; ' - 'fails when run from command line' - 'due to import errors') - def test_command_line_returns_expected_codes(self): - ''' We simulate execution from the command line - and confirm that errors result in the appropriate - return codes. - - ''' - - # minimum required arguments -- profile & project -- omitted - return_code = CommandLineUpdaterLauncher([]).run() - assert return_code == 2 - - project_code = ScodeTestProject().project.project_code - - # profile not specified - args = ['-C', project_code,] - return_code = CommandLineUpdaterLauncher(args).run() - assert return_code == 2 - - # project code not specified - args = ['-P', PROFILE] - assert CommandLineUpdaterLauncher(args).run() == 2 - - # profile value missing - args = ['-P', '-C', project_code] - return_code = CommandLineUpdaterLauncher(args).run() - assert return_code == 2 - - # project code missing - args = ['-P', PROFILE, '-C'] - assert CommandLineUpdaterLauncher(args).run() == 2 - - # bad project code - args = ['-P', PROFILE, '-C', 'bogus'] - assert CommandLineUpdaterLauncher(args).run() == 3 - - # bad profile - args = ['-P', 'not_a_profile', '-C', project_code] - assert CommandLineUpdaterLauncher(args).run() == 1 - - # missing title as last argument - args = ['-P', PROFILE, '-C', project_code, '-T'] - assert CommandLineUpdaterLauncher(args).run() == 2 - - # missing title as first argument - args = ['-T', '-P', PROFILE, '-C', project_code,] - assert CommandLineUpdaterLauncher(args).run() == 2 - - # nonexistent investigator - args = ['-P', PROFILE, '-C', project_code, '-I', '-22'] - assert CommandLineUpdaterLauncher(args).run() == 4 - - - ### UTILITIES ### - - def initialize_test_data(self): - ''' Insert test data into archive database for use in tests ''' - session = create_session('SDM') - num_commits = num_found = 0 - try: - with warnings.catch_warnings(): - # Suppress SQLAlchemy warnings - warnings.simplefilter("ignore", category=sa_exc.SAWarning) - - fake_projects = [ScodeTestProject().project, - ScienceTestProject().project, - AlmaTestProject().project] - try: - for fake_project in fake_projects: - project_code = fake_project.project_code - existing = session.query(Project) \ - .filter(Project.project_code == - project_code) \ - .first() - if existing is not None: - num_found += 1 - session.delete(existing) - session.commit() - session.add(fake_project) - session.commit() - num_commits += 1 - - if num_commits < num_found: - pytest.fail(f'{num_found} fake projects were found ' - f'and deleted, but {num_commits} were ' - f'added and committed') - except Exception as exc: - pytest.fail(f'{exc}') - raise - finally: - session.close() - - def remove_test_data(self): - ''' Get rid of the test data we inserted. ''' - session = create_session('SDM') - try: - with warnings.catch_warnings(): - # Suppress SQLAlchemy warnings - warnings.simplefilter("ignore", category=sa_exc.SAWarning) - - fake_projects = [ScodeTestProject().project, - ScienceTestProject().project, - AlmaTestProject().project] - for fake_project in fake_projects: - project_code = fake_project.project_code - existing = session.query(Project) \ - .filter(Project.project_code == - project_code) \ - .first() - if existing is not None: - session.delete(existing) - session.commit() - - # confirm removal - found = session.query(Project) \ - .filter(Project.project_code.like('%_TEST_PROJECT')) \ - .first() - if found is not None: - pytest.fail('test projects were not removed') - except Exception as exc: - pytest.fail(f'{exc}') - finally: - session.close() - -class FailingUpdaterHelper: - ''' for use in testing update failure ''' - - @pytest.fixture() - def update_project(self): - return SystemExit(5) - -class FailingUpdater: - ''' for use in testing update failure ''' - def __init__(self): - self.helper = FailingUpdaterHelper() - - def update_project(self): - return SystemExit(5) - -class CommandLineUpdaterLauncher: - ''' Simulates execution of script from command line. - This works when tests are run from within iJ - but not when pytest is execuated at the command line. - ''' - - def __init__(self, args: list): - self.args = [_UPDATE_COMMAND] - for arg in args: - self.args.append(str(arg)) - _LOG.info(f'{self.args}') - - def run(self): - ''' launch updater in a subprocess - @:returns directory listing - ''' - - args = self.args - try: - proc = subprocess.run(args, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - timeout=60, - check=False, - bufsize=1, - universal_newlines=True) - self.stdout = proc.stdout - if proc.returncode: - print(f'{self.stdout}') - return proc.returncode - except Exception as exc: - _LOG.error(f'{exc}') - return exc.returncode +''' Just a placeholder until scode_project_updater is merged in with archive ''' diff --git a/build/recipes/build_pkgs/build_pkgs.py b/build/recipes/build_pkgs/build_pkgs.py index 304d2bad22677d40b0e410e899aafd56ed66dd12..7b3324ad8d465a489f0302974870a1b2fd0d8015 100644 --- a/build/recipes/build_pkgs/build_pkgs.py +++ b/build/recipes/build_pkgs/build_pkgs.py @@ -1,9 +1,13 @@ -import subprocess import logging +import subprocess +from typing import List, Dict, Any, Callable, Optional + +from zc.buildout.buildout import Buildout, Options logger = logging.getLogger("buildout/build_pkgs") -def get_dirs(): + +def get_dirs() -> List[str]: """ Finds all subdirectories containing setup.py files. :return: List of directories as strings. @@ -21,7 +25,8 @@ def get_dirs(): logger.debug("Done getting directories.") return dirs -def get_names(dirs): + +def get_names(dirs: List[str]) -> List[str]: """ Generate list of subproject names based on the rule that the name of the subproject directory will be the name of the subproject. @@ -41,8 +46,9 @@ def get_names(dirs): logger.debug("Done generating.") return names + class Recipe: - def __init__(self, buildout, name, options): + def __init__(self, buildout: Optional[Buildout], name: str, options: Options): """ Initializes fields needed for recipe. :param buildout: (Boilerplate) Dictionary of options from buildout section @@ -54,7 +60,7 @@ class Recipe: self.options = options self.pkg_list = get_names(get_dirs()) - def install(self): + def install(self) -> Any: """ Install method that runs when recipe has components it needs to install. :return: Paths to files, as strings, created by the recipe. @@ -79,4 +85,4 @@ class Recipe: return self.options.created() - update = install \ No newline at end of file + update = install diff --git a/build/recipes/build_pkgs/setup.py b/build/recipes/build_pkgs/setup.py index db69c236a8a32ec6f0a16d2be9ca896b83df5875..04e432da6c878e5a4f3243cdf83b10081671854a 100644 --- a/build/recipes/build_pkgs/setup.py +++ b/build/recipes/build_pkgs/setup.py @@ -3,6 +3,6 @@ from setuptools import setup setup( name='build_pkgs', version='0.1', - py_modules = ['build_pkgs'], - entry_points = {"zc.buildout": ["default=build_pkgs:Recipe"]}, -) \ No newline at end of file + py_modules=['build_pkgs'], + entry_points={"zc.buildout": ["default=build_pkgs:Recipe"]}, +) diff --git a/build/recipes/build_pkgs/test/conftest.py b/build/recipes/build_pkgs/test/conftest.py index e98638c7ae9666bdb27979cbb8e20c1f899271b8..46126efdfc2d3f2a05a8cf0e83d51c1acc8bdc3f 100644 --- a/build/recipes/build_pkgs/test/conftest.py +++ b/build/recipes/build_pkgs/test/conftest.py @@ -1,8 +1,10 @@ import pytest import zc.buildout.testing +from .. import build_pkgs + @pytest.fixture(scope='module') -def recipe(): +def recipe() -> build_pkgs.Recipe: """ pytest fixture that initializes zc.buildout objects for use in testing. Initializes Buildout, Options, and Recipe objects. @@ -13,4 +15,4 @@ def recipe(): buildout = zc.buildout.testing.Buildout() options = buildout.Options(buildout, 'build_pkgs', {'recipe': 'build_pkgs', 'name': 'null'}) recipe = build_pkgs.Recipe(buildout=buildout, name=None, options=options) - return recipe \ No newline at end of file + return recipe diff --git a/build/recipes/build_pkgs/test/test_build_pkgs.py b/build/recipes/build_pkgs/test/test_build_pkgs.py index c0dfae66cbebc4a4540506aff006532ed8a15497..188c5e670ca796fad5dc6d275b60b6b9670b71c2 100644 --- a/build/recipes/build_pkgs/test/test_build_pkgs.py +++ b/build/recipes/build_pkgs/test/test_build_pkgs.py @@ -1,6 +1,9 @@ import os +from typing import List + from .. import build_pkgs + class TestBuildPkgs: def test_get_names(self): """ @@ -17,14 +20,13 @@ class TestBuildPkgs: """ assert './apps/cli/executables/null' in build_pkgs.get_dirs() - def test_output(self, recipe): + def test_output(self, recipe: build_pkgs.Recipe): """ Test that the package specified in the recipe has been built correctly. """ created = recipe.install() for path in created: - print(path) if len(path) > 0: assert path is not None, "conda build failed to build package" assert os.path.exists(path) diff --git a/build/recipes/setup_to_meta/setup.py b/build/recipes/setup_to_meta/setup.py index 24ade227647ff5b327d2f7a892a04a6a98ea004e..8a2ebea1329a7390c9016fa06838e78cb2843ecf 100644 --- a/build/recipes/setup_to_meta/setup.py +++ b/build/recipes/setup_to_meta/setup.py @@ -3,6 +3,6 @@ from setuptools import setup setup( name='setup_to_meta', version='0.1', - py_modules = ['setup_to_meta'], - entry_points = {"zc.buildout": ["default=setup_to_meta:Recipe"]}, -) \ No newline at end of file + py_modules=['setup_to_meta'], + entry_points={"zc.buildout": ["default=setup_to_meta:Recipe"]}, +) diff --git a/build/recipes/setup_to_meta/setup_to_meta.py b/build/recipes/setup_to_meta/setup_to_meta.py index f7e32134d73d9f2eaef14a99f01a8e1d6e840099..783afd3766ca01ad586f53953463bde491396c94 100644 --- a/build/recipes/setup_to_meta/setup_to_meta.py +++ b/build/recipes/setup_to_meta/setup_to_meta.py @@ -1,15 +1,20 @@ - -import subprocess -import logging -import json import os +import json +import logging +import subprocess +from typing import List, Any, Dict, Callable, Optional + +from zc.buildout.buildout import Buildout, Options PYTHON_VERSION = '3.8' logger = logging.getLogger("buildout/setup_to_meta") -def write_metafile(metadata, filepath): + +def write_metafile(metadata: str, filepath: str): """ Writes given metadata to file with given path. + :param metadata: String containing conda recipe metadata to be written + :param filepath: String containing the path to conda recipe file (meta.yaml) """ logger.debug(f"Writing meta.yaml file at {filepath}...") try: @@ -26,12 +31,11 @@ class MetadataGenerator: """ Uses given info extracted from setup.py file to fill out metadata template. """ - - def __init__(self, setup, path): + def __init__(self, setup: Dict[str, str], path: str): self.setup = setup self.path = path - def fmt_ep(self): + def fmt_ep(self) -> str: """ Format entry points section of metadata. :return: Formatted string if entry points exists; else empty string. @@ -44,7 +48,7 @@ class MetadataGenerator: ep_string += ' ' return ep_string - def fmt_reqs(self): + def fmt_reqs(self) -> str: """ Format requirements section of metadata. :return: Formatted string if requirements exists; else empty string. @@ -67,7 +71,7 @@ class MetadataGenerator: '\n' return reqs_string - def fmt_test(self): + def fmt_test(self) -> str: """ Format test section of metadata. NOTE: May need further tweaking to be smarter based on individual project @@ -93,7 +97,7 @@ class MetadataGenerator: ) return test_string - def generate(self): + def generate(self) -> str: logger.debug(f"Generating meta.yaml file from {self.path}...") # Filter numpy etc. out of the requirements try: @@ -126,7 +130,7 @@ class MetadataGenerator: ) -def parse_setup(d): +def parse_setup(d: str) -> Dict[str, str]: """ Function for running parse_setup.py on each directory with a setup.py file. NOTE: Contains a hack for getting parse_setup.py to run in each directory. @@ -143,7 +147,8 @@ def parse_setup(d): logger.debug("Done parsing.") return json.loads(proc.stdout) -def get_outputs(names): + +def get_outputs(names: List[str]) -> List[str]: """ Generate list of metadata files that will be created. :param dirs: List of dirs of all subprojects with a setup.py file. @@ -155,7 +160,7 @@ def get_outputs(names): return outputs -def get_dirs(): +def get_dirs() -> List[str]: """ Finds all subdirectories containing setup.py files. :return: List of directories as strings. @@ -173,7 +178,8 @@ def get_dirs(): logger.debug("Done finding directories.") return dirs -def get_names(dirs): + +def get_names(dirs: List[str]) -> List[str]: """ Generate list of subproject names based on the rule that the name of the subproject directory will be the name of the subproject. @@ -193,7 +199,8 @@ def get_names(dirs): logger.debug("Done getting list of names.") return names -def del_substrings(s, substrings): + +def del_substrings(s: str, substrings: List[str]): """ Function for deleting multiple substrings from a string. :param s: String to be modified. @@ -205,15 +212,17 @@ def del_substrings(s, substrings): return s + root = os.getcwd() + class Recipe: """ Buildout Recipe class. For more detailed information, see the link. http://www.buildout.org/en/latest/topics/writing-recipes.html """ - def __init__(self, buildout, name, options): + def __init__(self, buildout: Optional[Buildout], name: Optional[str], options: Options): """ Initializes fields needed for recipe. :param buildout: (Boilerplate) Dictionary of options from buildout section @@ -226,7 +235,7 @@ class Recipe: self.outputs = get_outputs(self.names) self.options = options - def install(self): + def install(self) -> Any: """ Install method that runs when recipe has components it needs to install. :return: Paths to files, as strings, created by the recipe. @@ -236,10 +245,10 @@ class Recipe: setup_data = parse_setup(d) metadata = MetadataGenerator(setup_data, d).generate() write_metafile(metadata, self.outputs[i]) - # Pass created file into options.created() + # Buildout-specific operation: pass created file into options.created() self.options.created(self.outputs[i]) return self.options.created() # No special procedure for updating vs. installing - update = install \ No newline at end of file + update = install diff --git a/build/recipes/setup_to_meta/test/conftest.py b/build/recipes/setup_to_meta/test/conftest.py index 2f6dfb21d3d7ca162dc8b378bf4be2ce707f77ab..877092d84ed217e63b96ae18f0b49205f9d70abd 100644 --- a/build/recipes/setup_to_meta/test/conftest.py +++ b/build/recipes/setup_to_meta/test/conftest.py @@ -1,8 +1,11 @@ import pytest import zc.buildout.testing +from .. import setup_to_meta + + @pytest.fixture(scope='module') -def recipe(): +def recipe() -> setup_to_meta.Recipe: """ pytest fixture that initializes zc.buildout objects for use in testing. Initializes Buildout, Options, and Recipe objects. @@ -11,6 +14,14 @@ def recipe(): """ from .. import setup_to_meta buildout = zc.buildout.testing.Buildout() - options = buildout.Options(buildout, 'gen_metadata', {'recipe': 'setup_to_meta'}) - recipe = setup_to_meta.Recipe(buildout=buildout, name=None, options=options) - return recipe \ No newline at end of file + options = buildout.Options( + buildout, + 'gen_metadata', + {'recipe': 'setup_to_meta'} + ) + recipe = setup_to_meta.Recipe( + buildout=buildout, + name=None, + options=options + ) + return recipe diff --git a/build/recipes/setup_to_meta/test/test_setup_to_meta.py b/build/recipes/setup_to_meta/test/test_setup_to_meta.py index 7b0162ae61965a5d88941af339f95761072d795e..8cb514d110f903e577b61fdaa503fe139a9bad03 100644 --- a/build/recipes/setup_to_meta/test/test_setup_to_meta.py +++ b/build/recipes/setup_to_meta/test/test_setup_to_meta.py @@ -1,5 +1,8 @@ +from typing import Dict, List + from .. import setup_to_meta + class TestSetupToMeta: def test_del_substrings(self): """ @@ -38,7 +41,7 @@ class TestSetupToMeta: for key in keys: assert key in setup_data - def test_output(self, recipe): + def test_output(self, recipe: setup_to_meta.Recipe): """ Test that metadata was successfully created and contains data. diff --git a/build/recipes/test_recipes/test/test_test_recipes.py b/build/recipes/test_recipes/test/test_test_recipes.py index 57034eefdff357e8e0b09ddb02c556b57b0f2fff..912bc274ba93235ccdc85c82e1949993855cafd5 100644 --- a/build/recipes/test_recipes/test/test_test_recipes.py +++ b/build/recipes/test_recipes/test/test_test_recipes.py @@ -1,5 +1,6 @@ from .. import test_recipes + class TestRecipes: def test_get_recipes(self): """ diff --git a/build/recipes/test_recipes/test_recipes.py b/build/recipes/test_recipes/test_recipes.py index 479cc05141ca024ae4db3a58f36ebf6f570100d2..fd09a2c49ee94434e2a4e6823c0be5029e3af9da 100644 --- a/build/recipes/test_recipes/test_recipes.py +++ b/build/recipes/test_recipes/test_recipes.py @@ -1,9 +1,13 @@ import subprocess import logging +from typing import Dict, Optional + +from zc.buildout.buildout import Buildout, Options logger = logging.getLogger("buildout/test_recipes") -def get_recipes(): + +def get_recipes() -> Dict[str, str]: """ Get all currently installed buildout recipes (including this one!) :return: Dictionary with format {recipe_name: recipe_path_from_root,} @@ -26,6 +30,7 @@ def get_recipes(): return recipes + class Recipe: """ Buildout Recipe class. @@ -33,7 +38,7 @@ class Recipe: http://www.buildout.org/en/latest/topics/writing-recipes.html """ - def run_test(self, recipe): + def run_test(self, recipe: str): """ Run test for given recipe. :param recipe: Name of recipe to be run. @@ -42,7 +47,7 @@ class Recipe: subprocess.run(['pytest', '-vv', '--log-level=DEBUG', '--showlocals', self.recipes[recipe]]) - def __init__(self, buildout, name, options): + def __init__(self, buildout: Optional[Buildout], name: str, options: Options): """ Initializes fields needed for recipe. :param buildout: (Boilerplate) Dictionary of options from buildout section @@ -68,4 +73,4 @@ class Recipe: self.run_test(recipe) else: if self.choice in self.recipes: - self.run_test(self.choice) \ No newline at end of file + self.run_test(self.choice) diff --git a/build/tools/parse_setup.py b/build/tools/parse_setup.py index 8acd807b95980c86bcedb4f1a2b220ea5300da79..6d1c8ab8fd78bd0096d40322fc3ca926dacf5aea 100644 --- a/build/tools/parse_setup.py +++ b/build/tools/parse_setup.py @@ -3,6 +3,8 @@ import setuptools import json data = {} + + def my_setup(*args, **kwargs): """ A replacement for setuptools.setup(). @@ -20,6 +22,7 @@ def my_setup(*args, **kwargs): for field in fields: data[field] = kwargs.get(field) + def main(): # Author of these shenanigans: Daniel Lyons (but you already knew that) @@ -36,5 +39,6 @@ def main(): # Instead of exiting, we now have populated our global variable, without doing any parsing json.dump(data, sys.stdout) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/build/tools/transfer_to_builder.py b/build/tools/transfer_to_builder.py index fa43851a3f4ccf5312ba40249f082155b2e1e1b1..4f9b67094afb4ecaafb325c1b8b7635dffa36dca 100644 --- a/build/tools/transfer_to_builder.py +++ b/build/tools/transfer_to_builder.py @@ -1,10 +1,12 @@ -import subprocess -import paramiko -import logging +import os +import sys import fnmatch import getpass -import sys -import os +import logging +from typing import List + +import paramiko +import subprocess from scp import SCPClient @@ -12,7 +14,8 @@ logger = logging.getLogger("buildtools/transfer_to_builder") logger.setLevel(logging.INFO) hander = logging.StreamHandler(stream=sys.stdout) -def get_build_pkg_names(): + +def get_build_pkg_names() -> List[str]: """ Search through pkgs directory for built .tar.bz2 packages :return: List of package archive file names @@ -28,7 +31,8 @@ def get_build_pkg_names(): return pkg_names -def create_ssh_client(server): + +def create_ssh_client(server: str) -> paramiko.SSHClient: """ Use paramiko to load SSH keys if they exist and set up an SSH connection to a server. :param server: The server to connect to @@ -52,7 +56,8 @@ def create_ssh_client(server): return client -def transfer_packages(pkg_names): + +def transfer_packages(pkg_names: List[str]): """ Use shell commands to transfer build archives to builder and update its conda package index. :param pkg_names: Names of the .tar.bz2 files for the built packages. @@ -74,7 +79,8 @@ def transfer_packages(pkg_names): cmd_chmod]) else: logger.error("No packages found in build/pkgs/noarch. " - "Did conda build successfully build the package(s)?") + "Did conda build successfully build the package(s)?") + if __name__ == "__main__": - transfer_packages(get_build_pkg_names()) \ No newline at end of file + transfer_packages(get_build_pkg_names()) diff --git a/environment.yml b/environment.yml index 6bb290b866b1d2ee388b556fe97bac1a35d5a3d0..0aa836450b834a93056ab8a564cf8aefd3adab92 100644 --- a/environment.yml +++ b/environment.yml @@ -20,6 +20,7 @@ dependencies: - pandas=1.0 - pendulum=2.1 - pid=2.2 + - pika=1.1.0 - psycopg2=2.8 - pycapo=0.3.0 - pyopenssl=19.1.0 diff --git a/support/conda/pyat/meta.yaml b/support/conda/pyat/meta.yaml index 1f6b6342cd8bab63c107baa59538c3d0ee08ae48..9e1e141253adc34cc8d70318b53dcfe023f841f9 100644 --- a/support/conda/pyat/meta.yaml +++ b/support/conda/pyat/meta.yaml @@ -21,7 +21,7 @@ requirements: - numpy~=1.16 - pendulum # date/time formatting - pid - - pika=1.1.0 # AMQP messaging + - pika>=1.1.0 # AMQP messaging - psycopg2 - py - pycapo # NRAO settings @@ -44,7 +44,7 @@ requirements: - numpy~=1.16 - pendulum # date/time formatting - pid - - pika~=1.1.0 # AMQP messaging + - pika>=1.1.0 # AMQP messaging - psycopg2 - py - pycapo # NRAO settings