diff --git a/apps/cli/executables/datafetcher/setup.cfg b/apps/cli/executables/datafetcher/setup.cfg index cf557ebb4e7e1142b55ab3be7e7824f9b4e17636..a392d237b96105d67126888c64caed04dd90bc20 100644 --- a/apps/cli/executables/datafetcher/setup.cfg +++ b/apps/cli/executables/datafetcher/setup.cfg @@ -4,9 +4,6 @@ description-file = README.txt [aliases] test=pytest -[tool:pytest] -addopts = -s - [build_sphinx] source-dir = docs/source build-dir = docs/build diff --git a/apps/cli/executables/datafetcher/setup.py b/apps/cli/executables/datafetcher/setup.py index b2ed7352377e17daeb95b12b58548f0529746582..fd16588d5f295c997903b517f77ad4427e75cfdc 100644 --- a/apps/cli/executables/datafetcher/setup.py +++ b/apps/cli/executables/datafetcher/setup.py @@ -2,11 +2,37 @@ # -*- coding: utf-8 -*- from pathlib import Path -from setuptools import setup, find_packages + +from setuptools import setup VERSION = open('_version.py').readlines()[-1].split()[-1].strip("\"'") README = Path('README.md').read_text() +requires = [ + 'pika==1.1.0', + 'pycapo==0.2.1post1', + 'bs4==0.0.1', + 'beautifulsoup4==4.9.1', + 'lxml==4.3.2', + 'psycopg2==2.8.5', + 'pyinstaller==3.2', + 'pyopenssl=19.1.0', + 'tox==3.1.3', + 'tox-pyenv==1.1.0' +] + +tests_require = [ + 'bs4==0.0.1', + 'beautifulsoup4==4.9.1', + 'psycopg2==2.8.5', + 'pyopenssl=19.1.0', + 'pyinstaller==3.2', + 'pytest==5.4', + 'pytest-runner==4.2', + 'tox==3.1.3', + 'tox-pyenv==1.1.0', + +] setup( name=Path().absolute().name, version=VERSION, diff --git a/apps/cli/executables/datafetcher/src/datafetcher/file_retrievers.py b/apps/cli/executables/datafetcher/src/datafetcher/file_retrievers.py index e3cceea77cd6a4655dba881a5ed1f66f6b239def..06ba963b7a447f8e54fbc1d58f285defdc1ac6b1 100644 --- a/apps/cli/executables/datafetcher/src/datafetcher/file_retrievers.py +++ b/apps/cli/executables/datafetcher/src/datafetcher/file_retrievers.py @@ -45,14 +45,11 @@ class NGASFileRetriever: """ download_url = 'http://' + server + '/RETRIEVE' destination = self._get_destination(file_spec) - - if os.path.exists( - destination) and not self.force_overwrite and not self.dry_run: + if destination.exists() and not self.force_overwrite and not self.dry_run: raise FileExistsError(f'{destination} exists; aborting') self._make_basedir(destination) - func = self._copying_fetch if retrieve_method == RetrievalMode.COPY \ else self._streaming_fetch retryer = Retryer(func, MAX_TRIES, SLEEP_INTERVAL_SECONDS, self._LOG) @@ -74,10 +71,10 @@ class NGASFileRetriever: """ try: + output_dir = Path(self.output_dir) if file_spec['subdirectory'] is None: - return os.path.join(self.output_dir, file_spec['relative_path']) - return os.path.join(self.output_dir, file_spec['subdirectory'], - file_spec['relative_path']) + return output_dir / file_spec['relative_path'] + return output_dir / file_spec['subdirectory'] / file_spec['relative_path'] except KeyError as k_err: raise MissingSettingsException(k_err) @@ -92,11 +89,10 @@ class NGASFileRetriever: :return: """ if not self.dry_run: - basedir = os.path.dirname(destination) - if os.path.isdir(basedir): - if not os.access(basedir, os.W_OK): - raise FileErrorException( - f'output directory {basedir} is not writable') + basedir = Path(destination).parent + if basedir.is_dir() and not os.access(basedir, os.W_OK): + raise FileErrorException( + f'output directory {basedir} is not writable') try: umask = os.umask(0o000) Path(basedir).mkdir(parents=True, exist_ok=True) @@ -114,7 +110,7 @@ class NGASFileRetriever: """ if not self.dry_run: self._LOG.debug(f'verifying fetch of {destination}') - if not os.path.exists(destination): + if not destination.exists(): raise NGASServiceErrorException( f'file not delivered to {destination}') if file_spec['size'] != os.path.getsize(destination): @@ -135,7 +131,7 @@ class NGASFileRetriever: params = {'file_id': file_spec['ngas_file_id'], 'processing': _DIRECT_COPY_PLUGIN, - 'processingPars': 'outfile=' + destination, + 'processingPars': 'outfile=' + str(destination), 'file_version': file_spec['version']} self._LOG.debug('attempting copying download:\nurl: {}\ndestination: {}' .format(download_url, destination)) @@ -147,7 +143,7 @@ class NGASFileRetriever: self._LOG.error( 'NGAS does not like this request:\n{}' .format(response.url)) - soup = BeautifulSoup(response.text, 'lxml-xml') + soup = BeautifulSoup(response.text, features="lxml") ngams_status = soup.NgamsStatus.Status message = ngams_status.get("Message") @@ -195,11 +191,11 @@ class NGASFileRetriever: actual_size = os.path.getsize(destination) if actual_size == 0: raise FileErrorException( - f'{os.path.basename(destination)} ' + f'{Path(destination).name} ' f'was not retrieved') if actual_size != expected_size: raise SizeMismatchException( - f'expected {os.path.basename(destination)} ' + f'expected {Path(destination).name} ' f'to be {expected_size} bytes; ' f'was {actual_size} bytes' ) @@ -210,7 +206,7 @@ class NGASFileRetriever: except AttributeError as a_err: self._LOG.warning(f'possible problem streaming: {a_err}') - if response.status_code != http.HTTPStatus.OK: + if response.status_code != http.HTTPStatus.OK: self._LOG.error('NGAS does not like this request:\n{}' .format(response.url)) soup = BeautifulSoup(response.text, 'lxml-xml') diff --git a/apps/cli/executables/datafetcher/src/datafetcher/locations_report.py b/apps/cli/executables/datafetcher/src/datafetcher/locations_report.py index d6ac6075d3e680842531bda0f8c0844b81eae5eb..5788dc67ba404c4e26816d3397e9e3cc30d94cd8 100644 --- a/apps/cli/executables/datafetcher/src/datafetcher/locations_report.py +++ b/apps/cli/executables/datafetcher/src/datafetcher/locations_report.py @@ -146,22 +146,24 @@ class LocationsReport: with open(self.location_file) as to_read: result = json.load(to_read) return result - # except (JSONDecodeError, FileNotFoundError): - # raise - # except Exception as ex: - # self._LOG.error(f'>>> unexpected exception thrown: {ex}') - # raise def _get_location_report_from_service(self): """ Use 'requests' to fetch the location report from the locator service. :return: location report (from locator service, in JSON) """ + + url = self.settings['locator_service_url'] self._LOG.debug('fetching report from {} for {}'.format( - self.settings['locator_service_url'], self.product_locator)) + url, self.product_locator)) + + # this is needed to prevent SSL errors when tests are run + # inside a Docker container + requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += ':HIGH:!DH:!aNULL' + requests.Session().mount(url, adapter=requests.adapters.HTTPAdapter()) try: - response = requests.get(self.settings['locator_service_url'], + response = requests.get(url, params={'locator': self.product_locator}) except requests.exceptions.Timeout: raise LocationServiceTimeoutException() diff --git a/apps/cli/executables/datafetcher/src/datafetcher/product_fetchers.py b/apps/cli/executables/datafetcher/src/datafetcher/product_fetchers.py index bccb7c392ef9c1bac8d36ef93daeca0cc8089dd1..f3f30841581f398ac849f95c60fd5094d50cdaed 100644 --- a/apps/cli/executables/datafetcher/src/datafetcher/product_fetchers.py +++ b/apps/cli/executables/datafetcher/src/datafetcher/product_fetchers.py @@ -6,6 +6,7 @@ import copy import os from argparse import Namespace from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path from typing import Dict from .errors import NGASServiceErrorException @@ -152,13 +153,12 @@ class ParallelProductFetcher(BaseProductFetcher): if dirnames: # we can expect one subdir: the external_name associated # with the product locator - subdir = dirnames[0] - to_walk = os.path.join(dirname, subdir) + to_walk = Path(dirname) / dirnames[0] for dname, dnames, files in os.walk(to_walk): - if self.num_files_expected == len(files): + if self.num_files_expected <= len(files): self.num_files_retrieved += len(files) break - if self.num_files_expected == self.num_files_retrieved: + if self.num_files_expected >= self.num_files_retrieved: break if self.num_files_retrieved < self.num_files_expected: raise NGASServiceErrorException( diff --git a/apps/cli/executables/datafetcher/src/datafetcher/utilities.py b/apps/cli/executables/datafetcher/src/datafetcher/utilities.py index 90f418266ce8a2de431b91c81a36153c2d40f74b..880f0cedab2cde8047ce26e5f31e6dee8495af01 100644 --- a/apps/cli/executables/datafetcher/src/datafetcher/utilities.py +++ b/apps/cli/executables/datafetcher/src/datafetcher/utilities.py @@ -9,8 +9,8 @@ import argparse import logging import os import pathlib -from time import time from enum import Enum +from time import time from typing import Callable import psycopg2 as pg @@ -21,7 +21,7 @@ from .errors import get_error_descriptions, NoProfileException, \ LOG_FORMAT = "%(module)s.%(funcName)s, %(lineno)d: %(message)s" -MAX_TRIES = 5 +MAX_TRIES = 10 SLEEP_INTERVAL_SECONDS = 1 FILE_SPEC_KEYS = ['ngas_file_id', 'subdirectory', 'relative_path', 'checksum', 'checksum_type', 'version', 'size', 'server'] @@ -42,14 +42,16 @@ REQUIRED_SETTINGS = { 'EDU.NRAO.ARCHIVE.DATAFETCHER.DATAFETCHERSETTINGS.EXECUTIONSITE': 'execution_site', 'EDU.NRAO.ARCHIVE.DATAFETCHER.DATAFETCHERSETTINGS.DEFAULTTHREADSPERHOST': - 'threads_per_host' + 'threads_per_host', + 'EDU.NRAO.ARCHIVE.WORKFLOW.CONFIG.REQUESTHANDLERSETTINGS.DOWNLOADDIRECTORY' : + 'download_dir' } -def path_is_accessible(path): +def path_is_accessible(path: pathlib.Path): ''' Is this path readable, executable, and writable? ''' can_access = os.access(path, os.F_OK) - can_access = can_access and os.path.isdir(path) + can_access = can_access and path.is_dir() can_access = can_access and os.access(path, os.R_OK) can_access = can_access and os.access(path, os.W_OK) can_access = can_access and os.access(path, os.X_OK) @@ -125,25 +127,6 @@ def get_capo_settings(profile: str): result[REQUIRED_SETTINGS[setting]] = value return result -def get_metadata_db_settings(profile): - """ Get Capo settings needed to connect to archive DB - :param profile: - :return: - """ - result = dict() - if profile is None: - raise NoProfileException('CAPO_PROFILE required, none provided') - config = CapoConfig(profile=profile) - fields = ['jdbcDriver', 'jdbcUrl', 'jdbcUsername', 'jdbcPassword'] - qualified_fields = ['metadataDatabase.' + field for field in fields] - for field in qualified_fields: - try: - value = config[field] - result[field] = value - except KeyError: - raise MissingSettingsException( - f'missing required setting "{field}"') - return result def validate_file_spec(file_spec: dict, retrieve_method_expected: bool): ''' @@ -213,8 +196,9 @@ class FlexLogger(): raise MissingSettingsException('class name is required') log_pathname = f'{class_name}_{str(time())}.log' try: - self.logfile = os.path.join(output_dir, log_pathname) - self.logger = logging.getLogger(self.logfile) + + self.logfile = pathlib.Path(output_dir, log_pathname) + self.logger = logging.getLogger(str(self.logfile)) self.verbose = verbose handler = logging.FileHandler(self.logfile) formatter = logging.Formatter(LOG_FORMAT) diff --git a/apps/cli/executables/datafetcher/test-requirements.txt b/apps/cli/executables/datafetcher/test-requirements.txt deleted file mode 100644 index 3919fc85835bc1fd6053b85ec056c8ad7c9bb50d..0000000000000000000000000000000000000000 --- a/apps/cli/executables/datafetcher/test-requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -tox==3.1.3 -tox-pyenv==1.1.0 -pyinstaller==3.2 -pytest==3.7.0 -pytest-runner==4.2 diff --git a/apps/cli/executables/datafetcher/test/Dockerfile b/apps/cli/executables/datafetcher/test/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..5ddc445a1d7abf529847978aa99845c8a6a1f9bb --- /dev/null +++ b/apps/cli/executables/datafetcher/test/Dockerfile @@ -0,0 +1,42 @@ +# datafetcher Dockerfile +# +# TO BUILD the docker image: -don't- "docker build" directly! +# use docker_build.sh: +# from apps/cli/executables/datafetcher, +# +# ./docker_build.sh datafetcher_test[:N] +# +# where '-t' specifies a name and N' is the version. +# (If ':N' is omitted, version is 'latest' by default.) +# tag is not required for the build, but without it +#the container name is an unhelpful hexadecimal value. + +FROM continuumio/miniconda3:latest + +COPY environment.yml . + +ENV PATH $HOME/miniconda3/bin/conda:$PATH + +# docker_build.sh should have copied environment.yml from data/; +# it will be used in the command below +RUN conda env update + +# get what we'll need for the build +COPY . . + +# get application files and tests +COPY src/ . +COPY test/ . + +# install the application +RUN ["conda", "run", "-n", "data", "python", "setup.py", "develop"] + +# we'll need a Capo profile +ENV CAPO_PROFILE local +ENV CAPO_PATH test/ +# ENV PATH $PATH:.. + +# finally, run the tests. be verbose. log stuff. +# (for more detailed output, use "-vv" and/or "--log-level=DEBUG"; +# to quit after first failure, use "-x") +ENTRYPOINT ["conda", "run", "-n", "data", "pytest", "-vv", "--log-level=DEBUG", "--showlocals", "test/datafetcher_test.py" ] diff --git a/apps/cli/executables/datafetcher/test/datafetcher_test.py b/apps/cli/executables/datafetcher/test/datafetcher_test.py index a79e65eab9670eac30e71e56c78f9718e2dcda91..71e03eed1a80bc9040f1f9b5cf4603300b82bd8b 100644 --- a/apps/cli/executables/datafetcher/test/datafetcher_test.py +++ b/apps/cli/executables/datafetcher/test/datafetcher_test.py @@ -1,4 +1,4 @@ -""" Unit tests for fetch as a whole """ +""" datafetcher unit tests """ import os import subprocess @@ -9,18 +9,16 @@ from typing import List from unittest.mock import MagicMock import pytest - from datafetcher.commands import DataFetcher from datafetcher.errors import Errors from datafetcher.locations_report import LocationsReport -from datafetcher.utilities import get_capo_settings, get_arg_parser, \ - ProductLocatorLookup, get_metadata_db_settings, ExecutionSite, \ - RetrievalMode, FlexLogger +from datafetcher.utilities import get_arg_parser, ExecutionSite, \ + RetrievalMode, FlexLogger, ProductLocatorLookup from .testing_utils import TEST_PROFILE, LOCATION_REPORTS, \ - get_locations_report,get_locations_file, \ - get_mini_locations_file, write_locations_file, \ - find_newest_fetch_log_file, get_test_data_dir + get_locations_report, get_locations_file, \ + get_mini_locations_file, find_newest_fetch_log_file, get_test_data_dir, \ + get_metadata_db_settings, get_test_capo_settings _VLA_SMALL_KEY = 'VLA_SMALL_EB' _FETCH_COMMAND = 'datafetcher' @@ -28,6 +26,13 @@ _LOCATION_FILENAME = 'locations.json' _EB_EXTERNAL_NAME = 'sysstartS.58955.83384832176' _ASDM_XML = 'ASDM.xml' +''' +TO EXECUTE THESE TESTS: from apps/cli/executables/datafetcher, + + pytest -vx --logging-level=INFO + +''' + class DataFetcherTestCase(unittest.TestCase): """ IMPORTANT NOTE: we CANNOT retrieve by copy if we don't have access to a location to which NGAS can write, e.g, lustre. Therefore, any test @@ -59,7 +64,7 @@ class DataFetcherTestCase(unittest.TestCase): @classmethod def setUpClass(cls) -> None: cls.profile = TEST_PROFILE - cls.settings = get_capo_settings(cls.profile) + cls.settings = get_test_capo_settings() cls.db_settings = get_metadata_db_settings(cls.profile) cls.test_data = cls._initialize_test_data(cls) cls.DATA_DIR = get_test_data_dir() @@ -80,14 +85,21 @@ class DataFetcherTestCase(unittest.TestCase): '--product-locator', 'not-even', '--profile', TEST_PROFILE, '--output-dir', self.top_level] fetcher = CommandLineFetchLauncher(args, self._LOG) - fetcher.run() + try: + fetcher.run() + except FileNotFoundError as err: + self._LOG.debug(f'>>> {err}') + raise err + exception_found = False terminal_exception_thrown = False - logfile = find_newest_fetch_log_file(self.top_level) - self.assertNotEqual(0, os.path.getsize(logfile), + bad_locator_logfile = find_newest_fetch_log_file(self.top_level) + self.assertIsNotNone(bad_locator_logfile, + f'we should have gotten a log file in {self.top_level}') + self.assertNotEqual(0, os.path.getsize(bad_locator_logfile), f'expecting a non-empty log file in {self.top_level}') - with open(logfile, 'r') as log: + with open(bad_locator_logfile) as log: log_contents = log.readlines() for line in log_contents: @@ -99,7 +111,7 @@ class DataFetcherTestCase(unittest.TestCase): break self.assertTrue(exception_found, 'expecting NoLocatorException') self.assertTrue(terminal_exception_thrown, 'terminal_exception should be thrown') - term_exc_count = 1 + bad_locator_logfile.unlink() # nonexistent locations file args = [_FETCH_COMMAND, '--location-file', 'aint_got_one', '--output-dir', @@ -121,14 +133,13 @@ class DataFetcherTestCase(unittest.TestCase): if exception_found and terminal_exception_thrown: break self.assertTrue(exception_found, 'expecting FileNotFoundError') - self.assertEqual(1, term_exc_count, 'terminal_exception should be ' - 'thrown') def test_nothing_retrieved_if_dry_on_cmd_line(self): - location_file = get_mini_locations_file(os.path.join(self.top_level, - _LOCATION_FILENAME)) + toplevel = Path(self.top_level) + location_file = get_mini_locations_file( + Path(toplevel, _LOCATION_FILENAME)) args = [_FETCH_COMMAND, - '--location-file', location_file, + '--location-file', str(location_file), '--profile', TEST_PROFILE, '--output-dir', self.top_level, '--dry', '--verbose'] fetcher = CommandLineFetchLauncher(args, self._LOG) @@ -137,54 +148,54 @@ class DataFetcherTestCase(unittest.TestCase): self.assertEqual([], output, 'expecting no files for dry run') self.assertNotEqual(0, os.path.getsize(logfile), 'log file should be non-empty because verbose') + Path.unlink(location_file) # make sure none of these files written - os.remove(location_file) file_count = 0 for _ in os.walk(location_file): file_count += 1 self.assertEqual(0, file_count, 'no files should have been retrieved') def test_force_overwrite_from_cmd_line(self): - location_file = get_mini_locations_file(os.path.join(self.top_level, - _LOCATION_FILENAME)) - dest_dir = os.path.join(self.top_level, _EB_EXTERNAL_NAME) - Path(dest_dir).mkdir(parents=True, exist_ok=True) + toplevel = Path(self.top_level) + location_file = get_mini_locations_file(toplevel / _LOCATION_FILENAME) + dest_dir = Path(toplevel, _EB_EXTERNAL_NAME) + dest_dir.mkdir(parents=True, exist_ok=True) # make a fake file to be overwritten - fake_file = os.path.join(dest_dir, _ASDM_XML) + fake_file = dest_dir / _ASDM_XML with open(fake_file, 'w') as to_write: to_write.write('alas, my days are numbered') args = [_FETCH_COMMAND, - '--location-file', location_file, + '--location-file', str(location_file), '--profile', TEST_PROFILE, '--output-dir', self.top_level, '--force'] CommandLineFetchLauncher(args, self._LOG).run() sizes = dict() - for dirname, dirnames, fnames in os.walk(dest_dir): + for _, _, fnames in os.walk(dest_dir): for fname in fnames: - path = os.path.join(dest_dir, fname) + path = dest_dir / fname sizes[path] = os.path.getsize(path) self.assertEqual(37, len(sizes), 'expecting 37 files to be fetched') fake_size = os.path.getsize(fake_file) self.assertEqual(9339, fake_size, f'expecting ' - f'{os.path.basename(fake_file)} to ' + f'{fake_file} to ' f'be 9339 bytes') def test_no_overwrite_from_cmd_line(self): - location_file = get_mini_locations_file(os.path.join(self.top_level, - _LOCATION_FILENAME)) - dest_dir = os.path.join(self.top_level, _EB_EXTERNAL_NAME) - Path(dest_dir).mkdir(parents=True, exist_ok=True) + toplevel = Path(self.top_level) + location_file = get_mini_locations_file(toplevel / _LOCATION_FILENAME) + dest_dir = toplevel / _EB_EXTERNAL_NAME + dest_dir.mkdir(parents=True, exist_ok=True) # make a fake file that shouldn't be overwritten - fake_file = os.path.join(dest_dir, _ASDM_XML) + fake_file = dest_dir / _ASDM_XML with open(fake_file, 'w') as to_write: to_write.write("I'm not going anywhere!") args = [_FETCH_COMMAND, - '--location-file', location_file, + '--location-file', str(location_file), '--profile', TEST_PROFILE, '--output-dir', self.top_level] fetcher = CommandLineFetchLauncher(args, self._LOG) fetcher.run() @@ -206,10 +217,10 @@ class DataFetcherTestCase(unittest.TestCase): 'expecting terminal_exception for FileExistsError') def test_cmd_line_more_output_when_verbose(self): + report_file = get_mini_locations_file( + Path(self.top_level, 'locations_verbose.json')) args = [_FETCH_COMMAND, - '--location-file', - get_mini_locations_file( - os.path.join(self.top_level, 'locations_verbose.json')), + '--location-file', str(report_file), '--profile', TEST_PROFILE, '--output-dir', self.top_level, '--verbose'] fetcher = CommandLineFetchLauncher(args, self._LOG) @@ -218,34 +229,33 @@ class DataFetcherTestCase(unittest.TestCase): self.assertEqual(num_files_expected, len(retrieved), f'expecting {num_files_expected} files') - logfile = find_newest_fetch_log_file(self.top_level) - self.assertNotEqual(0, os.path.getsize(logfile), + verbose_logfile = find_newest_fetch_log_file(self.top_level) + self.assertNotEqual(0, os.path.getsize(verbose_logfile), 'log should contain debug messages') - [os.remove(file) for file in retrieved] - logname = str(logfile).replace('data_fetcher', 'fetch_verbose') - os.rename(logfile, logname) + [file.unlink() for file in retrieved] + verbose_logfile.unlink() # same thing, but without verbose args = [_FETCH_COMMAND, - '--location-file', - get_mini_locations_file( - os.path.join(self.top_level, _LOCATION_FILENAME)), + '--location-file', str(report_file), '--profile', TEST_PROFILE, '--output-dir', self.top_level] fetcher = CommandLineFetchLauncher(args, self._LOG) retrieved = fetcher.run() self.assertEqual(num_files_expected, len(retrieved), f'expecting {num_files_expected} files') logfile = find_newest_fetch_log_file(self.top_level) - self.assertEqual(0, os.path.getsize(logfile), 'log should be empty') + self.assertEqual(0, os.path.getsize(logfile), + f'{logfile} should be empty') def test_can_stream_from_mini_locations_file(self): """ gin up a location report with just a few small files in it and confirm that we can actually stream them """ - path = os.path.join(self.top_level, _LOCATION_FILENAME) - report_file = get_mini_locations_file(path) - args = ['--location-file', report_file, + location_file = get_mini_locations_file(Path(self.top_level, _LOCATION_FILENAME)) + + report_file = get_mini_locations_file(location_file) + args = ['--location-file', str(report_file), '--output-dir', self.top_level, '--profile', self.profile] namespace = get_arg_parser().parse_args(args) @@ -255,24 +265,25 @@ class DataFetcherTestCase(unittest.TestCase): self.assertEqual(37, file_count) def test_verbose_writes_stuff_to_log(self): - path = os.path.join(self.top_level, _LOCATION_FILENAME) + path = Path(self.top_level, _LOCATION_FILENAME) report_file = get_mini_locations_file(path) - args = ['--location-file', report_file, '--output-dir', self.top_level, + args = ['--location-file', str(report_file), + '--output-dir', self.top_level, '--profile', self.profile, '--verbose'] namespace = get_arg_parser().parse_args(args) fetch = DataFetcher(namespace, self.settings) fetch.run() logfile = fetch.logfile - self.assertTrue(os.path.isfile(logfile), + self.assertTrue(logfile.is_file(), f'expecting log file {logfile}') self.assertNotEqual(0, os.path.getsize(logfile), 'there should be entries in the log file') def test_empty_log_if_not_verbose(self): - path = os.path.join(self.top_level, _LOCATION_FILENAME) + path = Path(self.top_level, _LOCATION_FILENAME) report_file = get_mini_locations_file(path) - args = ['--location-file', report_file, + args = ['--location-file', str(report_file), '--output-dir', self.top_level, '--profile', self.profile] namespace = get_arg_parser().parse_args(args) @@ -280,7 +291,7 @@ class DataFetcherTestCase(unittest.TestCase): fetch.run() logfile = fetch.logfile - self.assertTrue(os.path.isfile(logfile), + self.assertTrue(logfile.is_file(), f'expecting log file {logfile}') self.assertEqual(0, os.path.getsize(logfile), 'log file should be empty') @@ -324,7 +335,7 @@ class DataFetcherTestCase(unittest.TestCase): def test_dies_with_bad_server_info(self): report_file = get_locations_file('VLA_BAD_SERVER') - args = ['--location-file', report_file, + args = ['--location-file', str(report_file), '--output-dir', self.top_level, '--profile', self.profile] namespace = get_arg_parser().parse_args(args) @@ -336,82 +347,96 @@ class DataFetcherTestCase(unittest.TestCase): self.assertEqual(expected, exc_code) def test_throws_sys_exit_file_exists_if_overwrite_not_forced(self): - location_file = get_mini_locations_file(os.path.join(self.top_level, - _LOCATION_FILENAME)) - destination = os.path.join( - self.top_level, _EB_EXTERNAL_NAME) - + toplevel = Path(self.top_level) + location_file = get_mini_locations_file( + Path(self.top_level, _LOCATION_FILENAME)) + self.assertTrue(Path.exists(location_file), + f'{location_file}') + destination = Path(toplevel, _EB_EXTERNAL_NAME) Path(destination).mkdir(parents=True, exist_ok=True) - self.assertTrue(os.path.isdir(destination)) + self.assertTrue(destination.is_dir(), f'{destination}') # stick a fake SDM in there so it will fall over - fake_file = os.path.join(destination, _ASDM_XML) + fake_file = Path(destination, _ASDM_XML) with open(fake_file, 'w') as to_write: to_write.write('lalalalalala') - self.assertTrue(os.path.exists(fake_file)) + self.assertTrue(fake_file.exists(), f'expecting fake file: {fake_file}') self.assertFalse(os.path.getsize(fake_file) == 0) - parser = get_arg_parser() - - args = ['--location-file', location_file, + args = ['--location-file', str(location_file), '--output-dir', self.top_level, '--profile', TEST_PROFILE] - namespace = parser.parse_args(args) + namespace = get_arg_parser().parse_args(args) # exception should be thrown because one of the files to be retrieved # is in the destination dir and we're not forcing overwrite here with pytest.raises(SystemExit) as exc: - fetch = DataFetcher(namespace, self.settings) - fetch.run() + DataFetcher(namespace, self.settings).run() exc_code = exc.value.code expected = Errors.FILE_EXISTS_ERROR.value self.assertEqual(expected, exc_code) def test_overwrites_when_forced(self): - location_report = get_locations_report(_VLA_SMALL_KEY) - location_report = self._remove_large_files_from_location_report( - location_report) - report_file = write_locations_file( - os.path.join(self.top_level, _LOCATION_FILENAME), location_report) - external_name = LOCATION_REPORTS['VLA_SMALL_EB']['external_name'] - destination = os.path.join(self.top_level, external_name) - Path(destination).mkdir(parents=True, exist_ok=True) - self.assertTrue(os.path.isdir(destination)) + external_name = LOCATION_REPORTS[_VLA_SMALL_KEY]['external_name'] + toplevel = Path(self.top_level) + destination = toplevel / external_name + destination.mkdir(parents=True, exist_ok=True) + self.assertTrue(destination.is_dir(), f'{destination}') # stick a fake SDM in there to see if overwrite really happens to_overwrite = _ASDM_XML - fake_file = os.path.join(destination, to_overwrite) + fake_file = destination / to_overwrite text = '"Bother!" said Pooh. "Lock phasers on that heffalump!"' with open(fake_file, 'w') as to_write: to_write.write(text) - self.assertTrue(os.path.exists(fake_file), f'{to_overwrite} should ' - f'have been created') + self.assertTrue(fake_file.exists(), + f'{to_overwrite} should have been created') self.assertEqual(len(text), os.path.getsize(fake_file), f'before overwrite, {to_overwrite} should be' f' {len(text)} bytes') + report_metadata = LOCATION_REPORTS['VLA_SMALL_EB'] + external_name = report_metadata['external_name'] + destination = toplevel / external_name + Path(destination).mkdir(parents=True, exist_ok=True) - args = ['--location-file', report_file, - '--output-dir', self.top_level, '--force', '--profile', TEST_PROFILE] + json_path = destination / report_metadata['filename'] + report_file = get_mini_locations_file(json_path) + args = ['--location-file', str(report_file), + '--output-dir', self.top_level, + '--profile', TEST_PROFILE, '--force'] namespace = get_arg_parser().parse_args(args) + report = LocationsReport(self._LOG, namespace, self.settings) + + # expecting 37 files + files = report.files_report['files'] + + sizes = [file['size'] for file in files] + total_size_expected = sum(sizes) + num_files_expected = 37 + self.assertEqual(num_files_expected, len(files), + f"expecting {report_metadata['file_count']} files in report") + fetch = DataFetcher(namespace, self.settings) retrieved = fetch.run() + self.assertEqual(num_files_expected, len(retrieved), + f'expected {num_files_expected} files but got {len(retrieved)}') - real_size = -1 - location_report = get_locations_report(_VLA_SMALL_KEY) - for file in location_report['files']: - if to_overwrite == file['relative_path']: - real_size = file['size'] - break - for file in retrieved: - if to_overwrite == os.path.basename(file): - self.assertEqual(real_size, os.path.getsize(file), - f'{to_overwrite} should be {real_size} bytes') + # delete the .json so it doesn't mess up our total size computation + Path.unlink(report_file) + + total_size_actual = 0 + dest = Path(destination) + for dirpath, _, filenames in os.walk(dest): + for fname in filenames: + path = Path(dirpath, fname) + total_size_actual += os.path.getsize(path) + self.assertEqual(total_size_expected, total_size_actual, + f'expected total size={total_size_expected}; got {total_size_actual}') def test_sys_exit_file_error_on_bad_destination(self): file_spec = self.test_data['13B-014'] - bad_path = os.path.abspath('/foo') args = ['--product-locator', file_spec['product_locator'], - '--output-dir', bad_path, + '--output-dir', '/foo', '--profile', self.profile] namespace = get_arg_parser().parse_args(args) with pytest.raises(SystemExit) as s_ex: @@ -440,27 +465,30 @@ class DataFetcherTestCase(unittest.TestCase): def test_gets_vlbas_from_report_file(self): report_file = get_locations_file('VLBA_EB') - args = ['--location-file', report_file, + args = ['--location-file', str(report_file), '--output-dir', self.top_level, '--profile', self.profile] namespace = get_arg_parser().parse_args(args) fetch = DataFetcher(namespace, self.settings) report_files = fetch.locations_report.files_report['files'] + self.assertEqual(16, len(report_files), - f'{os.path.basename(report_file)} should have 16 ' - f'files') - expected_files = [os.path.join(self.top_level, item['relative_path']) + f'expecting 16 report files in {self.top_level}') + expected_files = [Path(self.top_level, item['relative_path']) for item in report_files] + + # files we're getting take waaaaayyy too long to fetch in a test case, + # so we're mocking DataFetcher.run() fetch.run = MagicMock(return_value=expected_files) actual_files = fetch.run() num_expected = len(expected_files) - self.assertEqual(num_expected, len(actual_files), f'expecting ' - f'{num_expected} ' - f'VLBA files') + self.assertEqual(num_expected, len(actual_files), + f'expecting {num_expected} VLBA files in {self.top_level}') match_count = 0 for exp_file in expected_files: for act_file in actual_files: - if os.path.basename(act_file) == os.path.basename(exp_file): + act_parent = act_file.name + if act_parent == exp_file.name: match_count += 1 break self.assertEqual(num_expected, match_count, @@ -469,15 +497,14 @@ class DataFetcherTestCase(unittest.TestCase): def test_gets_large_vla_ebs_from_report_file(self): report_file = get_locations_file('VLA_LARGE_EB') - args = ['--location-file', report_file, + args = ['--location-file', str(report_file), '--output-dir', self.top_level, '--profile', self.profile] namespace = get_arg_parser().parse_args(args) fetch = DataFetcher(namespace, self.settings) report_files = fetch.locations_report.files_report['files'] - self.assertEqual(46, len(report_files), - f'{os.path.basename(report_file)} should have 46 ' - f'files') - expected_files = [os.path.join(self.top_level, item['relative_path']) + self.assertEqual(46, len(report_files), 'expecting 46 files') + toplevel = Path(self.top_level) + expected_files = [toplevel / item['relative_path'] for item in report_files] fetch.run = MagicMock(return_value=expected_files) actual_files = fetch.run() @@ -488,15 +515,17 @@ class DataFetcherTestCase(unittest.TestCase): def test_gets_images_from_report_file(self): report_file = get_locations_file('IMG') - args = ['--location-file', report_file, + args = ['--location-file', str(report_file), '--output-dir', self.top_level, '--profile', self.profile] namespace = get_arg_parser().parse_args(args) fetch = DataFetcher(namespace, self.settings) report_files = fetch.locations_report.files_report['files'] self.assertEqual(2, len(report_files), - f'{os.path.basename(report_file)} should have 2 files') - expected_files = [os.path.join(self.top_level, item['relative_path']) + f'expecting 2 report files in {self.top_level}') + toplevel = Path(self.top_level) + expected_files = [toplevel / item['relative_path'] for item in report_files] + # files are too big to fetch in a test; mock DataFetcher.run() fetch.run = MagicMock(return_value=expected_files) actual_files = fetch.run() num_expected = len(expected_files) @@ -506,13 +535,13 @@ class DataFetcherTestCase(unittest.TestCase): def test_gets_calibration_from_report_file(self): report_file = get_locations_file('CALIBRATION') - args = ['--location-file', report_file, + args = ['--location-file', str(report_file), '--output-dir', self.top_level, '--profile', self.profile] namespace = get_arg_parser().parse_args(args) fetch = DataFetcher(namespace, self.settings) report_files = fetch.locations_report.files_report['files'] self.assertEqual(1, len(report_files), - f'{os.path.basename(report_file)} should have 1 file') + f'expecting 1 report file in {self.top_level}') file_spec = report_files[0] # calibration will have external name = relative path = subdirectory @@ -520,7 +549,7 @@ class DataFetcherTestCase(unittest.TestCase): self.assertEqual(relative_path, file_spec['subdirectory'], 'expecting relative_path same as subdirectory') - expected_files = [os.path.join(self.top_level, relative_path)] + expected_files = [Path(self.top_level, relative_path)] fetch.run = MagicMock(return_value=expected_files) actual_files = fetch.run() num_expected = len(expected_files) @@ -538,7 +567,7 @@ class DataFetcherTestCase(unittest.TestCase): fetch = DataFetcher(namespace, self.settings) report_files = fetch.locations_report.files_report['files'] self.assertEqual(1, len(report_files), - f'{external_name} should be 1 file') + f'{external_name} should be 1 file in {self.top_level}') file_spec = report_files[0] @@ -549,27 +578,26 @@ class DataFetcherTestCase(unittest.TestCase): self.assertEqual(relative_path, file_spec['subdirectory'], 'expecting relative_path same as subdirectory') - expected_files = [os.path.join(self.top_level, relative_path)] + expected_files = [Path(self.top_level) / relative_path] fetch.run = MagicMock(return_value=expected_files) actual_files = fetch.run() num_expected = len(expected_files) self.assertEqual(num_expected, len(actual_files), f'expecting ' f'{num_expected} ' f'calibration') - # @unittest.skip('test_retrieval_finds_size_mismatch') def test_retrieval_finds_size_mismatch(self): report_spec = LOCATION_REPORTS[_VLA_SMALL_KEY] external_name = report_spec['external_name'] - locations_file = os.path.join(self.DATA_DIR, 'VLA_SMALL_EB_BUSTED.json') - args = ['--location-file', locations_file, + data_dir = Path(self.DATA_DIR) + locations_file = data_dir / 'VLA_SMALL_EB_BUSTED.json' + args = ['--location-file', str(locations_file), '--output-dir', self.top_level, '--profile', self.profile] namespace = get_arg_parser().parse_args(args) fetch1 = DataFetcher(namespace, self.settings) report_files = fetch1.locations_report.files_report['files'] self.assertEqual(44, len(report_files), - f'{os.path.basename(locations_file)} should have 44 ' - f'files') + f'{locations_file.name} should have 44 files') filename = 'Weather.xml' for file in report_files: @@ -659,59 +687,65 @@ class CommandLineFetchLauncher: self._LOG = logger namespace = get_arg_parser().parse_args(args_to_parse) self.args = args - self.output_dir = namespace.output_dir + self.output_dir = Path(namespace.output_dir) + + if not Path.is_dir(self.output_dir): + raise FileNotFoundError(f'{self.output_dir} not found') + elif not os.access(self.output_dir, os.R_OK): + raise PermissionError(f'{self.output_dir} not found') self.verbose = namespace.verbose def run(self): ''' launch fetch from command line @:returns directory listing ''' + with subprocess.Popen(self.args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=1, universal_newlines=True) as proc: - if proc.stderr: - for err in proc.stderr: - self._LOG.error(err.strip()) - - output = proc.stdout - error_found = output.errors - if error_found: - if isinstance(error_found, list): - [self._LOG.error(line) for line in error_found] - else: - if error_found != 'strict': - self._LOG.error(error_found) - - lines = list() - for line in output: - lines.append(line.strip()) - - for i in range(0, len(lines) - 1): - line = lines[i] - self._LOG.debug(f'{line}') - if 'error' in line.lower(): - # log everything after the error - for j in range(i, len(lines) - 1): - self._LOG.error(lines[j]) - if 'debug' in line.lower() and self.verbose: - self._LOG.debug(line) - if 'warn' in line.lower(): - self._LOG.warning(line) + return self.analyze_output(proc) + + def analyze_output(self, proc): + if proc.stderr: + for err in proc.stderr: + self._LOG.error(err.strip()) + + output = proc.stdout + error_found = output.errors + if error_found: + if isinstance(error_found, list): + [self._LOG.error(line) for line in error_found] + else: + if error_found != 'strict': + self._LOG.error(error_found) + + lines = list() + for line in output: + lines.append(line.strip()) + + for i in range(0, len(lines) - 1): + line = lines[i] + self._LOG.debug(f'{line}') + if 'error' in line.lower(): + # log everything after the error + for j in range(i, len(lines) - 1): + self._LOG.error(lines[j]) + if 'debug' in line.lower() and self.verbose: + self._LOG.debug(line) + if 'warn' in line.lower(): + self._LOG.warning(line) files_retrieved = list() for root, dirnames, filenames in os.walk(self.output_dir): + root_dir = Path(root) if dirnames: - subdir = os.path.join(root, dirnames[0]) + subdir = root_dir / dirnames[0] else: - subdir = root + subdir = root_dir for filename in filenames: if not filename.endswith('.log') and not filename.endswith('.json'): - files_retrieved.append(os.path.join(subdir, filename)) - + files_retrieved.append(subdir / filename) + self._LOG.debug(f'>>> {len(files_retrieved)} files retrieved') return files_retrieved - - -if __name__ == '__main__': - unittest.main() diff --git a/apps/cli/executables/datafetcher/test/docker-build.sh b/apps/cli/executables/datafetcher/test/docker-build.sh new file mode 100755 index 0000000000000000000000000000000000000000..1888c1fb8e1a5861126f32fdd3a74303de0c267a --- /dev/null +++ b/apps/cli/executables/datafetcher/test/docker-build.sh @@ -0,0 +1,77 @@ +#!/bin/bash + +# Building a Docker image in which to execute tests +# will require a copy of the local Capo properties +# file, which can be found at /home/casa/capo +# on boxes that can see /home, but which on boxes +# that can't is likely to be at ~/home/.capo for +# any given user. Find local.properties and +# copy it to our test directory. Dockerfiles +# do not support conditional logic; hence this script. + +# Execute script from apps/executables/cli/datafetcher/ +FILENAME=local.properties +CONTAINER_NAME=$1;shift +CACHE_FLAG=$1;shift +USAGE='Usage: $0 <container_name> [NO-CACHE]' +if [[ -z "${CONTAINER_NAME}" ]] +then + echo "${USAGE}" + exit 1 +fi + +if [ -z "${CACHE_FLAG}" ] +then + shopt -s nocasematch + if [[ "${CACHE_FLAG}" =~ ^NO[-_]CACHE$ ]] + then + echo 'invalid cache flag: '"${CACHE_FLAG}" + exit 1 + else + USE_CACHE=1 + fi +else + USE_CACHE=0 +fi + +# conda will need the environment.yml +export ENV_YML=environment.yml +export YML_DIR=../../../../ +cp $YML_DIR${ENV_YML} ${ENV_YML} + +# The preferred version of Capo .properties files is always +# the one at /home/casa/capo, -if- this is visible +# (i.e., NRAO internal system). If not (i.e., developer laptop), +# get the one in the user's .capo directory +if [ -e /home/casa/capo/${FILENAME} ] +then + SOURCE=/home/casa/capo/${FILENAME} +elif [ -e ~/.capo/${FILENAME} ] +then + SOURCE=~/.capo/${FILENAME} +else + echo '${FILENAME} not found!' + exit 1 +fi + +NEW_FILE=./test/${FILENAME} +cp ${SOURCE} ${NEW_FILE} + +# remove extended attributes, which would cause Capo to balk +/usr/bin/xattr -c ${NEW_FILE} + +if [ "${USE_CACHE}" == 1 ] +then + echo '>>>> Using cache, if possible' + docker build . -f test/Dockerfile -t ${CONTAINER_NAME} +else + echo '>>>> no cache' + docker build . -f test/Dockerfile --no-cache -t ${CONTAINER_NAME} +fi + +# now get rid of the properties file; containing sensitive info, it must NOT be saved or committed +rm -f ${NEW_FILE} +# get rid of the .yml, too +rm -f ${ENV_YML} + +# to run the image: docker run ${CONTAINER_NAME}[:latest] diff --git a/apps/cli/executables/datafetcher/test/locations_report_test.py b/apps/cli/executables/datafetcher/test/locations_report_test.py deleted file mode 100644 index 1692a25e0a768d18e9db168a28e17aa4528863e1..0000000000000000000000000000000000000000 --- a/apps/cli/executables/datafetcher/test/locations_report_test.py +++ /dev/null @@ -1,300 +0,0 @@ -""" Unit tests for locations report """ -import os -import tempfile -import unittest -from json import JSONDecodeError - -import pytest - -from datafetcher.errors import Errors, MissingSettingsException, \ - NoLocatorException -from datafetcher.locations_report import LocationsReport -from .testing_utils import TEST_PROFILE, LOCATION_REPORTS, get_test_data_dir -from datafetcher.utilities import get_capo_settings, \ - get_metadata_db_settings, \ - ProductLocatorLookup, get_arg_parser, RetrievalMode, FlexLogger - - -class LocationsReportTestCase(unittest.TestCase): - ''' locations report test case''' - - @classmethod - def setUpClass(cls) -> None: - cls.profile = 'nmtest' - cls.settings = get_capo_settings(cls.profile) - cls.db_settings = get_metadata_db_settings(cls.profile) - cls._13b_locator = ProductLocatorLookup(cls.db_settings) \ - .look_up_locator_for_ext_name( - '13B-014.sb28862036.eb29155786.56782.5720116088') - cls.DATA_DIR = get_test_data_dir() - if cls.DATA_DIR is None: - pytest.fail(f'test data directory not found under {os.getcwd()}') - - @classmethod - def setUp(cls) -> None: - umask = os.umask(0o000) - cls.top_level = tempfile.mkdtemp() - cls._LOG = FlexLogger(cls.__name__, cls.top_level) - os.umask(umask) - - def test_init_failure(self): - - # missing log, args, settings - with pytest.raises(AttributeError): - LocationsReport(None, None, None) - - # missing log, args - with pytest.raises(AttributeError): - LocationsReport(None, None, self.settings) - - # empty destination, profile shouldn't matter - args = ['--product-locator', self._13b_locator, - '--output-dir', None, '--profile', None] - namespace = get_arg_parser().parse_args(args) - LocationsReport(self._LOG, namespace, self.settings) - - def test_throws_missing_setting_with_missing_locator(self): - args = ['--product-locator', None, - '--output-dir', None, '--profile', None] - with pytest.raises(SystemExit) as s_ex: - get_arg_parser().parse_args(args) - self.assertEqual(Errors.MISSING_SETTING.value, s_ex.value.code, - 'should throw MISSING_SETTING error') - - def test_throws_no_locator_with_bad_locator(self): - args = ['--product-locator', 'Fred', - '--output-dir', None, '--profile', None] - namespace = get_arg_parser().parse_args(args) - with pytest.raises(Exception) as exc: - LocationsReport(self._LOG, namespace, self.settings) - type_name = exc.typename - exc_name = NoLocatorException.__name__ - self.assertEqual(exc_name, type_name, - 'expecting a NoLocatorException') - - def test_throws_file_error_if_cant_find_report_file(self): - args = ['--location-file', 'Mildred', - '--output-dir', None, '--profile', None] - namespace = get_arg_parser().parse_args(args) - with pytest.raises(FileNotFoundError): - LocationsReport(self._LOG, namespace, self.settings) - - def test_gets_expected_eb_from_locator(self): - args = ['--product-locator', self._13b_locator, - '--output-dir', None, '--profile', None] - namespace = get_arg_parser().parse_args(args) - report = LocationsReport(self._LOG, namespace, self.settings) - files = report.files_report['files'] - self.assertEqual(91, len(files), 'expecting 91 files in report') - - def test_gets_empty_log_if_not_verbose(self): - args = ['--product-locator', self._13b_locator, - '--output-dir', None, '--profile', None] - namespace = get_arg_parser().parse_args(args) - report = LocationsReport(self._LOG, namespace, self.settings) - logfile = report.logfile - self.assertTrue(os.path.exists(logfile), - f'expecting log file "{logfile}"') - self.assertEqual(0, os.path.getsize(logfile), - 'expecting an empty log file because not verbose') - - def test_gets_non_empty_log_if_verbose(self): - args = ['--product-locator', self._13b_locator, - '--output-dir', None, '--profile', None] - namespace = get_arg_parser().parse_args(args) - verbose_logger = FlexLogger(self.__class__.__name__, - self.top_level, True) - report = LocationsReport(verbose_logger, namespace, self.settings) - logfile = report.logfile - self.assertTrue(os.path.exists(logfile), - f'expecting log file "{logfile}"') - self.assertNotEqual(0, os.path.getsize(logfile), - 'expecting at least one log entry because verbose') - - def test_gets_expected_servers_info_from_locator(self): - args = ['--product-locator', self._13b_locator, - '--output-dir', None, '--profile', None] - namespace = get_arg_parser().parse_args(args) - report = LocationsReport(self._LOG, namespace, self.settings) - server_info = report.servers_report - self.assertEqual(3, len(server_info), 'files should be on 3 NGAS hosts') - for server in ('1', '3', '4'): - server_url = 'nmngas0' + server + '.aoc.nrao.edu:7777' - self.assertTrue(server_url in server_info.keys()) - num_files = len(server_info[server_url]['files']) - if server == '1': - self.assertEqual(1, num_files, - f'{server_url} should have 1 file') - elif server == '3': - self.assertEqual(30, num_files, - f'{server_url} should have 30 files') - elif server == '4': - self.assertEqual(60, num_files, - f'{server_url} should have 60 files') - - def test_gets_expected_images_from_file(self): - report_metadata = LOCATION_REPORTS['IMG'] - report_file = os.path.join(self.DATA_DIR, report_metadata['filename']) - - args = ['--location-file', report_file, - '--output-dir', None, '--profile', None] - namespace = get_arg_parser().parse_args(args) - report = LocationsReport(self._LOG, namespace, self.settings) - files = report.files_report['files'] - self.assertEqual(report_metadata['file_count'], len(files), - f"expecting {report_metadata['file_count']} files in report") - - server_info = report.servers_report - self.assertEqual(report_metadata['server_count'], len(server_info), - f"expecting files to be on " - f"{report_metadata['server_count']} NGAS hosts") - for item in server_info.items(): - server_url = item[0] - file = item[1]['files'][0] - - if 'nmngas01' in server_url: - self.assertEqual( - file['checksum'], '-1675665022', - f"{server_url} file checksum") - self.assertEqual( - file['ngas_file_id'], - 'uid____evla_image_56a10be7-f1c2-4788-8651-6ecc5bfbc2f1.fits', - f"{server_url} file ngas_file_id") - elif 'nmngas02' in server_url: - self.assertEqual( - file['checksum'], '1271435719', - f"{server_url} file checksum") - self.assertEqual( - file['ngas_file_id'], - 'uid____evla_image_b10137d8-d2ef-4286-a5c9-a3b8cd74f276.fits', - f"{server_url} file ngas_file_id") - else: - self.fail(f"didn't expect to find {server_url}") - - def test_gets_vla_large_from_file_correctly(self): - report_metadata = LOCATION_REPORTS['VLA_LARGE_EB'] - report_file = os.path.join(self.DATA_DIR, report_metadata['filename']) - args = ['--location-file', report_file, - '--output-dir', None, '--profile', None] - namespace = get_arg_parser().parse_args(args) - report = LocationsReport(self._LOG, namespace, self.settings) - files = report.files_report['files'] - self.assertEqual(report_metadata['file_count'], len(files), - f"expecting {report_metadata['file_count']} files in report") - server_info = report.servers_report - self.assertEqual(report_metadata['server_count'], len(server_info), - f"expecting files to be on " - f"{report_metadata['server_count']} NGAS hosts") - - for item in server_info.items(): - files = item[1]['files'] - server_url = item[0] - if 'nmngas01' in server_url: - self.assertEqual(6, len(files), - f'expecting 6 files on {server_url}') - elif 'nmngas02' in server_url: - self.assertEqual(40, len(files), f'expecting 40 files on ' - f'{server_url}') - else: - self.fail( - f"not expecting {server_url} in {report_metadata['filename']}") - - def test_gets_vla_small_from_file_correctly(self): - report_metadata = LOCATION_REPORTS['VLA_SMALL_EB'] - report_file = os.path.join(self.DATA_DIR, report_metadata['filename']) - args = ['--location-file', report_file, - '--output-dir', None, '--profile', None] - namespace = get_arg_parser().parse_args(args) - report = LocationsReport(self._LOG, namespace, self.settings) - files = report.files_report['files'] - self.assertEqual(report_metadata['file_count'], len(files), - f"expecting {report_metadata['file_count']} files in report") - server_info = report.servers_report - self.assertEqual(report_metadata['server_count'], len(server_info), - f"expecting files to be on " - f"{report_metadata['server_count']} NGAS hosts") - - for item in server_info.items(): - files = item[1]['files'] - server_url = item[0] - if 'nmngas03' in server_url: - self.assertEqual(3, len(files), f'expecting 3 files on ' - f'{server_url}') - elif 'nmngas04' in server_url: - self.assertEqual(41, len(files), f'expecting 41 files on ' - f'{server_url}') - else: - self.fail(f"not expecting {server_url} in {report_metadata['filename']}") - - def test_gets_expected_vlbas_from_file(self): - report_metadata = LOCATION_REPORTS['VLBA_EB'] - report_file = os.path.join(self.DATA_DIR, report_metadata['filename']) - - args = ['--location-file', report_file, - '--output-dir', None, '--profile', None] - namespace = get_arg_parser().parse_args(args) - report = LocationsReport(self._LOG, namespace, self.settings) - files = report.files_report['files'] - self.assertEqual(report_metadata['file_count'], len(files), - f"expecting {report_metadata['file_count']} files in report") - - server_info = report.servers_report - self.assertEqual(report_metadata['server_count'], len(server_info), - f"expecting files to be on " - f"{report_metadata['server_count']} NGAS host") - for item in server_info.items(): - file = item[1]['files'][0] - ngas_id = file['ngas_file_id'] - self.assertEqual(ngas_id, file['relative_path'], - 'ngas_file_id = relative_path for VLBA files') - self.assertTrue(str(ngas_id).endswith('.uvfits'), - 'these should all be VLBA_VSN0011..UVFITS files') - self.assertTrue(str(ngas_id) - .startswith('VLBA_VSN0011'), - 'these should all be VLBA_VSN0011..UVFITS files') - - def test_throws_json_error_if_nothing_in_report_file(self): - report_file = os.path.join(self.DATA_DIR, 'EMPTY.json') - args = ['--location-file', report_file, - '--output-dir', None, '--profile', None] - namespace = get_arg_parser().parse_args(args) - with pytest.raises(JSONDecodeError): - LocationsReport(self._LOG, namespace, self.settings) - - def test_throws_json_error_if_report_file_is_not_json(self): - report_file = os.path.join(self.DATA_DIR, 'NOT_JSON.json') - args = ['--location-file', report_file, - '--output-dir', None, '--profile', None] - namespace = get_arg_parser().parse_args(args) - with pytest.raises(JSONDecodeError): - LocationsReport(self._LOG, namespace, self.settings) - - def test_local_profile_is_streaming_else_copy(self): - old_exec_site = self.settings['execution_site'] - self.settings['execution_site'] = 'somewhere else' - try: - args = ['--product-locator', self._13b_locator, - '--output-dir', None, '--profile', self.profile] - namespace = get_arg_parser().parse_args(args) - report = LocationsReport(self._LOG, namespace, self.settings) - server_info = report.servers_report - for item in server_info.items(): - self.assertEqual(RetrievalMode.STREAM.value, - item[1]['retrieve_method'].value, - 'files should be streamed') - finally: - self.settings['execution_site'] = old_exec_site - - args = ['--product-locator', self._13b_locator, - '--output-dir', None, '--profile', self.profile] - namespace = get_arg_parser().parse_args(args) - report = LocationsReport(self._LOG, namespace, self.settings) - server_info = report.servers_report - for item in server_info.items(): - self.assertEqual(RetrievalMode.COPY.value, - item[1]['retrieve_method'].value, - 'files should be direct-copied') - - -if __name__ == '__main__': - unittest.main() diff --git a/apps/cli/executables/datafetcher/test/logging_test.py b/apps/cli/executables/datafetcher/test/logging_test.py deleted file mode 100644 index 178054401eb982c8b65103d0ca736c1c334a3129..0000000000000000000000000000000000000000 --- a/apps/cli/executables/datafetcher/test/logging_test.py +++ /dev/null @@ -1,75 +0,0 @@ -''' Tests for FlexLogger ''' -import os -import tempfile -import unittest -from pathlib import Path - -import pytest -from datafetcher.utilities import FlexLogger - -class FlexLoggerTestCase(unittest.TestCase): - ''' FlexLogger regression tests ''' - - @classmethod - def setUp(cls) -> None: - umask = os.umask(0o000) - cls.top_level = tempfile.mkdtemp() - os.umask(umask) - - def test_errors_are_written_to_file(self): - logger = FlexLogger(self.__class__.__name__, self.top_level) - logger.error('Help! Help! The sky is falling!') - logfile = logger.logfile - self.assertTrue(os.path.exists(logfile), - f'there should be a log file: {logfile}') - self.assertNotEqual(0, os.path.getsize(logfile), - 'there should be an error in the log') - - def test_debugs_are_written_to_file_only_if_verbose(self): - logger = FlexLogger(self.__class__.__name__, self.top_level) - logger.debug('I am the walrus') - logfile = logger.logfile - self.assertTrue(os.path.exists(logfile), - f'there should be a log file: {logfile}') - self.assertEqual(0, os.path.getsize(logfile), - 'log should be empty') - os.rename(logfile, os.path.join(self.top_level, - 'non_verbose_debug.log')) - - logger = FlexLogger(self.__class__.__name__, self.top_level, True) - logger.debug('Is it time for lunch yet?') - logfile = logger.logfile - self.assertNotEqual(0, os.path.getsize(logfile), - 'there should be a message in the log now') - - def test_warnings_written_to_file_even_not_verbose(self): - logger = FlexLogger(self.__class__.__name__, self.top_level) - logger.warning('For the last time....') - logfile = logger.logfile - self.assertTrue(os.path.exists(logfile), - f'there should be a log file: {logfile}') - self.assertNotEqual(0, os.path.getsize(logfile), - 'there should be a warning in the log') - - def test_init_attempt_throws_fnf_if_dir_not_found(self): - with pytest.raises(FileNotFoundError): - FlexLogger(self.__class__.__name__, Path('/foo')) - - def test_init_attempt_throws_type_err_if_dir_not_found(self): - with pytest.raises(TypeError): - FlexLogger(self.__class__.__name__, None) - - def test_init_attempt_fails_if_dir_inaccessible(self): - test_dir = tempfile.mkdtemp() - # make directory non-writable - os.chmod(test_dir, 0o444) - - with pytest.raises(PermissionError): - FlexLogger(self.__class__.__name__, test_dir) - - # make directory writeable again so it'll get deleted - os.chmod(self.top_level, 0o555) - - -if __name__ == '__main__': - unittest.main() diff --git a/apps/cli/executables/datafetcher/test/retriever_test.py b/apps/cli/executables/datafetcher/test/retriever_test.py deleted file mode 100644 index 10963a78ff2d5a059c7c3b7241648de887422410..0000000000000000000000000000000000000000 --- a/apps/cli/executables/datafetcher/test/retriever_test.py +++ /dev/null @@ -1,432 +0,0 @@ -""" File retriever unit tests """ - -import http -import json -import os -import tempfile -import unittest -from pathlib import Path -from typing import List - -import pytest - -from datafetcher.errors import FileErrorException, MissingSettingsException, \ - SizeMismatchException, NGASServiceErrorException -from datafetcher.file_retrievers import NGASFileRetriever -from datafetcher.utilities import get_capo_settings, get_metadata_db_settings, \ - get_arg_parser, RetrievalMode, path_is_accessible, FlexLogger, MAX_TRIES, \ - ProductLocatorLookup, Cluster -from .testing_utils import TEST_PROFILE - -_A_FEW_TRIES = 3 -_NOTHING_EXPECTED_MSG = 'nothing should have been retrieved' - -class RetrieverTestCase(unittest.TestCase): - """ - Tests for product retrieval - """ - - @classmethod - def setUpClass(cls) -> None: - ''' do this before running tests ''' - - # local profile is required to force streaming - cls.profile = TEST_PROFILE - cls.settings = get_capo_settings(cls.profile) - cls.db_settings = get_metadata_db_settings(cls.profile) - cls.test_data = cls._initialize_13b_014_file_spec(cls) - - @classmethod - def setUp(cls) -> None: - ''' do this before running each test ''' - umask = os.umask(0o000) - cls.top_level = tempfile.mkdtemp() - cls._LOG = FlexLogger(cls.__class__.__name__, cls.top_level) - os.umask(umask) - - def test_retriever_accepts_valid_partial_args(self): - file_spec = self.test_data['files'][1] - - parser = get_arg_parser() - args = ['--product-locator', file_spec['product_locator'], - '--output-dir', self.top_level, - '--profile', self.profile] - namespace = parser.parse_args(args) - - server = file_spec['server']['server'] - - retriever = NGASFileRetriever(namespace, self._LOG) - retrieved = retriever.retrieve(server, RetrievalMode.STREAM, file_spec) - self.assertTrue(os.path.exists(retrieved), 'retrieved file must exist') - self.assertTrue(os.path.isfile(retrieved), - 'retrieved file must be a regular file') - self.assertEqual(file_spec['size'], os.path.getsize(retrieved), - f"expecting {os.path.basename(retrieved)} to be " - f"{file_spec['size']} bytes") - - def test_throws_file_exists_error_if_overwrite_not_forced(self): - ''' if the --force flag is supplied, any file that exists at the - destination should NOT be retrieved; throw error instead - ''' - file_spec = self.test_data['files'][0] - destination = os.path.join(self.top_level, file_spec['external_name']) - Path(destination).mkdir(parents=True, exist_ok=True) - self.assertTrue(os.path.isdir(destination)) - - # stick a fake SDM in there so it will fall over - fake_file = os.path.join(destination, file_spec['relative_path']) - with open(fake_file, 'w') as to_write: - to_write.write('as if!') - self.assertTrue(os.path.exists(fake_file)) - self.assertFalse(os.path.getsize(fake_file) == 0) - - args = ['--product-locator', file_spec['product_locator'], - '--output-dir', destination, - '--profile', self.profile] - namespace = get_arg_parser().parse_args(args) - server = file_spec['server']['server'] - retriever = NGASFileRetriever(namespace, self._LOG) - - # exception should be thrown because one of the files to be retrieved - # is in the destination dir - with pytest.raises(FileExistsError): - retriever.retrieve(server, RetrievalMode.STREAM, file_spec) - - def test_nothing_retrieved_in_dry_run(self): - file_spec = self.test_data['files'][0] - destination = os.path.join(self.top_level, file_spec['external_name']) - Path(destination).mkdir(parents=True, exist_ok=True) - self.assertTrue(os.path.isdir(destination)) - - args = ['--product-locator', file_spec['product_locator'], - '--output-dir', self.top_level, '--dry', '--profile', self.profile] - namespace = get_arg_parser().parse_args(args) - - server = file_spec['server']['server'] - retriever = NGASFileRetriever(namespace, self._LOG) - to_be_retrieved = os.path.join(destination, file_spec['relative_path']) - retriever.retrieve(server, RetrievalMode.STREAM, file_spec) - self.assertFalse(os.path.exists(to_be_retrieved), _NOTHING_EXPECTED_MSG) - self.assertTrue(retriever.fetch_attempted, - 'streaming_fetch() should have been entered') - - def test_verbose_log_has_debug_messages(self): - file_spec = self.test_data['files'][0] - destination = os.path.join(self.top_level, file_spec['external_name']) - Path(destination).mkdir(parents=True, exist_ok=True) - self.assertTrue(os.path.isdir(destination)) - - args = ['--product-locator', file_spec['product_locator'], - '--output-dir', destination, - '--profile', self.profile, '--verbose'] - namespace = get_arg_parser().parse_args(args) - - server = file_spec['server']['server'] - logger = FlexLogger(self.__class__.__name__, self.top_level, True) - retriever = NGASFileRetriever(namespace, logger) - to_be_retrieved = os.path.join(destination, file_spec['relative_path']) - retriever.retrieve( - server, RetrievalMode.STREAM, file_spec) - - files_retrieved = list() - for root, dirnames, filenames in os.walk(destination): - if dirnames: - subdir = os.path.join(root, dirnames[0]) - else: - subdir = root - to_add = [file for file in filenames - if not str(file).endswith('.log')] - for filename in to_add: - files_retrieved.append(os.path.join(subdir, filename)) - - self.assertEqual(1, len(files_retrieved), - 'one file should have been retrieved') - self.assertEqual(7566, os.path.getsize(to_be_retrieved), - f'expecting {to_be_retrieved} to be 7566 bytes') - - self.assertTrue(os.path.isfile(retriever.logfile), - f'expecting log file {os.path.basename(retriever.logfile)}') - self.assertNotEqual(0, os.path.getsize(retriever.logfile), - 'log file should not be empty') - - def test_non_verbose_log_empty(self): - file_spec = self.test_data['files'][0] - destination = os.path.join(self.top_level, file_spec['external_name']) - Path(destination).mkdir(parents=True, exist_ok=True) - self.assertTrue(os.path.isdir(destination)) - - args = ['--product-locator', file_spec['product_locator'], - '--output-dir', destination, - '--profile', self.profile] - namespace = get_arg_parser().parse_args(args) - - server = file_spec['server']['server'] - retriever = NGASFileRetriever(namespace, self._LOG) - to_be_retrieved = os.path.join(destination, file_spec['relative_path']) - retriever.retrieve( - server, RetrievalMode.STREAM, file_spec) - - files_retrieved = list() - for root, dirnames, filenames in os.walk(destination): - if dirnames: - subdir = os.path.join(root, dirnames[0]) - else: - subdir = root - to_add = [file for file in filenames if not str( - file).endswith('.log')] - for filename in to_add: - files_retrieved.append(os.path.join(subdir, filename)) - self.assertEqual(1, len(files_retrieved), - 'one file should have been retrieved') - self.assertEqual(7566, os.path.getsize(to_be_retrieved), - f'expecting {to_be_retrieved} to be 7566 bytes') - - logfile = self._LOG.logfile - self.assertTrue(os.path.isfile(logfile), - f'expecting log file {os.path.basename(logfile)}') - self.assertEqual(0, os.path.getsize(logfile), - 'log file should be empty') - - def test_stream_inaccessible_destination_throws_file_error(self): - file_spec = self.test_data['files'][0] - - # make directory read-only - os.chmod(self.top_level, 0o444) - self.assertFalse(path_is_accessible(self.top_level), - 'output directory should not be accessible') - - args = ['--product-locator', file_spec['product_locator'], - '--output-dir', self.top_level, '--profile', self.profile] - namespace = get_arg_parser().parse_args(args) - - with pytest.raises(FileErrorException): - NGASFileRetriever(namespace, self._LOG).retrieve( - file_spec['server']['server'], RetrievalMode.STREAM, file_spec) - - # make directory writeable again so it'll get deleted - os.chmod(self.top_level, 0o555) - - def test_stream_bad_destination_throws_service_error(self): - top_level = '/foo' - file_spec = self.test_data['files'][0] - args = ['--product-locator', file_spec['product_locator'], - '--output-dir', top_level, '--profile', self.profile] - namespace = get_arg_parser().parse_args(args) - - server = file_spec['server']['server'] - retrieve_method = RetrievalMode.COPY - retriever = NGASFileRetriever(namespace, self._LOG) - - destination = os.path.join(top_level, file_spec['external_name']) - to_be_retrieved = os.path.join(destination, file_spec['relative_path']) - - with pytest.raises(FileErrorException) as s_ex: - retriever.retrieve(server, retrieve_method, file_spec) - self.assertFalse(os.path.exists(to_be_retrieved), _NOTHING_EXPECTED_MSG) - details = s_ex.value - self.assertTrue('failure trying to create output directory /foo' in - details.args) - - def test_stream_no_data_throws_missing_setting(self): - file_spec = self.test_data['files'][0] - - args = ['--product-locator', file_spec['product_locator'], - '--output-dir', self.top_level, '--profile', self.profile] - namespace = get_arg_parser().parse_args(args) - - server = file_spec['server']['server'] - retrieve_method = RetrievalMode.STREAM - retriever = NGASFileRetriever(namespace, self._LOG) - - destination = os.path.join(self.top_level, file_spec['external_name']) - to_be_retrieved = os.path.join(destination, file_spec['relative_path']) - with pytest.raises(MissingSettingsException): - retriever.retrieve(server, retrieve_method, {}) - self.assertFalse(os.path.exists(to_be_retrieved), _NOTHING_EXPECTED_MSG) - - def test_wrong_size_throws_size_mismatch(self): - file_spec = self.test_data['files'][0] - # give it the wrong size to cause a SizeMismatchException - file_spec['size'] = 42 - - args = ['--product-locator', file_spec['product_locator'], - '--output-dir', self.top_level, '--profile', self.profile] - namespace = get_arg_parser().parse_args(args) - - server = file_spec['server']['server'] - retrieve_method = RetrievalMode.STREAM - retriever = NGASFileRetriever(namespace, self._LOG) - - destination = os.path.join(self.top_level, file_spec['external_name']) - to_be_retrieved = os.path.join(destination, file_spec['relative_path']) - with pytest.raises(SizeMismatchException): - retriever.retrieve(server, retrieve_method, file_spec) - self.assertFalse(os.path.exists(to_be_retrieved)) - - def test_stream_fetch_failure_throws_missing_setting(self): - file_spec = self.test_data['files'][0] - - args = ['--product-locator', file_spec['product_locator'], - '--output-dir', self.top_level, '--profile', self.profile] - namespace = get_arg_parser().parse_args(args) - - server = file_spec['server']['server'] - retrieve_method = RetrievalMode.STREAM - retriever = NGASFileRetriever(namespace, self._LOG) - - destination = os.path.join(self.top_level, file_spec['external_name']) - to_be_retrieved = os.path.join(destination, - 'not_the_droids_youre_looking_for') - with pytest.raises(MissingSettingsException): - retriever.retrieve(server, retrieve_method, {}) - self.assertFalse(os.path.exists(to_be_retrieved), _NOTHING_EXPECTED_MSG) - - def test_stream_cannot_connect_throws_service_error(self): - file_spec = self.test_data['files'][0] - - args = ['--product-locator', file_spec['product_locator'], - '--output-dir', self.top_level, '--profile', self.profile] - namespace = get_arg_parser().parse_args(args) - - server = 'foo' - retrieve_method = RetrievalMode.STREAM - retriever = NGASFileRetriever(namespace, self._LOG) - - destination = os.path.join(self.top_level, file_spec['external_name']) - to_be_retrieved = os.path.join(destination, file_spec['relative_path']) - with pytest.raises(NGASServiceErrorException): - retriever.retrieve(server, retrieve_method, file_spec) - self.assertFalse(os.path.exists(to_be_retrieved), _NOTHING_EXPECTED_MSG) - - def test_local_copy_attempt_raises_service_error(self): - ''' we can expect a copy ALWAYS to fail, - because NGAS can't write to a local destination - ''' - file_spec = self.test_data['files'][0] - - args = ['--product-locator', file_spec['product_locator'], - '--output-dir', self.top_level, '--profile', self.profile] - namespace = get_arg_parser().parse_args(args) - - server = file_spec['server']['server'] - retrieve_method = RetrievalMode.COPY - retriever = NGASFileRetriever(namespace, self._LOG) - - destination = os.path.join(self.top_level, file_spec['external_name']) - to_be_retrieved = os.path.join(destination, file_spec['relative_path']) - with pytest.raises(NGASServiceErrorException) as s_ex: - retriever.retrieve(server, retrieve_method, file_spec) - self.assertFalse(os.path.exists(to_be_retrieved), _NOTHING_EXPECTED_MSG) - details = s_ex.value.args[0] - self.assertEqual(http.HTTPStatus.BAD_REQUEST, details['status_code']) - - def test_no_retries_on_success(self): - self.assertTrue(path_is_accessible(self.top_level)) - file_spec = self.test_data['files'][1] - - args = ['--product-locator', file_spec['product_locator'], - '--output-dir', self.top_level, '--profile', self.profile] - namespace = get_arg_parser().parse_args(args) - - server = file_spec['server']['server'] - retriever = NGASFileRetriever(namespace, self._LOG) - destination = os.path.join(self.top_level, file_spec['relative_path']) - retriever.retrieve(server, RetrievalMode.STREAM, file_spec) - self.assertTrue(os.path.exists(destination)) - self.assertEqual(1, retriever.num_tries) - - def test_max_retries_on_failure(self): - file_spec = self.test_data['files'][0].copy() - - # give it an invalid version - file_spec['version'] = 126 - args = ['--product-locator', file_spec['product_locator'], - '--output-dir', self.top_level, '--profile', self.profile] - - namespace = get_arg_parser().parse_args(args) - - server = file_spec['server']['server'] - retriever = NGASFileRetriever(namespace, self._LOG) - - with pytest.raises(Exception): - retriever.retrieve(server, RetrievalMode.STREAM, file_spec) - self.assertEqual(MAX_TRIES, retriever.num_tries) - - - # -------------------------------------------------------------------------- - # - # U T I L I T I E S - # - # -------------------------------------------------------------------------- - - @staticmethod - def do_something_wrong(args: List): - raise NGASServiceErrorException(args) - - @staticmethod - def do_something_a_few_times(args: List): - return int(args[0]) - - def _initialize_13b_014_file_spec(self): - ext_name = '13B-014.sb29151475.eb29223944.56810.442529050924' - product_locator = ProductLocatorLookup(self.db_settings)\ - .look_up_locator_for_ext_name(ext_name) - server = {'server': 'nmngas03.aoc.nrao.edu:7777', - 'location': 'somewhere_else', - 'cluster': Cluster.DSOC} - - files = [ - { - 'ngas_file_id': 'uid___evla_sdm_X1401705435287.sdm', - 'external_name': ext_name, - 'subdirectory' : None, - 'product_locator': product_locator, - 'relative_path': 'ASDM.xml', - 'checksum': '-2040810571', - 'version': 1, - 'size': 7566, - 'server': server, - }, - { - 'ngas_file_id': 'uid___evla_sdm_X1401705435288.sdm', - 'external_name': ext_name, - 'subdirectory' : None, - 'product_locator': product_locator, - 'relative_path': 'Antenna.xml', - 'checksum': '1014682026', - 'version': 1, - 'size': 10505, - 'server': server, - } - - ] - return {'files': files} - - def _get_test_files(self): - ''' for the retriever interface: return each location report's - information -minus- server - ''' - files = [] - for location_report in self.test_data[0].files: - file = location_report.deepcopy() - del file['server'] - files.append(file) - return files - - def _get_test_filespec(self, target_filename): - ''' grab location report data for just the specified file ''' - test_data_dir = os.path.join(os.curdir, 'data') - self.assertTrue(os.path.isdir(test_data_dir)) - - report_file = os.path.join(test_data_dir, 'VLA_SMALL_EB.json') - self.assertTrue(os.path.isfile(report_file)) - with open(report_file, 'r') as content: - locations_report = json.loads(content.read()) - for file_spec in locations_report['files']: - if target_filename == file_spec['relative_path']: - return file_spec - - return None - -if __name__ == '__main__': - unittest.main() diff --git a/apps/cli/executables/datafetcher/test/testing_utils.py b/apps/cli/executables/datafetcher/test/testing_utils.py index 620d86e7549c45eda5fa593a1798b8d58a695a47..c4a03556e6cb1602dfb308d5272640a75ca4a49b 100644 --- a/apps/cli/executables/datafetcher/test/testing_utils.py +++ b/apps/cli/executables/datafetcher/test/testing_utils.py @@ -1,18 +1,28 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + """ Various conveniences for use and re-use in test cases """ import json import os from pathlib import Path -from datafetcher.locations_report import LocationsReport -from datafetcher.utilities import get_arg_parser +from pycapo import CapoConfig + +from src.datafetcher.errors import \ + MissingSettingsException, NoProfileException +from src.datafetcher.locations_report import \ + LocationsReport +from src.datafetcher.utilities import \ + REQUIRED_SETTINGS, get_arg_parser, \ + ExecutionSite TEST_PROFILE = 'local' LOCATION_REPORTS = { 'VLA_SMALL_EB': { 'filename' : 'VLA_SMALL_EB.json', - 'external_name' : 'TSKY_20min_B2319_18ms_001.58955.86469591435', + 'external_name' : 'sysstartS.58955.83384832176', 'file_count' : 44, 'server_count' : 2 }, @@ -31,7 +41,7 @@ LOCATION_REPORTS = { 'IMG': { 'filename' : 'IMG.json', 'external_name' : - 'VLASS1.1.ql.T01t01.J000232-383000.10.2048.v1.I.iter1.image.pbcor.tt0.subim.fits', + 'VLASS1.1.ql.T01t01.J000232-383000.10.2048.v1', 'file_count' : 2, 'server_count' : 2 }, @@ -51,19 +61,19 @@ LOCATION_REPORTS = { } def get_test_data_dir(): - here = os.path.abspath(os.curdir) - for root, dirnames, _ in os.walk(here): + """ where's our test data? """ + for root, dirnames, _ in os.walk(Path.cwd()): if str(root).endswith('test'): for dirname in dirnames: if dirname == 'data': - return os.path.join(root, dirname) + return Path(root, dirname) return None def get_locations_file(key: str): ''' return the location report file specified by key ''' report_spec = LOCATION_REPORTS[key] filename = report_spec['filename'] - return os.path.join(get_test_data_dir(), filename) + return Path(get_test_data_dir(), filename) def get_locations_report(key: str): ''' return the location report specified by key ''' @@ -121,11 +131,60 @@ def find_newest_fetch_log_file(target_dir: Path): logfiles = list() for root, _, filenames in os.walk(target_dir): for filename in filenames: - if filename.startswith('DataFetcher_') \ + if filename.startswith('DataFetcher') \ and filename.endswith('.log'): - logfiles.append(os.path.join(root, filename)) + logfiles.append(Path(root, filename)) if logfiles: return max(logfiles, key=os.path.getctime) return None + +def get_test_capo_settings(): + """ get the capo settings we'll need for the tests """ + capo = CapoConfig(profile=TEST_PROFILE) + result = dict() + for setting in REQUIRED_SETTINGS: + setting = setting.upper() + try: + result[REQUIRED_SETTINGS[setting]] = capo[setting] + except KeyError: + raise MissingSettingsException('missing required setting "{}"' + .format(setting)) + + if result is None or len(result) == 0: + raise MissingSettingsException('Required Capo settings were not found') + + for setting in result: + print(f'{setting} = {result[setting]}') + # be sure execution site is not DSOC nor NAASC + exec_site = result['execution_site'] + if ExecutionSite.DSOC.value in exec_site or ExecutionSite.NAASC.value in \ + exec_site: + result['execution_site'] = 'local_test' + + # be sure download location is accessible + dl_loc = result['download_dir'] + if not Path('/lustre').is_dir() and '/lustre' in dl_loc: + result['download_dir'] = '/var/tmp/' + + return result + +def get_metadata_db_settings(profile): + """ Get Capo settings needed to connect to archive DB + :param profile: + :return: + """ + result = dict() + if profile is None: + raise NoProfileException('CAPO_PROFILE required; none provided') + capo = CapoConfig(profile=TEST_PROFILE) + fields = ['jdbcDriver', 'jdbcUrl', 'jdbcUsername', 'jdbcPassword'] + qualified_fields = ['metadataDatabase.' + field for field in fields] + for field in qualified_fields: + try: + result[field] = capo.get(field) + except KeyError: + raise MissingSettingsException( + f'missing required setting "{field}"') + return result diff --git a/apps/cli/executables/datafetcher/tox.ini b/apps/cli/executables/datafetcher/tox.ini index 154c2bde13cc5fb7865c2a7b0c2b22976dc376c8..df64e93966468dc698889e3ec0ad79b39e1677fd 100644 --- a/apps/cli/executables/datafetcher/tox.ini +++ b/apps/cli/executables/datafetcher/tox.ini @@ -18,4 +18,9 @@ basepython = py27: {env:TOXPYTHON:python2.7} py35: {env:TOXPYTHON:python3.5} py36: {env:TOXPYTHON:python3.6} py37: {env:TOXPYTHON:python3.7} + py38: {env:TOXPYTHON:python3.8} + +[pytest] +console_output_style = progress +log_cli = True diff --git a/environment.yml b/environment.yml index edb577e135240d0b8fcbfe9c81d7566b8906035b..7b9e8d6d4e094044c7068bb91a7c4c02424a8e2c 100644 --- a/environment.yml +++ b/environment.yml @@ -18,6 +18,7 @@ dependencies: - pid=2.2 - psycopg2=2.8 - pycapo=0.2.1.post1 + - pyopenssl=19.1.0 - pyramid=1.10 - pyramid_debugtoolbar=4.5 - pysftp=0.2.9