df_pytest_utils.py

#!/usr/bin/python
# -*- coding: utf-8 -*-

""" Various conveniences for use and re-use in test cases """

import json
import logging
import os
import sys
import tempfile
from pathlib import Path

sys.path.insert(0, str(Path('.').absolute()))
sys.path.insert(0, str(Path('..').absolute()))


# TODO: Some Fine Day: this duplicates same function in package tester.
#  CAVEAT PROGRAMMOR: attempts to centralize it have resulted in tears.
def get_project_root() -> Path:
    """
    Get the root of this project.

    :return:
    """
    my_path = Path(__file__)
    path = my_path
    while not path.name.endswith('data') and not path.name.endswith('code'):
        path = path.parent

    return path

import pytest

from pycapo import CapoConfig

# pylint: disable=C0115, C0116, C0200, R0902, R0903, R0914, R1721, W0212, W0613, W0621, W0703, W1203
sys.path.insert(0, str(get_project_root()))
from shared.workspaces.test.utilities import get_locations_report, \
    get_test_data_dir

from datafetcher.datafetcher import DataFetcher
from datafetcher.return_codes import ReturnCode
from datafetcher.errors import MissingSettingsException, NoProfileException
from datafetcher.locations_report import LocationsReport
from datafetcher.utilities import REQUIRED_SETTINGS, get_arg_parser, \
    ExecutionSite, ProductLocatorLookup, RetrievalMode

TEST_PROFILE = 'local'
MISSING_SETTING = ReturnCode.MISSING_SETTING.value['code']
MISSING_PROFILE = ReturnCode.MISSING_PROFILE.value['code']
RUN_ALL = True

LOCATION_REPORTS = {
    'VLA_SMALL_EB': {
        'filename': 'VLA_SMALL_EB.json',
        'external_name': 'sysstartS.58955.83384832176',
        'file_count': 44,
        'server_count': 2
    },
    'VLA_LARGE_EB': {
        'filename': 'VLA_LARGE_EB.json',
        'external_name': '17B-197.sb34812522.eb35115211.58168.58572621528',
        'file_count': 46,
        'server_count': 2
    },
    'VLA_BAD_SERVER': {
        'filename': 'VLA_BAD_SERVER.json',
        'external_name': 'TSKY_20min_B2319_18ms_001.58955.86469591435',
        'file_count': 1,
        'server_count': 1
    },
    'IMG': {
        'filename': 'IMG.json',
        'external_name':
            'VLASS1.1.ql.T01t01.J000232-383000.10.2048.v1',
        'file_count': 2,
        'server_count': 2
    },
    'VLBA_EB': {
        'filename': 'VLBA_EB.json',
        'external_name': '',
        'file_count': 16,
        'server_count': 1
    },
    'CALIBRATION': {
        'filename': 'CALIBRATION.json',
        'external_name': '18B-265_2019_12_10_T00_00_59.203.tar',
        'file_count': 1,
        'server_count': 1
    },

}


def get_locations_file(key: str):
    """
    Return location report file specified by key
    :param key: location report name
    :return:

    """

    report_spec = LOCATION_REPORTS[key]
    filename = report_spec['filename']
    return Path(get_test_data_dir(), filename)


def write_locations_file(destination: Path, locations_report: LocationsReport):
    """

    :param destination: where locations file is to be written
    :param locations_report: locations report from which we'll write the file

    :return:

    """

    with open(destination, 'w') as to_write:
        to_dump = {'files': locations_report['files']}
        json.dump(to_dump, to_write, indent=4)
    return destination


def get_mini_exec_block():
    """
    Returns a location report with large files excised

    :return: attenuated location report

    """

    locations_in = get_locations_report('VLA_SMALL_EB')
    locations_out = locations_in.copy()
    locations_out['files'] = \
        [file for file in locations_in['files'] if file['size'] <= 100000]
    return locations_out


def get_mini_locations_file(destination):
    """
    Returns a location report file with large files excised

    :return: downsized location report file

    """

    locations_report = get_mini_exec_block()
    with open(destination, 'w') as to_write:
        to_dump = {'files': locations_report['files']}
        json.dump(to_dump, to_write, indent=4)
    return destination


def get_filenames_for_locator(product_locator: str, settings: dict):
    """
    For a given product locators, return names of all the files
    in its locations report's files report

    :param product_locator:
    :param settings:
    :return:
    """

    args = ['--product-locator', product_locator,
            '--profile', TEST_PROFILE, '--output-dir', None]
    namespace = get_arg_parser().parse_args(args)
    locations_report = LocationsReport(None, namespace, settings)

    return [file['relative_path'] for file in
            locations_report.files_report['files']]


def find_newest_fetch_log_file(target_dir: Path):
    """
    Data-fetcher command line was executed, perhaps more than once;
    find the most recent log

    :param target_dir: location of log file(s)
    :return:
    """

    logfiles = list()
    for root, _, filenames in os.walk(target_dir):
        for filename in filenames:
            if filename.startswith('DataFetcher') \
                    and filename.endswith('.log'):
                logfiles.append(Path(root, filename))

    if logfiles:
        return max(logfiles, key=os.path.getctime)

    return None


def get_test_capo_settings():
    """ get the capo settings we'll need for the tests """
    capo = CapoConfig(profile=TEST_PROFILE)
    result = dict()
    for setting in REQUIRED_SETTINGS:
        setting = setting.upper()
        try:
            result[REQUIRED_SETTINGS[setting]] = capo[setting]
        except KeyError as k_err:
            raise MissingSettingsException('missing required setting "{}"'
                                           .format(setting)) from k_err

    if result is None or len(result) == 0:
        raise MissingSettingsException('Required Capo settings were not found')

    # be sure execution site is not DSOC nor NAASC
    exec_site = result['execution_site']
    if ExecutionSite.DSOC.value in exec_site or ExecutionSite.NAASC.value in \
            exec_site:
        result['execution_site'] = 'local_test'

    # be sure download location is accessible
    dl_loc = result['download_dir']
    if not Path('/lustre').is_dir() and '/lustre' in dl_loc:
        result['download_dir'] = '/var/tmp/'

    return result


def get_metadata_db_settings(profile):
    """ Get Capo settings needed to connect to archive DB
   :param profile:
   :return:
    """
    result = dict()
    if profile is None:
        raise NoProfileException('CAPO_PROFILE required; none provided')
    capo = CapoConfig(profile=TEST_PROFILE)
    fields = ['jdbcDriver', 'jdbcUrl', 'jdbcUsername', 'jdbcPassword']
    qualified_fields = ['metadataDatabase.' + field for field in fields]
    for field in qualified_fields:
        try:
            result[field] = capo.get(field)
        except KeyError as k_err:
            raise MissingSettingsException(
                f'missing required setting "{field}"') from k_err
    return result


@pytest.fixture(autouse=True, scope='function')
def make_tempdir() -> Path:
    """
    Creates a new temporary working directory for each test.

    :return:
    """
    umask = os.umask(0o000)
    top_level = tempfile.mkdtemp(prefix='datafetcher_test_', dir='/var/tmp')
    os.umask(umask)
    yield top_level


@pytest.fixture(scope='session')
def capo_settings():
    """
    Gets Capo settings once for whole module.
    :return:
    """

    def retrieve_capo_settings() -> CapoConfig:
        return get_test_capo_settings()

    to_return = retrieve_capo_settings()
    yield to_return


@pytest.fixture(scope='session')
def settings(capo_settings):
    """
    Grabs all the settings we will need for the datafetcher:
        Capo, database, test data

    :param capo_settings:
    :return:
    """
    ''' g
    '''
    db_settings = get_metadata_db_settings(TEST_PROFILE)
    test_data = _initialize_test_data(db_settings)
    yield Settings(capo_settings, db_settings, test_data)


def _initialize_test_data(db_settings):
    """
    Set up test data for use in several tests

    :param db_settings:
    :return:
    """

    ext_name = '13B-014.sb28862036.eb29155786.56782.5720116088'

    product_locator = ProductLocatorLookup(db_settings) \
        .look_up_locator_for_ext_name(ext_name)
    return {'external_name': ext_name,
            'product_locator': product_locator}


class Settings:
    """ Encapsulates some settings for use in tests """

    def __init__(self, capo_settings, db_settings, test_data):
        self.capo_settings = capo_settings
        self.db_settings = db_settings
        self.test_data = test_data


def launch_datafetcher(args: list, df_capo_settings: dict) -> int:
    """ invoke the DF with these args as in df.main(),
        launch it with df.run(),
        and return the appropriate return/error code

    """
    if args is None or len(args) == 0:
        return MISSING_SETTING

    try:
        namespace = evaluate_args_and_capo(args, df_capo_settings)
        fetcher = DataFetcher(namespace, df_capo_settings)
        return fetcher.run()
    except SystemExit as exc:
        if hasattr(exc, 'value'):
            return exc.value.code if hasattr(exc.value, 'code') else exc.value
        if hasattr(exc, 'code'):
            return exc.code

        raise
    except (KeyError, NoProfileException) as exc:
        logging.error(f'{exc}')
        return MISSING_PROFILE
    except Exception as exc:
        pytest.fail(f'{exc}')


def evaluate_args_and_capo(args: list, capo_settings: dict):

    if args is None or len(args) == 0:
        sys.exit(MISSING_SETTING)

    profile = get_profile_from_args(args)
    if profile is None:
        profile = capo_settings['profile']
        if profile is None:
            sys.exit(MISSING_PROFILE)
        else:
            args['profile'] = profile

    namespace = get_arg_parser().parse_args(args)
    return namespace


def get_profile_from_args(args: list) -> str:
    for i in range(0, len(args)):
        if args[i] == '--profile' and i < len(args) - 1:
            profile = args[i + 1]
            return profile

    return ''


def confirm_retrieve_mode_copy(servers_report: dict) -> None:
    for server in servers_report:
        entry = servers_report[server]
        assert entry['retrieve_method'].value == RetrievalMode.COPY.value