""" Unit tests for fetch as a whole """

import os
import subprocess
import tempfile
import unittest
from pathlib import Path
from typing import List
from unittest.mock import MagicMock

import pytest
from datafetcher.commands import DataFetcher
from datafetcher.errors import Errors
from datafetcher.locations_report import LocationsReport
from datafetcher.utilities import get_arg_parser, ExecutionSite, \
    RetrievalMode, FlexLogger, ProductLocatorLookup

from .testing_utils import TEST_PROFILE, LOCATION_REPORTS, \
    get_locations_report, get_locations_file, \
    get_mini_locations_file, find_newest_fetch_log_file, get_test_data_dir, \
    get_metadata_db_settings, get_test_capo_settings

_VLA_SMALL_KEY = 'VLA_SMALL_EB'
_FETCH_COMMAND = 'datafetcher'
_LOCATION_FILENAME = 'locations.json'
_EB_EXTERNAL_NAME = 'sysstartS.58955.83384832176'
_ASDM_XML = 'ASDM.xml'

'''
N.B. some of these tests fail when run from within IJ,
but they all pass pytest at the command line.
TO EXECUTE THIS TEST: from apps/cli/executables/datafetcher,

    pytest -v test/datafetcher_test.py

'''

class DataFetcherTestCase(unittest.TestCase):
    """ IMPORTANT NOTE: we CANNOT retrieve by copy if we don't have access to a
        location to which NGAS can write, e.g, lustre. Therefore, any test
        that involves -actual- retrieval of files must be by streaming, to
        ensure which we must use a Capo profile in which the execution site is
        -not- DSOC or NAASC.
        The reason is this algorithm used in LocationsReport:

         for f in files_report['files']:
             if f['server']['cluster'] == Cluster.DSOC and \
                     f['server']['location'] == self.settings['execution_site']:
                 f['server']['retrieve_method'] = RetrievalMode.COPY
             else:
                 f['server']['retrieve_method'] = RetrievalMode.STREAM


        Be sure to have on the test system a local profile (local.properties)
        that meets these criteria:

      - edu.nrao.archive.workflow.config.StartupSettings.temporaryDataDirectory
          pointing to a locally writable temp dir, e.g., /var/tmp
      - edu.nrao.archive.workflow.config.DeliverySettings.hostname
        must point to local computer
      - execution_site must NOT be DSOC or NAASC


    """

    @classmethod
    def setUpClass(cls) -> None:
        cls.profile = TEST_PROFILE
        cls.settings = get_test_capo_settings()
        cls.db_settings = get_metadata_db_settings(cls.profile)
        cls.test_data = cls._initialize_test_data(cls)
        cls.DATA_DIR = get_test_data_dir()
        if cls.DATA_DIR is None:
            pytest.fail(f'test data directory not found under {os.getcwd()}')

    @classmethod
    def setUp(cls) -> None:
        umask = os.umask(0o000)
        cls.top_level = tempfile.mkdtemp()
        os.umask(umask)
        cls._LOG = FlexLogger(cls.__name__, cls.top_level)

    def test_bad_command_line(self):

        # bad product locator
        args = [_FETCH_COMMAND,
                '--product-locator', 'not-even',
                '--profile', TEST_PROFILE, '--output-dir', self.top_level]
        fetcher = CommandLineFetchLauncher(args, self._LOG)
        fetcher.run()
        exception_found = False
        terminal_exception_thrown = False

        bad_locator_logfile = find_newest_fetch_log_file(self.top_level)
        self.assertIsNotNone(bad_locator_logfile,
                             f'we should have gotten a log file in {self.top_level}')
        self.assertNotEqual(0, os.path.getsize(bad_locator_logfile),
                            f'expecting a non-empty log file in {self.top_level}')
        with open(bad_locator_logfile, 'r') as log:
            log_contents = log.readlines()

        for line in log_contents:
            if 'NoLocatorException' in line:
                exception_found = True
            if 'terminal_exception' in line:
                terminal_exception_thrown = True
            if exception_found and terminal_exception_thrown:
                break
        self.assertTrue(exception_found, 'expecting NoLocatorException')
        self.assertTrue(terminal_exception_thrown, 'terminal_exception should be thrown')
        Path.unlink(bad_locator_logfile)

        # nonexistent locations file
        args = [_FETCH_COMMAND, '--location-file', 'aint_got_one', '--output-dir',
                self.top_level, '--profile', TEST_PROFILE]

        fetcher = CommandLineFetchLauncher(args, self._LOG)
        fetcher.run()
        logfile = find_newest_fetch_log_file(self.top_level)
        with open(logfile, 'r') as log:
            log_contents = log.readlines()

        exception_found = False
        terminal_exception_thrown = False
        for line in log_contents:
            if 'FileNotFoundError' in line:
                exception_found = True
            if 'terminal_exception' in line:
                terminal_exception_thrown = True
            if exception_found and terminal_exception_thrown:
                break
        self.assertTrue(exception_found, 'expecting FileNotFoundError')

    def test_nothing_retrieved_if_dry_on_cmd_line(self):
        toplevel = Path(self.top_level)
        location_file = get_mini_locations_file(
            Path(toplevel, _LOCATION_FILENAME))
        args = [_FETCH_COMMAND,
                '--location-file', str(location_file),
                '--profile', TEST_PROFILE, '--output-dir', self.top_level,
                '--dry', '--verbose']
        fetcher = CommandLineFetchLauncher(args, self._LOG)
        output = fetcher.run()
        logfile = find_newest_fetch_log_file(self.top_level)
        self.assertEqual([], output, 'expecting no files for dry run')
        self.assertNotEqual(0, os.path.getsize(logfile),
                            'log file should be non-empty because verbose')
        Path.unlink(location_file)

        # make sure none of these files written
        file_count = 0
        for _ in os.walk(location_file):
            file_count += 1
        self.assertEqual(0, file_count, 'no files should have been retrieved')

    def test_force_overwrite_from_cmd_line(self):
        toplevel = Path(self.top_level)
        location_file = get_mini_locations_file(toplevel / _LOCATION_FILENAME)
        dest_dir = Path(toplevel, _EB_EXTERNAL_NAME)
        dest_dir.mkdir(parents=True, exist_ok=True)

        # make a fake file to be overwritten
        fake_file = dest_dir / _ASDM_XML
        with open(fake_file, 'w') as to_write:
            to_write.write('alas, my days are numbered')
        args = [_FETCH_COMMAND,
                '--location-file', str(location_file),
                '--profile', TEST_PROFILE,
                '--output-dir', self.top_level,
                '--force']
        CommandLineFetchLauncher(args, self._LOG).run()

        sizes = dict()
        for dirname, dirnames, fnames in os.walk(dest_dir):
            for fname in fnames:
                path = dest_dir / fname
                sizes[path] = os.path.getsize(path)
        self.assertEqual(37, len(sizes), 'expecting 37 files to be fetched')
        fake_size = os.path.getsize(fake_file)
        self.assertEqual(9339, fake_size, f'expecting '
                                          f'{fake_file} to '
                                          f'be 9339 bytes')

    def test_no_overwrite_from_cmd_line(self):
        toplevel = Path(self.top_level)
        location_file = get_mini_locations_file(toplevel / _LOCATION_FILENAME)
        dest_dir = toplevel / _EB_EXTERNAL_NAME
        dest_dir.mkdir(parents=True, exist_ok=True)

        # make a fake file that shouldn't be overwritten
        fake_file = dest_dir / _ASDM_XML
        with open(fake_file, 'w') as to_write:
            to_write.write("I'm not going anywhere!")
        args = [_FETCH_COMMAND,
                '--location-file', str(location_file),
                '--profile', TEST_PROFILE, '--output-dir', self.top_level]
        fetcher = CommandLineFetchLauncher(args, self._LOG)
        fetcher.run()

        term_except_found = False
        file_exists_found = False
        logfile = find_newest_fetch_log_file(self.top_level)
        with open(logfile, 'r') as log:
            log_contents = log.readlines()
        for line in log_contents:
            if 'terminal_exception' in line:
                term_except_found = True
            if 'FileExistsError' in line:
                file_exists_found = True
            if term_except_found and file_exists_found:
                break

        self.assertTrue(term_except_found and file_exists_found,
                        'expecting terminal_exception for FileExistsError')

    def test_cmd_line_more_output_when_verbose(self):
        report_file = get_mini_locations_file(
            Path(self.top_level, 'locations_verbose.json'))
        args = [_FETCH_COMMAND,
                '--location-file', str(report_file),
                '--profile', TEST_PROFILE, '--output-dir', self.top_level,
                '--verbose']
        fetcher = CommandLineFetchLauncher(args, self._LOG)
        retrieved = fetcher.run()
        num_files_expected = 37
        self.assertEqual(num_files_expected, len(retrieved),
                         f'expecting {num_files_expected} files')

        verbose_logfile = find_newest_fetch_log_file(self.top_level)
        self.assertNotEqual(0,  os.path.getsize(verbose_logfile),
                            'log should contain debug messages')

        [Path.unlink(file) for file in retrieved]
        Path.unlink(verbose_logfile)

        # same thing, but without verbose
        args = [_FETCH_COMMAND,
                '--location-file', str(report_file),
                '--profile', TEST_PROFILE, '--output-dir', self.top_level]
        fetcher = CommandLineFetchLauncher(args, self._LOG)
        retrieved = fetcher.run()
        self.assertEqual(num_files_expected, len(retrieved),
                         f'expecting {num_files_expected} files')
        logfile = find_newest_fetch_log_file(self.top_level)
        self.assertEqual(0,  os.path.getsize(logfile),
                         f'{logfile} should be empty')

    def test_can_stream_from_mini_locations_file(self):
        """ gin up a location report with just a few small files in it
            and confirm that we can actually stream them
        """
        location_file = get_mini_locations_file(Path(self.top_level, _LOCATION_FILENAME))

        report_file = get_mini_locations_file(location_file)
        args = ['--location-file', str(report_file),
                '--output-dir', self.top_level,
                '--profile', self.profile]
        namespace = get_arg_parser().parse_args(args)
        fetch = DataFetcher(namespace, self.settings)
        retrieved = fetch.run()
        file_count = len(retrieved)
        self.assertEqual(37, file_count)

    def test_verbose_writes_stuff_to_log(self):
        path = Path(self.top_level, _LOCATION_FILENAME)
        report_file = get_mini_locations_file(path)
        args = ['--location-file', str(report_file),
                '--output-dir', self.top_level,
                '--profile', self.profile, '--verbose']
        namespace = get_arg_parser().parse_args(args)
        fetch = DataFetcher(namespace, self.settings)
        fetch.run()

        logfile = fetch.logfile
        self.assertTrue(Path.is_file(logfile),
                        f'expecting log file {logfile}')
        self.assertNotEqual(0, os.path.getsize(logfile),
                            'there should be entries in the log file')

    def test_empty_log_if_not_verbose(self):
        path = Path(self.top_level, _LOCATION_FILENAME)
        report_file = get_mini_locations_file(path)
        args = ['--location-file', str(report_file),
                '--output-dir', self.top_level,
                '--profile', self.profile]
        namespace = get_arg_parser().parse_args(args)
        fetch = DataFetcher(namespace, self.settings)
        fetch.run()

        logfile = fetch.logfile
        self.assertTrue(Path.is_file(logfile),
                        f'expecting log file {logfile}')
        self.assertEqual(0, os.path.getsize(logfile),
                         'log file should be empty')

    def test_copy_attempt_throws_sys_exit_service_error(self):
        product_locator = self.test_data['13B-014']['product_locator']

        # use site from non-local profile to guarantee copy attempt
        local_exec_site = self.settings['execution_site']
        self.settings['execution_site'] = ExecutionSite.DSOC

        args = ['--product-locator', product_locator,
                '--output-dir', self.top_level,
                '--profile', self.settings['execution_site'].value,
                '--verbose']
        parser = get_arg_parser()
        namespace = parser.parse_args(args)
        fetch = DataFetcher(namespace, self.settings)
        servers_report = fetch.servers_report
        for server in servers_report:
            entry = servers_report[server]
            self.assertTrue(
                entry['retrieve_method'].value == RetrievalMode.COPY.value)

        # let's try just one file so we're not sitting here all day
        for server in servers_report:
            entry = servers_report[server]
            servers_report = {server: entry}
            break
        fetch.servers_report = servers_report
        self.assertIsNotNone(fetch.servers_report[server])
        files = fetch.servers_report[server]['files']
        fetch.servers_report[server]['files'] = [files[0]]

        try:
            with pytest.raises(SystemExit) as s_ex:
                fetch.run()
            self.assertEqual(Errors.NGAS_SERVICE_ERROR.value, s_ex.value.code)
        finally:
            self.settings['execution_site'] = local_exec_site

    def test_dies_with_bad_server_info(self):
        report_file = get_locations_file('VLA_BAD_SERVER')
        args = ['--location-file', str(report_file),
                '--output-dir', self.top_level,
                '--profile', self.profile]
        namespace = get_arg_parser().parse_args(args)
        fetch = DataFetcher(namespace, self.settings)
        with pytest.raises(SystemExit) as s_ex:
            fetch.run()
        exc_code = s_ex.value.code
        expected = Errors.NGAS_SERVICE_ERROR.value
        self.assertEqual(expected, exc_code)

    def test_throws_sys_exit_file_exists_if_overwrite_not_forced(self):
        toplevel = Path(self.top_level)
        location_file = get_mini_locations_file(
            Path(self.top_level, _LOCATION_FILENAME))
        self.assertTrue(Path.exists(location_file),
                        f'{location_file}')
        destination = Path(toplevel, _EB_EXTERNAL_NAME)
        Path(destination).mkdir(parents=True, exist_ok=True)
        self.assertTrue(Path.is_dir(destination))

        # stick a fake SDM in there so it will fall over
        fake_file = Path(destination, _ASDM_XML)
        with open(fake_file, 'w') as to_write:
            to_write.write('lalalalalala')
        self.assertTrue(Path.exists(fake_file))
        self.assertFalse(os.path.getsize(fake_file) == 0)

        args = ['--location-file', str(location_file),
                '--output-dir', self.top_level,
                '--profile', TEST_PROFILE]
        namespace = get_arg_parser().parse_args(args)

        # exception should be thrown because one of the files to be retrieved
        # is in the destination dir and we're not forcing overwrite here
        with pytest.raises(SystemExit) as exc:
            DataFetcher(namespace, self.settings).run()
        exc_code = exc.value.code
        expected = Errors.FILE_EXISTS_ERROR.value
        self.assertEqual(expected, exc_code)

    def test_overwrites_when_forced(self):
        external_name = LOCATION_REPORTS[_VLA_SMALL_KEY]['external_name']
        toplevel = Path(self.top_level)
        destination = toplevel / external_name
        destination.mkdir(parents=True, exist_ok=True)
        self.assertTrue(Path.is_dir(destination))

        # stick a fake SDM in there to see if overwrite really happens
        to_overwrite = _ASDM_XML
        fake_file = destination / to_overwrite
        text = '"Bother!" said Pooh. "Lock phasers on that heffalump!"'
        with open(fake_file, 'w') as to_write:
            to_write.write(text)
        self.assertTrue(Path.exists(fake_file),
                        f'{to_overwrite} should have been created')
        self.assertEqual(len(text), os.path.getsize(fake_file),
                         f'before overwrite, {to_overwrite} should be'
                         f' {len(text)} bytes')
        report_metadata = LOCATION_REPORTS['VLA_SMALL_EB']
        external_name = report_metadata['external_name']
        destination = toplevel / external_name
        Path(destination).mkdir(parents=True, exist_ok=True)

        json_path = destination / report_metadata['filename']
        report_file = get_mini_locations_file(json_path)
        args = ['--location-file', str(report_file),
                '--output-dir', self.top_level,
                '--profile', TEST_PROFILE, '--force']
        namespace = get_arg_parser().parse_args(args)
        report = LocationsReport(self._LOG, namespace, self.settings)

        # expecting 37 files
        files = report.files_report['files']

        sizes = [file['size'] for file in files]
        total_size_expected = sum(sizes)
        num_files_expected = 37
        self.assertEqual(num_files_expected, len(files),
                         f"expecting {report_metadata['file_count']} files in report")

        fetch = DataFetcher(namespace, self.settings)
        retrieved = fetch.run()
        self.assertEqual(num_files_expected, len(retrieved),
                         f'expected {num_files_expected} files but got {len(retrieved)}')

        # # delete the .json so it doesn't mess up our total size computation
        Path.unlink(report_file)

        total_size_actual = 0
        dest = Path(destination)
        for dirpath, dirnames, filenames in os.walk(dest):
            for fname in filenames:
                path = Path(dirpath, fname)
                total_size_actual += os.path.getsize(path)
        self.assertEqual(total_size_expected, total_size_actual,
                         f'expected total size={total_size_expected}; got {total_size_actual}')

    def test_sys_exit_file_error_on_bad_destination(self):
        file_spec = self.test_data['13B-014']
        args = ['--product-locator', file_spec['product_locator'],
                '--output-dir', '/foo',
                '--profile', self.profile]
        namespace = get_arg_parser().parse_args(args)
        with pytest.raises(SystemExit) as s_ex:
            DataFetcher(namespace, self.settings)
        self.assertEqual(Errors.FILE_NOT_FOUND_ERROR.value, s_ex.value.code,
                         'should throw FILE_NOT_FOUND_ERROR')

    def test_sys_exit_no_locator_for_bad_product_locator(self):
        args = ['--product-locator', '/foo',
                '--output-dir', self.top_level, '--profile', self.profile]
        namespace = get_arg_parser().parse_args(args)

        with pytest.raises(SystemExit) as s_ex:
            fetch = DataFetcher(namespace, self.settings)
            fetch.run()
        self.assertEqual(Errors.NO_LOCATOR.value, s_ex.value.code,
                         'should throw NO_LOCATOR')

    def test_gets_expected_test_data(self):
        self.assertIsNotNone(self.test_data['13B-014'])
        file_spec = self.test_data['13B-014']
        self.assertEqual('13B-014.sb28862036.eb29155786.56782.5720116088',
                         file_spec['external_name'])
        locator = file_spec['product_locator']
        self.assertTrue(locator.startswith('uid://evla/execblock/'))

    def test_gets_vlbas_from_report_file(self):
        report_file = get_locations_file('VLBA_EB')
        args = ['--location-file', str(report_file),
                '--output-dir', self.top_level, '--profile', self.profile]
        namespace = get_arg_parser().parse_args(args)
        fetch = DataFetcher(namespace, self.settings)
        report_files = fetch.locations_report.files_report['files']

        self.assertEqual(16, len(report_files),
                         f'expecting 16 report files in {self.top_level}')
        expected_files = [Path(self.top_level, item['relative_path'])
                          for item in report_files]

        # files we're getting take waaaaayyy too long to fetch in a test case,
        # so we're mocking DataFetcher.run()
        fetch.run = MagicMock(return_value=expected_files)
        actual_files = fetch.run()
        num_expected = len(expected_files)
        self.assertEqual(num_expected, len(actual_files),
                         f'expecting {num_expected} VLBA files in {self.top_level}')

        match_count = 0
        for exp_file in expected_files:
            for act_file in actual_files:
                act_parent = act_file.name
                if act_parent == exp_file.name:
                    match_count += 1
                    break
        self.assertEqual(num_expected, match_count,
                         f'{num_expected - match_count} file(s) are '
                         f'unaccounted for')

    def test_gets_large_vla_ebs_from_report_file(self):
        report_file = get_locations_file('VLA_LARGE_EB')
        args = ['--location-file', str(report_file),
                '--output-dir', self.top_level, '--profile', self.profile]
        namespace = get_arg_parser().parse_args(args)
        fetch = DataFetcher(namespace, self.settings)
        report_files = fetch.locations_report.files_report['files']
        self.assertEqual(46, len(report_files), 'expecting 46 files')
        toplevel = Path(self.top_level)
        expected_files = [toplevel / item['relative_path']
                          for item in report_files]
        fetch.run = MagicMock(return_value=expected_files)
        actual_files = fetch.run()
        num_expected = len(expected_files)
        self.assertEqual(num_expected, len(actual_files), f'expecting '
                                                          f'{num_expected} '
                                                          f'VLBA files')

    def test_gets_images_from_report_file(self):
        report_file = get_locations_file('IMG')
        args = ['--location-file', str(report_file),
                '--output-dir', self.top_level, '--profile', self.profile]
        namespace = get_arg_parser().parse_args(args)
        fetch = DataFetcher(namespace, self.settings)
        report_files = fetch.locations_report.files_report['files']
        self.assertEqual(2, len(report_files),
                         f'expecting 2 report files in {self.top_level}')
        toplevel = Path(self.top_level)
        expected_files = [toplevel / item['relative_path']
                          for item in report_files]
        # files are too big to fetch in a test; mock DataFetcher.run()
        fetch.run = MagicMock(return_value=expected_files)
        actual_files = fetch.run()
        num_expected = len(expected_files)
        self.assertEqual(num_expected, len(actual_files), f'expecting '
                                                          f'{num_expected} '
                                                          f'image files')

    def test_gets_calibration_from_report_file(self):
        report_file = get_locations_file('CALIBRATION')
        args = ['--location-file', str(report_file),
                '--output-dir', self.top_level, '--profile', self.profile]
        namespace = get_arg_parser().parse_args(args)
        fetch = DataFetcher(namespace, self.settings)
        report_files = fetch.locations_report.files_report['files']
        self.assertEqual(1, len(report_files),
                         f'expecting 1 report file in {self.top_level}')
        file_spec = report_files[0]

        # calibration will have external name = relative path = subdirectory
        relative_path = file_spec['relative_path']
        self.assertEqual(relative_path, file_spec['subdirectory'],
                         'expecting relative_path same as subdirectory')

        expected_files = [Path(self.top_level, relative_path)]
        fetch.run = MagicMock(return_value=expected_files)
        actual_files = fetch.run()
        num_expected = len(expected_files)
        self.assertEqual(num_expected, len(actual_files), f'expecting '
                                                          f'{num_expected} '
                                                          f'calibration')

    def test_gets_calibration_from_locator(self):
        external_name = LOCATION_REPORTS['CALIBRATION']['external_name']
        product_locator = ProductLocatorLookup(
            self.db_settings).look_up_locator_for_ext_name(external_name)
        args = ['--product-locator', product_locator,
                '--output-dir', self.top_level, '--profile', self.profile]
        namespace = get_arg_parser().parse_args(args)
        fetch = DataFetcher(namespace, self.settings)
        report_files = fetch.locations_report.files_report['files']
        self.assertEqual(1, len(report_files),
                         f'{external_name} should be 1 file in {self.top_level}')

        file_spec = report_files[0]

        # calibration will have external name = relative path = subdirectory
        relative_path = file_spec['relative_path']
        self.assertEqual(external_name, relative_path,
                         'expecting external_name same as relative path')
        self.assertEqual(relative_path, file_spec['subdirectory'],
                         'expecting relative_path same as subdirectory')

        expected_files = [Path(self.top_level) / relative_path]
        fetch.run = MagicMock(return_value=expected_files)
        actual_files = fetch.run()
        num_expected = len(expected_files)
        self.assertEqual(num_expected, len(actual_files), f'expecting '
                                                          f'{num_expected} '
                                                          f'calibration')
    def test_retrieval_finds_size_mismatch(self):
        report_spec = LOCATION_REPORTS[_VLA_SMALL_KEY]
        external_name = report_spec['external_name']

        data_dir = Path(self.DATA_DIR)
        locations_file = data_dir / 'VLA_SMALL_EB_BUSTED.json'
        args = ['--location-file', str(locations_file),
                '--output-dir', self.top_level, '--profile', self.profile]
        namespace = get_arg_parser().parse_args(args)
        fetch1 = DataFetcher(namespace, self.settings)
        report_files = fetch1.locations_report.files_report['files']
        self.assertEqual(44, len(report_files),
                         f'{locations_file.name} should have 44 files')

        filename = 'Weather.xml'
        for file in report_files:
            if filename == file['relative_path']:
                self.assertEqual(165100, file['size'])
                break

        product_locator = ProductLocatorLookup(self.db_settings) \
            .look_up_locator_for_ext_name(external_name)
        args = ['--product-locator', product_locator,
                '--output-dir', self.top_level, '--profile', self.profile]
        namespace = get_arg_parser().parse_args(args)
        fetch2 = DataFetcher(namespace, self.settings)

        locations_report = get_locations_report(_VLA_SMALL_KEY)
        fetch2.run = MagicMock(return_value=locations_report['files'])
        locator_files = fetch2.run()
        self.assertEqual(len(report_files), len(locator_files),
                         'should get same no. files from locator as from '
                         'report file')
        for file1 in report_files:
            for file2 in locator_files:
                if file2['relative_path'] == file1['relative_path']:
                    if filename != file1['relative_path']:
                        self.assertEqual(file2['size'], file1['size'],
                                         'sizes should match')
                    else:
                        self.assertNotEqual(file2['size'], file1['size'],
                                            'sizes should match')
                    break

    def test_throws_sys_exit_missing_setting_if_no_args(self):
        args = []
        with pytest.raises(SystemExit) as s_ex:
            get_arg_parser().parse_args(args)
        self.assertEqual(Errors.MISSING_SETTING.value, s_ex.value.code,
                         'should throw MISSING_SETTING error')

    def test_throws_sys_exit_no_locator_if_no_product_locator(self):
        args = ['--product-locator', '',
                '--output-dir', self.top_level, '--profile', self.profile]
        namespace = get_arg_parser().parse_args(args)

        with pytest.raises(SystemExit) as s_ex:
            DataFetcher(namespace, self.settings)
        self.assertEqual(Errors.NO_LOCATOR.value, s_ex.value.code,
                         'should throw NO_LOCATOR error')

    # --------------------------------------------------------------------------
    #
    #        U T I L I T I E S
    #
    # --------------------------------------------------------------------------

    @staticmethod
    def _remove_large_files_from_location_report(locations_in: LocationsReport):
        ''' strip files > 100000 bytes from location report, so we can try
            an actual stream without it taking forever

            :returns: LocationsReport
        '''

        files = locations_in['files']
        locations_out = locations_in.copy()
        locations_out['files'] = \
            [file for file in files if file['size'] <= 100000]
        return locations_out

    def _initialize_test_data(self):
        ext_name = '13B-014.sb28862036.eb29155786.56782.5720116088'

        product_locator = ProductLocatorLookup(self.db_settings) \
            .look_up_locator_for_ext_name(ext_name)
        dict13b = {'external_name': ext_name,
                   'product_locator': product_locator}

        to_return = {'13B-014': dict13b}
        return to_return


class CommandLineFetchLauncher:
    """ Launches DataFetcher from command line, with logging
    """

    def __init__(self, args: List, logger: FlexLogger):
        args_to_parse = args if args[0] != _FETCH_COMMAND else args[1:]
        self._LOG = logger
        namespace = get_arg_parser().parse_args(args_to_parse)
        self.args = args
        self.output_dir = namespace.output_dir
        self.verbose = namespace.verbose

    def run(self):
        ''' launch fetch from command line
            @:returns directory listing
        '''
        with subprocess.Popen(self.args,
                              stdout=subprocess.PIPE,
                              stderr=subprocess.STDOUT,
                              bufsize=1,
                              universal_newlines=True) as proc:
            if proc.stderr:
                for err in proc.stderr:
                    self._LOG.error(err.strip())

            output = proc.stdout
            error_found = output.errors
            if error_found:
                if isinstance(error_found, list):
                    [self._LOG.error(line) for line in error_found]
                else:
                    if error_found != 'strict':
                        self._LOG.error(error_found)

            lines = list()
            for line in output:
                lines.append(line.strip())

            for i in range(0, len(lines) - 1):
                line = lines[i]
                self._LOG.debug(f'{line}')
                if 'error' in line.lower():
                    # log everything after the error
                    for j in range(i, len(lines) - 1):
                        self._LOG.error(lines[j])
                if 'debug' in line.lower() and self.verbose:
                    self._LOG.debug(line)
                if 'warn' in line.lower():
                    self._LOG.warning(line)

        files_retrieved = list()
        for root, dirnames, filenames in os.walk(self.output_dir):
            root_dir = Path(root)
            if dirnames:
                subdir = root_dir / dirnames[0]
            else:
                subdir = root_dir
            for filename in filenames:
                if not filename.endswith('.log') and not filename.endswith('.json'):
                    files_retrieved.append(subdir / filename)

        return files_retrieved


if __name__ == '__main__':
    unittest.main()