From 73bd43c5bad04b4cc9ea6bf2d04282847e547139 Mon Sep 17 00:00:00 2001 From: Daniel K Lyons <dlyons@nrao.edu> Date: Wed, 7 Apr 2021 10:34:13 -0600 Subject: [PATCH] Split destination classes out into separate files and finish cleaning up the tests --- .../executables/delivery/delivery/context.py | 12 - .../delivery/delivery/deliverer.py | 319 +----------------- .../delivery/destinations/__init__.py | 0 .../delivery/destinations/checksum.py | 53 +++ .../delivery/destinations/fetchfile.py | 34 ++ .../delivery/destinations/interfaces.py | 172 ++++++++++ .../delivery/delivery/destinations/local.py | 92 +++++ .../delivery/destinations/sharedweb.py | 37 ++ .../delivery/delivery/destinations/tar.py | 49 +++ .../cli/executables/delivery/test/test_cli.py | 49 ++- 10 files changed, 478 insertions(+), 339 deletions(-) create mode 100644 apps/cli/executables/delivery/delivery/destinations/__init__.py create mode 100644 apps/cli/executables/delivery/delivery/destinations/checksum.py create mode 100644 apps/cli/executables/delivery/delivery/destinations/fetchfile.py create mode 100644 apps/cli/executables/delivery/delivery/destinations/interfaces.py create mode 100644 apps/cli/executables/delivery/delivery/destinations/local.py create mode 100644 apps/cli/executables/delivery/delivery/destinations/sharedweb.py create mode 100644 apps/cli/executables/delivery/delivery/destinations/tar.py diff --git a/apps/cli/executables/delivery/delivery/context.py b/apps/cli/executables/delivery/delivery/context.py index 45d9d7ee0..b039f8214 100644 --- a/apps/cli/executables/delivery/delivery/context.py +++ b/apps/cli/executables/delivery/delivery/context.py @@ -7,8 +7,6 @@ import argparse import pathlib import secrets -from pycapo import CapoConfig - from .deliverer import DeliveryContextIF, Destination, DestinationBuilder from .finder import HeuristicProductFinder, ProductFinder @@ -48,16 +46,6 @@ class DeliveryContext(DeliveryContextIF): def __repr__(self): return f"<DeliveryContext {self.__dict__}>" - def create_tempfile(self, prefix: str, suffix: str) -> pathlib.Path: - """ - Create a temporary file, using the given prefix and suffix. - - :param prefix: prefix for the tempfile name - :param suffix: suffix for the tempfile name - :return: the path to the temporary file - """ - raise NotImplementedError - def token(self) -> str: """ If a delivery only requires one token, just use this property diff --git a/apps/cli/executables/delivery/delivery/deliverer.py b/apps/cli/executables/delivery/delivery/deliverer.py index d95c79f8a..36e6ca276 100644 --- a/apps/cli/executables/delivery/delivery/deliverer.py +++ b/apps/cli/executables/delivery/delivery/deliverer.py @@ -3,223 +3,12 @@ # D E S T I N A T I O N S Y S T E M # # ------------------------------------------------------------------------- -import abc -import hashlib -import os -import pathlib -import shutil -import tarfile -import tempfile -from abc import ABC, abstractmethod -from types import TracebackType -from typing import BinaryIO, Optional, Type, Iterator, AnyStr, Iterable - -from pycapo import CapoConfig - - -class DeliveryContextIF(ABC): - @abstractmethod - def create_tempfile(self, prefix: str, suffix: str) -> pathlib.Path: - pass - - @abstractmethod - def token(self) -> str: - pass - - -class DestinationTempFile(ABC): - """ - A DestinationFile is a file that you can create in the destination. Initially a temporary file, - it will be added to the destination when you are finished with it. This is a way to create files - you can write to during delivery, which are still routed through the destination mechanism at the end. - """ - - @abstractmethod - def close(self): - """ - Close the file (and add it to the destination at your desired path) - """ - pass - - @abstractmethod - def file(self) -> BinaryIO: - """ - Access the raw file for writing - :return: a file for writing - """ - pass - - @abstractmethod - def filename(self) -> str: - """ - Access the temporary path of this file during construction - :return: the path to the temporary file - """ - pass - - -class Destination(abc.ABC): - """ - Destinations are locations that files can be copied into. They might not - always be on a local disk; FTP or Globus could also be destinations. - - The destination API is very simply, consisting just of adding files. - """ - - def __init__(self, context: DeliveryContextIF): - self.context = context - - @abc.abstractmethod - def add_file(self, file: pathlib.Path, relative_path: str): - """ - Add a file to the destination at the given relative path. - :param file: the file (whose contents we are delivering) - :param relative_path: the relative path to that file (in the delivery root) - """ - pass - - @abc.abstractmethod - def create_file(self, relative_path: str) -> DestinationTempFile: - """ - Create a file in the destination. When the file is closed, it will be added - to the destination via the add_file method at the specified relative path. - :param relative_path: the relative path where the file should eventually be placed - """ - pass - - def add_directory(self, directory: pathlib.Path, relative_path: str): - """ - Add a directory and its contents recursively to the destination at the given path. - - Do not override this method! add_file must be called for every file that gets delivered. - :param directory: the directory (whose files we will deliver) - :param relative_path: the relative path to this directory (in the delivery root) - """ - for entry in directory.iterdir(): - if entry.is_file(): - self.add_file(entry, relative_path + "/" + entry.name) - else: - self.add_directory(entry, relative_path + "/" + entry.name) - - def close(self): - """ - Close the destination, signalling to this and possibly destinations in the stack - that we are finished adding new files to the destination. - """ - pass - - @abc.abstractmethod - def result_url(self) -> str: - """ - Returns a URL to the results. - :return: URL pointing to the results - """ - pass - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - # ensure that if we are used as a context manager ('with' statement) - # that the destinations do get properly closed - self.close() - - -class DestinationDecorator(Destination): - def __init__(self, underlying: Destination): - self.underlying = underlying - super().__init__(underlying.context) - - def add_file(self, file: pathlib.Path, relative_path: str): - self.underlying.add_file(file, relative_path) - - def create_file(self, relative_path: str) -> DestinationTempFile: - # so this is a bit of a hack, but there's only one thing that makes temporary files - # if we don't have the following lines of code, adding the tempfile at the end of processing - # goes directly to the LocalDestination rather than passing through all the layers of the layer cake - temporary_file = self.underlying.create_file(relative_path) - temporary_file.destination = self - return temporary_file - - def close(self): - self.underlying.close() - - def result_url(self) -> str: - return self.underlying.result_url() - - -class ChecksumDecorator(DestinationDecorator): - """ - Generate a SHA1SUMS file in the content root with an entry for every file that got delivered. - """ - - def __init__(self, underlying: Destination): - super().__init__(underlying) - self.sumsfile = underlying.create_file("SHA1SUMS") - - def add_file(self, file: pathlib.Path, relative_path: str): - # add the file to the archive the normal way - super().add_file(file, relative_path) - - # generate the hash and keep it - self.sumsfile.file().writelines([self.hash_file_line(file, relative_path).encode("utf8")]) - - def hash_file_line(self, file: pathlib.Path, relative_path: str): - """ - Generate a line for the checksum file. - - :param file: file to hash - :param relative_path: relative path to show in checksum file - :return: a line like "<hash> <relative_path>" - """ - return f"{self.hash_file(file)} {relative_path}\n" - - @staticmethod - def hash_file(file: pathlib.Path): - # code courtesy of Stack Overflow: https://stackoverflow.com/a/59056796/812818 - with open(file, "rb") as f: - file_hash = hashlib.sha1() - while chunk := f.read(8192): - file_hash.update(chunk) - return file_hash.hexdigest() # to get a printable str instead of bytes - - def close(self): - self.sumsfile.close() - super().close() - - -class TarArchiver(DestinationDecorator): - """ - This decorator creates a local tar archive. Calls to add_file - are intercepted and instead the file contents are added to the - tar archive. When close() is called, we finalize the tar file - and place it in the delivery area. - """ - - def __init__(self, context: DeliveryContextIF, underlying: Destination): - super().__init__(underlying) - self.context = context - self.archive, self.archive_tempfile = None, None - - def add_file(self, file: pathlib.Path, relative_path: str): - # ensure we have created the tar archive - self.ensure_archive_created(relative_path) - - # add the file at the offset - self.archive.add(file, relative_path) - - def ensure_archive_created(self, relative_path: str): - # if we don't have the archive property yet, we must create the archive now - if not self.archive: - self.archive_tempfile = self.underlying.create_file( - relative_path.split("/")[0] + ".tar" - ) - self.archive = tarfile.open(mode="w:", fileobj=self.archive_tempfile.file()) - - def close(self): - self.archive.close() - self.archive_tempfile.close() - self.underlying.close() +from .destinations.interfaces import * +from .destinations.tar import TarArchiver +from .destinations.local import LocalDestination +from .destinations.checksum import ChecksumDecorator +from .destinations.sharedweb import SharedWebDestination +from .destinations.fetchfile import FetchFileGenerator # class SubdirectoryDecorator(DestinationDecorator): @@ -231,98 +20,6 @@ class TarArchiver(DestinationDecorator): # self.underlying.add_file(self.subdirectory + "/" + relative_path, file) -class LocalDestination(Destination): - """ - LocalDestination is for delivering to a local directory on the filesystem. - """ - - def __init__(self, context: DeliveryContextIF, path: pathlib.Path): - super().__init__(context) - self.path = path - - def add_file(self, file: pathlib.Path, relative_path: str): - """ - Copy contents of file to new file with path relative_path; creates directories if they don't exist - - :param file: Source file whose contents are getting copied - :param relative_path: Relative path to new file in destination - """ - # ensure that the directory exist - if not (self.path / relative_path).parent.exists(): - (self.path / relative_path).parent.mkdir(parents=True) - - # now copy the file - # if we don't care about file metadata we could use copy() instead - shutil.copy2(file, self.path / relative_path) - - def create_file(self, relative_path: str) -> DestinationTempFile: - # make a temporary file in the local destination - rawfile = tempfile.NamedTemporaryFile( - prefix=relative_path.split("/")[-1], dir=self.path, delete=False - ) - return LocalDestinationTempFile(self, relative_path, rawfile) - - def result_url(self) -> str: - """ - Return a file:/// URL for this location - """ - return pathlib.Path(self.path.absolute()).as_uri() - - -class LocalDestinationTempFile(DestinationTempFile): - def __init__( - self, - destination: LocalDestination, - relative_path: str, - tempfile: tempfile.NamedTemporaryFile, - ): - self.destination = destination - self.relative_path = relative_path - self.tempfile = tempfile - - def close(self): - self.tempfile.close() - self.destination.add_file(pathlib.Path(self.tempfile.name), self.relative_path) - os.unlink(self.tempfile.name) - - def file(self) -> BinaryIO: - return self.tempfile - - def filename(self) -> str: - return self.tempfile.name - - -class WgetFileGenerator(DestinationDecorator): - def __init__(self, underlying: Destination): - super().__init__(underlying) - self.fetch_script = underlying.create_file("fetch-all.sh") - self.fetch_script.file().writelines([b"#!/bin/sh\n", b"\n"]) - - def add_file(self, file: pathlib.Path, relative_path: str): - super().add_file(file, relative_path) - self.fetch_script.file().writelines( - [f"wget {self.underlying.result_url()}/{relative_path}\n".encode("utf8")] - ) - - def close(self): - self.fetch_script.close() - super().close() - - -class SharedWebDestination(DestinationDecorator): - def __init__(self, context: DeliveryContextIF): - settings = CapoConfig().settings("edu.nrao.archive.workflow.config.DeliverySettings") - ld = LocalDestination( - context, - pathlib.Path(settings.nraoDownloadDirectory) / "anonymous" / context.token(), - ) - super().__init__(ld) - self.download_url = f"{settings.nraoDownloadUrl}/anonymous/{context.token}" - - def result_url(self) -> str: - return self.download_url - - class DestinationBuilder: """ To facilitate building a stack of destination and its decorators. @@ -346,7 +43,7 @@ class DestinationBuilder: return self def curlfile(self): - self._destination = WgetFileGenerator(self._destination) + self._destination = FetchFileGenerator(self._destination) return self def tar(self): @@ -356,6 +53,4 @@ class DestinationBuilder: def build(self): """Create the destination""" - # We *always* want checksums, so that goes on last here - # return ChecksumDecorator(self.context, self._destination) return self._destination diff --git a/apps/cli/executables/delivery/delivery/destinations/__init__.py b/apps/cli/executables/delivery/delivery/destinations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/apps/cli/executables/delivery/delivery/destinations/checksum.py b/apps/cli/executables/delivery/delivery/destinations/checksum.py new file mode 100644 index 000000000..f680ae67a --- /dev/null +++ b/apps/cli/executables/delivery/delivery/destinations/checksum.py @@ -0,0 +1,53 @@ +import hashlib +import pathlib + +from .interfaces import DestinationDecorator, Destination + + +class ChecksumDecorator(DestinationDecorator): + """ + Generate a SHA1SUMS file in the content root with an entry for every file that got delivered. + """ + + def __init__(self, underlying: Destination): + super().__init__(underlying) + self.sumsfile = underlying.create_file("SHA1SUMS") + + def add_file(self, file: pathlib.Path, relative_path: str): + # add the file to the archive the normal way + super().add_file(file, relative_path) + + # generate the hash and keep it + self.sumsfile.file().writelines([self.hash_file_line(file, relative_path).encode("utf8")]) + + def hash_file_line(self, file: pathlib.Path, relative_path: str): + """ + Generate a line for the checksum file. + + :param file: file to hash + :param relative_path: relative path to show in checksum file + :return: a line like "<hash> <relative_path>" + """ + return f"{self.hash_file(file)} {relative_path}\n" + + @staticmethod + def hash_file(file: pathlib.Path): + # You would expect performance to be worse than calling a C program here, but in my own testing I found + # that the ensuing block is somewhat *faster* than calling "shasum" directly. I was able to hash a 1 GB + # random file in 1.2 seconds with the following code, versus 1.8 seconds with shasum. This is across about 10 + # trials of each, with a warm cache in each case. So I would not refactor this code to shell out without + # doing more performance tests + # + # code courtesy of Stack Overflow: https://stackoverflow.com/a/59056796/812818 + with open(file, "rb") as f: + file_hash = hashlib.sha1() + while chunk := f.read(8192): + file_hash.update(chunk) + return file_hash.hexdigest() # to get a printable str instead of bytes + + def close(self): + # first close the hash file + self.sumsfile.close() + + # now proceed + super().close() diff --git a/apps/cli/executables/delivery/delivery/destinations/fetchfile.py b/apps/cli/executables/delivery/delivery/destinations/fetchfile.py new file mode 100644 index 000000000..089103fa1 --- /dev/null +++ b/apps/cli/executables/delivery/delivery/destinations/fetchfile.py @@ -0,0 +1,34 @@ +import pathlib + +from .interfaces import DestinationDecorator, Destination + + +class FetchFileGenerator(DestinationDecorator): + """ + Generates a shell script which can be used to fetch all the files related to a request. + The script uses "wget". + """ + + def __init__(self, underlying: Destination): + super().__init__(underlying) + + # generate the file + self.fetch_script = underlying.create_file("fetch-all.sh") + + # write some lines to it + self.fetch_script.file().writelines([b"#!/bin/sh\n", b"\n"]) + + def add_file(self, file: pathlib.Path, relative_path: str): + super().add_file(file, relative_path) + + # add a line to the fetch script for this file + self.fetch_script.file().writelines( + [f"wget {self.underlying.result_url()}/{relative_path}\n".encode("utf8")] + ) + + def close(self): + # first close the script + self.fetch_script.close() + + # proceed + super().close() diff --git a/apps/cli/executables/delivery/delivery/destinations/interfaces.py b/apps/cli/executables/delivery/delivery/destinations/interfaces.py new file mode 100644 index 000000000..b8805b505 --- /dev/null +++ b/apps/cli/executables/delivery/delivery/destinations/interfaces.py @@ -0,0 +1,172 @@ +import pathlib +from abc import ABC, abstractmethod +from typing import BinaryIO + + +class DeliveryContextIF(ABC): + """ + The DeliveryContext is something that is available to destinations during + processing for shared utility functions. + """ + + @abstractmethod + def token(self) -> str: + """ + Returns a unique token for this delivery. Guaranteed to be the same across multiple calls. + :return: a string token + """ + pass + + +class DestinationTempFile(ABC): + """ + A DestinationFile is a file that you can create in the destination. Initially a temporary file, + it will be added to the destination when you are finished with it. This is a way to create files + you can write to during delivery, which are still routed through the destination mechanism at the end. + """ + + @abstractmethod + def close(self): + """ + Close the file (and add it to the destination at your desired path) + """ + pass + + @abstractmethod + def file(self) -> BinaryIO: + """ + Access the raw file for writing + :return: a file for writing + """ + pass + + @abstractmethod + def filename(self) -> str: + """ + Access the temporary path of this file during construction + :return: the path to the temporary file + """ + pass + + +class Destination(ABC): + """ + Destinations are locations that files can be copied into. They might not + always be on a local disk; FTP or Globus could also be destinations. + + The destination API is very simply, consisting just of adding files. + """ + + def __init__(self, context: DeliveryContextIF): + self.context = context + + @abstractmethod + def add_file(self, file: pathlib.Path, relative_path: str): + """ + Add a file to the destination at the given relative path. + :param file: the file (whose contents we are delivering) + :param relative_path: the relative path to that file (in the delivery root) + """ + pass + + @abstractmethod + def create_file(self, relative_path: str) -> DestinationTempFile: + """ + Create a file in the destination. When the file is closed, it will be added + to the destination via the add_file method at the specified relative path. + :param relative_path: the relative path where the file should eventually be placed + """ + pass + + def add_directory(self, directory: pathlib.Path, relative_path: str): + """ + Add a directory and its contents recursively to the destination at the given path. + + Do not override this method! add_file must be called for every file that gets delivered. + :param directory: the directory (whose files we will deliver) + :param relative_path: the relative path to this directory (in the delivery root) + """ + for entry in directory.iterdir(): + if entry.is_file(): + self.add_file(entry, relative_path + "/" + entry.name) + else: + self.add_directory(entry, relative_path + "/" + entry.name) + + def close(self): + """ + Close the destination, signalling to this and possibly destinations in the stack + that we are finished adding new files to the destination. + """ + pass + + @abstractmethod + def result_url(self) -> str: + """ + Returns a URL to the results. + :return: URL pointing to the results + """ + pass + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + # ensure that if we are used as a context manager ('with' statement) + # that the destinations do get properly closed + self.close() + + +class DestinationDecorator(Destination): + """ + This is a useful base class for destinations that augment the functionality of other destinations. + In general, if you add functionality to a destination, you should do it through the decorator facility + and then make corresponding changes to the builder to support it. + """ + + def __init__(self, underlying: Destination): + """ + Create this destination wrapping an underlying destination. + :param underlying: the underlying destination that does real-er work + """ + self.underlying = underlying + super().__init__(underlying.context) + + def add_file(self, file: pathlib.Path, relative_path: str): + """ + Add a file to the destination. In most cases, your decorator should intercept this call, + do something useful with the file and then propagate it to the underlying destination. + :param file: file to add to the destination + :param relative_path: path to the file in the result + """ + self.underlying.add_file(file, relative_path) + + def create_file(self, relative_path: str) -> DestinationTempFile: + """ + Create a temporary file with the underlying destination. In most cases your decorator should leave this call + alone; if you override, be sure to read the comments in the method and copy the hack. + + :param relative_path: path to the eventual home of this temporary file + :return: a DestinationTempFile that can be written to + """ + # This is a bit of a hack, but there's only one thing that makes temporary files properly now + # and it's necessary for the rest of the machinery to work. Better ideas for solving this are welcome. + # if we don't have the following lines of code, adding the tempfile at the end of processing + # goes directly to the LocalDestination rather than passing through all the layers of the layer cake + temporary_file = self.underlying.create_file(relative_path) + temporary_file.destination = self + return temporary_file + + def close(self): + """ + Close the underlying destination. In most cases, your decorator will want to intercept this call to do any + finalization work that might be needed, such as closing DestinationTempFiles. It's essential that you remember + to propagate the close call to the underlying destination, or the rest of delivery will fail. + """ + self.underlying.close() + + def result_url(self) -> str: + """ + In most cases you should leave this alone unless you want to modify the URL that gets + returned to the console at the end of delivery. + """ + return self.underlying.result_url() diff --git a/apps/cli/executables/delivery/delivery/destinations/local.py b/apps/cli/executables/delivery/delivery/destinations/local.py new file mode 100644 index 000000000..7302e2acc --- /dev/null +++ b/apps/cli/executables/delivery/delivery/destinations/local.py @@ -0,0 +1,92 @@ +import os +import shutil +import tempfile +from typing import BinaryIO + +from .interfaces import Destination, DeliveryContextIF, DestinationTempFile +import pathlib + + +class LocalDestination(Destination): + """ + LocalDestination is for delivering to a local directory on the filesystem. + """ + + def __init__(self, context: DeliveryContextIF, path: pathlib.Path): + super().__init__(context) + self.path = path + + def add_file(self, file: pathlib.Path, relative_path: str): + """ + Copy contents of file to new file with path relative_path; creates directories if they don't exist + + :param file: Source file whose contents are getting copied + :param relative_path: Relative path to new file in destination + """ + # ensure that the directory exists. we must check, because we get an exception + # if we try to create one that already exists + if not (self.path / relative_path).parent.exists(): + (self.path / relative_path).parent.mkdir(parents=True) + + # now copy the file + # if we don't care about file metadata we could use copy() instead + shutil.copy2(file, self.path / relative_path) + + def create_file(self, relative_path: str) -> DestinationTempFile: + """ + Creates a temporary file in the destination which will eventually be added at the supplied relative path. + + :param relative_path: the path to the eventual file + :return: a DestinationTempFile you can write to + """ + # make a temporary file in the local destination + + # the temporary file will have a prefix of just the filename + prefix_name = relative_path.split("/")[-1] + + # make the named temporary file + rawfile = tempfile.NamedTemporaryFile(prefix=prefix_name, dir=self.path, delete=False) + + # hand off to the LocalDestinationTempFile, which handles the rest + return LocalDestinationTempFile(self, relative_path, rawfile) + + def result_url(self) -> str: + """ + Return a file:/// URL for this location + """ + return pathlib.Path(self.path.absolute()).as_uri() + + +class LocalDestinationTempFile(DestinationTempFile): + """ + Implements the DestinationTempFile functionality for local destinations. Presumably if we wind up with other + kinds of destination (globus or streaming or something like that) we will need to greatly reconsider how to + implement this. + """ + + def __init__( + self, + destination: LocalDestination, + relative_path: str, + tempfile: tempfile.NamedTemporaryFile, + ): + self.destination = destination + self.relative_path = relative_path + self.tempfile = tempfile + + def close(self): + # The key idea here is that after we close the temp file but before we delete it, we add it to the destination + self.tempfile.close() + + # now that the file is finalized, we can add it to the destination using the path to the named temporary file + # and the relative path the user originally requested + self.destination.add_file(pathlib.Path(self.tempfile.name), self.relative_path) + + # now that the file has been delivered, we can remove it safely + os.unlink(self.tempfile.name) + + def file(self) -> BinaryIO: + return self.tempfile + + def filename(self) -> str: + return self.tempfile.name diff --git a/apps/cli/executables/delivery/delivery/destinations/sharedweb.py b/apps/cli/executables/delivery/delivery/destinations/sharedweb.py new file mode 100644 index 000000000..bd5b9ee34 --- /dev/null +++ b/apps/cli/executables/delivery/delivery/destinations/sharedweb.py @@ -0,0 +1,37 @@ +import pathlib + +from .interfaces import DestinationDecorator, DeliveryContextIF +from .local import LocalDestination +from pycapo import CapoConfig + + +class SharedWebDestination(DestinationDecorator): + """ + Users who do not specify a local destination receive the standard shared web destination automatically. + This is a location in a shared filesystem where a web service is configured to allow access files. + Otherwise, it behaves like a local destination. + """ + + def __init__(self, context: DeliveryContextIF): + # The implementation trick here is basically that the constructor makes it look like a final destination, + # but internally it is a decorator for a local destination with some slightly different + # initialization parameters, which come from Capo + capo = CapoConfig().settings("edu.nrao.archive.workflow.config.DeliverySettings") + + # eventually, we will need to plumb the username into here instead of anonymous + username = "anonymous" + + # determine the destination directory + destination_dir = pathlib.Path(capo.nraoDownloadDirectory) / username / context.token() + + # LocalDestination won't create a directory, so we'll do that here + destination_dir.mkdir(parents=True) + + # we're ready to build the local destination and thread it through our super constructor + super().__init__(LocalDestination(context, destination_dir)) + + # generate the download URL for later usage + self.download_url = f"{capo.nraoDownloadUrl}/{username}/{context.token()}" + + def result_url(self) -> str: + return self.download_url diff --git a/apps/cli/executables/delivery/delivery/destinations/tar.py b/apps/cli/executables/delivery/delivery/destinations/tar.py new file mode 100644 index 000000000..2e0b12690 --- /dev/null +++ b/apps/cli/executables/delivery/delivery/destinations/tar.py @@ -0,0 +1,49 @@ +import pathlib +import tarfile + +from .interfaces import DestinationDecorator, Destination, DeliveryContextIF + + +class TarArchiver(DestinationDecorator): + """ + This decorator creates a local tar archive. Calls to add_file + are intercepted and instead the file contents are added to the + tar archive. When close() is called, we finalize the tar file + and it is placed in the delivery area. + """ + + def __init__(self, context: DeliveryContextIF, underlying: Destination): + super().__init__(underlying) + self.context = context + self.archive, self.archive_tempfile = None, None + + def add_file(self, file: pathlib.Path, relative_path: str): + # ensure we have created the tar archive + self.ensure_archive_created(relative_path) + + # add the file at the offset + self.archive.add(file, relative_path) + + def ensure_archive_created(self, relative_path: str): + # if we don't have the archive property yet, we must create the archive now + if not self.archive: + # the filename we generate will be the path to the first thing we're delivering with ".tar" appended + # in practice, this is going to be a directory name and it will work out nicely + # in the future, we may need to revisit this practice if we want more control over the filenames + archive_filename = relative_path.split("/")[0] + ".tar" + + # create the archive as a temporary file + self.archive_tempfile = self.underlying.create_file(archive_filename) + + # open the file + self.archive = tarfile.open(mode="w:", fileobj=self.archive_tempfile.file()) + + def close(self): + # the tarfile documentation is explicit that closing the archive does NOT close the underlying file + # therefore, both steps are necessary: the first to finalize the archive, + # the second to flush and close the file + self.archive.close() + self.archive_tempfile.close() + + # now we can proceed + self.underlying.close() diff --git a/apps/cli/executables/delivery/test/test_cli.py b/apps/cli/executables/delivery/test/test_cli.py index c996de475..24357edcf 100644 --- a/apps/cli/executables/delivery/test/test_cli.py +++ b/apps/cli/executables/delivery/test/test_cli.py @@ -1,6 +1,7 @@ # Testing the CLI import filecmp import os +import pathlib import shutil import tarfile from unittest.mock import patch @@ -63,16 +64,22 @@ def test_web_rawdata_no_tar(tmpdir_factory): """ Test that a directory in the source is copied to a directory in the destination in the manner expected. """ - temp_directory = str(tmpdir_factory.mktemp("test_web_rawdata_no_tar")) + temp_directory = pathlib.Path(tmpdir_factory.mktemp("test_web_rawdata_no_tar")) test_data_path = "../../../../shared/workspaces/test/test_data/spool/724126739/" eb_name = "17A-109.sb33151331.eb33786546.57892.65940042824" test_context = DeliveryContext.parse_commandline(["-r", test_data_path]) - with patch("delivery.context.CapoConfig.settings") as mocked_capo_settings: - mocked_capo_settings.return_value.nraoDownloadDirectory = temp_directory - assert temp_directory == mocked_capo_settings().nraoDownloadDirectory - destination_path = Delivery().deliver(test_context) + with patch("delivery.destinations.sharedweb.CapoConfig.settings") as mocked_capo_settings: + mocked_capo_settings.return_value.nraoDownloadDirectory = str(temp_directory) + mocked_capo_settings.return_value.nraoDownloadUrl = "http://testing" + assert str(temp_directory) == mocked_capo_settings().nraoDownloadDirectory + destination_url = Delivery().deliver(test_context) + + # determine the destination by looking at the URL + actual_delivery_dir = temp_directory / destination_url.lstrip("http://testing") + # compare the source and destination - compare_dirs = filecmp.dircmp(destination_path / eb_name, f"{test_data_path}{eb_name}") + compare_dirs = filecmp.dircmp(actual_delivery_dir / eb_name, f"{test_data_path}{eb_name}") + # did the comparison report they are the same assert len(compare_dirs.left_only) == 0 assert len(compare_dirs.right_only) == 0 @@ -80,29 +87,41 @@ def test_web_rawdata_no_tar(tmpdir_factory): def test_web_rawdata_with_tar(tmpdir_factory): - temp_directory = str(tmpdir_factory.mktemp("test_web_rawdata_with_tar")) + temp_directory = pathlib.Path(tmpdir_factory.mktemp("test_web_rawdata_with_tar")) test_data_path = "../../../../shared/workspaces/test/test_data/spool/724126739/" test_context = DeliveryContext.parse_commandline(["-r", "-t", test_data_path]) - with patch("delivery.context.CapoConfig.settings") as mocked_capo_settings: + + with patch("delivery.destinations.sharedweb.CapoConfig.settings") as mocked_capo_settings: mocked_capo_settings.return_value.nraoDownloadDirectory = temp_directory + mocked_capo_settings.return_value.nraoDownloadUrl = "http://testing" assert temp_directory == mocked_capo_settings().nraoDownloadDirectory - destination_path = Delivery().deliver(test_context) + destination_url = Delivery().deliver(test_context) eb_name = "17A-109.sb33151331.eb33786546.57892.65940042824" - tar_path = destination_path / "724126739.tar.gz" + + # determine the destination by looking at the URL + actual_delivery_dir = temp_directory / destination_url.lstrip("http://testing") + # does a tar exist where we think - assert os.path.exists(tar_path) + tar_path = actual_delivery_dir / (eb_name + ".tar") + assert tar_path.exists() + # is it the only thing there (did cleanup work) - assert len(os.listdir(temp_directory)) == 1 + assert len(list(actual_delivery_dir.iterdir())) == 3 + # is it actually a tar assert tarfile.is_tarfile(tar_path) + # lets unpack it - shutil.unpack_archive(tar_path, temp_directory + "/extracted") + shutil.unpack_archive(tar_path, temp_directory / "extracted") + # did it output what we expect - assert os.path.exists(temp_directory + "/extracted/" + eb_name) + assert (temp_directory / "extracted" / eb_name).exists() + # compare the extracted results with the source compare_dirs = filecmp.dircmp( - temp_directory + "/extracted/" + eb_name, (test_data_path + eb_name) + temp_directory / "extracted" / eb_name, (test_data_path + eb_name) ) + # is the source and extracted the same assert len(compare_dirs.left_only) == 0 assert len(compare_dirs.right_only) == 0 -- GitLab