Compare revisions

Charlotte Hausman · Daniel Lyons · Daniel Lyons · Nathan Hertz · Charlotte Hausman · Andrew Kapuscinski
--- a/.coveragerc
+++ b/.coveragerc
+[paths]
+source =
+	./
+	/code/
+	/packages/
+	/code/packages
\ No newline at end of file
--- a/.gitignore
+++ b/.gitignore
@@ -65,11 +65,12 @@ services/capability/capability.log
 services/capability/workflow.log
 services/workflow/workflow.log
 services/notification/notification.log
-**/.coverage
+**/.coverage*
 **/htmlcov
 **/coverage.xml
 **/coverage.json
 pyproject.toml
+package-lock.json

 # Ignore docker volume mount points
 services/**/**/apps

--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
 stages:
    - build-base
    - push-base
+    - run-schema
    - build-dev
    - unit-test-dev
    - test-coverage
    - push-dev
-    - clean-images
-    # - deploy-dev
+    - deploy-coverage-page
+    - deploy-dev
+    - .post
    # - deploy-test

 variables:
    PROJECT_NAME: "workspaces"
    DEFAULT_DEPLOY_ENV: "dev"
+    # Postgres Service Variables
+    POSTGRES_DB: archive
+    POSTGRES_USER: "archive"
+    POSTGRES_PASSWORD: "docker"
+
+# CI Postgres Service
+services:
+    - name: marconi.aoc.nrao.edu/ops/ci/db:workspaces
+      alias: db

 image: docker:19.03.12

 workflow:
  rules:
+    - if: $CI_MERGE_REQUEST_TITLE =~ /^WIP:|^Draft:/
+      when: never
    - if: $CI_MERGE_REQUEST_IID
    - if: $CI_COMMIT_TAG
    - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
@@ -38,6 +51,7 @@ build base image:
        - if: '$CI_PIPELINE_SOURCE == "merge_request_event" && $CI_COMMIT_MESSAGE =~ /\A(?i)-ops/'
          changes:
            - Dockerfile.base
+            - docker.properties

 # Push Base Image Stage
 push base image:
@@ -50,24 +64,48 @@ push base image:
        - if: '$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH && $CI_COMMIT_MESSAGE =~ /\A(?i)-ops/'
          changes:
            - Dockerfile.base
+            - docker.properties
+
+# Run Schema
+run schema:
+    stage: run-schema
+    image: ${REGISTRY_URL}/ops/base:${PROJECT_NAME}
+    script:
+        - export PGPASSWORD=$POSTGRES_PASSWORD
+        - cd schema && ./bin/run-migrations.sh "docker"
+    rules:
+        - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
+          when: always
+        - if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
+          when: always

 # Build Stages
 build dev workflow:
    stage: build-dev
    variables:
        SERVICE_NAME: "workflow"
+        PATH_PREFIX: "services/"
    extends: .build

 build dev capability:
    stage: build-dev
    variables:
        SERVICE_NAME: "capability"
+        PATH_PREFIX: "services/"
    extends: .build

 build dev notification:
    stage: build-dev
    variables:
        SERVICE_NAME: "notification"
+        PATH_PREFIX: "services/"
+    extends: .build
+
+build dev web:
+    stage: build-dev
+    variables:
+        SERVICE_NAME: "web"
+        PATH_PREFIX: "apps/"
    extends: .build

 ## Test Stages ##
@@ -97,25 +135,31 @@ unit test dev notification:
    needs:
        - build dev notification

-# disabled stage
-.unit test coverage:
+
+# Generate Coverage reports 
+unit test coverage:
    stage: test-coverage
+    image: python:3.8-slim
    before_script:
-        - 
-    variables:
-        SERVICE_NAME_1: "capability"
-        SERVICE_NAME_2: "workflow"
-        SERVICE_NAME_3: "notification"
-        CONTAINER_NAME_1: ${SERVICE_NAME_1}_${CI_COMMIT_SHORT_SHA}
-        CONTAINER_NAME_2: ${SERVICE_NAME_2}_${CI_COMMIT_SHORT_SHA}
-        CONTAINER_NAME_2: ${SERVICE_NAME_3}_${CI_COMMIT_SHORT_SHA}
+        - pip install pytest pytest-cov
    script:
-        - docker container run --name ${CONTAINER_NAME_1} ${REGISTRY_URL}/${PROJECT_NAME}/${SERVICE_NAME_1}:${CI_COMMIT_SHORT_SHA} ./bin/run-tests.sh -cr xml -o ${SERVICE_NAME_1}.xml
-        - docker container run --name ${CONTAINER_NAME_2} ${REGISTRY_URL}/${PROJECT_NAME}/${SERVICE_NAME_2}:${CI_COMMIT_SHORT_SHA} ./bin/run-tests.sh -cr xml -o ${SERVICE_NAME_2}.xml
-        - docker container run --name ${CONTAINER_NAME_3} ${REGISTRY_URL}/${PROJECT_NAME}/${SERVICE_NAME_3}:${CI_COMMIT_SHORT_SHA} ./bin/run-tests.sh -cr xml -o ${SERVICE_NAME_3}.xml
-        - docker cp ${CONTAINER_NAME_1}:coverage.xml ./${CONTAINER_NAME_1}.xml
-        - docker cp ${CONTAINER_NAME_2}:coverage.xml ./${CONTAINER_NAME_2}.xml
-        - docker cp ${CONTAINER_NAME_3}:coverage.xml ./${CONTAINER_NAME_3}.xml
+        - coverage combine --append
+        - coverage report
+        - coverage xml
+        - coverage html
+    artifacts:
+        reports:
+            cobertura: coverage.xml
+        paths:
+            - coverage.xml
+            - htmlcov/
+    dependencies:
+      - unit test dev workflow
+      - unit test dev capability
+      - unit test dev notification
+    rules:
+        - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
+        - if: '$CI_PIPELINE_SOURCE == "merge_request_event"'

 # Push Stages
 push dev workflow:
@@ -142,35 +186,69 @@ push dev notification:
    needs:
        - unit test dev notification

+push dev web:
+    stage: push-dev
+    variables:
+        SERVICE_NAME: "web"
+    extends: .push
+    # UI tests coming soon!
+    # needs:
+    #     - unit test dev ui
+
 # Cleanup
 clean build workflow:
-    stage: clean-images
+    stage: .post
    variables:
        SERVICE_NAME: "workflow"
    extends: .cleanup
-    rules:
-        - if: '$CI_PIPELINE_SOURCE == "merge_request_event" && $CI_COMMIT_MESSAGE =~ /\A(?i)-debug/'
-          when: never
+    allow_failure: true

 clean build capability:
-    stage: clean-images
+    stage: .post
    variables:
        SERVICE_NAME: "capability"
    extends: .cleanup
-    rules:
-        - if: '$CI_PIPELINE_SOURCE == "merge_request_event" && $CI_COMMIT_MESSAGE =~ /\A(?i)-debug/'
-          when: never
+    allow_failure: true

 clean build notification:
-    stage: clean-images
+    stage: .post
    variables:
        SERVICE_NAME: "notification"
    extends: .cleanup
-    rules:
-        - if: '$CI_PIPELINE_SOURCE == "merge_request_event" && $CI_COMMIT_MESSAGE =~ /\A(?i)-debug/'
-          when: never
+    allow_failure: true
+
+clean build web:
+    stage: .post
+    variables:
+        SERVICE_NAME: "web"
+    extends: .cleanup
+    allow_failure: true

 # Deploy Stages
+pages:
+    stage: deploy-coverage-page
+    image: python:3.8-slim
+    dependencies:
+        - unit test coverage
+    script:
+        - mkdir public
+        - mv htmlcov/* public/
+    artifacts:
+        paths:
+        - public
+        expire_in: 2 weeks
+    rules:
+        - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
+
+# Development 
+deploy dev:
+    stage: deploy-dev
+    variables:
+        ENV: "dev"
+    script:
+        - SWARM_NODE_ENV=$ENV TAG_TO_DEPLOY=$CI_DEFAULT_BRANCH docker stack deploy --compose-file docker-compose.${ENV}.yml workspaces-${ENV}
+    rules:
+        - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH

 # Development 
 # deploy dev:
@@ -229,4 +307,4 @@ clean build notification:
 #             SWARM_NODE_ENV="test" TAG_TO_DEPLOY="${CI_COMMIT_TAG}" docker stack deploy --compose-file docker-compose.dev.yml workspaces-dev
 #     rules:
 #         - if: $CI_COMMIT_TAG
-#           when: manual
\ No newline at end of file
+#           when: manual
--- a/Makefile
+++ b/Makefile
 SHELL := /bin/bash

+.PHONY: check-build test-dev test dev setup alembic-update docker-base db build coverage clean
+
 all: dev check-build

 # Check if local code will pass CI build
-.PHONY: check-build
 check-build: docker-base docker-dev-images-locally test-dev

 # Run tests on Dockerfile.dev images
-.PHONY: test-dev
 test-dev:
 	docker run nrao:workflow ./bin/run-tests.sh
 	docker run nrao:capability ./bin/run-tests.sh
 	docker run nrao:notification ./bin/run-tests.sh

 # Run tests on Dockerfile.local containers
-.PHONY: test
 test:
 	docker exec workspaces_workflow_1 ./bin/run-tests.sh
 	docker exec workspaces_capability_1 ./bin/run-tests.sh
 	docker exec workspaces_notification_1 ./bin/run-tests.sh

 # Setup local development environment
-.PHONY: dev
 dev: docker-base

-.PHONY: setup
 setup:
 	docker exec workspaces_capability_1 /bin/bash -c 'python -m pip install -r requirements.txt'
 	docker exec workspaces_capability_1 /bin/bash -c 'python -m pip install -e .'
+	docker exec workspaces_workflow_1 /bin/bash -c 'python -m pip install -r requirements.txt'
 	docker exec workspaces_workflow_1 /bin/bash -c 'python -m pip install -e .'
+	docker exec workspaces_notification_1 /bin/bash -c 'python -m pip install -r requirements.txt'
 	docker exec workspaces_notification_1 /bin/bash -c 'python -m pip install -e .'
 	docker-compose restart

-.PHONY: alembic-update
 alembic-update:
 	cd schema; \
 	env CAPO_PROFILE=local alembic upgrade head

 # Build images from Dockerfile.dev
-.PHONY: docker-dev-images-locally
 docker-dev-images-locally:
 	docker build -t nrao:workflow -f services/workflow/Dockerfile.local . --build-arg capo_env=docker
 	docker build -t nrao:capability -f services/capability/Dockerfile.local . --build-arg capo_env=docker
 	docker build -t nrao:notification -f services/notification/Dockerfile.local . --build-arg capo_env=docker

 # Build base image
-.PHONY: docker-base
-docker-base:
+docker-base: db
 	docker build -t marconi.aoc.nrao.edu/ops/base:workspaces -f Dockerfile.base .
+	docker build -t marconi.aoc.nrao.edu/ops/base:nodejs-14 -f apps/web/Dockerfile.base .
+
+# Build DB image
+db:
+	docker build -t marconi.aoc.nrao.edu/ops/ci/db:workspaces -f ./ci/psql/Dockerfile.db .
+
+# Build docker images
+build: docker-base db
+	docker-compose build --no-cache
+
+# Generate HTML coverage report
+coverage:
+	docker exec workspaces_workflow_1 ./bin/run-tests.sh -b
+	cp services/workflow/.coverage ./.coverage.wf 
+	docker exec workspaces_capability_1 ./bin/run-tests.sh -b
+	cp services/capability/.coverage ./.coverage.cap
+	docker exec workspaces_notification_1 ./bin/run-tests.sh -b
+	cp services/notification/.coverage ./.coverage.no
+	coverage combine --append
+	coverage html
+
+# Clean up environment
+clean:
+	# Clean up Docker environment
+	docker-compose down
+	-docker images -aq | xargs docker rmi
+	docker system prune --volumes -af
+	docker volume prune -f
+	# Delete Python cache directories
+	find . \( -name "*.egg-info" -o -name ".pytest_cache" \) | xargs rm -r
--- a/apps/cli/executables/datafetcher/src/datafetcher/__init__.py
+++ b/apps/cli/executables/datafetcher/src/datafetcher/__init__.py
--- a/apps/cli/executables/datafetcher/src/datafetcher/_version.py
+++ b/apps/cli/executables/datafetcher/src/datafetcher/_version.py
--- a/apps/cli/executables/datafetcher/src/datafetcher/datafetcher.py
+++ b/apps/cli/executables/datafetcher/src/datafetcher/datafetcher.py
--- a/apps/cli/executables/datafetcher/src/datafetcher/errors.py
+++ b/apps/cli/executables/datafetcher/src/datafetcher/errors.py
--- a/apps/cli/executables/datafetcher/src/datafetcher/file_retrievers.py
+++ b/apps/cli/executables/datafetcher/src/datafetcher/file_retrievers.py
--- a/apps/cli/executables/datafetcher/src/datafetcher/locations_report.py
+++ b/apps/cli/executables/datafetcher/src/datafetcher/locations_report.py
--- a/apps/cli/executables/datafetcher/src/datafetcher/project_fetcher.py
+++ b/apps/cli/executables/datafetcher/src/datafetcher/project_fetcher.py
--- a/apps/cli/executables/datafetcher/src/datafetcher/return_codes.py
+++ b/apps/cli/executables/datafetcher/src/datafetcher/return_codes.py
--- a/apps/cli/executables/datafetcher/src/datafetcher/utilities.py
+++ b/apps/cli/executables/datafetcher/src/datafetcher/utilities.py
--- a/apps/cli/executables/datafetcher/setup.py
+++ b/apps/cli/executables/datafetcher/setup.py
@@ -30,7 +30,6 @@ setup(
    install_requires=requires,
    keywords=[],
    packages=["datafetcher"],
-    package_dir={"": "src"},
    classifiers=["Programming Language :: Python :: 3.8"],
-    entry_points={"console_scripts": ["datafetcher = datafetcher.commands:main"]},
+    entry_points={"console_scripts": ["datafetcher = datafetcher.datafetcher:main"]},
 )
--- a/apps/cli/executables/datafetcher/test/Dockerfile
+++ b/apps/cli/executables/datafetcher/test/Dockerfile
-# datafetcher Dockerfile
-#
-# TO BUILD the docker image: -don't- "docker build" directly!
-# use docker_build.sh:
-# from apps/cli/executables/datafetcher,
-#
-#   ./docker_build.sh datafetcher_test[:N]
-#
-# where '-t' specifies a name and N' is the version.
-# (If ':N' is omitted, version is 'latest' by default.)
-# tag is not required for the build, but without it
-# the container name is an unhelpful hexadecimal value.
-
-FROM continuumio/miniconda3:latest
-
-COPY environment.yml .
-
-ENV PATH $HOME/miniconda3/bin/conda:$PATH
-
-# docker_build.sh should have copied environment.yml from data/;
-# it will be used in the command below
-RUN conda env update
-
-# get what we'll need for the build
-COPY . .
-
-# get application files and tests
-COPY src/ .
-COPY test/ .
-
-# install the application
-RUN ["conda", "run", "-n", "data", "python", "setup.py", "develop"]
-
-# we'll need a Capo profile
-ENV CAPO_PROFILE local
-ENV CAPO_PATH test/
-
-# finally, run the tests. be verbose. log stuff.
-# (for more detailed output, use "-vv" and/or "--log-level=DEBUG";
-#  to quit after first failure, use "-x")
- ENTRYPOINT ["conda", "run", "-n", "data", "pytest", "-vv", "--log-level=DEBUG", "--showlocals", "test/"]
--- a/apps/cli/executables/datafetcher/test/df_pytest_utils.py
+++ b/apps/cli/executables/datafetcher/test/df_pytest_utils.py
@@ -24,7 +24,7 @@ def get_project_root() -> Path:
    """
    my_path = Path(__file__)
    path = my_path
-    while not path.name.endswith("workspaces") and not path.name.endswith("code"):
+    while not path.name.endswith("workspaces") and not path.name.endswith("packages"):
        path = path.parent

    return path
@@ -53,7 +53,7 @@ from datafetcher.utilities import (
    RetrievalMode,
 )

-TEST_PROFILE = "local"
+TEST_PROFILE = "docker"
 MISSING_SETTING = ReturnCode.MISSING_SETTING.value["code"]
 MISSING_PROFILE = ReturnCode.MISSING_PROFILE.value["code"]
 RUN_ALL = True

--- a/apps/cli/executables/datafetcher/test/docker-build.sh
+++ b/apps/cli/executables/datafetcher/test/docker-build.sh
-#!/bin/bash
-
-# Building a Docker image in which to execute tests
-# will require a copy of the local Capo properties
-# file, which can be found at /home/casa/capo
-# on boxes that can see /home, but which on boxes
-# that can't is likely to be at ~/home/.capo for
-# any given user. Find local.properties and
-# copy it to our test directory. Dockerfiles
-# do not support conditional logic; hence this script.
-
-# Execute script from apps/executables/cli/datafetcher/
-
-FILENAME=local.properties
-CONTAINER_NAME=$1;shift
-CACHE_FLAG=$1;shift
-USAGE='Usage: $0 <container_name> [--NO-CACHE]'
-if [[ -z "${CONTAINER_NAME}" ]]
-then
-  echo "${USAGE}"
-  exit 1
-fi
-
-if [ -z "${CACHE_FLAG}" ]
-then
-  shopt -s nocasematch
-  if [[ "${CACHE_FLAG}" =~ ^NO[-_]CACHE$ ]]
-  then
-    echo 'invalid cache flag: '"${CACHE_FLAG}"
-    exit 1
-  else
-    USE_CACHE=1
-  fi
-else
-  USE_CACHE=0
-fi
-
-# conda will need the environment.yml
-export ENV_YML=environment.yml
-export YML_DIR=../../../../
-cp $YML_DIR${ENV_YML} ${ENV_YML}
-
-# The preferred version of Capo .properties files is always
-# the one at /home/casa/capo, -if- this is visible
-# (i.e., NRAO internal system). If not (i.e., developer laptop),
-# get the one in the user's .capo directory
-if [ -e /home/casa/capo/${FILENAME} ]
-then
-    SOURCE=/home/casa/capo/${FILENAME}
-elif [ -e ~/.capo/${FILENAME} ]
-then
-    SOURCE=~/.capo/${FILENAME}
-else
-  echo '${FILENAME} not found!'
-  exit 1
-fi
-
-NEW_FILE=./test/${FILENAME}
-cp ${SOURCE} ${NEW_FILE}
-
-# remove extended attributes, which would cause Capo to balk
-/usr/bin/xattr -c ${NEW_FILE}
-
-## where the magic happens
-if [ "${USE_CACHE}" == 1 ]
-then
-  echo '>>>> Using cache, if possible'
-  docker build . -f test/Dockerfile -t ${CONTAINER_NAME}
-else
-  echo '>>>> no cache'
-  docker build . -f test/Dockerfile --no-cache -t ${CONTAINER_NAME}
-fi
-
-# now get rid of the properties file; containing sensitive info, it must NOT be saved or committed
-rm -f ${NEW_FILE}
-# get rid of the .yml, too
-rm -f ${ENV_YML}
-
-# to run the image: docker run ${CONTAINER_NAME}[:latest]
--- a/apps/cli/executables/datafetcher/test/test_df_return_codes.py
+++ b/apps/cli/executables/datafetcher/test/test_df_return_codes.py
@@ -66,12 +66,22 @@ def test_omitted_profile_returns_expected_code(make_tempdir, settings):
    :return:
    """

+    # store existing CAPO_PROFILE
+    existing_capo_profile = os.environ["CAPO_PROFILE"]
+
+    # remove existing CAPO_PROFILE
+    os.environ["CAPO_PROFILE"] = ""
+
+    # omit --profile arg
    args = ['--product-locator', settings.test_data['product_locator'],
            '--output-dir', str(make_tempdir)]

    return_code = launch_datafetcher(args, settings.capo_settings)
    assert return_code == MISSING_PROFILE

+    # restore the existing CAPO_PROFILE
+    os.environ["CAPO_PROFILE"] = existing_capo_profile
+

 @pytest.mark.skipif(not RUN_ALL, reason='debug')
 def test_omitted_capo_value_returns_expected_code(make_tempdir, settings):

--- a/apps/cli/executables/delivery/ARCHITECTURE.md
+++ b/apps/cli/executables/delivery/ARCHITECTURE.md
+# Delivery Architecture
+
+What is delivery? Delivery is what happens after the the active processing portion of the workflow concludes. It is the
+step that moves the retrieved or generated products from the processing area to a place where they can be accessed by
+the requesting user.
+
+Most workflows proceed by retrieving some files from NGAS and running CASA on those files to produce new products. The
+files are large and CASA is quite heavy, so we retrieve the files into a spool area on the Lustre filesystem and then
+launch the CASA jobs on the cluster. Once CASA is finished, the files the user wants are still sitting in that spool
+area on Lustre. Delivery is what gets the files from there to where the user can retrieve them.
+
+The simplest kind of delivery is just copying files from the spool area to another location—a mere `cp`. However, we
+have several complications:
+
+- CASA mandates a certain filesystem layout for the spool area
+- The filesystem layout of the delivery destination varies based on the _type_ of the product
+- Users can request `tar` archives, optionally
+- Users can request delivery to their own areas in Lustre
+- Not specifying a delivery location implies creating a unique location under a web root
+
+We also want to be somewhat flexible in case new streaming kinds of deliveries are mandated in the future, such as
+Globus (formerly GridFTP).
+
+The result is that the behavior of the delivery process, which is fundamentally `cp`, varies both according to options
+given by the user and various facts about the data we happen to be delivering.
+
+## Handling files
+
+At the bottom of every delivery process is a process of being supplied files and told to deliver them. The
+_Destination_ system is the core of this portion of the process. The goal here is to decouple the idea of "here is a
+file to deliver" from the details of how that delivery happens. We have one concrete class here, `LocalDestination`,
+which represents the common `cp` case of copying a file into the destination. If the simplest delivery
+is `cp source dest`, you can think of `LocalDestination` as embodying the idea of `cp ... dest`.
+
+The _Destination_ classes make no sense on their own, their purpose is to be passed around to other objects in the
+system that know about files that need to be delivered. The _Destination_ classes just hide the details about where
+those files are actually going and how they're getting there.
+
+If we were going to support something like Globus, I expect it would appear as a peer of `LocalDestination`, as another
+concrete implementation of `Destination`.
+
+### Checksums and compression
+
+Thinking along these lines, one can think of checksums as the construction of another file to be added to the
+destination. In fact, `Destination` is 1) handed every file to be delivered, and 2) knows where the files are ultimately
+going to be placed, we can see a way to handle creating a checksum file as a kind of "pass-through" step that happens
+automatically. The algorithm would look something like this:
+
+1. Make a checksum wrapper for the local destination
+2. For every file we get asked to deliver, calculate its checksum before handing it off to the wrapped destination for
+   delivery
+3. After we are done delivering files, pass a fake file containing the checksums to the wrapped destination
+
+This kind of "wrapper" or "pass-through" thing happens often enough in object-oriented programming that it is called
+the "Decorator pattern." We can handle compression the same way:
+
+1. Make a tar archive in a scratch area somewhere
+2. For every file we get asked to deliver, instead place it in the archive in the scratch area
+3. After we are done delivering files, finalize the archive and pass it to the wrapped destination
+
+The key idea here is that the next part of the system which finds files to deliver has _no idea_ about whether we are
+using compression or calculating checksums or not—in fact, these wrappers are stackable. The part of the system that
+finds files to deliver just passes them to the destination, and as long as the stack of wrappers and destinations has
+been constructed by someone in the right order, everything will happen as it should.
+
+The purpose of the `DestinationBuilder` is to ensure that the stack is constructed in the right way. The reason
+`Destination` has a `close()` method is for these wrappers to know when we are done delivering files so they can take
+their finalization steps.
+
+## Handling products
+
+If you look at
+the [delivery directory requirements](https://open-confluence.nrao.edu/display/SPR/Delivery+Directory+Improvements),
+you'll see that there are a number of requirements to group things together based on their project or their telescope,
+and the directory names are based on the type of product. Knowing what you have in hand affects the layout in the
+delivery directory. This means that we are not always going to have a straightforward `cp` command, because the way
+files rest in the spool area doesn't necessarily match the way that they need to be laid out in the delivery directory.
+
+The key idea here is that somebody, eventually, knows what _they_ are, and the knowledge about how that _type_ is
+delivered should live with that _type_, rather than being spread around the system. Execution blocks should know what
+execution blocks are supposed to look like when they get delivered; images should know what images should look like when
+they are delivered, and so forth. If a new type of product is invented, supporting a wacky delivery format for that
+product should be a matter of defining that product type and adding the logic just to that product. This is why we have
+a `SpooledProduct` with a single method: `deliver_to(Destination)`. We expect to have a driver that at some level is
+passing a destination to each of these products and saying, "write yourself to this destination."
+
+This suggests that when we are saying "deliver from here to there," we are not saying the same thing as `cp`, which is
+saying "copy these files from here to there" but actually we're saying "copy all the products from here to there,
+according to how each of these products _should_ be copied." In the beginning, a simple product like an execution block
+_will_
+simply deliver the files in its directory directly, but as we support more complex products like OUS requests with
+images, more interesting things will happen.
+
+## Finding products
+
+How will we know what the products are that need to be delivered? We can assume we are given a source directory with
+products in it, but how do we enumerate them in order to deliver them? The most straightforward answer is we can simply
+iterate the entire directory and match filename patterns with product types; if it ends with `.ms` it's a measurement
+set, if it looks like `PPR.xml` it's a pipeline request, etc. Doing this amounts to having a dispatch table of common
+filename patterns, which is tedious, but exhaustive and gives our code a fair amount of control.
+
+There is a second way to figure out the products, which is by examining CASA's `piperesults` output file. This file
+isn't necessarily present (after all, CASA is not _required_ for every workflow) so this method cannot ever be the
+_only_ means of determining the products. But it may eventually be a requirement that we support using the
+`piperesults` file. So rather than having a single class here called `ProductFinder`, we instead have an interface
+called `ProductFinder` and a `HeuristicProductFinder` that does the filename dispatch approach and a
+`PiperesultsProductFinder` that uses the `piperesults` file to figure it out.
+
+## Bringing it all together
+
+So we have a system that finds products, products that know how to write themselves to a destination, and
+destinations that know how to handle local filesystem writes, compression and checksumming. This is most of what is
+needed. We can see now that we want to have a main loop that looks like this:
+
+    for product in finder.find_products():
+      product.deliver_to(destination)
+
+What is still missing is a small amount of plumbing to get us from here to there. We need a device for processing
+the command line arguments. Some aspects of delivery are based on user-supplied options: whether we are do tar
+archives or not, whether we are delivering the raw data retrieved by the data fetcher or the products generated by
+CASA. Eventually we will have to support a local delivery command line option. Basically, anything the user chooses
+in the archive UI that affects delivery is going to arrive to us through the command line options. So we have to add
+a command line parser, which we have in `Context`.
+
+A few lessons-learned type things from the legacy delivery system are also in the `Context`. We assume that a few
+"services" are available in `Context` to the `Destination` and `ProductFinder` schemes. For web delivery, we will
+eventually need to be able to generate random codes for the URL, but we want those random codes to be stable
+throughout the delivery process, so there is a way to do that in the `Context`. Also creating temporary files is
+provided via the `Context`, which is something the tar and checksum wrappers will eventually need. So the `Context`
+is available to these classes at construction time so they can call these services as needed, or peek at command
+line arguments they may care about.
+
+And that's the theory behind delivery in a nutshell.
--- a/apps/cli/executables/delivery/README.md
+++ b/apps/cli/executables/delivery/README.md
@@ -8,138 +8,4 @@ This is the delivery thing.
 https://open-confluence.nrao.edu/display/AAT/Proposed+Delivery+Redesign
 https://open-confluence.nrao.edu/display/SPR/Delivery+Directory+Improvements

-->
-
-## Theory
-
-What is delivery? Delivery is what happens after the the active processing portion of the workflow concludes. It is the
-step that moves the retrieved or generated products from the processing area to a place where they can be accessed by
-the requesting user.
-
-Most workflows proceed by retrieving some files from NGAS and running CASA on those files to produce new products. The
-files are large and CASA is quite heavy, so we retrieve the files into a spool area on the Lustre filesystem and then
-launch the CASA jobs on the cluster. Once CASA is finished, the files the user wants are still sitting in that spool
-area on Lustre. Delivery is what gets the files from there to where the user can retrieve them.
-
-The simplest kind of delivery is just copying files from the spool area to another location—a mere `cp`. However, we
-have several complications:
-
- CASA mandates a certain filesystem layout for the spool area
- The filesystem layout of the delivery destination varies based on the _type_ of the product
- Users can request `tar` archives, optionally
- Users can request delivery to their own areas in Lustre
- Not specifying a delivery location implies creating a unique location under a web root
-
-We also want to be somewhat flexible in case new streaming kinds of deliveries are mandated in the future, such as
-Globus (formerly GridFTP).
-
-The result is that the behavior of the delivery process, which is fundamentally `cp`, varies both according to options
-given by the user and various facts about the data we happen to be delivering.
-
-### Handling files
-
-At the bottom of every delivery process is a process of being supplied files and told to deliver them. The
-_Destination_ system is the core of this portion of the process. The goal here is to decouple the idea of "here is a
-file to deliver" from the details of how that delivery happens. We have one concrete class here, `LocalDestination`,
-which represents the common `cp` case of copying a file into the destination. If the simplest delivery
-is `cp source dest`, you can think of `LocalDestination` as embodying the idea of `cp ... dest`.
-
-The _Destination_ classes make no sense on their own, their purpose is to be passed around to other objects in the
-system that know about files that need to be delivered. The _Destination_ classes just hide the details about where
-those files are actually going and how they're getting there.
-
-If we were going to support something like Globus, I expect it would appear as a peer of `LocalDestination`, as another
-concrete implementation of `Destination`.
-
-#### Checksums and compression
-
-Thinking along these lines, one can think of checksums as the construction of another file to be added to the
-destination. In fact, `Destination` is 1) handed every file to be delivered, and 2) knows where the files are ultimately
-going to be placed, we can see a way to handle creating a checksum file as a kind of "pass-through" step that happens
-automatically. The algorithm would look something like this:
-
-1. Make a checksum wrapper for the local destination
-2. For every file we get asked to deliver, calculate its checksum before handing it off to the wrapped destination for
-   delivery
-3. After we are done delivering files, pass a fake file containing the checksums to the wrapped destination
-
-This kind of "wrapper" or "pass-through" thing happens often enough in object-oriented programming that it is called
-the "Decorator pattern." We can handle compression the same way:
-
-1. Make a tar archive in a scratch area somewhere
-2. For every file we get asked to deliver, instead place it in the archive in the scratch area
-3. After we are done delivering files, finalize the archive and pass it to the wrapped destination
-
-The key idea here is that the next part of the system which finds files to deliver has _no idea_ about whether we are
-using compression or calculating checksums or not—in fact, these wrappers are stackable. The part of the system that
-finds files to deliver just passes them to the destination, and as long as the stack of wrappers and destinations has
-been constructed by someone in the right order, everything will happen as it should.
-
-The purpose of the `DestinationBuilder` is to ensure that the stack is constructed in the right way. The reason
-`Destination` has a `close()` method is for these wrappers to know when we are done delivering files so they can take
-their finalization steps.
-
-### Handling products
-
-If you look at
-the [delivery directory requirements](https://open-confluence.nrao.edu/display/SPR/Delivery+Directory+Improvements),
-you'll see that there are a number of requirements to group things together based on their project or their telescope,
-and the directory names are based on the type of product. Knowing what you have in hand affects the layout in the
-delivery directory. This means that we are not always going to have a straightforward `cp` command, because the way
-files rest in the spool area doesn't necessarily match the way that they need to be laid out in the delivery directory.
-
-The key idea here is that somebody, eventually, knows what _they_ are, and the knowledge about how that _type_ is
-delivered should live with that _type_, rather than being spread around the system. Execution blocks should know what
-execution blocks are supposed to look like when they get delivered; images should know what images should look like when
-they are delivered, and so forth. If a new type of product is invented, supporting a wacky delivery format for that
-product should be a matter of defining that product type and adding the logic just to that product. This is why we have
-a `SpooledProduct` with a single method: `deliver_to(Destination)`. We expect to have a driver that at some level is
-passing a destination to each of these products and saying, "write yourself to this destination."
-
-This suggests that when we are saying "deliver from here to there," we are not saying the same thing as `cp`, which is
-saying "copy these files from here to there" but actually we're saying "copy all the products from here to there,
-according to how each of these products _should_ be copied." In the beginning, a simple product like an execution block
-_will_
-simply deliver the files in its directory directly, but as we support more complex products like OUS requests with
-images, more interesting things will happen.
-
-### Finding products
-
-How will we know what the products are that need to be delivered? We can assume we are given a source directory with
-products in it, but how do we enumerate them in order to deliver them? The most straightforward answer is we can simply
-iterate the entire directory and match filename patterns with product types; if it ends with `.ms` it's a measurement
-set, if it looks like `PPR.xml` it's a pipeline request, etc. Doing this amounts to having a dispatch table of common
-filename patterns, which is tedious, but exhaustive and gives our code a fair amount of control.
-
-There is a second way to figure out the products, which is by examining CASA's `piperesults` output file. This file 
-isn't necessarily present (after all, CASA is not _required_ for every workflow) so this method cannot ever be the 
-_only_ means of determining the products. But it may eventually be a requirement that we support using the 
-`piperesults` file. So rather than having a single class here called `ProductFinder`, we instead have an interface 
-called `ProductFinder` and a `HeuristicProductFinder` that does the filename dispatch approach and a 
-`PiperesultsProductFinder` that uses the `piperesults` file to figure it out.
-
-### Bringing it all together
-
-So we have a system that finds products, products that know how to write themselves to a destination, and 
-destinations that know how to handle local filesystem writes, compression and checksumming. This is most of what is 
-needed. We can see now that we want to have a main loop that looks like this:
-
-    for product in finder.find_products():
-      product.deliver_to(destination)
-
-What is still missing is a small amount of plumbing to get us from here to there. We need a device for processing 
-the command line arguments. Some aspects of delivery are based on user-supplied options: whether we are do tar 
-archives or not, whether we are delivering the raw data retrieved by the data fetcher or the products generated by 
-CASA. Eventually we will have to support a local delivery command line option. Basically, anything the user chooses 
-in the archive UI that affects delivery is going to arrive to us through the command line options. So we have to add 
-a command line parser, which we have in `Context`. 
-
-A few lessons-learned type things from the legacy delivery system are also in the `Context`. We assume that a few 
-"services" are available in `Context` to the `Destination` and `ProductFinder` schemes. For web delivery, we will 
-eventually need to be able to generate random codes for the URL, but we want those random codes to be stable 
-throughout the delivery process, so there is a way to do that in the `Context`. Also creating temporary files is 
-provided via the `Context`, which is something the tar and checksum wrappers will eventually need. So the `Context` 
-is available to these classes at construction time so they can call these services as needed, or peek at command 
-line arguments they may care about.
-
-And that's the theory behind delivery in a nutshell.
+-->
\ No newline at end of file
No results found