# Workspaces Annihilator
A commandline utility that removes old unused directories from the Workspaces lustre areas.
Annihilator requires the capo profile for the environment to be cleaned (dsoc-dev, dsoc-test, dsoc-prod)
## Usage
usage: ws_annihilator [-h] -p PROFILE [-a] [-c]
Workspaces Directory Annihilator
optional arguments:
-h, --help            show this help message and exit
-p PROFILE, --profile PROFILE
Specify which environment to run on. Required. Example: dsoc-dev
-a, --all             Run annihilation on workspaces spool, staging, and storage areas.
-c, --cache           Run annihilation on workspaces cache area.
from pathlib import Path
from setuptools import find_packages, setup
VERSION = open("ws_annihilator/_version.py").readlines()[-1].split()[-1].strip("\"'")
README = Path("README.md").read_text()
requires = [
"argparse",
"pycapo",
"requests",
name="ssa-" + Path().absolute().name,
version=VERSION,
description="Workspaces Directory Annihilator; Clean up generated products from lustre!",
long_description=README,
author="NRAO SSA Team",
author_email="dms-ssa@nrao.edu",
url="TBD",
license="GPL",
install_requires=requires,
keywords=[],
packages=find_packages(),
classifiers=["Programming Language :: Python :: 3.8"],
entry_points={"console_scripts": ["ws_annihilator = ws_annihilator.annihilator:main"]},
___version___ = "4.0.0a1.dev1"
+Facility for removing old or QAFailed files and directories from the Workspaces lustre areas
import argparse
import logging
import os
import pathlib
import shutil
from enum import Enum
import requests
+import requests
from typing import Dict, List
import sys
+import sys
logger = logging.getLogger(__name__)
class Areas(Enum):
CACHE = "cache"
SPOOL = "spool"
STAGING = "staging"
STORAGE = "storage"
+    STORAGE = "storage"
"""
Get annihilator capo settings
:return:
"""
+    """
+    Get annihilator capo settings
+    :return:
+    """
+    config = CapoConfig(profile=profile)
spool_directory = config.settings("edu.nrao.workspaces.ProcessingSettings").rootDirectory
staging_directory = ingestion_settings.stagingDirectory
storage_directory = ingestion_settings.storageDirectory
return {
"workflow_url": workflow_url,
"cache": cache_directory,
"spool": spool_directory,
+    return {
"staging": staging_directory,
+        "cache": cache_directory,
+        "spool": spool_directory,
"staging_days": annihilator_settings.keepStagingForDays,
"storage_days": annihilator_settings.keepStorageForDays,
}
def _arg_parser() -> ArgumentParser:
"""
Create parser for annihilator
:return:
"""
arg_parser = ArgumentParser(
+    }
formatter_class=argparse.RawTextHelpFormatter,
)
arg_parser.add_argument(
"-p",
"--profile",
action="store",
required=True,
help="Specify which environment to run on. Required. Example: dsoc-dev",
)
arg_parser.add_argument(
"-a",
"--all",
action="store_true",
required=False,
help="Run annihilation on workspaces spool, staging, and storage areas.",
)
arg_parser.add_argument(
"-c",
"--cache",
action="store_true",
required=False,
help="Run annihilation on workspaces cache area.",
)
return arg_parser
class Annihilator:
+        "--cache",
def __init__(self):
+        required=False,
self._settings = _get_settings(self._args.profile)
+    )
+    return arg_parser
+class Annihilator:
+    """
+    Class for removing workspaces directories that are no longer in use and past a given date
key = area_to_clean.lower() + "_days"
return self._settings.get(key)
def get_stale_directories(self, area_to_clean: str) -> List[str]:
+    - If directory exists in the workspaces' area for the specified profile, annihilates directory
annihilation_url = (
self._settings.get("workflow_url") + "/workflows/requests/stale/" + self.get_days_to_keep(area_to_clean)
)
+    def __init__(self):
dir_list = response.json()["stale_dirs"]
name_list = []
# catch empty string response
if dir_list:
for stale in dir_list.split(","):
name_list.append(pathlib.Path(str(stale)).stem)
return name_list
+        """
+        return self._settings.get(area_to_clean.lower())
shutil.rmtree(directory)
+        """
if area == Areas.SPOOL.value:
logger.info(f"Annihilation complete for {directory}, setting 'cleaned' flag on workflow request...")
+        :return:
else:
logger.info(f"Annihilation complete for {directory} in {area}.")
def set_cleaned(self, directory: str):
+    def get_stale_directories(self, area_to_clean: str) -> List[str]:
+        """
json_payload = {"directory": directory, "update_to": "True"}
return requests.put(clean_url, json=json_payload)
def annihilate(self, area_to_clean: str):
"""
Remove stale directories from targeted workspaces areas
:return:
"""
stale = self.get_stale_directories(area_to_clean)
# If stale directories were found, annihilate them
if stale:
+        """
# change to area to clean
os.chdir(path)
+        )
dir_path = path + "/" + directory
if not pathlib.Path(dir_path).exists():
+        name_list = []
+        # catch empty string response
self.set_cleaned(dir_path)
else:
logger.info(f"directory {dir_path} not found in {area_to_clean}, skipping...")
else:
logger.info(f"found directory {directory}, annihilating...")
+        """
def run(self):
if self._args.all:
logger.info
+        :return:
+        """
+        shutil.rmtree(directory)
+        # After annihilation set cleaned flag...
+        if area == Areas.SPOOL.value:
+            logger.info(f"Annihilation complete for {directory}, setting 'cleaned' flag on workflow request...")
+            self.set_cleaned(directory)
+        else:
+            logger.info(f"Annihilation complete for {directory} in {area}.")
+    def set_cleaned(self, directory: str):
+        """
+        Set the 'cleaned' flag on the request of the specified directory
+        :param directory: the directory that was annihilated
+        :return:
+        """
+        clean_url = self._settings.get("workflow_url") + "/workflows/requests/cleaned"
+        json_payload = {"directory": directory, "update_to": "True"}
+        return requests.put(clean_url, json=json_payload)
+    def annihilate(self, area_to_clean: str):
+        """
+        Remove stale directories from targeted workspaces areas
+        :return:
+        """
+        stale = self.get_stale_directories(area_to_clean)
+        # If stale directories were found, annihilate them
+        if stale:
+            path = self.determine_path(area_to_clean)
+            # change to area to clean
+            os.chdir(path)
+            for directory in stale:
+                dir_path = path + "/" + directory
+                if not pathlib.Path(dir_path).exists():
+                    if area_to_clean == Areas.SPOOL.value:
+                        logger.info(f"directory {dir_path} not found, setting 'cleaned' flag on workflow request...")
+                        self.set_cleaned(dir_path)
+                    else:
+                        logger.info(f"directory {dir_path} not found in {area_to_clean}, skipping...")
+                else:
+                    logger.info(f"found directory {directory}, annihilating...")
+                    self.annihilate_directory(area_to_clean, directory)
+    def run(self):
+        if self._args.all:
+            logger.info(f"Starting cleaning of Workspaces Spool for profile {self._args.profile}")
+            self.annihilate(Areas.SPOOL.value)
+            logger.info(f"Starting cleaning of Workspaces Staging for profile {self._args.profile}")
+            self.annihilate(Areas.STAGING.value)
+            logger.info(f"Starting cleaning of Workspaces Storage for profile {self._args.profile}")
+            self.annihilate(Areas.STORAGE.value)
+            logger.info(
+                f"Finished cleaning Workspaces Spool, Staging, and Storage areas for profile {self._args.profile}!"
+            )
+        if self._args.cache:
+            logger.info(f"Starting cleaning of Workspaces Cache for profile {self._args.profile}")
+            self.annihilate(Areas.CACHE.value)
+            logger.info(f"Finished cleaning Workspaces Cache for profile {self._args.profile}!")
+        else:
+            logger.info(f"Starting cleaning of Workspaces Spool for profile {self._args.profile}")
+            self.annihilate(Areas.SPOOL.value)
+            logger.info(f"Finished cleaning Workspaces Spool for profile {self._args.profile}!")
+def main():
+    Annihilator().run()
         Pyramid view that returns a list of workflow request processing directories that
         have completed processing and are marked for annihilation
-        :return: HTTP Response with list of directories to annihilate
+        :return:  HTTP Response with list of directories to annihilate
+        dir_list = []
         keep = self.request.matchdict["days"]
-        return self.request.info.lookup_stale_requests(int(keep))
+        req_list = self.request.info.lookup_stale_requests(int(keep))
+        for r in req_list:
+            dir_list.append(r.results_dir)
+        # return list as comma seperated string
+        return Response(json_body={"stale_dirs": str(",".join(dir_list))})
+    @view_config(request_method="PUT", route_name="set_request_cleaned")
+    def set_request_cleaned(self):
+        """
+        Pyramid view to set the workflow request cleaned flag upon directory annihilation
+        :return: HTTP Response
+        """
+        directory = self.request.json_body["directory"]
+        update_to = eval(self.request.json_body["update_to"])
+        req = self.request.info.lookup_workflow_request_by_directory(directory)
+        self.request.info.update_request_cleaned(req, update_to)
+        return Response(
+            status_code=http.HTTPStatus.OK,
+            body=f"SUCCESS: Updated workflow request #{req.workflow_request_id} to {update_to}",
+        )
 @view_defaults(route_name="workflow_request_files", renderer="json")
@@ -756,6 +779,11 @@ def main(global_config, **settings):
+        config.add_route(
+            "set_request_cleaned",
+            "/workflows/requests/cleaned",
+        )
         # Route for healthcheck when bringing up containers
     def update_status(self, status: str):
         self.state = status
+    def update_cleaned(self, cleaned: bool):
+        self.cleaned = cleaned
     # TODO: create following fields in table
     def set_start_time(self, time: str):
         self.start_time = time
         return self.session.query(WorkflowTemplate).filter_by(workflow_name=workflow_name).all()
-    def lookup_stale_requests(self, keep_days: int) -> List[str]:
+    def lookup_stale_requests(self, keep_days: int) -> List[WorkflowRequest]:
-        Queries the workflow_requests table for all requests that have completed, have not
-        been cleaned up, and are older that the number of days specified by keep_days
+        Queries the workflow_requests table for all requests that have not been
+        cleaned up, and are older that the number of days specified by keep_days
         :param keep_days: the number of days to keep directories
         :return: a list of strings representing processing directories
@@ -66,10 +66,20 @@ class WorkflowInfo(WorkflowInfoIF):
         return (
             .filter((WorkflowRequest.updated_at + datetime.timedelta(days=keep_days)) < datetime.datetime.now())
+            .filter_by(htcondor_job_id=None)
+    def lookup_workflow_request_by_directory(self, results_dir: str) -> WorkflowRequest:
+        """
+        Queries the workflow_requests table for the request with the matching results directory
+        :param results_dir:
+        :return: WorkflowRequest
+        """
+        return self.session.query(WorkflowRequest).filter_by(results_dir=results_dir).first()
     def all_workflows(self) -> List[Workflow]:
         return self.session.query(Workflow).all()
@@ -146,3 +156,14 @@ class WorkflowInfo(WorkflowInfoIF):
         return wrf
+    def update_request_cleaned(self, request: WorkflowRequest, update_flag: bool):
+        """
+        Update an existing workflow request when it's results directory is annihilated
+        :param request: The workflow request to update
+        :param update_flag: the boolean state to set the cleaned flag to
+        :return:
+        """
+        request.update_cleaned(update_flag)
+        self.save_request(request)