Skip to content
Snippets Groups Projects
Commit 8feb7f26 authored by Sam Kagan's avatar Sam Kagan
Browse files

Added support for delivering restore's flagtemplate.txt

parent cf124bf5
No related branches found
Tags end-of-sprint/5
No related merge requests found
......@@ -17,8 +17,9 @@
# along with Workspaces. If not, see <https://www.gnu.org/licenses/>.
import pathlib
import re
import tarfile
from dataclasses import dataclass
from typing import Tuple, Union
from typing import Optional, Tuple, Union
import chevron
......@@ -201,6 +202,25 @@ class ArchiveProduct(SpooledProduct):
class RestoreProduct(SpooledProduct):
WEBLOG_REGEX = "^pipeline-[0-9]+T[0-9]+$"
SUBDIR_FILENAME_REGEXES = {
"products": [
"casa_pipescript\\.py",
"casa_commands\\.log",
WEBLOG_REGEX,
"PPR_calibration\\.xml",
".*calapply\\.txt",
".*caltables\\.tgz",
".*flagtsystemplate\\.txt",
],
"working": [
"flux\\.csv",
".*\\.ms",
"casa_piperestorescript\\.py",
WEBLOG_REGEX,
"PPR\\.xml",
],
}
AUXPRODUCTS_FILENAME_REGEXES = [".*flagtemplate\\.txt"]
def __init__(self, path: pathlib.Path, metadata: ProductMetadataIF):
super().__init__(path)
......@@ -211,7 +231,7 @@ class RestoreProduct(SpooledProduct):
return self._metadata
@classmethod
def _get_delivered_filename(cls, file: pathlib.Path, is_weblog: bool, subdir: str) -> str:
def _get_delivered_filename(cls, filename: str, is_weblog: bool, subdir: Optional[str]) -> str:
"""Get the filename to use when delivering the given file
:param file: The file to be delivered
......@@ -219,7 +239,6 @@ class RestoreProduct(SpooledProduct):
:param subdir: The subdirectory of the restore's spool in which `file` was found
:return: The name to use for `file` when it's delivered
"""
filename = file.name
# Rename the weblogs
if is_weblog:
if subdir == "products":
......@@ -232,35 +251,30 @@ class RestoreProduct(SpooledProduct):
return filename
def deliver_to(self, destination: Destination):
# Locations of to-be-delivered items in an EVLA restore
# * `*flagtemplate.txt` and `*flagtsystemplate.txt` should be in `./rawdata`, **need to verify this**
SUBDIR_FILENAME_REGEXES = {
"products": [
"casa_pipescript\\.py",
"casa_commands\\.log",
self.WEBLOG_REGEX,
"PPR_calibration\\.xml",
".*calapply\\.txt",
".*caltables\\.tgz",
".*flagtemplate\\.txt",
".*flagtsystemplate\\.txt",
],
"working": [
"flux\\.csv",
".*\\.ms",
"casa_piperestorescript\\.py",
self.WEBLOG_REGEX,
"PPR\\.xml",
],
}
# `*flagtsystemplate.txt` should be in `./products`, **need to verify this**
pipedir = destination / self.metadata.project / f"{self.metadata.pipeline_spec}"
for subdir, filename_regexes in SUBDIR_FILENAME_REGEXES.items():
auxproducts_paths = list((self.path / "products").glob("*auxproducts.tgz"))
if len(auxproducts_paths) == 1:
with tarfile.open(auxproducts_paths[0], mode="r:gz") as auxproducts_tar:
for member in auxproducts_tar.getmembers():
for filename_regex in self.AUXPRODUCTS_FILENAME_REGEXES:
if re.match(filename_regex, member.name) is not None:
# tarfile library recommends using extractall even for individual members
# Source: https://docs.python.org/3.10/library/tarfile.html#tarfile.TarFile.extract
auxproducts_tar.extractall(path=auxproducts_paths[0].parent, members=[member])
pipedir.add_path_entry(
auxproducts_paths[0].parent / member.name,
self._get_delivered_filename(member.name, filename_regex == self.WEBLOG_REGEX, None),
)
break
for subdir, filename_regexes in self.SUBDIR_FILENAME_REGEXES.items():
for file in (self.path / subdir).iterdir():
for filename_regex in filename_regexes:
if re.match(filename_regex, file.name) is not None:
pipedir.add_path_entry(
file, self._get_delivered_filename(file, filename_regex == self.WEBLOG_REGEX, subdir)
file, self._get_delivered_filename(file.name, filename_regex == self.WEBLOG_REGEX, subdir)
)
break
......
......@@ -24,9 +24,12 @@ from typing import Counter
from delivery.products import ProductMetadata
TEST_RESTORE_METADATA = ProductMetadata("EVLA", "17B-403", "restored_cms", None, None, "60468.6349189817")
RESTORE_AUXPRODUCTS_NAME = "unknown.auxproducts.tgz"
def get_expected_files_and_dirs_for_restore(containing_dir: str) -> tuple[set[pathlib.Path], Counter[str]]:
def get_expected_files_and_dirs_for_restore(
containing_dir: str, do_include_flagtemplate: bool = False
) -> tuple[set[pathlib.Path], Counter[str]]:
expected_dirs_to_file_counts = Counter(
{
f"{containing_dir}/17B-403.sb34574962.eb34577590_000.58035.544146898144.ms": 119,
......@@ -45,5 +48,7 @@ def get_expected_files_and_dirs_for_restore(containing_dir: str) -> tuple[set[pa
f"{containing_dir}/flux.csv",
f"{containing_dir}/unknown.session_1.caltables.tgz",
}
if do_include_flagtemplate:
expected_files.add(f"{containing_dir}/17B-403.sb34574962.eb34577590_000.58035.544146898144.flagtemplate.txt")
expected_files = {pathlib.Path(f) for f in expected_files}
return expected_files, expected_dirs_to_file_counts
......@@ -130,6 +130,40 @@ def test_local_restore_no_tar(restore_directory: pathlib.Path, tmpdir_factory, c
capsys.readouterr()
def test_local_restore_no_tar_with_flagtemplate(
restore_directory: pathlib.Path, tmp_path_factory: pytest.TempPathFactory, capsys
):
"""
Test that local delivery works without tar (the simplest case)
"""
auxproducts_path = restore_directory.parent / RESTORE_AUXPRODUCTS_NAME
assert auxproducts_path.is_file()
# To avoid possibly polluting the repo,
# use a tmp dir for the restore since the auxproducts tar needs to be copied into it
src_dir = str(tmp_path_factory.mktemp("test_basic_restore_no_tar_flagtemplate_src"))
shutil.copytree(restore_directory, src_dir, dirs_exist_ok=True)
shutil.copy2(auxproducts_path, src_dir + "/products")
dest_dir = str(tmp_path_factory.mktemp("test_basic_restore_no_tar_flagtemplate_dest"))
main(["--restore", "-l", dest_dir, src_dir])
deliver_rel_path_root = f"{dest_dir}/{TEST_RESTORE_METADATA.project_code}/{TEST_RESTORE_METADATA.pipeline_spec}"
expected_files, expected_dirs_to_file_counts = get_expected_files_and_dirs_for_restore(
deliver_rel_path_root, do_include_flagtemplate=True
)
expected_files.add(pathlib.Path(dest_dir + "/SHA1SUMS"))
expected_files = {pathlib.Path(file).resolve() for file in expected_files}
actual_files = list(p.resolve() for p in pathlib.Path(dest_dir).rglob("*"))
assert_restore_delivered_only_expected_files(actual_files, expected_files, expected_dirs_to_file_counts)
# ensure that we actually got a delivery file with the proper contents
with open("delivery.json", "r") as delivery_results_file:
results = json.load(delivery_results_file)
assert len(results.keys()) == 3
assert results["delivered_to"] == dest_dir
capsys.readouterr()
def test_local_rawdata_with_tar(resource_path_root, tmpdir_factory, capsys):
"""
Test that local delivery works with tar
......
......@@ -98,6 +98,30 @@ def test_restore(restore_directory: pathlib.Path):
assert_restore_delivered_only_expected_files(actual_files, expected_files, expected_dirs_to_fcs, files_to_is_file)
def test_restore_with_flagtemplate(restore_directory: pathlib.Path, tmp_path: pathlib.Path):
auxproducts_path = restore_directory.parent / RESTORE_AUXPRODUCTS_NAME
assert auxproducts_path.is_file()
# To avoid possibly polluting the repo,
# use a tmp dir for the restore since the auxproducts tar needs to be copied into it
shutil.copytree(restore_directory, tmp_path, dirs_exist_ok=True)
shutil.copy2(auxproducts_path, tmp_path / "products")
restore = RestoreProduct(tmp_path, TEST_RESTORE_METADATA)
# OK now let's make sure that we're delivering properly
destination = FakeDestination()
restore.deliver_to(destination)
deliver_rel_path_root = f"{TEST_RESTORE_METADATA.project_code}/{TEST_RESTORE_METADATA.pipeline_spec}"
# Just check the file counts in each dir since they contain a lot of files
actual_files = list(pathlib.Path(f) for f in destination.files_added.keys())
files_to_is_file = {pathlib.Path(key): value.is_file() for key, value in destination.files_added.items()}
expected_files, expected_dirs_to_fcs = get_expected_files_and_dirs_for_restore(
deliver_rel_path_root, do_include_flagtemplate=True
)
assert_restore_delivered_only_expected_files(actual_files, expected_files, expected_dirs_to_fcs, files_to_is_file)
def test_restore_product_finder_parse_metadata(restore_directory: pathlib.Path):
expected_metadata = TEST_RESTORE_METADATA
finder = RestoreProductFinder(restore_directory)
......
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment