Skip to content
Snippets Groups Projects
Commit 43f60251 authored by Janet Goldstein's avatar Janet Goldstein Committed by Andrew Kapuscinski
Browse files

SSA-6383: Refactoring suggested by documenting this test framework

parent cf58eba7
No related branches found
No related tags found
No related merge requests found
{
"files": [
{
"ngas_file_id": "uid____ALMA_continuum_image_4d1b66da-84b2-41fa-8a30-5d3d341246ec.fits",
"subdirectory": "uid___A001_X1465_X201e.Per_18-21_sci.spw25_27_29_31.cont.I.tt0.pbcor.fits",
"relative_path": "uid___A001_X1465_X201e.Per_18-21_sci.spw25_27_29_31.cont.I.tt0.pbcor.fits",
"checksum": "534126887",
"checksum_type": "ngamsGenCrc32",
"version": 1,
"size": 161280,
"server": {
"server": "cvngas01.cv.nrao.edu:7777",
"location": "NAASC",
"cluster": "DSOC"
}
}
],
"aggregate_size": 161280
}
{
"files": [
{
"ngas_file_id": "uid____ALMA_continuum_image_71595054-a06a-4b15-af78-e458557a7597.fits",
"subdirectory": "uid___A001_X1465_X201e.Per_18-21_sci.spw25_27_29_31.cont.I.tt1.pbcor.fits",
"relative_path": "uid___A001_X1465_X201e.Per_18-21_sci.spw25_27_29_31.cont.I.tt1.pbcor.fits",
"checksum": "-840283436",
"checksum_type": "ngamsGenCrc32",
"version": 1,
"size": 161280,
"server": {
"server": "cvngas01.cv.nrao.edu:7777",
"location": "NAASC",
"cluster": "DSOC"
}
}
],
"aggregate_size": 161280
}
{
"files": [
{
"ngas_file_id": "uid____ALMA_image_cube_daf0d76e-dc93-4193-ad29-6dc53692ba41.fits",
"subdirectory": "uid___A001_X1465_X201e.Per_18-21_sci.spw25.cube.I.pbcor.fits",
"relative_path": "uid___A001_X1465_X201e.Per_18-21_sci.spw25.cube.I.pbcor.fits",
"checksum": "-770206062",
"checksum_type": "ngamsGenCrc32",
"version": 1,
"size": 112907520,
"server": {
"server": "cvngas01.cv.nrao.edu:7777",
"location": "NAASC",
"cluster": "DSOC"
}
}
],
"aggregate_size": 112907520
}
......@@ -11,7 +11,14 @@ from ...utilities import get_file_info_from_json_file
EXEC_BLOCK = 'execblock'
CALIBRATED = 'CALIBRATED'
_WEBLOG = 'weblog.tgz'
WEBLOG = 'weblog.tgz'
PIPELINE_MANIFEST = 'unknown.download_alma_ms.pipeline_manifest.xml'
PIPELINE_AQUAREPORT = 'pipeline_aquareport.xml'
CASA_PIPESCRIPT = 'casa_pipescript.py'
AUX_PRODUCTS = 'unknown.download_alma_ms.auxproducts.tgz'
MS_PP_REQUEST = 'unknown.download_alma_ms.pprequest.xml'
# pylint: disable=R0201, R0801, R0902, R0903
class AlmaPublicProductSet:
......@@ -21,9 +28,10 @@ class AlmaPublicProductSet:
self.project = self.build_project()
self.exec_blocks = self.build_exec_blocks()
self.science_products = self.build_science_products()
self.file_info = get_file_info_from_json_file('alma_A001_X1296_Xa93_rawdata.json')
self.measurement_set = self.build_basic_ms()
self.restored_ms = self.build_restored_ms()
self.file_info = get_file_info_from_json_file('A001_X1296_Xa93_RAW.json')
self.rawdata_total_size = self._compute_rawdata_size()
self.measurement_set, self.basic_ms_size = self.build_basic_ms()
self.restored_ms, self.rest_ms_size = self.build_restored_ms()
def build_project(self) -> Project:
""" Returns project metadata for 2017.1.00297.S,
......@@ -157,53 +165,59 @@ class AlmaPublicProductSet:
),
]
def build_basic_ms(self) -> dict:
def build_basic_ms(self) -> tuple:
""" Returns basic measurement set metadata """
return {
ms_info = {
'uid___A002_Xcd8029_Xa892': {
'uid___A002_Xcd8029_Xa892.400887655.tar.gz': 504369560,
'unknown.download_alma_ms.pipeline_manifest.xml': 804,
'casa_pipescript.py': 680,
PIPELINE_MANIFEST: 804,
CASA_PIPESCRIPT: 680,
PPR_FILENAME: 4584,
_WEBLOG: 1592335,
'unknown.download_alma_ms.pprequest.xml': 4584,
'unknown.download_alma_ms.auxproducts.tgz': 839,
WEBLOG: 1592335,
MS_PP_REQUEST: 4584,
AUX_PRODUCTS: 839,
CASA_LOG: 1729,
'pipeline_aquareport.xml': 3472,
PIPELINE_AQUAREPORT: 3472,
'uid___A002_Xcd8029_Xa892.ms.tgz': 519234359,
},
'uid___A002_Xcd8029_Xb0a4': {
'uid___A002_Xcd8029_Xb0a4.400887655.tar.gz': 503501127,
'unknown.download_alma_ms.pipeline_manifest.xml': 804,
PIPELINE_MANIFEST: 804,
CASA_LOG: 1729,
_WEBLOG: 1576475,
'casa_pipescript.py': 680,
'pipeline_aquareport.xml': 3465,
WEBLOG: 1576475,
CASA_PIPESCRIPT: 680,
PIPELINE_AQUAREPORT: 3465,
'uid___A002_Xcd8029_Xb0a4.ms.tgz': 518582184,
'unknown.download_alma_ms.auxproducts.tgz': 844,
AUX_PRODUCTS: 844,
PPR_FILENAME: 4584,
'unknown.download_alma_ms.pprequest.xml': 4584,
MS_PP_REQUEST: 4584,
},
'uid___A002_Xcd8029_Xfdd': {
'uid___A002_Xcd8029_Xfdd.400887655.tar.gz': 489883578,
'unknown.download_alma_ms.pipeline_manifest.xml': 802,
'pipeline_aquareport.xml': 3460,
PIPELINE_MANIFEST: 802,
PIPELINE_AQUAREPORT: 3460,
PPR_FILENAME: 4582,
'unknown.download_alma_ms.pprequest.xml': 4582,
_WEBLOG: 1562579,
MS_PP_REQUEST: 4582,
WEBLOG: 1562579,
'uid___A002_Xcd8029_Xfdd.ms.tgz': 504128830,
CASA_LOG: 1723,
'unknown.download_alma_ms.auxproducts.tgz': 836,
'casa_pipescript.py': 679,
AUX_PRODUCTS: 836,
CASA_PIPESCRIPT: 679,
}
}
total_size = 0
for mous in ms_info.keys():
for _, size in ms_info[mous].items():
total_size += size
return ms_info, total_size
def build_restored_ms(self):
""" Returns restored measurement set metadata """
return {
ms_info = {
'uid___A002_Xcd8029_Xb0a4': {
'uid___A002_Xcd8029_Xb0a4.ms.tgz': 1325422495,
'uid___A002_Xcd8029_Xa892.ms.tgz': 1331336708,
......@@ -212,35 +226,69 @@ class AlmaPublicProductSet:
},
}
total_size = 0
for _, size in ms_info['uid___A002_Xcd8029_Xb0a4'].items():
total_size += size
return ms_info, total_size
def _compute_rawdata_size(self):
total_size = 0
for file_type in self.file_info.keys():
metadata = self.file_info[file_type]
for item in metadata.items():
total_size += item[1]
return total_size
class AlmaImageProduct:
""" proprietary project with image data """
class AlmaProprietaryImageProductSet:
""" proprietary project 2019.1.00914.S, with image data """
def __init__(self):
self.project_code = '2019.1.00914.S'
self.file_info = self.get_image_file_info()
self.file_info, self.total_size = self._build_file_info()
def get_image_file_info(self) -> list:
""" there's just the one image file """
def _build_file_info(self) -> tuple:
"""
TODO
:return:
"""
return [
{'filename':
'uid___A001_X1465_X201e.Per_18-21_sci.spw25.cube.I.pbcor.fits/uid___A001_X1465_X201e.Per_18-21_sci.spw25.cube.I.pbcor.fits',
'size': 112907520
}
img_files_metadata = [
get_file_info_from_json_file('ALMA_CONT_IMG_4d1b66da.json'),
get_file_info_from_json_file('ALMA_CONT_IMG_71595054.json'),
get_file_info_from_json_file('ALMA_IMG_CUBE.json'),
]
file_info = {}
total_size = 0
for metadata in img_files_metadata:
files = metadata['files']
for file in files:
file_info[file['ngas_file_id']] = file['size']
total_size += file['size']
return file_info, total_size
class AlmaAudiProductSet:
""" AUDI products for project 2019.1.00914.S """
def __init__(self):
self.science_products = self.build_science_products()
self.file_info, self.total_size = get_file_info_from_json_file('alma_A001_X1296_Xa93_rawdata')
self.file_info = get_file_info_from_json_file(
'A001_X1296_Xa93_RAW.json')
self.total_size = self._compute_total_size()
def _compute_total_size(self):
total_size = 0
for file_type in self.file_info.keys():
metadata = self.file_info[file_type]
for item in metadata.items():
total_size += item[1]
return total_size
def build_science_products(self) -> list:
""" Returns 2019.1.00914.S science product in list """
return [
ScienceProduct(external_name='uid___A002_Xe5aacf_Xdde3',
filegroup_id=398280,
......
......@@ -382,7 +382,7 @@ class CalibrationProduct:
class VlbaProductSet:
""" Encapsulates a set of products for a VLBA project. """
""" Encapsulates a set of products for VLBA project BT142. """
def __init__(self):
self.project = self.build_project()
......
......@@ -6,10 +6,11 @@
import pytest
from .test_data.products.expected_values_alma import AlmaPublicProductSet, \
AlmaImageProduct
AlmaProprietaryImageProductSet, AlmaAudiProductSet
from .test_data.products.expected_values_evla import VlaProductSet, \
VlassImageProductSet, VlbaProductSet, CalibrationProduct, PPR_FILENAME
from .utilities import get_locations_report, Deliverable, DeliverableProduct
VlbaProductSet, CalibrationProduct, PPR_FILENAME, VlassImageProductSet
from .test_data.products.expected_values_gbt import GbtProductSet
from .utilities import Deliverable, DeliverableProduct
MS_TGZ = '.ms.tgz'
WEBLOG = 'weblog.tgz'
......@@ -31,12 +32,12 @@ def test_gets_evla_eb_products():
assert len(product_set.science_products) == len(exec_blocks)
# TODO Some Fine Day: refactor to reduce cognitive complexity
def test_gets_evla_ms():
"""
Check our basic measurement set test data
:return:
"""
product_set = VlaProductSet()
basic_ms = product_set.measurement_set
expected_file_count = 9
......@@ -69,7 +70,6 @@ def test_gets_evla_ms():
assert actual_file_count == expected_file_count
# TODO Some Fine Day: refactor to reduce cognitive complexity
def test_gets_evla_cms():
"""
Check our calibrated measurement set ('restore') test data
......@@ -152,7 +152,6 @@ def test_gets_calibration():
assert product.total_size == 27668480
# TODO Some Fine Day: refactor to reduce cognitive complexity
def test_gets_alma_raw_data():
""" Grab ALMA SDM+BDF test data and confirm we got what we expect """
......@@ -161,6 +160,7 @@ def test_gets_alma_raw_data():
file_info = product_set.file_info
assert isinstance(file_info, dict)
assert len(file_info) == 3
assert product_set.rawdata_total_size == 1368988450
file_sizes = dict()
delivered_products = list()
......@@ -208,7 +208,6 @@ def test_gets_alma_raw_data():
f'{delivery.type}')
# TODO Some Fine Day: refactor to reduce cognitive complexity
def test_gets_alma_basic_ms():
"""
Analyze our ALMA basic MS test data and ensure the values are as we expect
......@@ -265,6 +264,7 @@ def test_gets_alma_restored_ms():
restored_ms = product_set.restored_ms
assert isinstance(restored_ms, dict)
assert len(restored_ms) == 1
assert product_set.rest_ms_size == 3947540021
for name in restored_ms.keys():
assert name == 'uid___A002_Xcd8029_Xb0a4'
......@@ -276,115 +276,45 @@ def test_gets_alma_restored_ms():
assert values[PPR_FILENAME] == 5158
def test_gets_alma_image():
def test_gets_alma_images():
""" Expected download results for a proprietary ALMA project
with image data
"""
product = AlmaImageProduct()
delivered = product.file_info
assert len(delivered) == 1
file_info = delivered[0]
assert 'uid___A001_X1465_X201e' in file_info['filename']
assert file_info['filename'].endswith(
'Per_18-21_sci.spw25.cube.I.pbcor.fits')
assert file_info['size'] == 112907520
def test_gets_vla_report_files():
""" make sure we're getting the VLA test data values we expect
for our 17B-109 exec blocks of interest
"""
locations_dict = get_locations_report('17a-109_fg_18468')
assert locations_dict is not None
assert isinstance(locations_dict, dict)
assert len(locations_dict) == 2
assert 'files' in locations_dict.keys()
assert 'aggregate_size' in locations_dict.keys()
files = locations_dict['files']
assert len(files) == 79
agg_size = locations_dict['aggregate_size']
assert agg_size == 45572444823
locations_dict = get_locations_report('17a-109_fg_41979')
assert locations_dict is not None
assert isinstance(locations_dict, dict)
assert len(locations_dict) == 2
assert 'files' in locations_dict.keys()
assert 'aggregate_size' in locations_dict.keys()
files = locations_dict['files']
assert len(files) == 70
agg_size = locations_dict['aggregate_size']
assert agg_size == 50076899992
def test_gets_vlba_report_file():
""" make sure we're getting the VLBA test data values we expect """
locations_dict = get_locations_report('vlba_eb')
assert locations_dict is not None
assert isinstance(locations_dict, dict)
assert len(locations_dict) == 2
assert 'files' in locations_dict.keys()
assert 'aggregate_size' in locations_dict.keys()
files = locations_dict['files']
assert len(files) == 16
agg_size = locations_dict['aggregate_size']
assert agg_size == 2140560000
def test_gets_img_report_file():
""" make sure we're getting the image test data values we expect """
locations_dict = get_locations_report('img')
assert locations_dict is not None
assert isinstance(locations_dict, dict)
assert len(locations_dict) == 2
assert 'files' in locations_dict.keys()
assert 'aggregate_size' in locations_dict.keys()
files = locations_dict['files']
assert len(files) == 2
agg_size = locations_dict['aggregate_size']
assert agg_size == 110851200
product = AlmaProprietaryImageProductSet()
file_info = product.file_info
assert len(file_info) == 3
total_size = 0
for filename, size in file_info.items():
total_size += size
if '_71595054' in filename or '_4d1b66da' in filename:
assert size == 161280
else:
assert '_image_cube_daf0d76e' in filename
assert size == 112907520
assert total_size == product.total_size
def test_gets_cal_report_file():
""" make sure we're getting the calibration test data values we expect """
locations_dict = get_locations_report('calibration')
assert locations_dict is not None
assert isinstance(locations_dict, dict)
def test_gets_audi_product():
"""
Make sure we're getting the product and files we expect for our
AlmaAudiProductSet
assert len(locations_dict) == 2
assert 'files' in locations_dict.keys()
assert 'aggregate_size' in locations_dict.keys()
:return:
"""
files = locations_dict['files']
assert len(files) == 1
agg_size = locations_dict['aggregate_size']
assert agg_size == 27668480
product = AlmaAudiProductSet()
assert product.total_size == 1368988450
file_info = product.file_info
assert len(file_info) == 3
for file_type in ['SDM', 'BDF', 'TAR']:
assert file_type in file_info.keys()
def test_gets_gbt_product():
""" make sure we're getting the GBT test data values we expect """
locations_dict = get_locations_report('AGBT17B_044_02')
assert locations_dict is not None
assert isinstance(locations_dict, dict)
assert len(locations_dict) == 2
assert 'files' in locations_dict.keys()
assert 'aggregate_size' in locations_dict.keys()
files = locations_dict['files']
assert len(files) == 4
agg_size = locations_dict['aggregate_size']
assert agg_size == 118571622400
product_set = GbtProductSet()
assert product_set.project.project_code == 'AGBT17B_044'
assert product_set.total_size == 948572979200
assert len(product_set.file_info) == 4
......@@ -40,12 +40,20 @@ def get_exec_block_details_from_loc_report(prefix: str, exec_blocks: list):
total_size = 0
for exec_block in exec_blocks:
basename = prefix + str(exec_block.filegroup_id)
locations_report = get_locations_report(basename)
locations_report = None
try:
locations_report = get_locations_report(basename)
except FileNotFoundError:
# special case: GBT product
if basename.startswith('AGBT17B_044'):
locations_report = get_locations_report('AGBT17B_044_02')
total_size += locations_report['aggregate_size']
for file_spec in locations_report['files']:
filename = file_spec['ngas_file_id']
size = file_spec['size']
file_info[filename] = size
return file_info, total_size
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment