Source code for crikit.io.lazy5.tests.test_inspect

""" Test inspection of HDF5 files """
import os
import time

import h5py
import pytest

import numpy as np
from numpy.testing import assert_array_almost_equal

from crikit.io.lazy5.inspect import (get_groups, get_datasets, get_hierarchy,
                                     get_attrs_dset, valid_dsets, valid_file)

from crikit.io.lazy5.utils import hdf_is_open

[docs]@pytest.fixture(scope="module") def hdf_dataset(): """ Setups and tears down a sample HDF5 file """ filename = 'temp_test.h5' fid = h5py.File(filename, 'w') data_m, data_n, data_p = [20, 22, 24] data = np.random.randn(data_m, data_n, data_p) fid.create_dataset('base', data=data) grp1 = fid.create_group('Group1') grp3 = fid.create_group('Group2/Group3') grp6 = fid.create_group('Group4/Group5/Group6') grp1.create_dataset('ingroup1_1', data=data) grp1.create_dataset('ingroup1_2', data=data) fid.create_dataset('Group2/ingroup2', data=data) grp3.create_dataset('ingroup3', data=data) grp6.create_dataset('ingroup6', data=data) fid['base'].attrs['Attribute_str'] = 'Test' fid['base'].attrs['Attribute_bytes'] = b'Test' fid['base'].attrs['Attribute_np_bytes'] = np.bytes_('Test') # pylint: disable=no-member fid['base'].attrs['Attribute_np_bytes_inarray'] = np.array(b'Test') # pylint: disable=no-member fid['base'].attrs['Attribute_np_bytes_inarray2'] = np.array([b'Test']) # pylint: disable=no-member fid['base'].attrs.create('Attribute_int', 1) fid['base'].attrs.create('Attribute_float', 1.1) fid['base'].attrs.create('Attribute_np_1d', np.array([1, 2, 3])) fid['base'].attrs.create('Attribute_np_2d', np.array([[1, 2, 3], [4, 5, 6]])) fid['base'].attrs.create('Attribute_np_sgl_int', np.array([1])) fid['base'].attrs.create('Attribute_np_sgl_float', np.array([1.0])) fid['base'].attrs.create('Attribute_np_array_float', np.array([1.0, 2.0])) fid['base'].attrs.create('Attribute_np_sgl_complex', np.array([1.0 + 1j])) yield filename, fid # Tear-down if hdf_is_open(fid): fid.close() time.sleep(1) try: os.remove(filename) except Exception: print('Could not delete {}'.format(filename))
[docs]def test_valid_file(hdf_dataset): # pylint:disable=redefined-outer-name """ Test whether a file is valid or not """ filename, fid = hdf_dataset assert valid_file(filename, verbose=True) assert not valid_file('NOT_A_REAL_FILE.XYZ', verbose=True)
[docs]def test_valid_dsets(hdf_dataset): # pylint:disable=redefined-outer-name """ Test whether a dset or list of dsets are valid or not """ filename, fid = hdf_dataset # NOT valid filename assert not valid_dsets('NOTAFILE.XYZ', 'base', verbose=True) # Single dataset dset_list = 'base' assert valid_dsets(filename, dset_list, verbose=True) # Single dataset by fid dset_list = 'base' assert valid_dsets(fid, dset_list, verbose=True) # Single dataset is WRONG dset_list = 'NOTADSET' assert not valid_dsets(filename, dset_list, verbose=True) # Single dataset in list dset_list = ['base'] assert valid_dsets(filename, dset_list, verbose=True) # Datasets in list -- NOTE some have leading slashes, some don't dset_list = ['base', '/Group1/ingroup1_1', '/Group1/ingroup1_2', 'Group2/ingroup2'] assert valid_dsets(filename, dset_list, verbose=True) # Datasets in list -- 1 IS WRONG dset_list = ['base', '/Group1/ingroup1_1', '/Group1/ingroup1_2', 'Group2/DOESNOTEXIST'] assert not valid_dsets(filename, dset_list, verbose=True) # Dataset with wrong-type dset_list = 1 with pytest.raises(TypeError): valid_dsets(filename, dset_list, verbose=True) # Dataset with wrong-type dset_list = ['base', 1] with pytest.raises(TypeError): valid_dsets(filename, dset_list, verbose=True)
[docs]def test_get_groups(hdf_dataset): # pylint:disable=redefined-outer-name """ Get an HDF5 file's group list """ filename, fid = hdf_dataset # Passing fid grp_list = get_groups(fid) assert set(grp_list) == {'/', '/Group1', '/Group2', '/Group2/Group3', '/Group4', '/Group4/Group5', '/Group4/Group5/Group6'} # Passing filename grp_list = get_groups(filename) assert set(grp_list) == {'/', '/Group1', '/Group2', '/Group2/Group3', '/Group4', '/Group4/Group5', '/Group4/Group5/Group6'}
[docs]def test_get_datasets_fullpath(hdf_dataset): # pylint:disable=redefined-outer-name """ Get an HDF5 file's dataset list with groupnames prepended""" filename, fid = hdf_dataset # Passing fid dataset_list = get_datasets(fid, fulldsetpath=True) assert set(dataset_list) == {'/base', '/Group1/ingroup1_1', '/Group1/ingroup1_2', '/Group2/ingroup2', '/Group2/Group3/ingroup3', '/Group4/Group5/Group6/ingroup6'} # Passing filename dataset_list = get_datasets(filename, fulldsetpath=True) assert set(dataset_list) == {'/base', '/Group1/ingroup1_1', '/Group1/ingroup1_2', '/Group2/ingroup2', '/Group2/Group3/ingroup3', '/Group4/Group5/Group6/ingroup6'}
[docs]def test_get_datasets_nopath(hdf_dataset): # pylint:disable=redefined-outer-name """ Get an HDF5 file's dataset list with no groupnames prepended """ filename, fid = hdf_dataset # Passing fid dataset_list = get_datasets(fid, fulldsetpath=False) assert set(dataset_list) == {'base', 'ingroup1_1', 'ingroup1_2', 'ingroup2', 'ingroup3', 'ingroup6'} # Passing filename dataset_list = get_datasets(filename, fulldsetpath=False) assert set(dataset_list) == {'base', 'ingroup1_1', 'ingroup1_2', 'ingroup2', 'ingroup3', 'ingroup6'}
[docs]def test_get_hierarchy_fullpath(hdf_dataset): # pylint:disable=redefined-outer-name """ OrderedDict describing HDF5 file hierarchy. Testing with full paths in the dataset names. Keys are groups, values are datasets. """ filename, fid = hdf_dataset # Passing fid hierarchy = get_hierarchy(fid, fulldsetpath=True, grp_w_dset=False) assert hierarchy == {'/':['base'], '/Group1':['/Group1/ingroup1_1', '/Group1/ingroup1_2'], '/Group2':['/Group2/ingroup2'], '/Group2/Group3':['/Group2/Group3/ingroup3'], '/Group4':[], '/Group4/Group5':[], '/Group4/Group5/Group6':['/Group4/Group5/Group6/ingroup6'] } # Passing filename hierarchy = get_hierarchy(filename, fulldsetpath=True, grp_w_dset=False) assert hierarchy == {'/':['base'], '/Group1':['/Group1/ingroup1_1', '/Group1/ingroup1_2'], '/Group2':['/Group2/ingroup2'], '/Group2/Group3':['/Group2/Group3/ingroup3'], '/Group4':[], '/Group4/Group5':[], '/Group4/Group5/Group6':['/Group4/Group5/Group6/ingroup6'] }
[docs]def test_get_hierarchy_grp_w_dset(hdf_dataset): # pylint:disable=redefined-outer-name """ OrderedDict describing HDF5 file hierarchy. Testing empty sets are NOT returned. Keys are groups, values are datasets. """ filename, fid = hdf_dataset # Passing fid hierarchy = get_hierarchy(fid, fulldsetpath=True, grp_w_dset=True) assert hierarchy == {'/':['base'], '/Group1':['/Group1/ingroup1_1', '/Group1/ingroup1_2'], '/Group2':['/Group2/ingroup2'], '/Group2/Group3':['/Group2/Group3/ingroup3'], '/Group4/Group5/Group6':['/Group4/Group5/Group6/ingroup6'] } # Passing filename hierarchy = get_hierarchy(filename, fulldsetpath=True, grp_w_dset=True) assert hierarchy == {'/':['base'], '/Group1':['/Group1/ingroup1_1', '/Group1/ingroup1_2'], '/Group2':['/Group2/ingroup2'], '/Group2/Group3':['/Group2/Group3/ingroup3'], '/Group4/Group5/Group6':['/Group4/Group5/Group6/ingroup6'] }
[docs]def test_get_hierarchy_nopath(hdf_dataset): # pylint:disable=redefined-outer-name """ OrderedDict describing HDF5 file hierarchy. Testing with no full paths in the dataset names. Keys are groups, values are datasets. """ filename, fid = hdf_dataset # Passing fid hierarchy = get_hierarchy(fid, fulldsetpath=False, grp_w_dset=False) assert hierarchy == {'/':['base'], '/Group1':['ingroup1_1', 'ingroup1_2'], '/Group2':['ingroup2'], '/Group2/Group3':['ingroup3'], '/Group4':[], '/Group4/Group5':[], '/Group4/Group5/Group6':['ingroup6'] } # Passing filename hierarchy = get_hierarchy(filename, fulldsetpath=False, grp_w_dset=False) assert hierarchy == {'/':['base'], '/Group1':['ingroup1_1', 'ingroup1_2'], '/Group2':['ingroup2'], '/Group2/Group3':['ingroup3'], '/Group4':[], '/Group4/Group5':[], '/Group4/Group5/Group6':['ingroup6']}
[docs]def test_get_dset_attrs(hdf_dataset): # pylint:disable=redefined-outer-name """ Get an HDF5 file's dataset list with groupnames prepended""" filename, fid = hdf_dataset # Passing fid # DO NOT CONVERT-to-STR dset_attrs = get_attrs_dset(fid, 'base', convert_to_str=False) assert dset_attrs['Attribute_str'] == 'Test' # NOTE: looks like hdf5 has changed how byte strings are delt with, maybe # assert dset_attrs['Attribute_bytes'] == b'Test' assert dset_attrs['Attribute_np_bytes'] == b'Test' assert np.allclose(dset_attrs['Attribute_np_bytes_inarray'] == b'Test', True) assert np.allclose(dset_attrs['Attribute_np_bytes_inarray2'] == b'Test', True) assert dset_attrs['Attribute_int'] == 1 assert dset_attrs['Attribute_float'] == 1.1 assert np.allclose(dset_attrs['Attribute_np_1d'], np.array([1, 2, 3])) assert np.allclose(dset_attrs['Attribute_np_2d'], np.array([[1, 2, 3], [4, 5, 6]])) assert dset_attrs['Attribute_np_sgl_int'] == np.array([1]) assert dset_attrs['Attribute_np_sgl_float'] == np.array([1.0]) assert dset_attrs['Attribute_np_sgl_complex'] == np.array([1.0 + 1j]) assert_array_almost_equal(dset_attrs['Attribute_np_array_float'], np.array([1.0, 2.0])) # DO CONVERT-to-STR dset_attrs = get_attrs_dset(fid, 'base', convert_to_str=True) assert dset_attrs['Attribute_str'] == 'Test' assert dset_attrs['Attribute_bytes'] == 'Test' assert dset_attrs['Attribute_np_bytes'] == 'Test' assert np.allclose(dset_attrs['Attribute_np_bytes_inarray'] == 'Test', True) assert np.allclose(dset_attrs['Attribute_np_bytes_inarray2'] == 'Test', True) assert dset_attrs['Attribute_int'] == 1 assert dset_attrs['Attribute_float'] == 1.1 assert np.allclose(dset_attrs['Attribute_np_1d'], np.array([1, 2, 3])) assert np.allclose(dset_attrs['Attribute_np_2d'], np.array([[1, 2, 3], [4, 5, 6]])) assert dset_attrs['Attribute_np_sgl_int'] == np.array([1]) assert dset_attrs['Attribute_np_sgl_float'] == np.array([1.0]) assert dset_attrs['Attribute_np_sgl_complex'] == np.array([1.0 + 1j]) assert_array_almost_equal(dset_attrs['Attribute_np_array_float'], np.array([1.0, 2.0])) # Passing filename # DO NOT CONVERT-to-STR dset_attrs = get_attrs_dset(filename, 'base', convert_to_str=False) assert dset_attrs['Attribute_str'] == 'Test' # NOTE: looks like hdf5 has changed how byte strings are delt with, maybe # assert dset_attrs['Attribute_bytes'] == b'Test' assert dset_attrs['Attribute_np_bytes'] == b'Test' assert np.allclose(dset_attrs['Attribute_np_bytes_inarray'] == b'Test', True) assert np.allclose(dset_attrs['Attribute_np_bytes_inarray2'] == b'Test', True) assert dset_attrs['Attribute_int'] == 1 assert dset_attrs['Attribute_float'] == 1.1 assert np.allclose(dset_attrs['Attribute_np_1d'], np.array([1, 2, 3])) assert np.allclose(dset_attrs['Attribute_np_2d'], np.array([[1, 2, 3], [4, 5, 6]])) assert dset_attrs['Attribute_np_sgl_int'] == np.array([1]) assert dset_attrs['Attribute_np_sgl_float'] == np.array([1.0]) assert dset_attrs['Attribute_np_sgl_complex'] == np.array([1.0 + 1j]) assert_array_almost_equal(dset_attrs['Attribute_np_array_float'], np.array([1.0, 2.0])) # DO CONVERT-to-STR dset_attrs = get_attrs_dset(filename, 'base', convert_to_str=True) assert dset_attrs['Attribute_str'] == 'Test' assert dset_attrs['Attribute_bytes'] == 'Test' assert dset_attrs['Attribute_np_bytes'] == 'Test' assert np.allclose(dset_attrs['Attribute_np_bytes_inarray'] == 'Test', True) assert np.allclose(dset_attrs['Attribute_np_bytes_inarray2'] == 'Test', True) assert dset_attrs['Attribute_int'] == 1 assert dset_attrs['Attribute_float'] == 1.1 assert np.allclose(dset_attrs['Attribute_np_1d'], np.array([1, 2, 3])) assert np.allclose(dset_attrs['Attribute_np_2d'], np.array([[1, 2, 3], [4, 5, 6]])) assert dset_attrs['Attribute_np_sgl_int'] == np.array([1]) assert dset_attrs['Attribute_np_sgl_float'] == np.array([1.0]) assert dset_attrs['Attribute_np_sgl_complex'] == np.array([1.0 + 1j]) assert_array_almost_equal(dset_attrs['Attribute_np_array_float'], np.array([1.0, 2.0])) # CONVERT Single Numpy Value to Numeric dset_attrs = get_attrs_dset(filename, 'base', convert_sgl_np_to_num=True) assert dset_attrs['Attribute_np_sgl_int'] == 1 assert isinstance(dset_attrs['Attribute_np_sgl_int'], int) assert not isinstance(dset_attrs['Attribute_np_sgl_int'], np.ndarray) assert dset_attrs['Attribute_np_sgl_float'] == 1.0 assert isinstance(dset_attrs['Attribute_np_sgl_float'], float) assert not isinstance(dset_attrs['Attribute_np_sgl_float'], np.ndarray) assert dset_attrs['Attribute_np_sgl_complex'] == 1.0 + 1j assert isinstance(dset_attrs['Attribute_np_sgl_complex'], complex) assert not isinstance(dset_attrs['Attribute_np_sgl_complex'], np.ndarray) assert_array_almost_equal(dset_attrs['Attribute_np_array_float'], np.array([1.0, 2.0])) assert isinstance(dset_attrs['Attribute_np_array_float'], np.ndarray) # DO NOT CONVERT Single Numpy Value to Numeric dset_attrs = get_attrs_dset(filename, 'base', convert_sgl_np_to_num=False) assert dset_attrs['Attribute_np_sgl_int'] == 1 assert not isinstance(dset_attrs['Attribute_np_sgl_int'], int) assert isinstance(dset_attrs['Attribute_np_sgl_int'], np.ndarray) assert dset_attrs['Attribute_np_sgl_float'] == 1.0 assert not isinstance(dset_attrs['Attribute_np_sgl_float'], float) assert isinstance(dset_attrs['Attribute_np_sgl_float'], np.ndarray) assert dset_attrs['Attribute_np_sgl_complex'] == 1.0 + 1j assert not isinstance(dset_attrs['Attribute_np_sgl_complex'], complex) assert isinstance(dset_attrs['Attribute_np_sgl_complex'], np.ndarray) assert_array_almost_equal(dset_attrs['Attribute_np_array_float'], np.array([1.0, 2.0])) assert isinstance(dset_attrs['Attribute_np_array_float'], np.ndarray)