import os
import pickle
import numpy as np
from hdf5storage import loadmat, savemat
import h5py
from naplib import logger
from ..data import Data
[docs]
def import_data(filepath, strict=True, useloadmat=True, varname='out'):
'''
Import Data object from MATLAB (.mat) format. This will
automatically transpose the 'resp' and 'aud' fields
so that they are shape (time, channels) for each trial. The
MATLAB equivalent structure is a 1xN struct with N trials and
some number of fields, and this is stored in the .mat file
under the variable name "out".
Parameters
----------
filepath : string
Path to .mat file.
strict : bool, default=True
If True, requires strict adherance to the following standards:
1) Each trial must contain at least the following fields:
['name','sound','soundf','resp','dataf']
2) Each trial must contain the exact same set of fields
useloadmat : boolean, default=True
If True, use hdf5storage.loadmat, else use custom h5py loader
varname : string, default='out'
Name of the variable containing the out structure to load.
Returns
-------
data : naplib.Data object
Notes
-----
Given the highly-specific nature of the Data object Matlab format, this
function is mostly used internally by Neural Acoustic Processing
Lab members.
'''
req = ['name','sound','soundf','resp','dataf']
data = []
if useloadmat:
loaded = loadmat(filepath)
loaded = loaded[varname]
if loaded.ndim > 1:
loaded = loaded.squeeze(0)
fieldnames = loaded[0].dtype.names
else:
fieldnames = loaded.squeeze().dtype.names
# a single struct, rather than struct array,
# was saved originally, so make it an 'array' list
loaded = [loaded.squeeze().item()]
for tt, trial in enumerate(loaded):
trial_dict = {}
for f, t in zip(fieldnames, trial):
logger.debug(f'Loading trial #{tt}: {f}')
tmp_t = t.squeeze()
if f == 'resp' or f == 'aud':
if tmp_t.ndim > 1:
tmp_t = tmp_t.transpose(1,0,*[i for i in range(2, tmp_t.ndim)]) # only switch the first 2 dimensions if there are more than 2
try:
tmp_t = tmp_t.item()
except:
pass
trial_dict[f] = tmp_t
data.append(trial_dict)
else:
f = h5py.File(filepath)
fieldnames = list(f['out'].keys())
n_trial = f['out'][fieldnames[0]].shape[0]
for trial in range(n_trial):
trial_dict = {}
for fld in fieldnames:
logger.debug(f'Loading trial #{trial}: {fld}')
tmp = np.array(f[f['out'][fld][trial][0]])
# Pull out scalars
if np.prod(tmp.shape) == 1:
tmp = tmp[0,0]
else:
try:
tmp = ''.join([chr(c[0]) for c in tmp])
except:
# Read cell arrays within entries
if isinstance(tmp[0,0], h5py.h5r.Reference):
shp = tmp.shape
tmp_flat = np.ravel(tmp)
for tt in range(len(tmp_flat)):
# Handle cell arrays containing strings
try:
tmp_flat[tt] = ''.join([chr(c[0]) for c in f[tmp_flat[tt]][:]])
except:
tmp_flat[tt] = f[tmp_flat[tt]][:]
tmp = np.reshape(tmp_flat, shp)
# Remove lists with single item
try:
while len(tmp) == 1:
tmp = tmp[0]
except:
pass
tmp = np.squeeze(tmp)
trial_dict[fld] = tmp
data.append(trial_dict)
for r in req:
if strict and r not in fieldnames:
raise ValueError(f'Missing required field: {r}')
out = Data(data=data, strict=strict)
return out
def _matlab_valid_fieldnames(fields):
'''
Convert fieldnames so they are matlab struct compliant (e.g. no spaces, hyphens)
'''
new_fields = []
for field in fields:
tmp = field.replace(' ', '_')
tmp = tmp.replace('-', '_')
new_fields.append(tmp)
return new_fields
[docs]
def export_data(filepath, data, fmt='7.3'):
'''
Export a naplib.Data instance to the MATLAB-compatible
equivalent (.mat file).
The MATLAB equivalent structure is a 1xN struct with N trials and
some number of fields, and this is stored in the .mat file
under the variable name "out". This function will
automatically transpose the 'resp' and 'aud' fields for
each trial in the .mat file, thus undoing the actions of
import_data.
Parameters
----------
filepath : string
Filename or path-like specifying where to save the file.
data : Data instance
Data to export.
fmt : str, default='7.3'
MATLAB file format. Options are {'7.3','7','6'}
'''
if not filepath.endswith('.mat'):
logger.warning(f'The filepath does not end with ".mat". Saving anyway. However, the .mat extension may be needed to open the file in MATLAB.')
FORMAT_OPTIONS = ['7.3','7','6']
if fmt not in FORMAT_OPTIONS:
raise ValueError(f"format must be one of ['7.3','7','6'] but got {fmt}")
if not isinstance(data, Data):
raise TypeError(f'data must be a naplib.Data instance but got {type(data)}')
fieldnames = data.fields
matlab_fieldnames = _matlab_valid_fieldnames(fieldnames)
dt = np.dtype([(field, 'O') for field in matlab_fieldnames])
# construct a numpy void array which contains multiple dtypes
void_data = []
for trial in data:
trial_data = []
for field in fieldnames:
trial_tmp = trial[field]
expand_dimension = 0
if isinstance(trial_tmp, np.ndarray):
expand_dims = False if trial_tmp.ndim > 1 else True
if trial_tmp.ndim == 1:
expand_dimension = 1 # column vec for matlab
if (field == 'resp' or field == 'aud') and trial_tmp.ndim > 1:
trial_tmp = trial_tmp.transpose(1,0,*[i for i in range(2, trial_tmp.ndim)])
else:
expand_dims = True
# check for other object types
if isinstance(trial_tmp, str):
trial_tmp = np.array(trial_tmp, dtype='str')
elif isinstance(trial_tmp, list):
trial_tmp = np.array(trial_tmp)
expand_dimension = 0
elif isinstance(trial_tmp, int):
trial_tmp = np.array(trial_tmp, dtype='float').reshape((1,))
else:
trial_tmp = np.array(trial_tmp)
if expand_dims:
trial_tmp = np.expand_dims(trial_tmp, expand_dimension)
trial_data.append(trial_tmp)
void_data.append(tuple(trial_data))
void_data = np.array(void_data, dtype=dt).reshape(1,-1)
savemat(filepath, {'out': void_data}, appendmat=False, format=fmt)
[docs]
def load(filename):
'''
Load object from saved file.
Parameters
----------
filename : string
File to load. If doesn't end with .pkl this will be added
automatically.
Returns
-------
output : Object
Loaded object.
Raises
------
FileNotFoundError
Can't find file.
Examples
--------
>>> from naplib.io import save, load
>>> arr = [1, 2, 3]
>>> save('data.pkl', arr)
>>> arr_loaded = load('data.pkl')
>>> arr_loaded
[1, 2, 3]
'''
if not filename.endswith('.pkl') and '.' not in os.path.basename(filename):
filename = filename + '.pkl'
with open(filename, 'rb') as inp:
output = pickle.load(inp)
return output
[docs]
def save(filename, obj, makedirs=False):
'''
Save object with pickle.
Parameters
----------
filename : string
File to load. If doesn't end with .pkl this will be added
automatically.
obj : Object
Data to save.
makedirs : bool, default=False
Whether to create parent directories if they do not exist.
Examples
--------
>>> from naplib.io import save, load
>>> arr = [1, 2, 3]
>>> save('data.pkl', arr)
>>> arr_loaded = load('data.pkl')
>>> arr_loaded
[1, 2, 3]
'''
if not filename.endswith('.pkl') and '.' not in os.path.basename(filename):
filename = filename + '.pkl'
if makedirs:
os.makedirs(os.path.dirname(filename), exist_ok=True)
with open(filename, 'wb') as f:
pickle.dump(obj, f)