Source code for naplib.io.fileio

import os
import pickle
import numpy as np
from hdf5storage import loadmat, savemat
import h5py

from naplib import logger
from ..data import Data


[docs]
def import_data(filepath, strict=True, useloadmat=True, varname='out'):
    '''
    Import Data object from MATLAB (.mat) format. This will
    automatically transpose the 'resp' and 'aud' fields
    so that they are shape (time, channels) for each trial. The
    MATLAB equivalent structure is a 1xN struct with N trials and
    some number of fields, and this is stored in the .mat file
    under the variable name "out".

    Parameters
    ----------
    filepath : string
        Path to .mat file.
    strict : bool, default=True
        If True, requires strict adherance to the following standards:
        1) Each trial must contain at least the following fields:
        ['name','sound','soundf','resp','dataf']
        2) Each trial must contain the exact same set of fields
    useloadmat : boolean, default=True
        If True, use hdf5storage.loadmat, else use custom h5py loader
    varname : string, default='out'
        Name of the variable containing the out structure to load.

    Returns
    -------
    data : naplib.Data object
    
    Notes
    -----
    Given the highly-specific nature of the Data object Matlab format, this
    function is mostly used internally by Neural Acoustic Processing
    Lab members.
    '''
    req = ['name','sound','soundf','resp','dataf']
    data = []
    if useloadmat:
        loaded = loadmat(filepath)
        loaded = loaded[varname]
        if loaded.ndim > 1:
            loaded = loaded.squeeze(0)
            fieldnames = loaded[0].dtype.names
        else:
            fieldnames = loaded.squeeze().dtype.names
            # a single struct, rather than struct array,
            # was saved originally, so make it an 'array' list
            loaded = [loaded.squeeze().item()]

        for tt, trial in enumerate(loaded):
            trial_dict = {}
            for f, t in zip(fieldnames, trial):
                logger.debug(f'Loading trial #{tt}: {f}')
                tmp_t = t.squeeze()
                if f == 'resp' or f == 'aud':
                    if tmp_t.ndim > 1:
                        tmp_t = tmp_t.transpose(1,0,*[i for i in range(2, tmp_t.ndim)]) # only switch the first 2 dimensions if there are more than 2
                try:
                    tmp_t = tmp_t.item()
                except:
                    pass
                trial_dict[f] = tmp_t
            data.append(trial_dict)
    else:
        f = h5py.File(filepath)
        fieldnames = list(f['out'].keys())
        n_trial = f['out'][fieldnames[0]].shape[0]
    
        for trial in range(n_trial):
            trial_dict = {}
            for fld in fieldnames:
                logger.debug(f'Loading trial #{trial}: {fld}')
                tmp = np.array(f[f['out'][fld][trial][0]])
                # Pull out scalars
                if np.prod(tmp.shape) == 1:
                    tmp = tmp[0,0]
                else:
                    try:
                        tmp = ''.join([chr(c[0]) for c in tmp])
                    except:
                        # Read cell arrays within entries
                        if isinstance(tmp[0,0], h5py.h5r.Reference):
                            shp = tmp.shape
                            tmp_flat = np.ravel(tmp)
                            for tt in range(len(tmp_flat)):
                                # Handle cell arrays containing strings
                                try:
                                    tmp_flat[tt] = ''.join([chr(c[0]) for c in f[tmp_flat[tt]][:]])
                                except:
                                    tmp_flat[tt] = f[tmp_flat[tt]][:]
                            tmp = np.reshape(tmp_flat, shp)
                            # Remove lists with single item
                            try:
                                while len(tmp) == 1:
                                    tmp = tmp[0]
                            except:
                                pass
                        tmp = np.squeeze(tmp)

                trial_dict[fld] = tmp
            data.append(trial_dict)
    
    for r in req:
        if strict and r not in fieldnames:
            raise ValueError(f'Missing required field: {r}')
    
    out = Data(data=data, strict=strict)
    return out


def _matlab_valid_fieldnames(fields):
    '''
    Convert fieldnames so they are matlab struct compliant (e.g. no spaces, hyphens)
    '''
    new_fields = []
    for field in fields:
        tmp = field.replace(' ', '_')
        tmp = tmp.replace('-', '_')
        new_fields.append(tmp)
    return new_fields


[docs]
def export_data(filepath, data, fmt='7.3'):
    '''
    Export a naplib.Data instance to the MATLAB-compatible
    equivalent (.mat file).
    The MATLAB equivalent structure is a 1xN struct with N trials and
    some number of fields, and this is stored in the .mat file
    under the variable name "out". This function will
    automatically transpose the 'resp' and 'aud' fields for
    each trial in the .mat file, thus undoing the actions of
    import_data.

    Parameters
    ----------
    filepath : string
        Filename or path-like specifying where to save the file.
    data : Data instance
        Data to export.
    fmt : str, default='7.3'
        MATLAB file format. Options are {'7.3','7','6'}
    
    '''
    if not filepath.endswith('.mat'):
        logger.warning(f'The filepath does not end with ".mat". Saving anyway. However, the .mat extension may be needed to open the file in MATLAB.')
    
    FORMAT_OPTIONS = ['7.3','7','6']
    if fmt not in FORMAT_OPTIONS:
        raise ValueError(f"format must be one of ['7.3','7','6'] but got {fmt}")
    if not isinstance(data, Data):
        raise TypeError(f'data must be a naplib.Data instance but got {type(data)}')
    
    fieldnames = data.fields

    matlab_fieldnames = _matlab_valid_fieldnames(fieldnames)
    dt = np.dtype([(field, 'O') for field in matlab_fieldnames])
    
    # construct a numpy void array which contains multiple dtypes
    void_data = []
    for trial in data:
        trial_data = []
        for field in fieldnames:
            trial_tmp = trial[field]

            expand_dimension = 0
            if isinstance(trial_tmp, np.ndarray):
                expand_dims = False if trial_tmp.ndim > 1 else True
                if trial_tmp.ndim == 1:
                    expand_dimension = 1 # column vec for matlab
                if (field == 'resp' or field == 'aud') and trial_tmp.ndim > 1:
                        trial_tmp = trial_tmp.transpose(1,0,*[i for i in range(2, trial_tmp.ndim)])
            else:
                expand_dims = True
            
                # check for other object types
                if isinstance(trial_tmp, str):
                    trial_tmp = np.array(trial_tmp, dtype='str')
                elif isinstance(trial_tmp, list):
                    trial_tmp = np.array(trial_tmp)
                    expand_dimension = 0
                elif isinstance(trial_tmp, int):
                    trial_tmp = np.array(trial_tmp, dtype='float').reshape((1,))
                else:
                    trial_tmp = np.array(trial_tmp)

            if expand_dims:
                trial_tmp = np.expand_dims(trial_tmp, expand_dimension)

            trial_data.append(trial_tmp)
        void_data.append(tuple(trial_data))
    void_data = np.array(void_data, dtype=dt).reshape(1,-1)
    
    savemat(filepath, {'out': void_data}, appendmat=False, format=fmt)




[docs]
def load(filename):
    '''
    Load object from saved file.
    
    Parameters
    ----------
    filename : string
        File to load. If doesn't end with .pkl this will be added
        automatically.
    
    Returns
    -------
    output : Object
        Loaded object.
    
    Raises
    ------
    FileNotFoundError
        Can't find file.

    Examples
    --------
    >>> from naplib.io import save, load
    >>> arr = [1, 2, 3]
    >>> save('data.pkl', arr)
    >>> arr_loaded = load('data.pkl')
    >>> arr_loaded
    [1, 2, 3]
    
    '''
    
    if not filename.endswith('.pkl') and '.' not in os.path.basename(filename):
        filename = filename + '.pkl'
        
    with open(filename, 'rb') as inp:
        output = pickle.load(inp)

    return output




[docs]
def save(filename, obj, makedirs=False):
    '''
    Save object with pickle.
    
    Parameters
    ----------
    filename : string
        File to load. If doesn't end with .pkl this will be added
        automatically.
    obj : Object
        Data to save.
    makedirs : bool, default=False
        Whether to create parent directories if they do not exist.

    Examples
    --------
    >>> from naplib.io import save, load
    >>> arr = [1, 2, 3]
    >>> save('data.pkl', arr)
    >>> arr_loaded = load('data.pkl')
    >>> arr_loaded
    [1, 2, 3]

    '''
    
    if not filename.endswith('.pkl') and '.' not in os.path.basename(filename):
        filename = filename + '.pkl'

    if makedirs:
        os.makedirs(os.path.dirname(filename), exist_ok=True)
    
    with open(filename, 'wb') as f:
        pickle.dump(obj, f)