Source code for dyconnmap.ts.ci

""" Complexity Index


Complexity index (Janson2004_, Rapp2007_) computes the `l`-subword complexity (`l`-subword spectrum) of a one-dimensional,
symbolic (integer) time series, by finding the number of distinct subwords
of length `l`. The total complexity is given by the sum of all subwords of different lengths.
The unnormalized measure can be used to compare sequences of equal lengths.


Notes
-----
* This is a direct translation from the `Complexity toolbox available at` http://users.auth.gr/~stdimitr/files/software/complexitiy.rar
* Original author is Stravros Dimitriadis <stidimitriadis@gmail.com>

|

.. [Janson2004] Janson, S., Lonardi, S., & Szpankowski, W. (2004). On average sequence complexity. Theoretical Computer Science, 326(1-3), 213-227.
.. [Rapp2007] Rapp, P. E. (2007). Quantitative characterization of animal behavior following blast exposure. Cognitive neurodynamics, 1(4), 287-293.

"""
# Author: Avraam Marimpis <avraam.marimpis@gmail.com>

import numpy as np


[docs]def complexity_index(x, sub_len=-1, normalize=False, iterations=100): """ Complexity Index Parameters ---------- x : array-like, shape(n_samples) Input symbolic time series. sub_len : int Maximum subword length. Default is `len(x) - 1`. normalize : bool Normalize result. Default is `False`. iterationss : int Number of iterations to perform randomization. Default is `100`. Returns ------- normal_ci : float The computed omplexity index after normalization against the randomization procedure. ci : float The computed complexity index. spectrum : array-like A list of the number of distinct subwords of length 1, up to the size of the input symbolic time series. """ x = np.int32(x) x = x.flatten() len_x = len(x) ci, spectrum = __compute_complexity_index(x, sub_len) if normalize: rng = np.random.RandomState(0) mean_ci = 0.0 num_letters = spectrum[0] for _ in range(iterations): new_x = np.int32(np.floor(rng.rand(len_x) * num_letters)) new_ci, _ = __compute_complexity_index(new_x, sub_len) mean_ci += new_ci / iterations normal_ci = ci / mean_ci return normal_ci, ci, spectrum else: return ci, spectrum
def __compute_complexity_index(x, sub_len=-1): """ Complexity Index Parameters ---------- x : Input symbolic time series. sub_len : int Maximum subword length. Default is `len(x) - 1`. Returns ------- ci : float The computed complexity index. spectrum : array-like A list of the number of distinct subwords of length 1, up to the size of the input symbolic time series. """ ci = 0.0 x = np.int32(x) x = x.flatten() len_x = len(x) max_subword_len = len_x - 1 if sub_len >= 2: max_subword_len = sub_len min_x = np.min(x) x = x - min_x letters = np.unique(x) max_len_word = np.min([max_subword_len, len_x - 1]) spectrum = np.ones((max_len_word)) spectrum[0] = len(letters) all_num_words = list() for word_len in range(1, max_len_word): real_word_len = word_len + 1 cumulative_words = None for shift in range(real_word_len): num_words = np.int32(np.floor((len_x - shift) / real_word_len)) all_num_words.append(num_words) if num_words > 0: idx1 = shift idx2 = real_word_len * num_words + shift sliced = x[idx1:idx2] words = np.reshape(sliced, (num_words, real_word_len)).T if cumulative_words is None: cumulative_words = words else: cumulative_words = np.hstack([cumulative_words, words]) conv_cumulative_words = __rowsBaseConv(cumulative_words.T) u_cumulative_words = np.unique(conv_cumulative_words) spectrum[word_len] = len(u_cumulative_words) cumulative_words = None ci = np.sum(spectrum) all_num_words = np.array(all_num_words).flatten() return ci, spectrum def __rowsBaseConv(x, base=None): """ Parameters ---------- x : base : integer Returns ------- """ if base is None: base = np.max(x) + 1 _, p = np.shape(x) bases = np.ones(p) * base indices = list(range(p - 1, -1, -1)) base = np.power(bases, indices) result = x.dot(base) return result