Source code for bcselector.information_theory.basic_approximations

import numpy as np

__all__ = [
    'entropy',
    'conditional_entropy',
    'mutual_information',
    'conditional_mutual_information'
]


[docs]def entropy(vector, base=None): """This estimator computes the entropy of the empirical probability distribution. Parameters ---------- vector: list or np.array Vector of which entropy is calculated. base: int or float (default=np.e) Base of the logarithm in entropy approximation Returns -------- vector_entropy: float Approximated entropy Examples -------- >>> from bcselector.information_theory.basic_approximations import entropy >>> foo = [1,4,1,2,5,6,3] >>> entropy(foo) """ assert isinstance(vector, (list)) or (isinstance(vector, np.ndarray) and len(vector.shape) == 1), "Argument 'vector' not in the right shape. Use list or numpy (n,) shape instead" assert len(vector) > 0, "Argument 'vector' can't be empty" vector = np.array(vector) if len(vector) == 1: "Entropy for one number is zero" return 0.0 _, counts = np.unique(vector, return_counts=True) norm_counts = counts / counts.sum() base = np.e if base is None else base return -(norm_counts * np.log(norm_counts)/np.log(base)).sum()
[docs]def conditional_entropy(vector, condition, base=None): """This estimator computes the conditional entropy of the empirical probability distribution. Parameters ---------- vector: list or np.array Vector of which entropy is calculated. condition: list or np.array Vector of condition for entropy. base: int or float Base of the logarithm in entropy approximation. If None, np.e is selected and entropy is returned in nats. Returns -------- vector_entropy: float Approximated entropy. """ assert isinstance(vector, (list)) or (isinstance(vector, np.ndarray) and len(vector.shape) == 1), "Argument 'vector' not in the right shape. Use list or numpy (n,) shape instead." assert isinstance(condition, (list)) or (isinstance(condition, np.ndarray) and len(condition.shape) == 1), "Argument 'condition' not in the right shape. Use list or numpy (n,) shape instead." assert len(vector) > 0, "Argument 'vector' can't be empty" assert len(condition) > 0, "Argument 'condition' can't be empty" vector = np.array(vector) condition = np.array(condition) assert vector.shape == condition.shape, "Argument 'vector' must be the same lenght as 'condition'" if len(vector) == 1: "Entropy for one number is zero" return 0.0 # sort values to use np.split later vector_sorted = vector[condition.argsort()] condition_sorted = condition[condition.argsort()] binvalues = np.split(vector_sorted, np.unique(condition_sorted, return_index=True)[1][1:]) _, counts = np.unique(condition_sorted, return_counts=True) binprobas = counts / counts.sum() cond_entropy = 0 for values, proba in zip(binvalues, binprobas): cond_entropy += entropy(values, base=base) * proba return cond_entropy
[docs]def mutual_information(vector_1, vector_2, base=None): """This estimator computes the mutual information of two vectors with method of the empirical probability distribution. Parameters ----------- vector_1 : list or np.array Vector of one variable. vector_2 : list or np.array Vector of one variable. base : int or float Base of the logarithm in entropy approximation. If None, np.e is selected and entropy is returned in nats. Returns -------- variables_mutual_information: float Approximated mutual information between variables. """ vector_1_entropy = entropy(vector=vector_1, base=base) cond_entropy = conditional_entropy(vector=vector_1, condition=vector_2, base=base) return vector_1_entropy - cond_entropy
[docs]def conditional_mutual_information(vector_1, vector_2, condition, base=None): """This estimator computes the conditional mutual information of two vectors and condition vector with method of the empirical probability distribution. Parameters ----------- vector_1 : list or np.array Vector of one variable. vector_2: list or np.array Vector of one variable. condition: list or np.array Vector of condition for mutual information. base : int or float Base of the logarithm in entropy approximation. If None, np.e is selected and entropy is returned in nats. Returns -------- variables_conditional_mutual_information : float Approximated conditional mutual information between variables. """ assert isinstance(vector_1, (list)) or (isinstance(vector_1, np.ndarray) and len(vector_1.shape) == 1), "Argument 'condition' not in the right shape. Use list or numpy (n,) shape instead." assert isinstance(vector_2, (list)) or (isinstance(vector_2, np.ndarray) and len(vector_2.shape) == 1), "Argument 'condition' not in the right shape. Use list or numpy (n,) shape instead." assert isinstance(condition, (list)) or (isinstance(condition, np.ndarray) and len(condition.shape) == 1), "Argument 'condition' not in the right shape. Use list or numpy (n,) shape instead." assert len(vector_1) > 0, "Argument 'vector_1' can't be empty" assert len(vector_2) > 0, "Argument 'vector_2' can't be empty" assert len(condition) > 0, "Argument 'condition' can't be empty" vector_1 = np.array(vector_1) vector_2 = np.array(vector_2) condition = np.array(condition) assert vector_1.shape == vector_2.shape == condition.shape, "Argument 'vector_1' and 'vector_2' must be the same lenght as 'condition'" if len(condition) == 1: "Entropy for one number is zero" return 0.0 vector_1_sorted = vector_1[condition.argsort()] vector_2_sorted = vector_2[condition.argsort()] condition_sorted = condition[condition.argsort()] binvalues_1 = np.split(vector_1_sorted, np.unique(condition_sorted, return_index=True)[1][1:]) binvalues_2 = np.split(vector_2_sorted, np.unique(condition_sorted, return_index=True)[1][1:]) _, counts = np.unique(condition_sorted, return_counts=True) binprobas = counts / counts.sum() cond_mutual_info = 0 for value_1, value_2, proba in zip(binvalues_1, binvalues_2, binprobas): cond_mutual_info += mutual_information(value_1, value_2, base=base) * proba return cond_mutual_info