from operator import attrgetter
from typing import List, Optional, Tuple, Type, Union
import numpy as np
from anml.data.component import Component
from anml.data.validator import NoNans
from anml.prior.main import Prior
from anml.prior.utils import filter_priors, get_prior_type
from numpy.typing import NDArray
from pandas import DataFrame
[docs]class Variable:
"""Variable class that contains information of variable, including name and
priors. It provides functions of create design matrix and gather prior
information for likelihood building.
Parameters
----------
component
You can pass in the name of the variable corresponding to the column
name in the data frame. It will be automatically converted into an
instance of :class:`Component` with :class:`NoNans` as the validator.
Alternatively, you can also pass in an instance of :class:`Component`,
with your own set of validators.
priors
A list of priors corresponding to the variable.
"""
component = property(attrgetter("_component"))
"""Data compoent for the variable.
Raises
------
TypeError
Raised when the input component are not a string nor an instance of
:class:`Component`.
"""
priors = property(attrgetter("_priors"))
"""A list of priors corresponding to the variable.
Raises
------
TypeError
Raised when the input priors are not a list of given prior types.
Different classes have different legal prior types that is stored in a
protected class variable `_prior_types`.
"""
_prior_types: Tuple[Type, ...] = (Prior,)
"""A Tuple of all allowed prior types.
"""
def __init__(self,
component: Union[str, Component],
priors: Optional[List[Prior]] = None):
self.component = component
self.priors = priors
@component.setter
def component(self, component: Union[str, Component]):
if not isinstance(component, (str, Component)):
raise TypeError("Variable input component has to be a string or "
"an instance of Component.")
if isinstance(component, str):
component = Component(component, validators=[NoNans()])
self._component = component
@priors.setter
def priors(self, priors: Optional[List[Prior]]):
priors = list(priors) if priors is not None else []
if not all(isinstance(prior, self._prior_types) for prior in priors):
raise TypeError("Variable input priors must be a list of "
"instances of Prior.")
self._priors = priors
@property
def size(self) -> Optional[int]:
"""Size of the variable.
"""
return 1
[docs] def attach(self, df: DataFrame):
"""Attach the data to variable. It will attach data to the component.
Parameters
----------
df
The data frame contains the corresponding data column.
"""
self.component.attach(df)
[docs] def get_design_mat(self, df: DataFrame) -> NDArray:
"""Get design matrix.
Parameters
----------
df
The data frame contains the corresponding data column.
Returns
-------
NDArray
The design matrix as a numpy array.
"""
self.attach(df)
return self.component.value[:, np.newaxis]
[docs] def get_direct_prior_params(self, prior_type: str) -> NDArray:
"""Get the direct prior parameters. The direct prior refers to the
priors that do not have a linear map and direct act on the variable.
We require that one variable can only have one direct prior for a given
prior type. If there is no direct prior in the prior list, we will use
the default prior parameters of the given prior type.
Parameters
----------
prior_type
Given name of the prior type.
Returns
-------
NDArray
The prior parameters as an array.
Raises
------
ValueError
Raised when have more than one direct priors.
ValueError
Raised when the size of the prior parameters doesn't match with the
size of the variable.
"""
direct_priors = filter_priors(self.priors, prior_type, with_mat=False)
prior_type = get_prior_type(prior_type)
if len(direct_priors) == 0:
return np.repeat(prior_type.default_params, self.size, axis=1)
if len(direct_priors) >= 2:
raise ValueError("Variable can only have one direct prior.")
prior = direct_priors[0]
if prior.shape[1] != self.size:
raise ValueError("Variable and prior size don't match.")
return prior.params
[docs] def get_linear_prior_params(self, prior_type: str) -> Tuple[NDArray, NDArray]:
"""Get the linear prior parameters. The linear prior refers to the
priors that contain a linear map. If there is no linear prior in the
prior list, we will return empty arrays that match the size of the
variable.
Parameters
----------
prior_type
Given name of the prior type.
Returns
-------
Tuple[NDArray, NDArray]
The prior parameters as an array. The first one is the distribution
parameters and the second one is the linear map.
Raises
------
ValueError
Raised when the size of the prior parameters doesn't match with the
size of the variable.
"""
linear_priors = filter_priors(self.priors, prior_type, with_mat=True)
if len(linear_priors) == 0:
return np.empty((2, 0)), np.empty((0, self.size))
if not all(prior.shape[1] == self.size for prior in linear_priors):
raise ValueError("Variable and prior size don't match.")
params = np.hstack([prior.params for prior in linear_priors])
mat = np.vstack([prior.mat for prior in linear_priors])
return params, mat
def __repr__(self) -> str:
return f"{type(self).__name__}(component={self.component}, priors={self.priors})"