Source code for PyFoam.Wrappers.Pandas

#  ICE Revision: $Id$
"""Extended version of the Pandas-Dataframe
"""

from pandas import DataFrame, Series
from numpy import hstack, unique
from math import isnan

from PyFoam.Error import error, warning, PyFoamException

from PyFoam.ThirdParty.six import string_types, text_type, u

import pandas.api.types as pdtypes
import numpy as np
import pandas as pd

[docs]class PyFoamDataFrame(DataFrame): """This class adds some convenience functions to the regular Datafram class""" validOtherTypes = (DataFrame, Series) def __init__(self, *args, **kwargs): """Adds no data. Just passes the arguments to the super-class""" super(PyFoamDataFrame, self).__init__(*args, **kwargs) if not self.__allStrings(): raise PandasWrapperPyFoamException("Columns must be strings") if self.shape == (0, 0): return if not pdtypes.is_numeric_dtype(self.index.dtype): raise TypeError( "Index '{}' is of type {} which is not a numberic type".format( self.index.name, self.index.dtype ) ) if not self.index.is_monotonic_increasing: raise TypeError( "Index '{}' should be monothinc increasing. It is not".format( self.index.name ) ) def __allStrings(self, keys=None): if keys is None: keys = self.keys() if isinstance(keys,pd.MultiIndex): for key in keys: for k in key: if not isinstance(k,string_types): return False return True else: return keys.map(lambda k: isinstance(k, string_types)).all()
[docs] def addData( self, other, sameIndex=True, mergeIndex=False, prefix=None, suffix=None, allowExtrapolate=False, interpolationMethod="values", ): """Add data from another DataFrame or Series :param other: data as Pandas-DataFrame or Series :param sameIndex: assum both have the same indices. If False the other data will be interpolated to the current indices :param mergeIndex: make the result indices a mixture of the indices""" if not sameIndex and mergeIndex: raise PandasWrapperPyFoamException( "Can't specify sameIndex=False and mergeIndex=True at the same time" ) if not isinstance(other, self.validOtherTypes): raise PandasWrapperPyFoamException( "Other data is of type", type(other), "should be one of", self.validOtherTypes, ) if isinstance(other, DataFrame): o = other else: o = DataFrame(other) k = o.keys() if not self.__allStrings(k): raise PandasWrapperPyFoamException("Added data with non-string columns") v = k.copy() if prefix: v = [prefix + n for n in v] if suffix: v = [n + suffix for n in v] if len(set(v) & set(self.keys())) > 0: raise PandasWrapperPyFoamException( "Keys of this", self.keys(), "and other", v, "intersect", set(v) & set(self.keys()), ) keys = dict(zip(k, v)) interpolate = False # only interpolate if necessary if len(self.index) != len(o.index) or (self.index != o.index).any(): if sameIndex and not mergeIndex: raise PandasWrapperPyFoamException( "Other data has different index. Specify sameIndex=False or mergeIndex=True" ) ni = unique(hstack([self.index, o.index])) interpolate = True if mergeIndex: minOld = min(self.index) maxOld = max(self.index) result = self.reindex(index=ni, copy=True).interpolate( method=interpolationMethod, limit=1 ) if not allowExtrapolate: for s in result: result[s][result.index < minOld] = float("NaN") result[s][result.index > maxOld] = float("NaN") else: # make sure we have values at the current position # o=o.reindex_axis(ni,axis='index').interpolate(method=interpolationMethod) o = o.reindex(index=ni, columns=o.columns).interpolate( method=interpolationMethod ) # ,takeable=True result = self.copy() else: result = self.copy() minOld = min(o.index) maxOld = max(o.index) for k, v in keys.items(): result[v] = o[k] if interpolate: result[v] = result[v].interpolate(method=interpolationMethod, limit=1) if not allowExtrapolate: result[v][result.index < minOld] = float("NaN") result[v][result.index > maxOld] = float("NaN") return PyFoamDataFrame(result)
[docs] def integrate(self, columns=None): """Integrate by using the trapezoid rule. Return a dictionary with values. :param values: list of column names. If unset all are integrated""" return self.__integrateInternal(columns)[0]
[docs] def validLength(self, columns=None): """Length were the values are valid (not NaN) Return a dictionary with values. :param values: list of column names. If unset all are integrated""" return self.__integrateInternal(columns)[1]
[docs] def weightedAverage(self, columns=None): """Weighted average. Return a dictionary with values. :param values: list of column names. If unset all are integrated""" integral, length = self.__integrateInternal(columns) result = {} for k in integral: if length[k] > 0 and not isnan(length[k]): result[k] = integral[k] / length[k] else: result[k] = float("NaN") return result
def __integrateInternal(self, columns): if columns is None: columns = self.keys() integrals = {} lengths = {} ind = self.index for k in columns: integrals[k] = 0 lengths[k] = 0 if len(ind) < 2: # no weighting possible integrals[k] = float("NaN") continue val = self[k].values for i in range(len(ind)): if not isnan(val[i]): w = 0 if i > 0: w += 0.5 * (ind[i] - ind[i - 1]) if i + 1 < len(ind): w += 0.5 * (ind[i + 1] - ind[i]) lengths[k] += w integrals[k] += w * val[i] if lengths[k] == 0: integrals[k] = float("NaN") return integrals, lengths
[docs] def describe(self, *args, **kwargs): """Adds our own statistics to the regular describe""" d = super(PyFoamDataFrame, self).describe(*args, **kwargs) integral, length = self.__integrateInternal(self.keys()) d = d.append(DataFrame(data=integral, index=["integral"])) d = d.append(DataFrame(data=length, index=["valid length"])) a = {} for k in integral: if length[k] > 0 and not isnan(length[k]): a[k] = integral[k] / length[k] else: a[k] = float("NaN") d = d.append(DataFrame(data=a, index=["weighted average"])) return d
def __getitem__(self, key): """If this gets a number as the key it tries to get the row that is nearest to this number. If it is something list-like and the elements of the lists are numbers then all the elements of the list are looked up, sorted and mad unique. Afterwards it gets the rows that are nearest to the numbers. Otherwise it defaults to the []-operator of the DataFram-class but converts the result to a PyFoamDataFrame """ idx = None if isinstance(key, (float, int)): idx = [Series(abs(self.index - key)).idxmin()] elif pdtypes.is_list_like(key): try: k = np.array(key) if pdtypes.is_numeric_dtype(k) and not pdtypes.is_bool_dtype(k): idx = [] for i in k: nx = Series(abs(self.index - i)).idxmin() if nx not in idx: idx.append(nx) idx.sort() except TypeError: pass if idx is not None: return PyFoamDataFrame(self.iloc[idx]) val = DataFrame.__getitem__(self, key) if isinstance(val, DataFrame): return PyFoamDataFrame(val) else: return val
[docs]class PandasWrapperPyFoamException(PyFoamException): """The PyFoam-exception that does not expect to be caught""" def __init__(self, *text): descr = "Problem in wrapper to pandas-library" # super(FatalErrorPyFoamException,self).__init__(descr,*text) # does not work with Python 2.4 PyFoamException.__init__(self, descr, *text)