from . import evaluation as evl
from . import cross_validation as cv
import numpy as np
import matplotlib.pyplot as plt
import functools
[docs]def baselineplot(regression,unit=None,marker='x',marker_color='r',marker_y=1,marker_shape=100,fontsize=20,**kw):
r"""
Plots the histgram of baseline of regression.
Arguments:
regression (evaluation.Regression):
regression result.
unit (string):
unit of property.
marker (char):
marker type of mean value. It's the same as in matplotlib.
marker_color (char):
marker color of mean value. It's the same as in matplotlib.
marker_shape (int):
marker shape of mean value. It's the same as in matplotlib.
marker_y (float):
vertical coordinate of marker of mean value.
fontsize (int):
fontsize of axis name.
kw:
keyword arguments of histogram. It's the same as ``hist()`` in matplotlib.
"""
plt.hist(regression.property,zorder=0,**kw)
plt.scatter(regression.property.mean(),marker_y,marker=marker,c=marker_color,s=marker_shape,zorder=1,label='Mean Value')
if unit:
plt.xlabel(regression.property_name+'['+unit+']',fontsize=fontsize)
else:
plt.xlabel(regression.property_name,fontsize=fontsize)
plt.ylabel('Counts',fontsize=fontsize)
plt.title('Histogram of %s over training data\nMean = %.5f\nStandard deviation = %.5f'%(regression.property_name,regression.property.mean(),regression.property.std()),fontsize=fontsize)
plt.legend()
[docs]def error_hist(dimension,*regressions,training=True,absolute=False,unit=None,fontsize=20,**kw):
r"""
Plots the histgram of errors of regression.
Arguments:
dimension (int):
dimension of descriptor.
regressions (evaluation.Regression or evaluation.RegressionCV):
regression result.
training (bool):
training errors or prediction errors.
absolute (bool):
absolute errors or not.
unit (string):
unit of property.
fontsize (int):
fontsize of axis name.
kw:
keyword arguments of histogram. It's the same as ``hist()`` in matplotlib
"""
dimension-=1
if absolute:
collect_data=np.hstack([np.abs(regression.errors(training=training))[dimension,:] for regression in regressions])
if unit:
plt.xlabel('Absolute error %s'%('['+unit+']'),fontsize=fontsize)
else:
plt.xlabel('Absolute error',fontsize=fontsize)
else:
collect_data=np.hstack([regression.errors(training=training)[dimension,:] for regression in regressions])
if unit:
plt.xlabel('Signed error %s'%('['+unit+']'),fontsize=fontsize)
else:
plt.xlabel('Signed error',fontsize=fontsize)
plt.hist(collect_data,**kw)
plt.ylabel('Counts',fontsize=fontsize)
[docs]def prediction_vs_property(dimension,*regressions,training=True,unit=None,fontsize=20,**kw):
r"""
Plots the scatter plot of prediction v.s. property.
Arguments:
dimension (int):
dimension of descriptor.
regressions (evaluation.Regression or evaluation.RegressionCV):
regression result.
training (bool):
training errors or prediction errors.
unit (string):
unit of property.
fontsize (int):
fontsize of axis name.
kw:
keyword arguments of histogram. It's the same as ``hist()`` in matplotlib
"""
dimension-=1
if isinstance(regressions[0],evl.RegressionCV):
if training:
property_values=np.hstack([regression.property[cv].values for regression in regressions for cv in range(0,regression.n_cv)])
else:
property_values=np.hstack([regression.validation_data[cv].iloc[:,1].values.tolist() for regression in regressions for cv in range(0,regression.n_cv)])
prediction_value=np.hstack([regression.predictions(training=training)[dimension,:] for regression in regressions])-property_values
else:
property_values=np.hstack([regression.property.values for regression in regressions])
prediction_value=np.hstack([regression.predictions(training=training)[dimension,:] for regression in regressions])-property_values
plt.scatter(property_values,prediction_value,**kw)
if unit:
plt.xlabel('%s in data set %s'%(regressions[0].property_name,'['+unit+']'),fontsize=fontsize)
plt.ylabel('Signed error %s'%('['+unit+']'),fontsize=fontsize)
else:
plt.xlabel('%s in data set'%(regressions[0].property_name),fontsize=fontsize)
plt.ylabel('Signed error',fontsize=fontsize)
[docs]def hist_with_markers(dimension,*regressions,training=True,unit=None,fontsize=20,selected_errors=None,marker_x=0,marker=None,**kw):
r"""
Plots the histogram of absolute errors with markers
Arguments:
dimension (int):
dimension of descriptor.
regressions (evaluation.Regression or evaluation.RegressionCV):
regression result.
training (bool):
training errors or prediction errors.
unit (string):
unit of property.
fontsize (int):
fontsize of axis name.
seleted_errors (None or list):
what errors should pinpoint in the plot.
errors are 'RMSE','MAE','25%ile AE','50%ile AE','75%ile AE','95%ile AE','MaxAE'.
If it is ``None``, then all the errors will appear in the plot.
marker_x (float):
horrizontal coordinate of marker.
marker (NOne or list):
marker type. It's the same as in matplotlib.
If it is ``None``, then will use the default types.
kw:
keyword arguments of histogram. It's the same as ``hist()`` in matplotlib
"""
dimension-=1
collect_data=np.hstack([np.abs(regression.errors(training=training))[dimension,:] for regression in regressions])
plt.hist(collect_data,**kw,orientation='horizontal',zorder=0)
if unit:
plt.ylabel('Absolute error %s'%('['+unit+']'),fontsize=fontsize)
else:
plt.ylabel('Absolute error',fontsize=fontsize)
plt.xlabel('Counts',fontsize=fontsize)
errors=evl.compute_errors(collect_data)
if selected_errors==None:
selected_errors=('RMSE','MAE','25%ile AE','50%ile AE','75%ile AE','95%ile AE','MaxAE')
if marker==None:
marker={'RMSE':'s','MAE':'x','25%ile AE':'p','50%ile AE':'X','75%ile AE':'D','95%ile AE':'+','MaxAE':'.'}
for selected_error in selected_errors:
plt.scatter(marker_x,errors[selected_error],s=50,zorder=1,marker=marker[selected_error],label=selected_error)
plt.legend()
[docs]def abs_errors_vs_dimension(*regressions,training=True,unit=None,fontsize=20,selected_errors=None,display_baseline=False,label='',**kw):
r"""
Plots the histogram of absolute errors with box plot for errors.
Arguments:
regressions (evaluation.Regression or evaluation.RegressionCV):
regression result.
training (bool):
training errors or prediction errors.
unit (string):
unit of property.
fontsize (int):
fontsize of axis name.
seleted_errors (None or list):
what errors should appear in the plot.
errors are 'RMSE','MAE','25%ile AE','50%ile AE','75%ile AE','95%ile AE','MaxAE'.
If it is ``None``, then all the errors will appear in the plot.
display_baseline (bool):
whether plot baseline.
kw:
keyword arguments of histogram. It's the same as ``hist()`` in matplotlib
"""
if display_baseline:
plt.plot([1,regressions[0].dimension],[regressions[0].baseline[1],regressions[0].baseline[1]],'--',label='Baseline')
collect_data=np.hstack([np.abs(regression.errors(training=training)) for regression in regressions])
errors=evl.compute_errors(collect_data)
if selected_errors==None:
selected_errors=('RMSE','MAE','25%ile AE','50%ile AE','75%ile AE','95%ile AE','MaxAE')
for selected_error in selected_errors:
errors[selected_error].plot(label=label+' '+selected_error,**kw)
plt.scatter(errors.index.values,errors[selected_error].values)
plt.legend()
if unit:
plt.ylabel('Errors %s'%('['+unit+']'),fontsize=fontsize)
else:
plt.ylabel('Errors',fontsize=fontsize)
plt.xlabel('Dimension of the descriptor',fontsize=fontsize)
plt.xlim(0,len(errors)+1)
plt.xticks(range(1,len(errors)+1))
[docs]def boxplot(regression,training=True,unit=None,fontsize=20,**kwargs):
r"""
Plots the boxplot of regression.
Arguments:
regression (evaluation.Regression or evaluation.RegressionCV):
regression result.
training (bool):
training errors or prediction errors.
unit (string):
unit of property.
fontsize (int):
fontsize of axis name.
kw:
keyword arguments of histogram. It's the same as ``hist()`` in matplotlib
"""
plt.boxplot([np.abs(regression.errors(training=training))[dimension] for dimension in range(regression.dimension)],
**kwargs)
if unit:
plt.ylabel('Errors %s'%('['+unit+']'),fontsize=fontsize)
else:
plt.ylabel('Errors',fontsize=fontsize)
plt.xlabel('Dimension',fontsize=fontsize)
[docs]def errors_details(regression,training=True):
r"""
Plots the detailed information about regression, including histograme of signed errors,
preditction v.s. property and histgram of absolute errors with markers.
Arguments:
regression (evaluation.Regression or evaluation.RegressionCV):
regression result.
training (bool):
training errors or prediction errors.
"""
plt.figure(figsize=(20,6*regression.dimension))
for i in range(1,regression.dimension+1):
plt.subplot(regression.dimension,3,(i-1)*3+1)
error_hist(i,regression,absolute=False,training=training,rwidth=0.8)
plt.subplot(regression.dimension,3,(i-1)*3+2)
prediction_vs_property(i,regression,training=training)
plt.subplot(regression.dimension,3,(i-1)*3+3)
hist_with_markers(i,regression,training=training, bins=20, alpha=0.5, rwidth=0.8,marker_x=10)