Source code for Project.Prediction.ClassModel

import pandas as pd
import numpy as np
import datetime as dt
import xgboost as xgb
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm


sns.set()


[docs]class Dos(): """ Class Dos is a class that could daily predect and plot three sources of three sources of enrergy: Electricity Consumption, Gaz and Nuclear. :param data: This is the data training, it is the output of :ref:`Data Collection <DataCol>`. :type data: Dataframe :param energy: This model predict three sources of energy: Electricity Consumption, Gaz and Nuclear. 0 : Electricity Consumption 1 : Gaz 2: Nuclear :type energy: int :param Year: The Year for which day we want to predict. :type Year: int :param month: The month for which day we want to predict. :type month: int :param day: The day for which day we want to predict. :type day: int """ def __init__(self, data, energy, year, month, day): self.data = data self.energie = energy self.year = year self.month = month self.day = day
[docs] def createFeatures(self): """ Create time series features based on time series index. """ target = self.data.columns[self.energie] target_map = self.data[target].to_dict() self.data = self.data.copy() ind = self.data.index self.data['minute'] = ind.minute self.data['dayofweek'] = ind.dayofweek self.data['month'] = ind.month self.data['year'] = ind.year self.data['dayofyear'] = ind.dayofyear self.data['dayofmonth'] = ind.day self.data['lag1'] = (ind - pd.Timedelta('364 days')).map(target_map) self.data['lag2'] = (ind - pd.Timedelta('30 days')).map(target_map) self.data['lag3'] = (ind - pd.Timedelta('7 days')).map(target_map) return self.data
[docs] def fitModel(self): """ Automatic process that makes sure our machine learning models have high level of accuracy. The Model name is **Boosted Trees** for more information please see this `referance <https://xgboost.readthedocs.io/en/stable/tutorials/model.html>`_""" target = self.data.columns[self.energie] FEATURES = ['dayofyear', 'minute', 'dayofweek', 'month', 'year', 'lag1', 'lag2', 'lag3'] TARGET = target X_all = self.data[FEATURES] y_all = self.data[TARGET] reg = xgb.XGBRegressor(base_score=0.5, booster='gbtree', n_estimators=500, objective='reg:linear', max_depth=3, learning_rate=0.01) reg.fit(X_all, y_all, eval_set=[(X_all, y_all)], verbose=100) return reg
[docs] def DayPred(self, reg): """ Predict confidence scores for samples. The confidence score for a sample is proportional to the signed distance of that sample to the hyperplane. :param reg: this is the output of **fitModel** . :type Year: xgboost.sklearn.XGBRegressor """ liste = [] for i in range(96): liste.append(str(dt.datetime(self.year, self.month, self.day, 0, 0) + dt.timedelta(minutes=(i)*15))) DayDate = pd.to_datetime(np.array(liste)) dayFeaturs = pd.DataFrame() dayFeaturs['dayofyear'] = DayDate.dayofyear dayFeaturs['minute'] = DayDate.minute dayFeaturs['dayofweek'] = DayDate.dayofweek dayFeaturs['month'] = DayDate.month dayFeaturs['year'] = DayDate.year target = self.data.columns[self.energie] target_map = self.data[target].to_dict() dayFeaturs['lag1'] = ( DayDate - pd.Timedelta('364 days')).map(target_map) dayFeaturs['lag2'] = ( DayDate - pd.Timedelta('30 days')).map(target_map) dayFeaturs['lag3'] = (DayDate - pd.Timedelta('7 days')).map(target_map) pred = reg.predict(dayFeaturs) Date = DayDate.strftime('%Y-%m-%d') Hour = DayDate.strftime('%H:%M') day_pred = pd.DataFrame() day_pred["Date"] = Date day_pred["Heure"] = Hour day_pred[target] = pred return day_pred, DayDate
[docs] def plot(self, day_pred, DayDate): """ This method plot daily energy predection indexing by Time """ target = self.data.columns[self.energie] day_pred = day_pred[target].values day_pred = pd.DataFrame(day_pred, index=DayDate, columns=['{}'.format(target)]) f, ax = plt.subplots(figsize=(12, 6), dpi=200) plt.suptitle('{}-{}-{}, forecasting {}'.format(self.year, self.month, self.day, target), fontsize=24) day_pred.plot(ax=ax, rot=90, ylabel='MW', legend="Predicted day ") plt.show()