import numpy as np
import pandas as pd
import joblib
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from typing import List, Union, Optional
from data_utils import plot_schedule, bootstrap_mean, print_time_format, discretize_sched, activity_colors
from collections import defaultdict
[docs]class Results():
"""Class to handle optimization results, plot schedules and compute statistics.
Attributes:
---------------
- solutions: list of dataframes containing the optimized schedules
- runtimes: list containing the runtimes of each iteration
- n_iter: number of simulation iterations.
Methods:
---------------
- plot: plots the given schedules
- compute_statistics: compute average duration and frequency for each activity in the optimal schedules
- plot_distribution:
- get_solutions: returns list of optimized schedules
- get_runtimes: returns list of runtimes for each iteration.
"""
def __init__(self, solutions: Optional[List[pd.DataFrame]]=None, runtimes: Optional[List[float]]=None, objective_values: Optional[List[float]]=None, multiday: bool = False, day_index: Optional[List] = None) -> None:
"""
Parameters:
---------------
- solutions: list of dataframes containing the optimized schedules
- runtimes: list containing the runtimes of each iteration
"""
self.solutions = solutions
self.runtimes = runtimes
self.objective_values = objective_values
self.multiday = multiday
self.day_index = day_index
self.n_days = len(self.day_index) if self.day_index else 1
self.day_names = {i: day_name for i, day_name in enumerate(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],1)}
if self.solutions:
self.n_iter= len(self.solutions)
else:
self.n_iter = 0
def __str__(self) -> str:
return f'Results object for {self.n_iter} iterations. Total runtime:' + print_time_format(sum(self.runtimes))
[docs] def plot(self, plot_every: int = 1, plot_iter: Optional[int] = None, colors : str = 'colorblind', title : Optional[str] = None, save_fig: Optional[str] = 'png') -> None:
"""
Plots a given schedule.
Parameters:
---------------
- plot_every: plotting frequency as a number of iterations
- plot_iter: index of iteration to plot
- colors: name of matplotlib/seaborn compatible palette, see options here: https://seaborn.pydata.org/tutorial/color_palettes.html
- title: plot title as a string
- save_fig: export format (png/pdf/svg) as string. if None, the figure is not saved.
Return:
---------------
Matplotlib figure, either printed or saved to an external file if save_fig is not None.
"""
if self.n_iter == 0:
print('There is no schedule to plot.')
return None
if isinstance(plot_iter,int):
sol = self.solutions[plot_iter]
if not sol:
print('There is no schedule to plot. The optimisation might not have been successful.')
return None
if self.multiday:
fig = self.plot_multiday(sol, colors)
else:
fig, ax = plt.subplots(figsize=[20, 3])
plot_schedule(sol, ax, colors)
if title:
plt.title(title, fontsize = 14, fontweight = 'bold')
if save_fig:
filename = f'schedule{plot_iter}.{save_fig}'
plt.savefig(filename,format = save_fig)
print(f'Figure saved at {filename}.')
else:
plt.show()
return None
for i,sol in enumerate(self.solutions):
if i%plot_every == 0:
if self.multiday:
fig = self.plot_multiday(sol, colors)
else:
fig, ax = plt.subplots(figsize=[20, 3])
plot_schedule(sol, ax, colors)
if title:
plt.title(title, fontsize = 14, fontweight = 'bold')
if save_fig:
filename = f'schedule{i}.{save_fig}'
plt.savefig(filename,format = save_fig)
print(f'Figure saved at {filename}.')
else:
plt.show()
return None
[docs] def plot_multiday(self, multi_schedules: List, colors : str = 'colorblind'):
'''
Plots schedules for the multiday case.
Parameters:
---------------
- multischedules: list of schedules in the multiday time horizon
- colors: name of matplotlib/seaborn compatible palette, see options here: https://seaborn.pydata.org/tutorial/color_palettes.html
Returns:
---------------
- Matplotlib figure
'''
fig,axs = plt.subplots(self.n_days, 1, figsize=[20, (3*self.n_days +2)])
for j, day_sched in enumerate(multi_schedules):
plot_schedule(day_sched, axs[j])
if self.day_index:
axs[j].set_title(f'{self.day_names[self.day_index[j]]}', fontweight = 'bold', fontsize = 14)
plt.tight_layout()
return fig
[docs] def compute_statistics(self, activities : List = ['education', 'leisure', 'work', 'shopping'], days: Optional[List] = None, bootstrap: int = 100, verbose: bool = True, save: Union[bool, str] = 'out_stats.joblib') -> None:
"""
Compute aggregate statistics for the optimized schedules.
Parameters:
---------------
- activities: list of activities of interest for the computations.
- days: list of days of interest for the computations (only defined in the multiday case)
- bootstrap: number of bootstrap samples to generate, to compute the 95% confidence intervals.
- verbose: if True, prints computed statistics.
- save: if filename is provided, save statistics to file
Return:
---------------
List of computed statistics, either saved or printed
"""
all_solutions = []
if self.multiday:
if not days:
days = self.day_index #aggregate all days to compute distributions
all_solutions = [day_sol for solution in self.solutions for i, day_sol in enumerate(solution) if self.day_index[i] in days]
else:
all_solutions = self.solutions
for sol in all_solutions:
sol['act_label'] = sol.label.apply(lambda x: 'home' if x.rstrip('0123456789') in ['dawn', 'dusk'] else x.rstrip('0123456789'))
sol_ooh = [s for s in all_solutions if len(s.act_label.unique()) > 1] #only out of home solutions
#------------------------------Proportion of out-of-home schedules----------------------------------------------
f_ooh = 100 * len(sol_ooh) / len(all_solutions)
#----------------------------- Average total time out of home (for out of home schedules) ----------------------
mean_time_ooh = np.mean([d[d.act_label != 'home'].duration.sum() for d in sol_ooh])
mean_time_ooh_bs, ci_time = bootstrap_mean([d[d.act_label != 'home'].duration.sum() for d in sol_ooh], bootstrap)
#------------------------- Average number of activities out of home (for out of home schedules)------------------
mean_act_ooh = np.mean([len(d[d.act_label != 'home'].index) for d in sol_ooh])
mean_act_ooh_bs, ci_act = bootstrap_mean([len(d[d.act_label != 'home'].index) for d in sol_ooh], bootstrap)
#--------------------- Average time spent in each activity (for out of home schedules)-------------------------
mean_time_per_act = [np.mean([d[d.act_label==a].duration.sum() for d in sol_ooh if a in d.act_label.unique()]) for a in activities]
mean_time_per_act_bs = [bootstrap_mean([d[d.act_label==a].duration.sum() for d in sol_ooh if a in d.act_label.unique()],bootstrap)[0] for a in activities]
ci_time_act = [bootstrap_mean([d[d.act_label==a].duration.sum() for d in sol_ooh if a in d.act_label.unique()],bootstrap)[1] for a in activities]
if verbose:
addition = ' '
if days:
if len(days) > 1:
con = "and" if len(days) == 2 else "to"
addition = f'({self.day_names[min(days)]} {con} {self.day_names[max(days)]})'
else:
addition = f'({self.day_names[min(days)]})'
print('Summary of collected statistics '+self.multiday*addition)
print('------------------------------------------------\n')
print(f'Total number of schedules: {len(all_solutions)}')
print(f'Proportion of out-of-home schedules: {f_ooh:.2f} %')
print(f'Average time spent out-of-home: {mean_time_ooh:.2f}, CI: [{ci_time[0]:.3f},{ci_time[1]:.3f}] hours')
print(f'Average number of out-of-home activities: {mean_act_ooh:.2f}, CI: [{ci_act[0]:.3f}, {ci_act[1]:.3f}]')
print('------------------------------------------------\n')
print('Average duration of each activity:')
for i, act in enumerate(activities):
print(f'{act.capitalize()}: {mean_time_per_act[i]:.2f}, CI: [{ci_time_act[i][0]:.3f}, {ci_time_act[i][1]:.3f}] hours')
print('------------------------------------------------\n')
if save and isinstance(save, str):
save_dict = {'frequency_ooh': f_ooh,
'mean_time_ooh': mean_time_ooh,
'mean_time_ooh_bs':mean_time_ooh_bs,
'mean_act_ooh': mean_act_ooh,
'mean_act_ooh_bs': mean_act_ooh_bs,
'mean_time_per_act':mean_time_per_act,
' mean_time_per_act_bs': mean_time_per_act_bs}
joblib.dump(save_dict, save)
print(f'Saved statistics to: {save}')
return None
[docs] def plot_distribution(self, exclude: Optional[List]= ["escort", "business_trip", "errands_services"], block_size: float = 5/60, days: Optional[List] = None, figure_size: List = [7,4], save_fig: Optional[str] = 'png')-> None:
"""
Plots aggregate time of day distribution.
Parameters:
---------------
- exclude: list of activities to exclude from the visualization
- block_size: size of the discretization in hours. Default: 5/60 hours.
- days: list of days of interest for the aggregation (only defined in the multiday case)
- figure_size: size of figure
- save_fig: xport format (png/pdf/svg) as string. if None, the figure is not saved.
Return:
---------------
Matplotlib figure, either printed or saved to an external file if save_fig is not None.
"""
disc_list = []
all_solutions = []
if self.multiday:
if not days:
days = self.day_index #aggregate all days to plot the distributions
all_solutions = [day_sol for solution in self.solutions for i, day_sol in enumerate(solution) if self.day_index[i] in days]
else:
all_solutions = self.solutions
for i,s in enumerate(all_solutions):
s['act_label'] = s.label.apply(lambda x: x.rstrip('0123456789') if x not in ['dawn', 'dusk'] else 'home')
list_act = [x for x in s.act_label.unique() if x not in exclude]
if len(s.act_label.unique()) == 1:
continue
s = s[~s.act_label.isin(exclude)]
discret_s = discretize_sched(s, block_size = block_size)
disc_list.append(discret_s)
final_dict = defaultdict(list) #dictionary storing start times distributions for all activities
time_slots = range(int(24/block_size))
for t in time_slots:
for dicts in disc_list:
if t in dicts.keys():
final_dict[t].append(dicts[t])
disc_df = pd.DataFrame.from_dict(final_dict, orient = 'index').transpose().melt(var_name = 'time', value_name= 'activity')
disc_df['time'] = disc_df.time.apply(lambda x: round(x * block_size))
disc_grouped = (disc_df.groupby(['time', 'activity'])['activity'].count()/disc_df.groupby('time')['activity'].count())
colors = activity_colors(palette = "colorblind")
colors['home'] = 'gainsboro'
fig, ax = plt.subplots(figsize = figure_size)
ax.set_facecolor('gainsboro')
disc_grouped.unstack().drop('home', axis = 1).plot.bar(stacked = True, color = colors, edgecolor = 'white', width = 1, ax = ax, legend = False, rot = 1)
# LEGEND
other_patches = [mpatches.Patch(color = f'{colors[a]}', label=f'{a}') for a in sorted(list_act)]
plt.legend(handles=other_patches, loc='upper right', fontsize=10)
ax.set_ylabel("Frequency ", fontsize=12)
ax.set_xlabel("Time [h]", fontsize=12)
ax.set_xticks(range(0, 25, 4))
ax.set_xticklabels(range(0, 25, 4))
ax.set_xlim([0, 25])
ax.set_ylim([0, 1])
title = "Time of day distribution"
if days:
if len(days) > 1:
con = "and" if len(days) == 2 else "to"
title = f'Time of day distribution, ({self.day_names[min(days)]} {con} {self.day_names[max(days)]})'
else:
title = f'Time of day distribution, ({self.day_names[min(days)]})'
plt.title(title, fontsize = 12)
if save_fig:
filename = f'time_of_day_dist.{save_fig}'
plt.savefig(filename,format = save_fig)
print(f'Figure saved at {filename}.')
else:
plt.show()
return None
[docs] def get_solutions(self) -> List:
"""Returns list of optimized schedules"""
return self.solutions
[docs] def get_runtimes(self) -> List:
"""Returns list of runtimes for each iteration."""
return self.runtimes
[docs] def get_objective_values(self) -> List:
"""Returns list of objective_values for each iteration."""
return self.objective_values