Source code for Histogram

import numpy as np
import csv
import warnings


[docs]class Histogram: """ Defines a histogram object. The histograms can be initialized either with a tuple (hist_min,hist_max,num_bins) or a list/numpy.ndarray containing the bin boundaries, which allows for different bin widths. Multiple histograms can be added and averaged. .. note:: It may be important to keep in mind that each bin contains it left edge but not the right edge. If a value is added to a histogram the bin assignment therefore follows: .. raw:: html <p><code style="color:red; background-color:transparent; font-size: 0.9em;">bins[i-1] &lt;= value &lt; bins[i]</code></p> Parameters ---------- bin_boundaries : tuple A tuple with three values :code:`(hist_min, hist_max, num_bins)`. This creates an evenly sized histogram in the range :code:`[hist_min, hist_max]` divided into :code:`num_bins` bins. bin_boundaries : list/numpy.ndarray A list or numpy array that contains all bin edges. This allows for non- uniform bin sizes. Attributes ---------- number_of_bins_: int Number of histogram bins. bin_edges_: numpy.ndarray Array containing the edges of the bins. number_of_histograms_: int Number of created histograms. histograms_: numpy.ndarray Array containing the histograms (might be scaled). histograms_raw_count_: numpy.ndarray Array containing the raw counts of the histograms. error_: numpy.ndarray Array containing the histogram error. scaling_: numpy.ndarray Array containing scaling factors for each bin. Methods ------- histogram: Get the created histogram(s) histogram_raw_counts: Get the raw bin counts of the histogram(s). number_of_histograms: Get the number of current histograms. bin_centers: numpy.ndarray Get the bin centers. bin_width: numpy.ndarray Get the bin widths. bin_bounds_left: numpy.ndarray Extract the lower bounds of the individual bins. bin_bounds_right: numpy.ndarray Extract the upper bounds of the individual bins. bin_boundaries: numpy.ndarray Get the bin boundaries. add_value: Add one or multiple values to the latest histogram. add_histogram: Add a new histogram. average: Averages over all histograms. average_weighted: Performs a weighted average over all histograms. standard_error: Get the standard deviation for each bin. statistical_error: Statistical error of all histogram bins in all histograms. scale_histogram: Multiply latest histogram with a factor. set_error: Set the error for the histogram by hand. print_histogram: Print the histogram(s) to the terminal. write_to_file: Write one histogram to a csv file. Examples -------- **Creating histograms** To create multiple histograms with the same number and size of bins we can initialize the Histogram object with a tuple (otherwise initialize with a list containing all bin edges). To fill these histograms store them in a variable, the methods ``add_value(value)``, ``add_histogram()`` and ``histogram()`` can be used. .. highlight:: python .. code-block:: python :linenos: >>> from Histogram import Histogram >>> >>> # Initialize a histogram in the range [0,10] with 10 bins >>> histObj = Histogram((0, 10, 10)) >>> >>> # Add the values [1, 1.2, 3, 5, 6.5, 9, 9] to the histogram >>> histObj.add_value([1, 1.2, 3, 5, 6.5, 9, 9]) >>> print(histObj.histogram()) [0. 2. 0. 1. 0. 1. 1. 0. 0. 2.] >>> >>> # Add a second histogram and add the values [2, 7, 7.2, 9] >>> histObj.add_histogram() >>> histObj.add_value([2, 7, 7.2, 9]) >>> print(histObj.histogram()) [[0. 2. 0. 1. 0. 1. 1. 0. 0. 2.] [0. 0. 1. 0. 0. 0. 0. 2. 0. 1.]] >>> >>> # Store the histograms in hist as numpy.ndarray >>> hist = histObj.histogram() """ def __init__(self, bin_boundaries): self.number_of_bins_ = None self.bin_edges_ = None self.number_of_histograms_ = 1 self.histograms_ = None self.histograms_raw_count_ = None self.error_ = None self.scaling_ = None if isinstance(bin_boundaries, tuple) and len(bin_boundaries) == 3: hist_min = bin_boundaries[0] hist_max = bin_boundaries[1] num_bins = bin_boundaries[2] if hist_min > hist_max or hist_min == hist_max: raise ValueError('hist_min must be smaller than hist_max') elif not isinstance(num_bins,int) or num_bins <= 0: raise ValueError('Number of bins must be a positive integer') self.number_of_bins_ = num_bins self.bin_edges_ = np.linspace(hist_min, hist_max, num=num_bins+1) self.histograms_ = np.zeros(num_bins) self.histograms_raw_count_ = np.zeros(num_bins) self.scaling_ = np.ones(num_bins) self.error_ = np.zeros(num_bins) elif isinstance(bin_boundaries, (list, np.ndarray)): self.number_of_bins_ = len(bin_boundaries)-1 self.bin_edges_ = np.asarray(bin_boundaries) self.histograms_ = np.zeros(self.number_of_bins_) self.histograms_raw_count_ = np.zeros(self.number_of_bins_) self.scaling_ = np.ones(self.number_of_bins_) self.error_ = np.zeros(self.number_of_bins_) else: raise TypeError('Input must be a tuple (hist_min, hist_max, num_bins) '+\ 'or a list/numpy.ndarray containing the bin edges!')
[docs] def histogram(self): """ Get the current histogram(s). Returns ------- `histograms_`: numpy.ndarray Array containing the histogram(s). """ return self.histograms_
[docs] def histogram_raw_counts(self): """ Get the raw bin counts of the histogram(s), even after the original histograms are scaled or averaged. Returns ------- `histograms_raw_count_`: numpy.ndarray Array containing the raw counts of the histogram(s) """ return self.histograms_raw_count_
[docs] def number_of_histograms(self): """ Get the number of current histograms. Returns ------- `number_of_histograms_`: int Number of histograms. """ return self.number_of_histograms_
[docs] def bin_centers(self): """ Get the bin centers. Returns ------- numpy.ndarray Array containing the bin centers. """ return (self.bin_edges_[:-1] + self.bin_edges_[1:]) / 2.0
[docs] def bin_width(self): """ Get the bin widths. Returns ------- numpy.ndarray Array containing the bin widths. """ return self.bin_edges_[1:] - self.bin_edges_[:-1]
[docs] def bin_bounds_left(self): """ Extract the lower bounds of the individual bins. Returns ------- numpy.ndarray Array containing the lower bin boundaries. """ return self.bin_edges_[:-1]
[docs] def bin_bounds_right(self): """ Extract the upper bounds of the individual bins. Returns ------- numpy.ndarray Array containing the upper bin boundaries. """ return self.bin_edges_[1:]
[docs] def bin_boundaries(self): """ Get the bin boundaries. Returns ------- numpy.ndarray Array containing the bin boundaries. """ return np.asarray(self.bin_edges_)
[docs] def add_value(self, value): """ Add value(s) to the latest histogram. Different cases, if there is just one number added or a whole list/ array of numbers. Parameters ---------- value: int, float, np.number, list, numpy.ndarray Value(s) which are supposed to be added to the histogram instance. Raises ------ TypeError if the input is not a number or numpy.ndarray or list """ # Case 1.1: value is a single number if isinstance(value, (int, float, np.number)): counter_warnings = 0 if (value < self.bin_edges_[0] or value > self.bin_edges_[-1]) and counter_warnings == 0: warn_msg = 'One or more values lie outside the histogram '+\ 'range ['+str(self.bin_edges_[0])+','+str(self.bin_edges_[-1])+\ ']. Exceeding values are ignored. Increase histogram range!' warnings.warn(warn_msg) # Case 2.1: histogram contains only 1 instance if self.number_of_histograms_ == 1: bin_index = np.digitize(value, self.bin_edges_)-1 if bin_index > self.number_of_bins_-1: pass else: self.histograms_[bin_index] += 1 self.histograms_raw_count_[bin_index] += 1 # Case 2.2: If histogram contains multiple instances, # always add values to the latest histogram else: bin_index = np.digitize(value, self.bin_edges_)-1 if bin_index > self.number_of_bins_-1: pass else: self.histograms_[-1,bin_index] += 1 self.histograms_raw_count_[-1,bin_index] += 1 # Case 1.2: value is a list of numbers elif isinstance(value, (list, np.ndarray)): for element in value: self.add_value(element) # Case 1.3: value has an invalid input type else: err_msg = 'Invalid input type! Input value must have one of the '+\ 'following types: (int, float, np.number, list, np.ndarray)' raise TypeError(err_msg)
[docs] def add_histogram(self): """ Add a new histogram to the Histogram class instance. If new values are added to the histogram afterwards, these are added to the last histogram. """ empty_histogram = np.zeros(self.number_of_bins_) self.histograms_ = np.vstack((self.histograms_, empty_histogram)) self.histograms_raw_count_ = np.vstack((self.histograms_raw_count_, empty_histogram)) self.scaling_ = np.vstack((self.scaling_, np.ones(self.number_of_bins_))) self.error_ = np.vstack((self.error_, np.zeros(self.number_of_bins_))) self.number_of_histograms_ += 1 return self
[docs] def average(self): """ Average over all histograms. When this function is called the previously generated histograms are averaged with the same weigths and they are overwritten by the averaged histogram. The standard error of the histograms is computed. Returns ------- Histogram Returns a Histogram object. Raises ------ TypeError if there is only one histogram """ if self.histograms_.ndim == 1: raise TypeError('Cannot average an array of dim = 1') else: self.error_ = np.sqrt(np.sum(self.histograms_, axis=0))/self.number_of_histograms_ self.histograms_ = np.mean(self.histograms_, axis=0) self.number_of_histograms_ = 1 return self
[docs] def average_weighted(self,weights): """ Weighted average over all histograms. When this function is called the previously generated histograms are averaged with the given weigths and they are overwritten by the averaged histogram. The standard error of the histograms is computed. Parameters ---------- weights: numpy.ndarray Array containing a weight for each histogram. Returns ------- Histogram Returns a Histogram object. Raises ------ TypeError if there is only one histogram """ #TODO: correct error as in average() if self.histograms_.ndim == 1: raise TypeError('Cannot average an array of dim = 1') else: self.error_ = np.std(self.histograms_, axis=0)/np.sqrt(self.number_of_histograms_) self.histograms_ = np.average(self.histograms_, axis=0, weights=weights) self.number_of_histograms_ = 1 return self
[docs] def standard_error(self): """ Get the standard deviation over all histogram counts for each bin. Returns ------- numpy.ndarray Array containing the standard deviation for each bin. """ return self.error_
[docs] def statistical_error(self): """ Compute the statistical error of all histogram bins for all histograms. Returns ------- numpy.ndarray 2D Array containing the statistical error for each bin and histogram. """ counter_histogram = 0 for histogram in self.histogram(): self.error_[counter_histogram] = np.sqrt(histogram) counter_histogram += 1 return self.error_
[docs] def scale_histogram(self,value): """ Scale the latest histogram by a factor. Multiplies the latest histogram by a number or a list/numpy array with a scaling factor for each bin. Parameters ---------- value: int, float, np.number, list, numpy.ndarray Scaling factor for the histogram. """ if self.histograms_.ndim == 1: if isinstance(value, (int, float, np.number)): self.histograms_ *= value self.scaling_ *= value elif isinstance(value, (list, np.ndarray)): self.histograms_ *= np.asarray(value) self.scaling_ *= np.asarray(value) else: if isinstance(value, (int, float, np.number)): self.histograms_[-1] *= value self.scaling_[-1] *= value elif isinstance(value, (list, np.ndarray)): self.histograms_[-1] *= np.asarray(value) self.scaling_[-1] *= np.asarray(value)
[docs] def set_error(self,own_error): """ Sets the histogram error by hand. This is helpful for weighted histograms where the weight has also an uncertainty. This function has to be called after averaging, otherwise the error will be overwritten by the standard error. Parameters ---------- value: list, numpy.ndarray Values for the uncertainties of the individual bins. """ if len(own_error) != self.number_of_bins_ and\ not isinstance(own_error, (list,np.ndarray)): error_message = "The input error has a different length than the"\ + " number of histogram bins or it is not a list/numpy.ndarray" raise ValueError(error_message) self.error_ = own_error
[docs] def print_histogram(self): """Print the histograms to the terminal.""" print("bin_low,bin_high,bin_value,bin_error") for hist in range(self.number_of_histograms_): print(f"{hist}. histogram:") for bin in range(self.number_of_bins_): if self.number_of_histograms_ == 1: print(f'{self.bin_edges_[bin]},{self.bin_edges_[bin+1]},\ {self.histograms_[bin]}') else: print(f'{self.bin_edges_[bin]},{self.bin_edges_[bin+1]},\ {self.histograms_[hist][bin]}')
[docs] def write_to_file(self,filename,label_bin_center,label_bin_low,\ label_bin_high,label_distribution,label_error,comment=''): """ Write one histogram to a csv file. Parameters ---------- filename: string Name for the output file label_bin_center: string Label for the bin center column. label_bin_low: string Label for the lower boundary of the bins. label_bin_high: string Label for the upper boundary of the bins. label_distribution: string Label for the histogram / distribution. label_error: string Label for the statistical error. comment: string Additional comment at the beginning of the file. It is possible to give a multi line comment, where each line should start with a '#'. Raises ------ ValueError if there is more than one histogram """ if self.number_of_histograms_ > 1: raise ValueError("At the moment only a single histogram can be"+\ " written to a file") f = open(filename, 'w') writer = csv.writer(f) if comment != '': f.write(comment) f.write('\n') header = [label_bin_center,label_bin_low,label_bin_high,\ label_distribution,label_error] writer.writerow(header) bin_centers = self.bin_centers() bin_low = self.bin_bounds_left() bin_high = self.bin_bounds_right() distribution = self.histograms_ error = self.error_ for i in range(self.number_of_bins_): data = [bin_centers[i],bin_low[i],bin_high[i],distribution[i],\ error[i]] writer.writerow(data)