Source code for listmode.data

import json
from pathlib import Path

import datetime as dt
import time
import types
import sys
import numpy as np

import listmode.loaders as ldr
import listmode.exceptions as ex
from listmode import utils as ut


[docs]class TimeCache: """ TimeCache controls timing data and provides dead/live time of the detector plus maintains lists of index - time pairs of the time information insertions times so that quick retrieval of time periods is possible. Each interval holds variable amount of events. Because both indices and timestamps are monotonously increasing, they can both be used to find intervals from the data. The timing datafile is saved with data and should be read only. It contains index of insertion (int64) plus the dead_time delta of the interval for each channel in float32 type. First row always points to first event with zero dead times for all channels. First real dead time value is stored to the second row. """ def __init__(self, parent): """ A component for time caching and dead time handling. Takes calling Data instance as parent for data access. :param parent: Parent Data instance. """ self.parent = parent self.dlen = len(parent.ch_list) type_list = [('idx', '<u8'), ('t', '<u8')] for x in range(self.dlen): type_list.append(('dt{}'.format(x), '<f4')) self.timing = np.zeros((0,), dtype=type_list)
[docs] def set(self, timing): """ :param timing: an opened np.memmap instance containing the timing data (retrieved by read_binary_data) """ self.timing = timing
[docs] def get_timing(self, t_slice=None): """ Return dead time data for a slice. The first entry is zeros and the second one is interpolated to start from t_slice[0]. Last one is an extra row interpolated to t_slice[1]. If t_slice is not defined this method returns timing data as it is. :param t_slice: Time slice (in ns) :return: (interpolated) timing data for time slice """ if t_slice is None: return self.timing indices = np.zeros((self.dlen, 2), dtype='u8') dts = np.zeros((self.dlen, 2)) for ch in range(self.dlen): # first interp the first index indices[ch, :] = self.find(t_slice, ch) dts[ch, 0] = self._interp(ch, t_slice[0], indices[ch, 0], prev=True)[1] # pick the part after t_val if indices[ch, 1] == indices[ch, 0] + 1: # if both ends of t_slice fit within a single timing interval dts[ch, 1] = dts[ch, 0] - self._interp(ch, t_slice[1], int(indices[ch, 1]), prev=False)[1] # subtract the rest else: dts[ch, 1] = self._interp(ch, t_slice[1], int(indices[ch, 1]), prev=False)[0] # pick the part before t_val timing_idx_0 = indices[:, 0].min() timing_idx_1 = int(indices[:, 1].max()) retval = self.timing[timing_idx_0:timing_idx_1 + 1].copy() # include the last index ch_list = ['dt{}'.format(x) for x in range(self.dlen)] retval[ch_list][0] = 0 retval[ch_list][1] = tuple(dts[:, 0]) retval[ch_list][-1] = tuple(dts[:, 1]) return retval
[docs] def get_dead_time(self, t_slice=None): """ Return the dead time of the time slice. :param t_slice: :return: All dead times in a numpy array. Live and dead times are float values of seconds. """ timing = self.get_timing(t_slice) ch_list = ['dt{}'.format(x) for x in range(self.dlen)] dead_time = np.zeros((self.dlen,)) for ch_idx in range(self.dlen): dead_time[ch_idx] = (timing[ch_list[ch_idx]].sum()) return dead_time
[docs] def get_live_time(self, t_slice=None): """ Return live time of the slice. :param t_slice: :return: All live times in a numpy array. Live and dead times are float values of seconds. """ livetime = self.get_total_time(t_slice)*1e-9 - self.get_dead_time(t_slice) return livetime
[docs] def get_total_time(self, t_slice=None): """ Return total time of the time slice or total time of measurement, if t_slice is None. :param t_slice: :return: Total time value in nanoseconds """ if t_slice is None: tottime = self.parent.data_dict['time'][-1] else: tottime = t_slice[1] - t_slice[0] return tottime
def _interp(self, ch, t_val, index, prev=True): """ Interpolates dead time values for time slices. Timing array lists the dead time of each channel by some interval that depends on the loader. If a time_slice does not hit the interval boundary the value has to be somehow interpolated for the slice. This is especially true if the dead time itself is being plotted and the time bin is smaller than the interval. :param ch: The channel in question. :param t_val: A nanosecond value defining a point in time. :param index: The last timing index that is earlier than t_val for the channel. For start point of a time slice this is just the start idx returned by find(t_slice, channel), for the endpoint it is the stop idx - 1. :param prev: whether index points to timing index previous to t_val or after it. :return: Return a tuple with a dead time value for the part before the t_val and after it so that an interval in timing array can be sliced. """ t_min = self.parent.data_dict['time'][0] # cannot to go before the first event print('t_min, t_val', t_min, t_val) t_val = max(t_min, t_val) if prev: # idx0 is given idx0 = self.timing['idx'][index] t0 = self.parent.data_dict['time'][idx0] # find next nonzero index (index is uint64, so it automatically casts itself to float on a mathematical # operation, hence the int() mask = self.timing['dt{}'.format(ch)][int(index + 1):] != 0 index1 = int(index + 1 + np.argmax(mask)) idx1 = int(self.timing['idx'][index1]) t1 = self.parent.data_dict['time'][idx1] dt = self.timing['dt{}'.format(ch)][index1] else: # idx1 is given idx1 = int(self.timing['idx'][index]) print('idx of end', self.timing[index-1:], self.parent.data_dict['time'].shape[0]) print('time of idx', self.parent.data_dict['time'][idx1:]) print('time of end', self.parent.data_dict['energy'][idx1:, :]) print('channel', ch) t1 = self.parent.data_dict['time'][idx1] # find previous nonzero index mask = self.timing['dt{}'.format(ch)][int(index - 1)::-1] != 0 index0 = int(index - 1 - np.argmax(mask)) idx0 = int(self.timing['idx'][index0]) t0 = self.parent.data_dict['time'][idx0] dt = self.timing['dt{}'.format(ch)][index] if t0 > t_val or t_val > t1: # sanity check print('t0\t', t0) print('t_val\t', t_val) print('t1\t', t1) t_val = t1 #raise ValueError('Invalid t_val in get_timing.') return dt*(t_val - t0)/(t1 - t0), dt*(t1 - t_val)/(t1 - t0)
[docs] def find(self, t_slice, ch=None): """ Finding indices in self.timing that contain the t_slice time. :param t_slice: tuple of nanosecond values defining a slice in time :param ch: If specified will return only indices in which dead time has been given for ch. This is mainly used by get_timing to interpolate the dead time. :return: indices to self.timing containing the time slice. """ if ch is None: mask = np.ones((self.timing.shape[0],), dtype='bool') else: mask = np.zeros((self.timing.shape[0],), dtype='bool') mask[0] = True # include zero in the beginning nonz = self.timing['dt{}'.format(ch)] > 0 mask[nonz] = True # last index under t_slice[0] or 0 idx1 = 0 prev = 0 for idx, t_idx in enumerate(self.timing['idx']): if mask[idx]: if self.parent.data_dict['time'][t_idx] >= t_slice[0]: #went over, now need to go back to previous event with mask True idx1 = prev break prev = idx # first index over t_slice[1] or last index in mask try: for idxn, t_idx2 in enumerate(self.timing[idx1+1:]['idx']): if self.parent.data_dict['time'][t_idx2] >= t_slice[1] and mask[idx1 + 1 + idxn]: break except: print('Error in TCache') print(self.parent.data_dict['time'].shape, t_idx2, t_slice, mask.shape) raise idx2 = idx1 + 1 + idxn return idx1, idx2
[docs] def get_indices(self, t_slice=None): """ Return start and stop event indices (endpoint not inclusive) that contain the time slice fully using timing info as a hash. :param t_slice: tuple of nanosecond values defining a slice in time. If None full data is returned. :return: indices to event data containing the the time slice """ if t_slice is None: return 0, self.parent.data_dict['time'].shape[0] if self.timing.shape[0] == 0: start = 0 stop = self.parent.data_dict['time'].shape[0] print('Time cache not set!') return start, stop idx1, idx2 = self.find(t_slice) start = self.timing['idx'][idx1] stop = int(self.timing['idx'][idx2] + 1) # idx2 is included in the range return start, stop
[docs]class Metadata: """ Metadata is responsible for the saving, loading and generation of metadata within data. Under normal circumstances the metadata is present in a json file, and is loaded by the metadata class. If, however, metadata is missing or needs to be changed the metadata class provides methods for updating, validating and saving the changes. """ def __init__(self, parent): """ As a component the metadata needs reference to the calling Data class instance as parent for data access. :param parent: Parent Data instance. """ self.parent = parent # is this needed anywhere? It is the prototype of minimal metadata fields. self._run_data = [] for idx in parent.ch_list: # definition of the (per channel) metadata is here self._run_data.append({'start': None, 'stop': None, 'input_counts': 0, 'counts': 0, 'events': 0, 'total_time': 0, 'live_time': 0., 'dead_time': 0., 'name': '', 'run_id': '', 'notes': ''}) # Few of the metadata items can be easily implemented as properties, so why not. # start, stop, total time and events are the same for every channel so they are simple. counts are handled # as np.arrays. @property def start(self): return self._run_data[0]['start'] @start.setter def start(self, value: dt.timedelta): for i in range(len(self._run_data)): self._run_data[i]['start'] = value @property def stop(self): return self._run_data[0]['stop'] @stop.setter def stop(self, value: dt.timedelta): for i in range(len(self._run_data)): self._run_data[i]['stop'] = value @property def total_time(self): if not isinstance(self._run_data[0]['total_time'], (int, np.integer)): raise ex.ListModeMetadataSetError('Invalid type for total time in metadata!') return self._run_data[0]['total_time'] @total_time.setter def total_time(self, value): if not isinstance(value, (int, np.integer)): raise ex.ListModeMetadataSetError('Invalid type given for total time!') dt = self.parent.get_dead_time() for i in range(len(self._run_data)): self._run_data[i]['total_time'] = value self._run_data[i]['dead_time'] = dt[i] self._run_data[i]['live_time'] = value * 1e-9 - dt[i] @property def events(self): return self._run_data[0]['events'] @events.setter def events(self, value: np.uint64): for i in range(len(self._run_data)): self._run_data[i]['events'] = value @property def input_counts(self): ldata = len(self._run_data) i_c = np.zeros((ldata,)) for i in range(ldata): i_c[i] = self._run_data[i]['input_counts'] return i_c @input_counts.setter def input_counts(self, value): ldata = len(self._run_data) for i in range(ldata): self._run_data[i]['input_counts'] = value[i] @property def counts(self): ldata = len(self._run_data) i_c = np.zeros((ldata,)) for i in range(ldata): i_c[i] = self._run_data[i]['counts'] return i_c @counts.setter def counts(self, value): ldata = len(self._run_data) for i in range(ldata): self._run_data[i]['counts'] = value[i] @property def run_id(self): return self._run_data[0]['run_id'] @run_id.setter def run_id(self, value: str): for i in range(len(self._run_data)): self._run_data[i]['run_id'] = value @property def name(self): return self._run_data[0]['name'] @name.setter def name(self, value: str): for i in range(len(self._run_data)): self._run_data[i]['name'] = value @property def notes(self): return self._run_data[0]['notes'] @notes.setter def notes(self, value: str): for i in range(len(self._run_data)): self._run_data[i]['notes'] = value
[docs] def load(self): """ Loads metadata from json files. If incomplete metadata is loaded it is updated from the data. :return: None """ try: required_fields = set(self._run_data[0].keys()) for ch_idx in range(len(self.parent.ch_list)): ch_meta = ut.read_channel_metadata(self.parent.config.path['home'], self.parent.base_name, ch_idx) if required_fields <= ch_meta.keys(): # required fields is subset of what is loaded self._run_data[ch_idx] = ch_meta else: print('Metadata load fails.') print('path:', self.parent.config.path['home'], self.parent.base_name, ch_idx) raise ex.ListModeMetadataSetError('Incomplete metadata!') except FileNotFoundError as e: print('No metadata found, please run calculate.') raise
[docs] def save(self): """ Save metadata back to json. :return: None """ for ch_idx in range(len(self.parent.ch_list)): ut.write_channel_metadata(self.parent.config.path['home'], self.parent.base_name, ch_idx, self._run_data)
[docs] def calculate(self): """ Generates metadata from parent data and members. Calculate should not touch values that are set by the loader. :return: """ # Start and stop times need to be set ddict = self.parent.data_dict if self.total_time == 0: self.total_time = ddict['time'][-1] if self.start is None: # # If no start time, stop time is checked for calculation if self.stop is None: # if both are None we are in trouble: print('No start or stop defined!') self.stop = dt.datetime.fromtimestamp(time.time()) self.notes = self.notes + ' No start or stop defined. Timestamps calculated from load time.' self.start = self.stop - dt.timedelta(seconds=self.total_time * 1e-9) #raise ListModeMetadataSetError('No start or stop defined!') elif self.stop is None: # If start time is defined it is used to calculate stop self.stop = (self.start + dt.timedelta(seconds=self.total_time * 1e-9)) self.input_counts = np.count_nonzero(ddict['energy'] >= 0, axis=0) self.counts = np.count_nonzero(ddict['energy'] > 0, axis=0) self.events = ddict['time'].shape[0] self.name = self.parent.config.det['name'] self.run_id = self.parent.base_name self.save()
[docs] def set(self, key, value, channel): """ Set a metadata item for one or all channels. For example some sample related information can be retrieved from database and added to metadata after the data is created. This method exists to give easy access to metadata for the loader functions of vendor specific data. This method should not be used to set the minimal metadata handled by the properties of Metadata class. ListModeMetadataSetError is raised if even tried. :param key: Key to _run_data dict :param value: A value to set. :param channel: Channel to modify. If ch is less than 0, then all channels are updated. :return: """ if key in ['start', 'stop', 'total_time', 'dead_time', 'live_time', 'events', 'input_counts', 'counts', 'run_id', 'name', 'notes']: raise ex.ListModeMetadataSetError('Set method received a key that is handled as a property!') if channel < 0: for ch_idx in range(len(self.parent.ch_list)): self._run_data[ch_idx][key] = value else: self._run_data[channel][key] = value
[docs] def get(self, key, channel): """ Get a metadata item that is not one of the properties. This is simply wrapping the dict indexing. :param key: keyword to get :param channel: channel :return: value """ return self._run_data[channel][key]
[docs]class Data: """ Sort of generic data class, with plug-in dataloader for vendor specific raw data and extra data items configurable via configuration file. Timestamp is always present in all kinds of data. It stores the event time in nanoseconds since the start of the data. It is always 64-bit unsigned integer and is handled in a special way by listmode. All other data is defined by info-dictionaries that are of the form: info_dict = {"name": "some_data", "type": "u1", "num_col": 2, "aggregate": "col", "ch_mask": [1, 1, 0, 0], "multi": "mean"} Data is held in a data dictionary, with data name as key and memmap of the data as the value. (Currently the data is also held as members with same name as the data for backward compatibility using _update method). All data is stored in numpy arrays in the data dict. The data dict always contains time_vec and data_mat: time and energy information of events. data_mat is defined by 'events' info dict in the configuration file. Data dict can also contain extra data, defined by 'extras' list of info dicts in the configuration file. Few types of extras are hardcoded into Listmode and are handled in a special way if they are present: coord: coordinate information. Correspondence of channels to coordinate columns is given by config.det['coordinates']. This is used for data selection and plots. latency:timing information. Each column is the time difference between 'main' channel and other channels in the event. Used to tune the latency and coincidence window. multihit: A flag that is raised if a channel has several hits per event. A type of nondestuctive pileup. The energy value of a multihit event is calculated using a function defined by the 'multi' keyword. All other extras are just carried along with the data and can be plotted (not quite yet) or used for event selection (not there either). """ def __init__(self, config): """ Configuration file contains a recipe to the data and is loaded on the beginning of the creation of the data. All data structures, time cache and metadata are created here, but the data is still empty and has to be loaded with load_data-method. :param config: A pathlib path to detector configuration file or the configuration dict itself. """ # do init stuff self.config = config self.data_ext = self.config.det['extension'] self.data_type = self.config.det['data_type'] # select raw data loader detector_type = {'pixie4': ldr.pixie4_loader, 'g4': ldr.g4_loader, 'dspec': ldr.dspec_loader, 'caen': ldr.caen_loader, 'PANDA': ldr.panda_loader, 'standard': None} print('Loading data', config.det['data_type']) # initializing correct raw data loader, if not standard data temp = detector_type[self.config.det['data_type']] if temp is not None: self._read_raw_data = temp.__get__(self) # detector init self.num_ch = len(self.config.det['ch_cfg']) self.ch_list = self.config.det['ch_list'] #self.events = 0 # Stuff defined per event. self.data_dict = {} # self.name_list = [] self.data_dict['time'] = np.zeros((0,), dtype='uint64') # self.name_list.append('time') # create the main event type # todo: this should be just one of the extras or 'datas' self.event_info = {"name": "energy", "num_col": self.num_ch, "aggregate": "col", "ch_mask": np.ones((self.num_ch,), dtype='bool')} self.event_info.update(self.config.det["events"]) self.data_dict['energy'] = np.zeros((0, self.num_ch), dtype=self.event_info['type']) #self.name_list.append('energy') try: for extra in self.config.det['extras']: self.data_dict[extra['name']] = np.zeros((0, extra['num_col']), dtype=extra['type']) # self.name_list.append(extra['name']) except KeyError: pass self._update() # init time cache self.t_cache = TimeCache(self) # init metadata self.metadata = Metadata(self) # For calibration a dummy is initialized here and can be overwritten if a cal file is defined #self.cal = np.zeros((len(self.ch_list), 3)) #self._load_calibration() self.chunk_idx = 0 # for block based data processing def _update(self): """ All data, including the extras, is set as a direct member of data class. This is for compatibility with the old code and will be deprecated in the future. More harmful than useful. Deprecated. :return: """ pass # old variable names for compatibility and easy referral: #for name, value in self.data_dict.items(): # setattr(self, name, value)
[docs] def load_data(self, data_path_str, name=None, reset=False): """ Loads data preferably from event mode .dat files. If this fails, then channel data is searched for. (Channel data may be saved as intermediary step when doing slow conversion from other data formats.) Otherwise _read_raw_data method is called. Native format has no raw data and will fail. :param data_path_str: Path to data directory. It has to be either a string or a pathlib Path object. :param name: Optional name, if data file does not share the same base_name as the directory. :param reset: The raw data parsing can be forced with reset=True. :type reset: Bool :return: """ data_path = Path(data_path_str) save_metadata = False # for old datasets with no metadata present # check that path and files exist loaded = False if not data_path.is_dir(): raise ValueError("Invalid datapath in read data!") if name is None: self.base_name = data_path.name else: self.base_name = name if not reset: try: # check if base_name has an event mode datafile and load it. print('Trying to read parsed data', data_path, self.base_name) self.data_dict, timing_data = read_binary_data(data_path, self.base_name, cfg=self.config, mode='event') self.t_cache.set(timing_data) self._update() loaded = True except ex.ListModeDataNotFoundError: print('Cannot find parsed data') except: print('Event data load failed!') raise if not loaded: try: # check if base_name has channel datafiles and load them. (Rerunning coincidence # parsing can be done quickly by deleting the event file!) # Otherwise set reset to True and proceed. print('Trying to read channel data', data_path, self.base_name) self._load_channel_data(data_path) #if len(self.ch_list) != self.data_mat.shape[1]: # raise ValueError('Invalid number of channels!') loaded = True except ex.ListModeDataNotFoundError: print('Cannot find channel data') raise except: print('Channel data load failed!') raise if not loaded: if self.data_type == 'standard': # there is no raw data for standard data raise FileNotFoundError('No datafiles') reset = True if reset: print('Data reset!') # Read raw data try: print('Reading raw data') # _read_raw_data is responsible for converting raw data to channel data, parsing it and providing # metadata if it is missing self._read_raw_data(data_path) except FileNotFoundError: print('No raw data!') print(data_path) print('Exit!') raise try: self.metadata.load() except FileNotFoundError: self.metadata.calculate() self._update()
[docs] def get_data_block(self, t_slice=None): """ Get data and time vectors, but processed in chunks of 1M events to save memory. Optionally, define a slice in time. The method should be called in a loop to read everything. All data including extras is returned. Last return value isdata indicates if there is more data to come. On a False the loop should be stopped, but the last data is still valid. :param t_slice: A tuple of start and stp times in nanoseconds. Full data is set to be read if this is None. The time slice should never be changed while reading the data in a loop. :return: A tuple of (data_dict, isdata) for current chunk. """ chunk_size = 1000000 events = self.data_dict['time'].shape[0] start, stop = self.t_cache.get_indices(t_slice) num_chunks = (stop - start) // chunk_size if self.chunk_idx < num_chunks: isdata = True # data left else: # self.chunk_idx == num_chunks - 1: # last chunk isdata = False # last chunk idx1 = int(start + self.chunk_idx * chunk_size) idx2 = min(stop, int(start + (self.chunk_idx + 1) * chunk_size), int(events)) if t_slice is not None: mask = np.logical_and(self.data_dict['time'][idx1:idx2] > t_slice[0], self.data_dict['time'][idx1:idx2] < t_slice[1]) else: mask = np.ones((self.data_dict['time'][idx1:idx2].shape[0],), dtype='bool') retdict = dict() for name, item in self.data_dict.items(): retdict[name] = item[idx1:idx2, ...][mask, ...] if isdata: self.chunk_idx += 1 else: self.chunk_idx = 0 return (retdict, isdata)
[docs] def get_dead_time(self, t_slice=None): """ Get dead time for the data or a time_slice of data. :param t_slice: a tuple of start and stop times in nanoseconds. Full dead time is retrieved if this is set to None. :return: The dead times in [s] for all channels as a vector of floats. """ deltat = np.zeros((len(self.ch_list))) if t_slice is None: # currently implemented timing = self.t_cache.timing else: # not implemented timing = self.t_cache.get_timing(t_slice) for ch in range(len(self.ch_list)): deltat[ch] = timing['dt{}'.format(ch)].sum() return deltat
[docs] def get_end_time(self): return self.data_dict['time'][-1]
def _parse_on_load(self, data): """ For parsing events from channel mode files in batches to conserve memory. :param data: a tuple of (list of data_dicts, list of timing_datas). :return: """ batch_size = 100000 data_list, timing_data = data print('Allocating event file streamers!!') evnt_streamer = StreamData(self.config.path['home'], self.base_name, raw=False, method='event') timing_streamer = StreamData(self.config.path['home'], self.base_name, raw=False, method='timing') # ugly fix for the stupid zero entries in the beginning of channel timing data for td_idx in range(len(timing_data)): if timing_data[td_idx]['idx'][0] == 0: # if this fires, we are at the beginning of data. It should! timing_data[td_idx] = timing_data[td_idx][1:] timing_streamer.write(np.zeros((1,), dtype=[('idx', 'u8'), ('t', 'u8')] + [('dt{}'.format(x), 'f4') for x in range(len(timing_data))])) try: extras = self.config.det['extras'] except KeyError: extras = None extra_streamers = dict() for extra in self.config.det['extras']: # self.name_dict.keys: extra_streamers[extra['name']] = StreamData(self.config.path['home'], self.base_name, raw=False, method='extra', extra_name=extra['name']) # Time vector, or data_tuple[ch_idx]['time'], is used for parsing. The data is pushed to EventBuilder in # batches of constant time, but not exceeding max_datasize in length. chlist = np.array(range(self.num_ch), dtype='u1') # used to retrieve indices through boolean indexing idx0_front = np.zeros((len(self.ch_list),), dtype='u8') # start indices for batch idx1_front = np.zeros_like(idx0_front) # stop indices for batch # set data_left for beginning idx_max = np.array([x['time'].shape[0] for x in data_list], dtype='u8') # the last indices of data data_left = idx0_front < idx_max # current timestamps # Empty channels must be handled separately or hilariousness ensues. t_front = np.zeros((self.num_ch,), dtype='u8') for live_ch in chlist[data_left]: t_front[data_left] = data_list[live_ch]['time'][idx1_front[live_ch]] # event builder is supposed to be an online function, so doing it posthumously is unnecessarily complicated... if self.num_ch > 1: # todo: What happens if only 1 channel in data? ev_bldr = EventBuilder(len(self.ch_list), self.config.det['coinc_win'], self.config.det['latency'], extras, self.event_info, max_datasize=batch_size) # Loop through the data. Chop everything into equal time chunks by finding channel with highest rate and # selecting max_datasize events from that channel. Include other channels up to same time while np.any(data_left): try: # next batch end idx in all channels idx1_front[data_left] = [min(idx_max[x], idx0_front[x] + batch_size) for x in chlist[data_left]] print('idx1_front', idx1_front) # corresponding timestamps, idx1_front points to one past the last index in the chunk t_front[data_left] = [data_list[x]['time'][int(idx1_front[x]-1)] for x in chlist[data_left]] except: print('exception in t_front calc') raise events_left = idx_max - idx0_front print('left :', events_left) print(t_front) mask = events_left > batch_size # mask the channels which have more counts than the current batch if np.any(mask): # pick the active channel with smallest timestamp at batch end. Here we have to take into account # that some channels may already be done for. Hence the data_left tricks. lead_ch = chlist[data_left][t_front[data_left].argmin()] else: # when data is about to end we take one more step. We pick the channel with biggest timestamp #lead_ch = idx_max.argmax() lead_ch = t_front.argmax() print('Last batch!') lead_t = t_front[lead_ch] print('channel with last event in batch is', lead_ch, 'and t is', lead_t, 'ns.') full_data = [None for _x in range(len(self.ch_list))] ch_timing = [[] for _x in range(len(self.ch_list))] # Then find the same (or smaller) time for all active channels and cut the batch there for ch_idx in chlist: # we go through empty channels too to provide empty data for event builder. ch_data = dict() if data_left[ch_idx]: if ch_idx != lead_ch: # easy way of finding the last timestamp under lead_t. Return value is number of events # to include from the chunk, or the index +1 # of the last event in the chunk that passes the test <= lead_t. # The test fails if all timestamps are smaller (0 returned, should not happen as lead_ch # is smallest) # or the last timestamps are equal (0 returned, unlikely but possible). Zero is returned # also when all time stamps are more than lead_t. This is correct behaviour. temp = np.argmin(data_list[ch_idx]['time'][idx0_front[ch_idx]:idx1_front[ch_idx]] <= lead_t) # debug if temp == 0: if data_list[ch_idx]['time'][idx0_front[ch_idx]] > lead_t: # the time of the first event in the chunk is bigger than the end time # of the chunk. Empty chunk so temp is correct! print('!"!"!"!"!"!"! correct temp 0') pass elif data_list[ch_idx]['time'][int(idx1_front[ch_idx] - 1)] == lead_t: # last event in batch is shared between several channels temp = int(idx1_front[ch_idx] - idx0_front[ch_idx]) print('%%%%%%% incorrect temp0') elif data_list[ch_idx]['time'][int(idx1_front[ch_idx] - 1)] < lead_t: # last index is less than lead_t -> crash! temp = int(idx1_front[ch_idx] - idx0_front[ch_idx]) if idx1_front[ch_idx] < idx_max[ch_idx]: # Check if data left raise ex.ListModeTimestampError('Last timestamp is less than lead_t but data is left!') else: raise # correct idx1 front idx1_front[ch_idx] = idx0_front[ch_idx] + temp else: print('Empty channel!') print('tfronts', idx0_front, idx1_front) print('idx max', idx_max) print('data left', data_left) # correct idx1 front #idx1_front[ch_idx] = idx0_front[ch_idx] #raise # timing data sliced by (idx) timing_mask = np.logical_and(timing_data[ch_idx]['idx'] >= idx0_front[ch_idx], timing_data[ch_idx]['idx'] <= idx1_front[ch_idx]) temp_extra = [] # build batch. for name, value in data_list[ch_idx].items(): ch_data[name] = value[idx0_front[ch_idx]:idx1_front[ch_idx]] full_data[ch_idx] = ch_data ch_timing[ch_idx] = timing_data[ch_idx][timing_mask] ev_data_dict, ev_timing = ev_bldr.run_batch(full_data, ch_timing) # STOP evnt_streamer.write((ev_data_dict['time'], ev_data_dict['energy'])) for name in extra_streamers.keys(): extra_streamers[name].write(ev_data_dict[name]) timing_streamer.write(ev_timing) print('timing', ev_timing) print('streamed', ev_data_dict['time'].shape[0]) idx0_front[data_left] = idx1_front[data_left] # recalculate data_left data_left = idx0_front < idx_max print('debug', data_left) else: #todo: not modified yet. Should the raw data be directly copied or just renamed pass # print('last timing indices', [timing_data[ch]['idx'][-1] for ch in chlist]) # print(ev_timing['idx'][-1]) evnt_streamer.close() timing_streamer.close() for es in extra_streamers.values(): es.close() def _load_channel_data(self, data_path): """ Used to read channel data and parse into events. Channel data for each channel can be just measurement or zero-suppressed strip detector data, with 1-d coordinates on a separate file. Coordinate data is aggregated into final coordinate information (forming an n-d coord-data) in the order of channel_cfg vector. :param data_path: self evident :param delete: delete channel files on exit :return: """ # ch_list is a tuple of (list of ch_data_dicts, list of ch_timing_datas) ch_list = read_binary_data(data_path, self.base_name, mode='channel', cfg=self.config) self._parse_on_load(ch_list) del(ch_list) # free the files data_dict, timing_data = read_binary_data(data_path, self.base_name, mode='event', cfg=self.config) # data_tuple is made of data_dict and timing data self.data_dict = data_dict self.t_cache.set(timing_data) try: delete_chfiles = not self.config.det['debug'] except KeyError: delete_chfiles = True if delete_chfiles: ut.delete_channel_data(data_path, self.base_name, self.config)
[docs]def poly2(x, *p): """ Model function for 2nd degree polynomial fit for energy calibration. :param x: A channel value or a numpy list of channel values. :param p: Calibration coefficients, starting from 0th degree coefficient. :return: Calibrated x. """ a, b, c = p return a + b * x + c * x ** 2
[docs]def ipoly2(y, *p): """ Estimates the inverse of 2nd degree polynomial above by dropping the 2nd degree term: returns ~x given y. Here the larger root is always returned. :param y: An energy value or a numpy list of energy values. :param p: Calibration coefficients, starting from 0th degree coefficient. :return: Channel values """ y = np.asarray(y) # cast single numbers to array if needed ylim = np.array((y.min(), y.max())) c, b, a = p if np.abs(a) > 1e-8: # if it is 2nd deg disc = b**2 - 4*a*c xapex = -b/(2*a) if disc <= 0: # no intersection of axis. Only valid if a>0 and all y > xapex or all y < xapex if a > 0 and np.all(ylim >= xapex): branch = 1 elif a > 0 and np.all(ylim < xapex): branch = -1 else: raise ex.ListModeCalibrationError('No real solution for inverse y calculation!') else: # Two roots case x0 = (-b - np.sqrt(disc))/(2 * a) x1 = (-b + np.sqrt(disc))/(2 * a) if a > 0: # Only valid if positive and all y over x1 or all y under x0 if np.all(ylim > x1): branch = 1 elif np.all(ylim < x0): branch = -1 else: raise ex.ListModeCalibrationError('Inverse energy calibration is not unambiguous over the range!') else: # only valid if positive and between x0 to xapex or xapex to x1 if np.all(np.logical_and(ylim >= x1, ylim < xapex)): branch = 1 elif np.all(np.logical_and(ylim >= xapex, ylim < x0)): branch = -1 else: print(ylim, xapex, x0, x1) raise ex.ListModeCalibrationError('No real solution for inverse y calculation!') x = (-b + branch*np.sqrt(b**2 - 4*a*(c-y)))/(2 * a) else: # linear case x = (y - c) / b return x
[docs]class EventBuilder: """ Painful way of walking through the data and trying to build events by seeking coincidences between channel times. Ideally works on shortish arrays of data returned by the digitizer, but should manage big savefiles in chunks. """ def __init__(self, num_ch, coinc_win, latency, extras, event_info, max_datasize=8192): """ :param num_ch: Number of input channels. This is the number of active channels in the data. :param coinc_win: in ns. Time window for coincidence search after a trigger in a channel. :param latency: for every channel, in ns. Number of ns to add to channel time to make it fit to the coincidence window. Can be negative. :param extras: list of dictionaries holding information for extra data. :param extras: dictionary holding information for energy data. :param max_datasize: size for the internal arrays during parsing, should be the size of the output buffer of the digitizer. """ self.coinc_win = coinc_win # coincidence window length in nanoseconds #self.event_info = event_info # data type and aggregation of energy data self.latency = np.array(latency, dtype='int') # per channel latencies self.maxwin = self.coinc_win - self.latency # end of coincidence window in each channel self.num_ch = num_ch # number of channels in the data self.chan_list = np.arange(self.num_ch, dtype='int32') self.bit_list = 2**self.chan_list self.chmax = np.zeros((self.num_ch,), dtype='uint64') # max data idx per channel self.timing_chmax = np.zeros((self.num_ch,), dtype='uint64') # max timing entries per channel #self.data_mat = np.zeros((int(max_datasize*self.num_ch), # Worst case scenario has no coincidences # self.num_ch), dtype='int16') # time vec is recorded self.time_vec = np.zeros((max_datasize * self.num_ch,), dtype='uint64') # big time holds timestamp and index of every event in time order self.big_time = np.zeros((max_datasize * self.num_ch, 2), dtype='uint64') self.timing_data_sz = 2000 type_list = [('idx', '<u8'), ('t', '<u8')] for x in range(self.num_ch): type_list.append(('dt{}'.format(x), '<f4')) self.timing_data = np.zeros((self.timing_data_sz,), dtype=type_list) self.timing_idx = 0 # idx of current timing data leading edge (first always zeros) self.t0 = np.zeros((self.num_ch,), dtype='uint64') #self.E0 = -1*np.ones((self.num_ch,), dtype='int16') self.timing0 = np.zeros((self.num_ch,), dtype='uint64') # idx values of timing front # construct the processors and outputs for processor pipeline self.proc_list = [] self.out_list = [] self.name_list = [] self.ev_count = np.zeros((self.num_ch,), dtype='uint64') # energy ch_mask = np.ones((self.num_ch,), dtype='bool') self.defaults = [] # this is ordered like this due to historical reasons.. Not worth the trouble to change. self.proc_list.append(ColProcessor(event_info)) self.out_list.append(event_info['empty_val'] * np.ones((int(max_datasize * event_info['num_col']), # Worst case event_info['num_col']), dtype=event_info['type'])) # scenario has no coincidences self.name_list.append('energy') self.defaults.append(event_info['empty_val']) # add extras. Timing is not in the pipeline. if extras is not None: for e_idx, extra in enumerate(extras): self.proc_list.append(process_dict[extra['aggregate']](extra)) self.defaults.append(extra['empty_val']) self.out_list.append(np.zeros((int(max_datasize*self.num_ch), extra['num_col']), dtype=extra['type'])) self.name_list.append(extra['name']) self.t_front = np.zeros((self.num_ch,), dtype='uint64') # indices that are compared currently self.timing_front = np.zeros((self.num_ch,), dtype='uint64') # indices to timing front self.total_sum = 0 # total accumulated events self.ch_total_sum = np.zeros((self.num_ch,), dtype='uint64') # total number of input counts self.first = True # Extra timing entry needs to be written on the first run of run_batch
[docs] def run_batch(self, data_dict, timing_list): """ The time front is a list of the lowest unbuilt indices for each channel. (The t0 is the times, E0 the energies) The channel which has lowest time in the front is put to an event and if other channels in the front have time within the window, then they are included. The front is incremented for all the channels that were included and the iteration is started again. :param data_dict: list of data_dicts for each channel :param timing_list: list holding timing information for each channel :return: data_dict, timing_data """ # zero all data and indices coincsum = 0 # self.data_mat.fill(-1) self.time_vec.fill(0) self.t_front.fill(0) self.timing_front.fill(0) self.timing_data.fill(0) # reset energy # self.out_list[0].fill(-1) # reset energy and extras for idx in range(len(self.out_list)): self.out_list[idx].fill(self.defaults[idx]) tot_counts = 0 for ch in range(self.num_ch): data_dict[ch]['time'] = data_dict[ch]['time'] + self.latency[ch] try: self.chmax[ch] = data_dict[ch]['time'].shape[0] except IndexError: self.chmax[ch] = 0 try: self.timing_chmax[ch] = timing_list[ch].shape[0] if self.timing_chmax[ch] == 0: # very clumsy way around empty timing lists. If set to max value the write is never triggered. self.timing0[ch] = np.iinfo('uint64').max else: # otherwise we use the first index entry in timing_list self.timing0[ch] = timing_list[ch][0]['idx'] except IndexError: # Missing timing list self.timing0[ch] = np.iinfo('uint64').max except: raise tot_counts += int(self.chmax[ch]) print('Counts in eventbuilder', self.chmax) print('Counts in timing', self.timing_chmax) chan_mask = self.t_front < self.chmax # the channels that actually have any data timing_chan_mask = self.timing_front < self.timing_chmax # current_ev_inds = self.chan_list[chan_mask] # on first iteration all channels are inspected count = 0 timing_idx = 0 if self.first: # on first run we include the first row of zeros into timing data timing_idx = 1 self.first = False cnum = 0 oldt = 0 while count < tot_counts: # go through the hits one by one and insert earliest into big time. # Channels that can have data ev_indices = self.chan_list[chan_mask] # go through active channels and record current values of time for each channel for ch in ev_indices: self.t0[ch] = data_dict[ch]['time'][self.t_front[ch]] # find channel with smallest t and insert into big list chan = ev_indices[self.t0[ev_indices].argmin()] self.big_time[count, :] = [self.t0[chan], chan] if oldt > self.t0[chan]: print(oldt, self.t0[chan]) print('Gotcha! Timestamp error!') raise ex.ListModeTimestampError("Previous timestamp was bigger!") oldt = self.t0[chan] self.t_front[chan] += 1 chan_mask = self.t_front < self.chmax count += 1 self.t_front.fill(0) evnt_num = 0 # this is the most important number in the method. If wrong, the data is incorrectly cropped. big_idx = 0 iterating = tot_counts > 0 while iterating: # through all events # bookkeeping stage. Events are written into matrices. Here the list of data should be run # through each corresponding processor function ev_sz = 0 self.ev_count.fill(0) t_end = int(self.big_time[big_idx, 0] + self.coinc_win) # single event is looped always here, as first tstamp is guaranteed to be under while self.big_time[big_idx + ev_sz, 0] < t_end: # mark the channel to the event self.ev_count[self.big_time[big_idx + ev_sz, 1]] += 1 if big_idx + ev_sz + 1 == tot_counts: iterating = False break ev_sz += 1 if ev_sz > 1: coincsum += 1 # set time of the event (time of trigger modified by latencies) self.time_vec[evnt_num] = self.big_time[big_idx, 0] # Set the event data. The data of the event is run through the processors and each processor fills # corresponding indices in self.out_list. e.g. a hit in ch0 and ch2 will cause energy processor to fill # values corresponding self.t_front[0,2] into to self.out_list[0][evnt_num, (0,2)] for idx, proc in enumerate(self.proc_list): # proc.process(data_dict, self.out_list[idx][evnt_num, :], self.t_front, self.ev_count) proc.process(data_dict, self.out_list[idx][evnt_num, :], self.t_front, self.ev_count) # handle timing # If the data block has any timing data left then # the current timing value is set according to the front changed = False for ch in self.chan_list[np.logical_and(self.ev_count > 0, timing_chan_mask)]: # if current timing index is smaller than the current idx. Current idx per channel is equal to already # written channel hits (self.ch_total_sum) t_front of current chunk and the ev_count of current event. # if self.timing0[ch] <= int(self.t_front[ch] + self.ch_total_sum[ch] + self.ev_count[ch]): print('JIIIHAAAA!!!') self.timing_data['dt{}'.format(ch)][timing_idx] = timing_list[ch]['dt0'][self.timing_front[ch]] self.timing_front[ch] += 1 changed = True # calculate new timing0 if self.timing_chmax[ch] > self.timing_front[ch]: self.timing0[ch] = timing_list[ch]['idx'][self.timing_front[ch]] else: timing_chan_mask[ch] = False if changed: # The event idx to write, on the other hand, is equal to already written events (self.total_sum) # plus event num of current event. # Somehow this is still 1 event less than it should. A loader problem? self.timing_data['idx'][timing_idx] = evnt_num + 1 + self.total_sum # evnt_num not incremented yet self.timing_data['t'][timing_idx] = self.big_time[big_idx, 0] # evnt_num not incremented yet timing_idx += 1 # increment output timing index if one or more channels were updated # update event self.t_front += self.ev_count evnt_num += 1 big_idx += ev_sz # update running values in the end of the processed chunk self.total_sum += evnt_num self.ch_total_sum += self.chmax # build output data_dict = dict() data_dict['time'] = self.time_vec[:evnt_num] for idx in range(len(self.out_list)): data_dict[self.name_list[idx]] = self.out_list[idx][:evnt_num, ...] print('Parsed', evnt_num, 'events with', coincsum, 'coincidences.') print('last idx', self.total_sum, 'timing', self.timing_data[:timing_idx]) return data_dict, self.timing_data[:timing_idx]
[docs]def strip_cal(data_mat, coord, strip_cal, coord_ch): """ Calculates strip calibration for coordinate data. :param data_mat: data :param coord: coordinates :param strip_cal: calibration matrix :param coord_ch: order of coordinate channels :return: """ for idx, cc in enumerate(coord_ch): mask = data_mat[:, cc] > 0 data_mat[mask, cc] = (strip_cal[idx, coord[mask, idx], 0] + strip_cal[idx, coord[mask, idx], 1] * data_mat[mask, cc] + strip_cal[idx, coord[mask, idx], 2] * data_mat[mask, cc] ** 2)
[docs]def generate_timing(chfile, pulse_dead_time, t_vec): """ Utility function to generate timing vector if it does not exist. Takes pathlib type filename, pulse dead time for the channel and t_vec. Returns nothing, just writes the data. """ chunk_size = 100000 count = t_vec.shape[0] if count % 100000 != 0: sz = int(count//chunk_size) else: sz = int(count//chunk_size-1) t_data = np.zeros((sz+2,), dtype=[('idx', '<u8'), ('t', '<u8'), ('dt0', '<f4')]) # t_data[0] = (0, 0.) idx = 0 for idx in range(sz): t_data[idx+1] = ((idx+1)*chunk_size, chunk_size*pulse_dead_time*1e-9) leftover = t_vec[(idx)*chunk_size:].shape[0] t_data[-1] = (t_vec.shape[0]-1, leftover*pulse_dead_time*1e-9) with chfile.open('wb') as f: f.write(t_data.tobytes())
[docs]def fill_default_data(cfg): """ Will generate reasonable defaults for parameters omitted for 'events' and 'extras' data_info dictionaries. It will overwrite incompatible parameters. Does not work yet. :param cfg: Configuration of the detector. :return: data_info dictionary """ pass
#dict = {'name': name} #if name == 'energy': # dict['num_col'] = len(cfg['ch_mask'])
[docs]class ColProcessor: """ Simple class for aggregating data in event building. It is initialized with the data info (like in extras definition) including what happens when multiple events are found within the same time window. The process method is given input events, channel mask and output data structure. Output data is modified in-place. Each instance of a class is only updating its own part of the data (energy, timing, coord, etc.) and is supposed to be run in a pipeline for every event. """ def __init__(self, info): """ :param info: The data info dict containing information of the data: "name": name of the output datafile: "basename_name.dat" "type": datatype (u1, i2, u4 ..) "num_col": number of columns in the output "aggregate": aggregate type of the data. Accepted aggregate types are: "col": each input channel is aggregated as a column to the output matrix "bit": each input channel is cast to bool and added to a bitmask "multihit": No inputs. Outputs a bitmask of multiple hits per event on a multi- channel detector. "latency": No inputs. Outputs the time difference of coincident signals between a single main channel and all the others. Needs "main" parameter to be set. In the future add: "sum": Sum of the data defined by "type" and "channel" parameters where "type" denotes data type to sum and "channel" is a list of channels. This extra is associated to the first channel in the list. "multi": What to do if multiple hits to a channel in single event: "sum": sum all to a single value "max": take the maximum value "max_e": take value on the hit with maximum energy "min": take the minimum value "mean": calculate arithmetic mean and round to fit "type" "kill": set to 0 "ch_mask": Some data is only valid for some channels. Boolean channel mask is used to define valid channels for the data. Must be np array with shape[0]=num_ch "main": Used by the "latency" aggregate to define which channel is compared against the others. In the future add: "type": Type of data to sum up as extra. "channel": List of channels to sum up as extra. :param in_idx: Index of the input in the data pipeline. This is needed to fill correct output. """ self.info = info # self.ch_mask = np.array(self.info['ch_mask'], dtype='i4') self.ch_mask = np.array(self.info['ch_mask'], dtype='bool') self.ch_ind = np.arange(self.ch_mask.shape[0]) # used to map from extra data index to channel idx # channel map maps input channel index into output. It is used for data that can be missing from some # channels, such as coordinate data. The cumulative sum works because channel mask masks the incorrect # indices. self.ch_map = self.ch_mask.cumsum() - 1 # With the new dictionary input one needs to use the name of the data to index it from the input instead of # in_idx. This is also true for 'energy' # self.in_idx = in_idx self.name = self.info['name'] self.op = multi_dict[self.info['multi']] self.template = np.zeros(self.ch_mask.shape[0], self.info['type']) self.template.fill(self.info['empty_val'])
[docs] def process(self, in_list, out, t_front, ev_count): """ :param in_list: list of data_dicts, one per channel :param out: list of initialized output data arrays :param t_front: current position in data :param ev_count: number of hits per channel in the event :param ev_num: number of hits per channel in the event :return: """ #result = self.template.copy() #in_mask = ev_count > 0 for ch in self.ch_ind[np.logical_and(ev_count > 0, self.ch_mask)]: # out[ self.ch_map[ch]] = in_list[ch][self.name][t_front[ch]] # fill valid hits # ch is index to detector channel #try: if ev_count[ch] == 1: out[self.ch_map[ch]] = in_list[ch][self.name][t_front[ch]] # #result[self.ch_map[ch]] = in_list[ch][self.name][t_front[ch]] else: # # out[self.ch_map[ch]] = self.op(in_list[ch][self.name][t_front[ch]:t_front[ch] + ev_count[ch]]) out[self.ch_map[ch]] = self.op(in_list[ch], t_front[ch], ev_count[ch], self.name)
# # result[self.ch_map[ch]] = self.op(in_list[ch], t_front[ch], ev_count[ch], self.name) #except: # print('Exception in col processor!') # print(ch, self.name, in_list[ch][self.name].shape, t_front[ch], ev_count) # raise # out[:] = result
[docs]class BitProcessor (ColProcessor): def __init__(self, info): super().__init__(info) self.bitvals = 2 ** np.array(range(len(self.ch_mask)), dtype=info['type'])
[docs] def process(self, in_list, out, t_front, ev_count): # in_mask = ev_count > 0 #single_hits = self.ch_ind[np.logical_and(ev_count == 1, self.ch_mask)] #multi_hits = self.ch_ind[np.logical_and(ev_count > 1, self.ch_mask)] # temp = [in_list[ch][self.name][t_front[ch]]*self.bitvals[ch] for ch in single_hits] # temp.extend([self.op(in_list[ch], t_front[ch], ev_count[ch], self.name)*self.bitvals[ch] for ch in multi_hits]) #temp = 0 for ch in self.ch_ind[np.logical_and(ev_count > 0, self.ch_mask)]: # fill valid hits # ch is index to detector channel #try: if ev_count[ch] == 1: #temp[ch] += in_list[ch][self.name][t_front[ch]]*self.bitvals out += in_list[ch][self.name][t_front[ch]] * self.bitvals[ch] else: # will this ever be used? out += self.op(in_list[ch], t_front[ch], ev_count[ch], self.name) * self.bitvals[ch]
#for ch in self.ch_ind[np.logical_and(ev_count > 1, self.ch_mask)]: #temp[ch] += self.op(in_list[ch], t_front[ch], ev_count[ch], self.name)*self.bitvals # out[ev_num] += self.op(in_list[ch], t_front[ch], ev_count[ch], self.name)*self.bitvals[ch] # if temp > 0: #except: # print(ch, self.name, in_list[ch][self.name].shape, t_front[ch], ev_count) # raise # out[ev_num] = temp
[docs]class LatencyProcessor (ColProcessor): """ LatencyProcessor is a specialized processor used to visualize the timing properties of the input data. Each output column is equal to time difference between event in main channel and event in each other channel (so output of main channel is always zeros) calculated from latency corrected time data. Smallest possible value is returned if there was no coincidence between the channels. All channels should show zero-centered distributions in a properly tuned detector. Width of the distributions will show how big coincidence window is needed. """ def __init__(self, info): super().__init__(info) self.main_ch = info['main'] self.ch_mask[self.main_ch] = 0 # set to zero as self delta is constant 0
[docs] def process(self, in_list, out, t_front, ev_count): in_mask = ev_count > 0 if in_mask[self.main_ch]: out[self.main_ch] = 0 main_t = int(in_list[self.main_ch]['time'][t_front[self.main_ch]]) for ch in self.ch_ind[np.logical_and(in_mask, self.ch_mask)]: # fill valid hits # ch is index to detector channel try: out[self.ch_map[ch]] = int(in_list[ch]['time'][t_front[ch]]) - main_t except: print('out', out[self.ch_map[ch]]) print(ch, t_front[ch], ev_count) raise
[docs]class MultiHitProcessor (BitProcessor): """ MultiHitProcessor calculates a bitmask where channels with multiple hits per event are set to 1. """
[docs] def process(self, in_list, out, t_front, ev_count): multi = ev_count > 1 for ch in self.ch_ind[np.logical_and(multi, self.ch_mask)]: # fill valid hits out[:] += self.bitvals[ch]
# out[:] = self.bitvals[self.ch_ind[np.logical_and(multi, self.ch_mask)]].sum()
[docs]def max_combinator(in_dict, idx, ev_count, name): """ Returns the hit that has highest value. :param in_dict: A dictionary including all datas of the channel. :param idx: Index of the first hit in the event :param ev_count: Number of hits in the event :param name: Name of the data :return: A single value for the hit """ return np.max(in_dict[name][idx:idx+ev_count])
[docs]def max_e_combinator(in_dict, idx, ev_count, name): """ Returns the hit that has highest energy value. :param in_list: A dictionary including all datas of the channel. :param idx: Index of the first hit in the event :param ev_count: Number of hits in the event :param name: Name of the data :return: A single value for the hit """ idx2 = in_dict['energy'][idx:idx+ev_count].argmax() return in_dict[name][idx2]
[docs]def min_combinator(in_dict, idx, ev_count, name): """ Returns the hit that has smallest value. :param in_list: A dictionary including all datas of the channel. :param idx: Index of the first hit in the event :param ev_count: Number of hits in the event :param name: Name of the data :return: A single value for the hit """ return np.min(in_dict[name][idx:idx+ev_count])
[docs]def mean_combinator(in_dict, idx, ev_count, name): """ Returns the mean of all hits in the event. :param in_list: A dictionary including all datas of the channel. :param idx: Index of the first hit in the event :param ev_count: Number of hits in the event :param name: Name of the data :return: A single value for the hit """ return np.mean(in_dict[name][idx:idx+ev_count])
[docs]def sum_combinator(in_dict, idx, ev_count, name): """ Returns the sum of all hits in the event. :param in_list: A dictionary including all datas of the channel. :param idx: Index of the first hit in the event :param ev_count: Number of hits in the event :param name: Name of the data :return: A single value for the hit """ return np.sum(in_dict[name][idx:idx+ev_count])
[docs]def kill_combinator(in_dict, idx, ev_count, name): """ Event is set to zero. :param in_list: A dictionary including all datas of the channel. :param idx: Index of the first hit in the event :param ev_count: Number of hits in the event :param name: Name of the data :return: A single value for the hit """ return 0
multi_dict = {'max': max_combinator, 'min': min_combinator, 'mean': mean_combinator, 'sum': sum_combinator, 'kill': kill_combinator, 'max_e': max_e_combinator} process_dict = {'col': ColProcessor, 'bit': BitProcessor, 'multihit': MultiHitProcessor, 'latency': LatencyProcessor}
[docs]class StreamData: """ Stream_data is a manager that pushes list mode data into disk as it comes available. Every kind of data (time + energy for channel, time + energy matrix for events, timing data and extra data) needs to have its own streamer. Channel mode data is stored as raw binary files, with one file holding time (uint64), one the energy (uint16). Note: there is no reason to save data in channel mode after latency and coincidence window are set. Event data is stored as raw binary with timestamps (uint64), energy matrix (uint16 x num_ch) Timing data is a row of timing info (uint32 idx + 2xuint32 x num_ch). Extra data can can be given via the extras dictionary (keys: 'name', 'type', 'num_col'). Extras can include pile-up flags (Type x num_ch) or coordinates (Type x N), where N is number of coordinates. """ def __init__(self, path, data_name, method='event', raw=False, channels=None, extra_name=None): """ Initialize the write method, coincidence window and number of channels. :param path: path to the data. String or a pathlib Path :param data_name: string, the base filename :param method: * event: have e and t data as input, output timestamps and events * timing: have index to event plus dead time float * extra: have some other data, such as coordinates or tags as input, stream to 'name' in the extra dict :param raw: raw data is defined separately for each channel. :param channels: Used if raw = True. This is a list of channel numbers that are saved. The time and energy files will be appended with '_ch{channels[idx]}.dat' :param extra_name:Used if mode is 'extra'. This is a name of the extra data. Filename will be 'data_name_{extra_name}.dat' """ self.raw = raw self.path = Path(path) self.data_name = data_name self.channels = channels self.method = method self.extra_name = extra_name if raw: if self.channels is None: raise ValueError('channels must be defined for raw stream mode!') if self.method == 'event': self.write = self._write_event elif self.method == 'timing': self.write = self._write_extra elif self.method == 'extra': if extra_name is None: raise ValueError('extra_name must be defined for extra mode!') self.write = self._write_extra else: print(self.method) raise ValueError('Invalid method for disk write') self.file_idx = 0 # the index of files in case the file size has exceeded 2GB and the data has been split # Not in use at all self.new_files()
[docs] def new_files(self): self.time_files = [] self.data_files = [] if self.file_idx == 0: suffix = 'dat' else: suffix = 'b{:02}'.format(self.file_idx) if self.raw: for ch in self.channels: if self.method == 'event': self.time_files.append((self.path / '{}_timestamps_ch{}.{}'.format(self.data_name, ch, suffix)).open('wb')) self.data_files.append((self.path / '{}_events_ch{}.{}'.format(self.data_name, ch, suffix)).open('wb')) elif self.method == 'timing': self.data_files.append((self.path / '{}_timing_ch{}.{}'.format(self.data_name, ch, suffix)).open('wb')) elif self.method == 'extra': self.data_files.append((self.path / '{}_{}_ch{}.{}'.format(self.data_name, self.extra_name, ch, suffix)).open('wb')) else: if self.method == 'event': self.time_files.append((self.path / '{}_timestamps.{}'.format(self.data_name, suffix)).open('wb')) self.data_files.append((self.path / '{}_events.{}'.format(self.data_name, suffix)).open('wb')) elif self.method == 'timing': self.data_files.append((self.path / '{}_timing.{}'.format(self.data_name, suffix)).open('wb')) elif self.method == 'extra': self.data_files.append((self.path / '{}_{}.{}'.format(self.data_name, self.extra_name, suffix)).open('wb'))
def _write_event(self, data): """ Writes a chunk of data data to file. For raw mode the data is given in a list of len num_ch, but only channels specified in self.channels are actually written into file. :param data: Tuple of (Timestamp as np vector, Energies as np matrix) or a tuple of lists of such if self.raw==True. :return: """ t_data, e_data = data if self.raw: for idx, ch in enumerate(self.channels): if len(t_data[ch]) > 1: self.time_files[idx].write(t_data[ch].tobytes()) self.data_files[idx].write(e_data[ch].tobytes()) else: self.time_files[0].write(t_data.tobytes()) self.data_files[0].write(e_data.tobytes()) def _write_extra(self, data): """ Same as write_event :param extra_data: numpy matrix or list of matrices if raw == True :return: """ if self.raw: for idx, ch in enumerate(self.channels): if len(data[ch]) > 1: self.data_files[idx].write(data[ch].tobytes()) else: self.data_files[0].write(data.tobytes())
[docs] def close(self): for fil in self.time_files + self.data_files: fil.close()
[docs]def read_binary_data(data_path, base_name, cfg, mode='event'): """ :param data_path: Path to the data directory :param base_name: Base name of the data :param cfg: The detector config dictionary :param mode: What mode of data to read: 'event' or 'channel'. :return: The detector configuration is needed for defining the extras: List of dicts defining extra data files, type and number of columns. extras = {"name":'x', "type":'t', "num_col":'n'}, where type is a numpy type string of the data. Several extras can be defined in det_cfg (coord, ch_flags). These are handled automatically if they are present. Some extras, such as coord, need to have additional definitions in the config. For coord, it is the 'coordinates' list which defines the number of coordinates, the channels the data is found and the order of the coordinates in i, j notation. """ event_info = cfg.det['events'] # Find the data and determine num_ch and ev_sz extras = cfg.det['extras'] timenames, enames, tnames, xnames = ut.find_data_files(data_path, base_name, cfg, mode) for fname in timenames + enames: # not checking against timing data as sometimes it has to be generated afterwards if not fname.exists(): print('fname is:', fname) raise ex.ListModeDataNotFoundError('Could not find all data files') for chnamelist in xnames: for fname in chnamelist: if (fname is not None) and (not fname.exists()): print('fname is:', fname) raise ex.ListModeDataNotFoundError('Could not find all data files') # Now all files in the name lists are loaded # For channel data this is one item per ch, for events there is only one item. Each item of full_data_list # is a ch_data_dict containing time, energy and extras. (Some extras are not included in channel mode read.) # with time vector, energy and individual extras as items full_data_list = [] timing_list = [] # build return tuple. Empty channels are given zeros vector instead of a memmap to prevent crashing the loader. print('Read binary', len(timenames)) for idx in range(len(timenames)): ch_data_dict = dict() timename = timenames[idx] ename = enames[idx] # First the timing ev_sz = timename.stat().st_size // 8 # number of events if ev_sz > 0: try: ch_data_dict['time'] = np.memmap(timename, dtype='uint64', mode='r', shape=(ev_sz,)) except FileNotFoundError: print(timename, 'not found!') raise ex.ListModeDataNotFoundError except: print('Data load fails!') raise else: #num_ch = len(cfg.det['ch_cfg']) ch_data_dict['time'] = np.zeros((0,), dtype='uint64') # empty channel is just empty # now for events and extras if mode == 'channel': # First channel mode if ev_sz > 0: print(event_info['type']) ch_data_dict['energy'] = np.memmap(ename, dtype=event_info['type'], mode='r', shape=(ev_sz,)) else: ch_data_dict['energy'] = np.zeros((0,), dtype=event_info['type']) # for some data types the timing info is missing from channel data. Geant4 for example, but also # appended Caen files are dumped without timing. tname = tnames[idx] try: timing_sz = tname.stat().st_size // 20 except FileNotFoundError: print('No tdata for ch', idx) # need to generate timing data generate_timing(tname, cfg.det['ch_cfg'][idx]['pdeadtime'], ch_data_dict['time']) timing_sz = tname.stat().st_size // 20 # Actually loading the data here try: timing_list.append(np.memmap(tname, dtype=[('idx', '<u8'), ('t', '<u8'), ('dt0', '<f4')], mode='r', shape=(timing_sz, 1))) except: print('Fails on load of timing data for channel', idx) raise # finally all the extras that are defined in channel mode if extras is not None: try: for e_idx, extra in enumerate(extras): # if this extra has channel info if extra['name'] not in ['multihit', 'latency']: # if info for this channel exists xname = xnames[e_idx][idx] if xname: # an extra may be defined for a subset channels. Skip if empty. # single channel extra always 1 column wide if ev_sz > 0: ch_data_dict[extra['name']] = np.memmap(xname, dtype=extra['type'], mode='r', shape=(ev_sz,)) else: ch_data_dict[extra['name']] = np.zeros((ev_sz,), dtype=extra['type']) except: print('Channel mode extras fail!') print(xnames) print(e_idx, idx) raise # event mode else: tname = tnames[0] num_ch = len(cfg.det['ch_cfg']) # need num_ch to shape the data if ev_sz > 0: try: ch_data_dict['energy'] = np.memmap(ename, dtype=event_info['type'], mode='r', shape=(ev_sz, num_ch)) except: print('Fails when loading events!') raise else: ch_data_dict['energy'] = np.zeros((0, num_ch), dtype=event_info['type']) timing_sz = tname.stat().st_size // (16 + num_ch*4) try: type_list = [('idx', '<u8'), ('t', '<u8')] for x in range(num_ch): type_list.append(('dt{}'.format(x), '<f4')) timing_list.append(np.memmap(tname, dtype=type_list, mode='r', shape=(timing_sz,))) except: print('Fails on load of timing data!', tnames[idx]) raise # loop through extra data of the channel if extras is not None: for e_idx, extra in enumerate(extras): # if this extra has info xname = xnames[e_idx][idx] if ev_sz > 0: try: ch_data_dict[extra['name']] = np.memmap(xname, dtype=extra['type'], mode='r', shape=(ev_sz, extra['num_col'])) except: print('Loading extras fail!') raise else: ch_data_dict[extra['name']] = np.zeros((0, extra['num_col']), dtype=extra['type']) full_data_list.append(ch_data_dict) print(timing_list) if mode == 'event': print('Read binary data in event mode') #print(timing_list[0]) return full_data_list[0], timing_list[0] else: return full_data_list, timing_list
[docs]def data_info(info, ch_list): """ Fills data_info dict with defaults for parts that are missing. Hardcoded settings for energy, multihit and latency data will be overwritten if defined in config. A warning is printed if setup is overwritten. :param info: info dict :param ch_list: info dict :return: dict with missing keys filled with defaults. """ # channel mask defines num_col so it has to be checked and calculated first try: ch_mask = info['ch_mask'] except KeyError: ch_mask = list(np.ones((len(ch_list)), dtype='u1')) info['ch_mask'] = ch_mask # hardcoded values for different datatypes. e_hardcoded = {'aggregate': 'col', 'empty_val': -1} mh_hardcoded = {'type': 'u1', 'num_col': 1, 'aggregate': 'multihit', 'multi': 'max', 'empty_val': 0} lat_hardcoded = {'type': 'i2', 'aggregate': 'latency', 'multi': 'min', 'unit': 'ns', 'raw_unit': 'ns'} default = {'multi': 'max', 'empty_val': 0} e_default = {'type': 'i2', 'raw_unit': 'ch', 'unit': 'keV'} lat_default = {'main': 0} # hardcoded values are written over ones defined in info. num_col is calculated and defaults are applied. if info['name'] == 'energy': for key in info: if key in e_hardcoded: print('Warning, {} in energy data is incompatible and will be overwritten!'.format(key)) info.update(e_hardcoded) info['num_col'] = sum(ch_mask) default.update(e_default) for key, value in default.items(): if key not in info: info[key] = value elif info['name'] == 'multihit': for key in info: if key in mh_hardcoded: print('Warning, {} in multihit extra is incompatible and will be overwritten.!'.format(key)) info.update(mh_hardcoded) elif info['name'] == 'latency': for key in info: if key in lat_hardcoded: print('Warning, {} in latency extra is incompatible and will be overwritten.!'.format(key)) info.update(lat_hardcoded) default.update(lat_default) for key, value in default.items(): if key not in info: info[key] = value info['num_col'] = sum(ch_mask) info['empty_val'] = -32768 else: agg = info['aggregate'] if issubclass(process_dict[agg], process_dict['bit']): info['num_col'] = 1 else: info['num_col'] = sum(ch_mask) for key, value in default.items(): if key not in info: info[key] = value return info
[docs]def load_calibration(config): """ Loads calibration for the detector. Calibration gives the 2nd degree function coefficients for calibration for each channel and for each data type. The data is organized as a dictionary with data types as keys and each data as numpy arrays with channel in first axis and three coefficients (a, b and c) in second axis. Missing data is fixed with dummy calibration ([0,1,0] coefficients), but incompatible data (e.g. wrong number of channels) will raise an exception. Old calibration data had keys for peaks used for calibration, but they have been dropped. :param config: The detector config object (obviously missing the calibration info) :return: The calibration dictionary. read from disk. Missing data is fixed with dummy calibration, but incompatible """ cal_name = config.det['cal_name'] try: with ut.find_path(config, cal_name, '_ecal.json').open('r') as fil: temp = json.load(fil) cal = temp except FileNotFoundError: print('Calibration file not found!') raise ex.ListModeConfigurationError('Calibration file not found!') # Convert to numpy arrays and check that data is complete: try: if len(cal['energy']) == len(config.det['ch_list']): cal['energy'] = np.asarray(cal['energy']) else: #print('cal loaded', cal) #print(config.det['ch_list']) raise ex.ListModeConfigurationError('Incompatible calibration data for energy!') except KeyError: temp = np.zeros((len(config.det['ch_list']), 3)) temp[:, 1] = 1 cal['energy'] = temp for extra in config.det['extras']: #('Extra cal', extra['name']) data = extra['name'] #if not issubclass(dat.process_dict[extra['aggregate']], dat.process_dict['bit']): # bitmasks are not calibrated try: #print('Check data') if len(cal[data]) == extra['num_col']: cal[data] = np.asarray(cal[data]) else: raise ex.ListModeConfigurationError('Incompatible calibration data for extra data!') except KeyError: # missing calibration data is just generated here temp = np.zeros((extra['num_col'], 3)) temp[:, 1] = 1 cal[data] = temp return cal
[docs]def load_config(det_name, local_cfg, data_name=None): """ Detector configuration object is a namespace with: paths into configuration directories and optionally to data. Contents of the detector configuration file. Calibration for the detector. Calibration gives the 2nd degree function coefficients for calibration for each channel and for each data type. The data is organized as a dictionary with data types as keys and each data as numpy arrays with channel in first axis and three coefficients (a, b and c) in second axis. Omitted calibration data is replaced with [0,1,0] coefficients :param det_name: Name of the detector configuration file without the _cfg.json :param local_cfg: Paths needed to find configurations and data :param data_name: Optional path to data, that will be added as "home" into config.path :return: detector configuration object """ # load path information # with(open('local_cfg.json', 'r')) as fil: # path_cfg = json.load(fil) path_cfg = local_cfg.copy() if data_name is not None: # home is useful when saving data and looking for plot_name_list data_name = Path(data_name) if data_name.is_absolute: path_cfg['home'] = data_name else: path_cfg['home'] = path_cfg['data_dir'] + '/' + data_name else: path_cfg['home'] = path_cfg['data_dir'] # and detector config cfg_dir = path_cfg['cfg_dir'] with (Path(cfg_dir) / (det_name + '_cfg.json')).open('r') as fil: det_cfg = json.load(fil) # detector config needs sensible values set for data. This line forces load config into data module. det_cfg['events'] = data_info(det_cfg['events'], det_cfg['ch_list']) det_cfg['extras'] = [data_info(extra, det_cfg['ch_list']) for extra in det_cfg['extras']] # The rest is only needed for DAQ and can be None if det_cfg['readout_cfg'] is not None: with (Path(cfg_dir) / (det_cfg['readout_cfg'] + '_boardcfg.json')).open('r') as fil: readout_cfg = json.load(fil) else: readout_cfg = None ch_cfg = [] for chdata in det_cfg['ch_cfg']: if chdata['cfg_file'] is not None: with (Path(cfg_dir) / (chdata['cfg_file'] + '_chcfg.json')).open('r') as fil: ch_cfg.append(json.load(fil)) else: ch_cfg.append(None) # added calibrations config = types.SimpleNamespace(path=path_cfg, det=det_cfg, readout=readout_cfg, ch=ch_cfg, cal=None) config.cal = load_calibration(config) return config