import json
from pathlib import Path
import datetime as dt
import time
import types
import sys
import numpy as np
import listmode.loaders as ldr
import listmode.exceptions as ex
from listmode import utils as ut
[docs]class TimeCache:
"""
TimeCache controls timing data and provides dead/live time of the detector plus maintains lists of index - time
pairs of the time information insertions times so that quick retrieval of time periods is possible. Each interval
holds variable amount of events. Because both indices and timestamps are monotonously increasing, they can both be
used to find intervals from the data.
The timing datafile is saved with data and should be read only. It contains index of insertion (int64) plus
the dead_time delta of the interval for each channel in float32 type. First row always points to first event with
zero dead times for all channels. First real dead time value is stored to the second row.
"""
def __init__(self, parent):
"""
A component for time caching and dead time handling. Takes calling Data instance as parent for data access.
:param parent: Parent Data instance.
"""
self.parent = parent
self.dlen = len(parent.ch_list)
type_list = [('idx', '<u8'), ('t', '<u8')]
for x in range(self.dlen):
type_list.append(('dt{}'.format(x), '<f4'))
self.timing = np.zeros((0,), dtype=type_list)
[docs] def set(self, timing):
"""
:param timing: an opened np.memmap instance containing the timing data (retrieved by read_binary_data)
"""
self.timing = timing
[docs] def get_timing(self, t_slice=None):
"""
Return dead time data for a slice. The first entry is zeros and the second one is interpolated to start
from t_slice[0]. Last one is an extra row interpolated to t_slice[1].
If t_slice is not defined this method returns timing data as it is.
:param t_slice: Time slice (in ns)
:return: (interpolated) timing data for time slice
"""
if t_slice is None:
return self.timing
indices = np.zeros((self.dlen, 2), dtype='u8')
dts = np.zeros((self.dlen, 2))
for ch in range(self.dlen):
# first interp the first index
indices[ch, :] = self.find(t_slice, ch)
dts[ch, 0] = self._interp(ch, t_slice[0], indices[ch, 0], prev=True)[1] # pick the part after t_val
if indices[ch, 1] == indices[ch, 0] + 1: # if both ends of t_slice fit within a single timing interval
dts[ch, 1] = dts[ch, 0] - self._interp(ch, t_slice[1], int(indices[ch, 1]), prev=False)[1] # subtract the rest
else:
dts[ch, 1] = self._interp(ch, t_slice[1], int(indices[ch, 1]), prev=False)[0] # pick the part before t_val
timing_idx_0 = indices[:, 0].min()
timing_idx_1 = int(indices[:, 1].max())
retval = self.timing[timing_idx_0:timing_idx_1 + 1].copy() # include the last index
ch_list = ['dt{}'.format(x) for x in range(self.dlen)]
retval[ch_list][0] = 0
retval[ch_list][1] = tuple(dts[:, 0])
retval[ch_list][-1] = tuple(dts[:, 1])
return retval
[docs] def get_dead_time(self, t_slice=None):
"""
Return the dead time of the time slice.
:param t_slice:
:return: All dead times in a numpy array. Live and dead times are float values of seconds.
"""
timing = self.get_timing(t_slice)
ch_list = ['dt{}'.format(x) for x in range(self.dlen)]
dead_time = np.zeros((self.dlen,))
for ch_idx in range(self.dlen):
dead_time[ch_idx] = (timing[ch_list[ch_idx]].sum())
return dead_time
[docs] def get_live_time(self, t_slice=None):
"""
Return live time of the slice.
:param t_slice:
:return: All live times in a numpy array. Live and dead times are float values of seconds.
"""
livetime = self.get_total_time(t_slice)*1e-9 - self.get_dead_time(t_slice)
return livetime
[docs] def get_total_time(self, t_slice=None):
"""
Return total time of the time slice or total time of measurement, if t_slice is None.
:param t_slice:
:return: Total time value in nanoseconds
"""
if t_slice is None:
tottime = self.parent.data_dict['time'][-1]
else:
tottime = t_slice[1] - t_slice[0]
return tottime
def _interp(self, ch, t_val, index, prev=True):
"""
Interpolates dead time values for time slices. Timing array lists the dead time of each channel by some
interval that depends on the loader. If a time_slice does not hit the interval boundary the value has to be
somehow interpolated for the slice. This is especially true if the dead time itself is being plotted and the
time bin is smaller than the interval.
:param ch: The channel in question.
:param t_val: A nanosecond value defining a point in time.
:param index: The last timing index that is earlier than t_val for the channel. For start point of a time slice
this is just the start idx returned by find(t_slice, channel), for the endpoint it is the
stop idx - 1.
:param prev: whether index points to timing index previous to t_val or after it.
:return: Return a tuple with a dead time value for the part before the t_val and after it so that an interval
in timing array can be sliced.
"""
t_min = self.parent.data_dict['time'][0] # cannot to go before the first event
print('t_min, t_val', t_min, t_val)
t_val = max(t_min, t_val)
if prev: # idx0 is given
idx0 = self.timing['idx'][index]
t0 = self.parent.data_dict['time'][idx0]
# find next nonzero index (index is uint64, so it automatically casts itself to float on a mathematical
# operation, hence the int()
mask = self.timing['dt{}'.format(ch)][int(index + 1):] != 0
index1 = int(index + 1 + np.argmax(mask))
idx1 = int(self.timing['idx'][index1])
t1 = self.parent.data_dict['time'][idx1]
dt = self.timing['dt{}'.format(ch)][index1]
else: # idx1 is given
idx1 = int(self.timing['idx'][index])
print('idx of end', self.timing[index-1:], self.parent.data_dict['time'].shape[0])
print('time of idx', self.parent.data_dict['time'][idx1:])
print('time of end', self.parent.data_dict['energy'][idx1:, :])
print('channel', ch)
t1 = self.parent.data_dict['time'][idx1]
# find previous nonzero index
mask = self.timing['dt{}'.format(ch)][int(index - 1)::-1] != 0
index0 = int(index - 1 - np.argmax(mask))
idx0 = int(self.timing['idx'][index0])
t0 = self.parent.data_dict['time'][idx0]
dt = self.timing['dt{}'.format(ch)][index]
if t0 > t_val or t_val > t1: # sanity check
print('t0\t', t0)
print('t_val\t', t_val)
print('t1\t', t1)
t_val = t1
#raise ValueError('Invalid t_val in get_timing.')
return dt*(t_val - t0)/(t1 - t0), dt*(t1 - t_val)/(t1 - t0)
[docs] def find(self, t_slice, ch=None):
"""
Finding indices in self.timing that contain the t_slice time.
:param t_slice: tuple of nanosecond values defining a slice in time
:param ch: If specified will return only indices in which dead time has been given for ch. This is mainly used
by get_timing to interpolate the dead time.
:return: indices to self.timing containing the time slice.
"""
if ch is None:
mask = np.ones((self.timing.shape[0],), dtype='bool')
else:
mask = np.zeros((self.timing.shape[0],), dtype='bool')
mask[0] = True # include zero in the beginning
nonz = self.timing['dt{}'.format(ch)] > 0
mask[nonz] = True
# last index under t_slice[0] or 0
idx1 = 0
prev = 0
for idx, t_idx in enumerate(self.timing['idx']):
if mask[idx]:
if self.parent.data_dict['time'][t_idx] >= t_slice[0]:
#went over, now need to go back to previous event with mask True
idx1 = prev
break
prev = idx
# first index over t_slice[1] or last index in mask
try:
for idxn, t_idx2 in enumerate(self.timing[idx1+1:]['idx']):
if self.parent.data_dict['time'][t_idx2] >= t_slice[1] and mask[idx1 + 1 + idxn]:
break
except:
print('Error in TCache')
print(self.parent.data_dict['time'].shape, t_idx2, t_slice, mask.shape)
raise
idx2 = idx1 + 1 + idxn
return idx1, idx2
[docs] def get_indices(self, t_slice=None):
"""
Return start and stop event indices (endpoint not inclusive) that contain the time slice fully using timing
info as a hash.
:param t_slice: tuple of nanosecond values defining a slice in time. If None full data is returned.
:return: indices to event data containing the the time slice
"""
if t_slice is None:
return 0, self.parent.data_dict['time'].shape[0]
if self.timing.shape[0] == 0:
start = 0
stop = self.parent.data_dict['time'].shape[0]
print('Time cache not set!')
return start, stop
idx1, idx2 = self.find(t_slice)
start = self.timing['idx'][idx1]
stop = int(self.timing['idx'][idx2] + 1) # idx2 is included in the range
return start, stop
[docs]class Data:
"""
Sort of generic data class, with plug-in dataloader for vendor specific raw data
and extra data items configurable via configuration file.
Timestamp is always present in all kinds of data. It stores the event time in nanoseconds since the start
of the data. It is always 64-bit unsigned integer and is handled in a special way by listmode. All other
data is defined by info-dictionaries that are of the form:
info_dict = {"name": "some_data",
"type": "u1",
"num_col": 2,
"aggregate": "col",
"ch_mask": [1, 1, 0, 0],
"multi": "mean"}
Data is held in a data dictionary, with data name as key and memmap of the data as the value. (Currently
the data is also held as members with same name as the data for backward compatibility using _update method).
All data is stored in numpy arrays in the data dict. The data dict always contains time_vec and data_mat: time
and energy information of events. data_mat is defined by 'events' info dict in the configuration file.
Data dict can also contain extra data, defined by 'extras' list of info dicts in the configuration file.
Few types of extras are hardcoded into Listmode and are handled in a special way if they are present:
coord: coordinate information. Correspondence of channels to coordinate columns is given by
config.det['coordinates']. This is used for data selection and plots.
latency:timing information. Each column is the time difference between 'main' channel and other channels in the
event. Used to tune the latency and coincidence window.
multihit: A flag that is raised if a channel has several hits per event. A type of nondestuctive pileup. The
energy value of a multihit event is calculated using a function defined by the 'multi' keyword.
All other extras are just carried along with the data and can be plotted (not quite yet) or used for event
selection (not there either).
"""
def __init__(self, config):
"""
Configuration file contains a recipe to the data and is loaded on the beginning of the creation of the data.
All data structures, time cache and metadata are created here, but the data is still empty and has to be loaded
with load_data-method.
:param config: A pathlib path to detector configuration file or the configuration dict itself.
"""
# do init stuff
self.config = config
self.data_ext = self.config.det['extension']
self.data_type = self.config.det['data_type']
# select raw data loader
detector_type = {'pixie4': ldr.pixie4_loader,
'g4': ldr.g4_loader,
'dspec': ldr.dspec_loader,
'caen': ldr.caen_loader,
'PANDA': ldr.panda_loader,
'standard': None}
print('Loading data', config.det['data_type'])
# initializing correct raw data loader, if not standard data
temp = detector_type[self.config.det['data_type']]
if temp is not None:
self._read_raw_data = temp.__get__(self)
# detector init
self.num_ch = len(self.config.det['ch_cfg'])
self.ch_list = self.config.det['ch_list']
#self.events = 0
# Stuff defined per event.
self.data_dict = {}
# self.name_list = []
self.data_dict['time'] = np.zeros((0,), dtype='uint64')
# self.name_list.append('time')
# create the main event type
# todo: this should be just one of the extras or 'datas'
self.event_info = {"name": "energy",
"num_col": self.num_ch,
"aggregate": "col",
"ch_mask": np.ones((self.num_ch,), dtype='bool')}
self.event_info.update(self.config.det["events"])
self.data_dict['energy'] = np.zeros((0, self.num_ch), dtype=self.event_info['type'])
#self.name_list.append('energy')
try:
for extra in self.config.det['extras']:
self.data_dict[extra['name']] = np.zeros((0, extra['num_col']), dtype=extra['type'])
# self.name_list.append(extra['name'])
except KeyError:
pass
self._update()
# init time cache
self.t_cache = TimeCache(self)
# init metadata
self.metadata = Metadata(self)
# For calibration a dummy is initialized here and can be overwritten if a cal file is defined
#self.cal = np.zeros((len(self.ch_list), 3))
#self._load_calibration()
self.chunk_idx = 0 # for block based data processing
def _update(self):
"""
All data, including the extras, is set as a direct member of data class. This is for compatibility with
the old code and will be deprecated in the future.
More harmful than useful. Deprecated.
:return:
"""
pass
# old variable names for compatibility and easy referral:
#for name, value in self.data_dict.items():
# setattr(self, name, value)
[docs] def load_data(self, data_path_str, name=None, reset=False):
"""
Loads data preferably from event mode .dat files. If this fails, then channel data is searched for. (Channel
data may be saved as intermediary step when doing slow conversion from other data formats.) Otherwise
_read_raw_data method is called. Native format has no raw data and will fail.
:param data_path_str: Path to data directory. It has to be either a string or a pathlib Path object.
:param name: Optional name, if data file does not share the same base_name as the directory.
:param reset: The raw data parsing can be forced with reset=True.
:type reset: Bool
:return:
"""
data_path = Path(data_path_str)
save_metadata = False # for old datasets with no metadata present
# check that path and files exist
loaded = False
if not data_path.is_dir():
raise ValueError("Invalid datapath in read data!")
if name is None:
self.base_name = data_path.name
else:
self.base_name = name
if not reset:
try:
# check if base_name has an event mode datafile and load it.
print('Trying to read parsed data', data_path, self.base_name)
self.data_dict, timing_data = read_binary_data(data_path, self.base_name,
cfg=self.config, mode='event')
self.t_cache.set(timing_data)
self._update()
loaded = True
except ex.ListModeDataNotFoundError:
print('Cannot find parsed data')
except:
print('Event data load failed!')
raise
if not loaded:
try:
# check if base_name has channel datafiles and load them. (Rerunning coincidence
# parsing can be done quickly by deleting the event file!)
# Otherwise set reset to True and proceed.
print('Trying to read channel data', data_path, self.base_name)
self._load_channel_data(data_path)
#if len(self.ch_list) != self.data_mat.shape[1]:
# raise ValueError('Invalid number of channels!')
loaded = True
except ex.ListModeDataNotFoundError:
print('Cannot find channel data')
raise
except:
print('Channel data load failed!')
raise
if not loaded:
if self.data_type == 'standard': # there is no raw data for standard data
raise FileNotFoundError('No datafiles')
reset = True
if reset:
print('Data reset!')
# Read raw data
try:
print('Reading raw data')
# _read_raw_data is responsible for converting raw data to channel data, parsing it and providing
# metadata if it is missing
self._read_raw_data(data_path)
except FileNotFoundError:
print('No raw data!')
print(data_path)
print('Exit!')
raise
try:
self.metadata.load()
except FileNotFoundError:
self.metadata.calculate()
self._update()
[docs] def get_data_block(self, t_slice=None):
"""
Get data and time vectors, but processed in chunks of 1M events to save memory. Optionally, define a slice in
time. The method should be called in a loop to read everything. All data including extras is returned.
Last return value isdata indicates if there is more data to come. On a False the loop should be stopped, but
the last data is still valid.
:param t_slice: A tuple of start and stp times in nanoseconds. Full data is set to be read if this is None. The
time slice should never be changed while reading the data in a loop.
:return: A tuple of (data_dict, isdata) for current chunk.
"""
chunk_size = 1000000
events = self.data_dict['time'].shape[0]
start, stop = self.t_cache.get_indices(t_slice)
num_chunks = (stop - start) // chunk_size
if self.chunk_idx < num_chunks:
isdata = True # data left
else: # self.chunk_idx == num_chunks - 1: # last chunk
isdata = False # last chunk
idx1 = int(start + self.chunk_idx * chunk_size)
idx2 = min(stop, int(start + (self.chunk_idx + 1) * chunk_size), int(events))
if t_slice is not None:
mask = np.logical_and(self.data_dict['time'][idx1:idx2] > t_slice[0],
self.data_dict['time'][idx1:idx2] < t_slice[1])
else:
mask = np.ones((self.data_dict['time'][idx1:idx2].shape[0],), dtype='bool')
retdict = dict()
for name, item in self.data_dict.items():
retdict[name] = item[idx1:idx2, ...][mask, ...]
if isdata:
self.chunk_idx += 1
else:
self.chunk_idx = 0
return (retdict, isdata)
[docs] def get_dead_time(self, t_slice=None):
"""
Get dead time for the data or a time_slice of data.
:param t_slice: a tuple of start and stop times in nanoseconds. Full dead time is retrieved if this is set to
None.
:return: The dead times in [s] for all channels as a vector of floats.
"""
deltat = np.zeros((len(self.ch_list)))
if t_slice is None: # currently implemented
timing = self.t_cache.timing
else: # not implemented
timing = self.t_cache.get_timing(t_slice)
for ch in range(len(self.ch_list)):
deltat[ch] = timing['dt{}'.format(ch)].sum()
return deltat
[docs] def get_end_time(self):
return self.data_dict['time'][-1]
def _parse_on_load(self, data):
"""
For parsing events from channel mode files in batches to conserve memory.
:param data: a tuple of (list of data_dicts, list of timing_datas).
:return:
"""
batch_size = 100000
data_list, timing_data = data
print('Allocating event file streamers!!')
evnt_streamer = StreamData(self.config.path['home'], self.base_name, raw=False,
method='event')
timing_streamer = StreamData(self.config.path['home'], self.base_name, raw=False,
method='timing')
# ugly fix for the stupid zero entries in the beginning of channel timing data
for td_idx in range(len(timing_data)):
if timing_data[td_idx]['idx'][0] == 0: # if this fires, we are at the beginning of data. It should!
timing_data[td_idx] = timing_data[td_idx][1:]
timing_streamer.write(np.zeros((1,), dtype=[('idx', 'u8'), ('t', 'u8')] +
[('dt{}'.format(x), 'f4') for x in range(len(timing_data))]))
try:
extras = self.config.det['extras']
except KeyError:
extras = None
extra_streamers = dict()
for extra in self.config.det['extras']: # self.name_dict.keys:
extra_streamers[extra['name']] = StreamData(self.config.path['home'], self.base_name, raw=False,
method='extra', extra_name=extra['name'])
# Time vector, or data_tuple[ch_idx]['time'], is used for parsing. The data is pushed to EventBuilder in
# batches of constant time, but not exceeding max_datasize in length.
chlist = np.array(range(self.num_ch), dtype='u1') # used to retrieve indices through boolean indexing
idx0_front = np.zeros((len(self.ch_list),), dtype='u8') # start indices for batch
idx1_front = np.zeros_like(idx0_front) # stop indices for batch
# set data_left for beginning
idx_max = np.array([x['time'].shape[0] for x in data_list], dtype='u8') # the last indices of data
data_left = idx0_front < idx_max
# current timestamps
# Empty channels must be handled separately or hilariousness ensues.
t_front = np.zeros((self.num_ch,), dtype='u8')
for live_ch in chlist[data_left]:
t_front[data_left] = data_list[live_ch]['time'][idx1_front[live_ch]]
# event builder is supposed to be an online function, so doing it posthumously is unnecessarily complicated...
if self.num_ch > 1: # todo: What happens if only 1 channel in data?
ev_bldr = EventBuilder(len(self.ch_list), self.config.det['coinc_win'],
self.config.det['latency'], extras, self.event_info, max_datasize=batch_size)
# Loop through the data. Chop everything into equal time chunks by finding channel with highest rate and
# selecting max_datasize events from that channel. Include other channels up to same time
while np.any(data_left):
try:
# next batch end idx in all channels
idx1_front[data_left] = [min(idx_max[x], idx0_front[x] + batch_size) for x in chlist[data_left]]
print('idx1_front', idx1_front)
# corresponding timestamps, idx1_front points to one past the last index in the chunk
t_front[data_left] = [data_list[x]['time'][int(idx1_front[x]-1)] for x in chlist[data_left]]
except:
print('exception in t_front calc')
raise
events_left = idx_max - idx0_front
print('left :', events_left)
print(t_front)
mask = events_left > batch_size # mask the channels which have more counts than the current batch
if np.any(mask):
# pick the active channel with smallest timestamp at batch end. Here we have to take into account
# that some channels may already be done for. Hence the data_left tricks.
lead_ch = chlist[data_left][t_front[data_left].argmin()]
else:
# when data is about to end we take one more step. We pick the channel with biggest timestamp
#lead_ch = idx_max.argmax()
lead_ch = t_front.argmax()
print('Last batch!')
lead_t = t_front[lead_ch]
print('channel with last event in batch is', lead_ch, 'and t is', lead_t, 'ns.')
full_data = [None for _x in range(len(self.ch_list))]
ch_timing = [[] for _x in range(len(self.ch_list))]
# Then find the same (or smaller) time for all active channels and cut the batch there
for ch_idx in chlist: # we go through empty channels too to provide empty data for event builder.
ch_data = dict()
if data_left[ch_idx]:
if ch_idx != lead_ch:
# easy way of finding the last timestamp under lead_t. Return value is number of events
# to include from the chunk, or the index +1
# of the last event in the chunk that passes the test <= lead_t.
# The test fails if all timestamps are smaller (0 returned, should not happen as lead_ch
# is smallest)
# or the last timestamps are equal (0 returned, unlikely but possible). Zero is returned
# also when all time stamps are more than lead_t. This is correct behaviour.
temp = np.argmin(data_list[ch_idx]['time'][idx0_front[ch_idx]:idx1_front[ch_idx]] <= lead_t)
# debug
if temp == 0:
if data_list[ch_idx]['time'][idx0_front[ch_idx]] > lead_t:
# the time of the first event in the chunk is bigger than the end time
# of the chunk. Empty chunk so temp is correct!
print('!"!"!"!"!"!"! correct temp 0')
pass
elif data_list[ch_idx]['time'][int(idx1_front[ch_idx] - 1)] == lead_t:
# last event in batch is shared between several channels
temp = int(idx1_front[ch_idx] - idx0_front[ch_idx])
print('%%%%%%% incorrect temp0')
elif data_list[ch_idx]['time'][int(idx1_front[ch_idx] - 1)] < lead_t:
# last index is less than lead_t -> crash!
temp = int(idx1_front[ch_idx] - idx0_front[ch_idx])
if idx1_front[ch_idx] < idx_max[ch_idx]: # Check if data left
raise ex.ListModeTimestampError('Last timestamp is less than lead_t but data is left!')
else:
raise
# correct idx1 front
idx1_front[ch_idx] = idx0_front[ch_idx] + temp
else:
print('Empty channel!')
print('tfronts', idx0_front, idx1_front)
print('idx max', idx_max)
print('data left', data_left)
# correct idx1 front
#idx1_front[ch_idx] = idx0_front[ch_idx]
#raise
# timing data sliced by (idx)
timing_mask = np.logical_and(timing_data[ch_idx]['idx'] >= idx0_front[ch_idx],
timing_data[ch_idx]['idx'] <= idx1_front[ch_idx])
temp_extra = []
# build batch.
for name, value in data_list[ch_idx].items():
ch_data[name] = value[idx0_front[ch_idx]:idx1_front[ch_idx]]
full_data[ch_idx] = ch_data
ch_timing[ch_idx] = timing_data[ch_idx][timing_mask]
ev_data_dict, ev_timing = ev_bldr.run_batch(full_data, ch_timing)
# STOP
evnt_streamer.write((ev_data_dict['time'], ev_data_dict['energy']))
for name in extra_streamers.keys():
extra_streamers[name].write(ev_data_dict[name])
timing_streamer.write(ev_timing)
print('timing', ev_timing)
print('streamed', ev_data_dict['time'].shape[0])
idx0_front[data_left] = idx1_front[data_left]
# recalculate data_left
data_left = idx0_front < idx_max
print('debug', data_left)
else: #todo: not modified yet. Should the raw data be directly copied or just renamed
pass
# print('last timing indices', [timing_data[ch]['idx'][-1] for ch in chlist])
# print(ev_timing['idx'][-1])
evnt_streamer.close()
timing_streamer.close()
for es in extra_streamers.values():
es.close()
def _load_channel_data(self, data_path):
"""
Used to read channel data and parse into events. Channel data for each
channel can be just measurement or zero-suppressed strip detector data,
with 1-d coordinates on a separate file.
Coordinate data is aggregated into final coordinate information (forming
an n-d coord-data) in the order of channel_cfg vector.
:param data_path: self evident
:param delete: delete channel files on exit
:return:
"""
# ch_list is a tuple of (list of ch_data_dicts, list of ch_timing_datas)
ch_list = read_binary_data(data_path, self.base_name, mode='channel', cfg=self.config)
self._parse_on_load(ch_list)
del(ch_list) # free the files
data_dict, timing_data = read_binary_data(data_path, self.base_name, mode='event', cfg=self.config)
# data_tuple is made of data_dict and timing data
self.data_dict = data_dict
self.t_cache.set(timing_data)
try:
delete_chfiles = not self.config.det['debug']
except KeyError:
delete_chfiles = True
if delete_chfiles:
ut.delete_channel_data(data_path, self.base_name, self.config)
[docs]def poly2(x, *p):
"""
Model function for 2nd degree polynomial fit for energy calibration.
:param x: A channel value or a numpy list of channel values.
:param p: Calibration coefficients, starting from 0th degree coefficient.
:return: Calibrated x.
"""
a, b, c = p
return a + b * x + c * x ** 2
[docs]def ipoly2(y, *p):
"""
Estimates the inverse of 2nd degree polynomial above by dropping the 2nd degree term: returns ~x given y. Here the
larger root is always returned.
:param y: An energy value or a numpy list of energy values.
:param p: Calibration coefficients, starting from 0th degree coefficient.
:return: Channel values
"""
y = np.asarray(y) # cast single numbers to array if needed
ylim = np.array((y.min(), y.max()))
c, b, a = p
if np.abs(a) > 1e-8: # if it is 2nd deg
disc = b**2 - 4*a*c
xapex = -b/(2*a)
if disc <= 0:
# no intersection of axis. Only valid if a>0 and all y > xapex or all y < xapex
if a > 0 and np.all(ylim >= xapex):
branch = 1
elif a > 0 and np.all(ylim < xapex):
branch = -1
else:
raise ex.ListModeCalibrationError('No real solution for inverse y calculation!')
else:
# Two roots case
x0 = (-b - np.sqrt(disc))/(2 * a)
x1 = (-b + np.sqrt(disc))/(2 * a)
if a > 0:
# Only valid if positive and all y over x1 or all y under x0
if np.all(ylim > x1):
branch = 1
elif np.all(ylim < x0):
branch = -1
else:
raise ex.ListModeCalibrationError('Inverse energy calibration is not unambiguous over the range!')
else:
# only valid if positive and between x0 to xapex or xapex to x1
if np.all(np.logical_and(ylim >= x1, ylim < xapex)):
branch = 1
elif np.all(np.logical_and(ylim >= xapex, ylim < x0)):
branch = -1
else:
print(ylim, xapex, x0, x1)
raise ex.ListModeCalibrationError('No real solution for inverse y calculation!')
x = (-b + branch*np.sqrt(b**2 - 4*a*(c-y)))/(2 * a)
else:
# linear case
x = (y - c) / b
return x
[docs]class EventBuilder:
"""
Painful way of walking through the data and trying to build events
by seeking coincidences between channel times.
Ideally works on shortish arrays of data returned by the digitizer, but should manage big savefiles in chunks.
"""
def __init__(self, num_ch, coinc_win, latency, extras, event_info, max_datasize=8192):
"""
:param num_ch: Number of input channels. This is the number of active channels in the data.
:param coinc_win: in ns. Time window for coincidence search after a trigger in a channel.
:param latency: for every channel, in ns. Number of ns to add to channel time to make it fit to the coincidence
window. Can be negative.
:param extras: list of dictionaries holding information for extra data.
:param extras: dictionary holding information for energy data.
:param max_datasize: size for the internal arrays during parsing, should be the size of the output buffer of the
digitizer.
"""
self.coinc_win = coinc_win # coincidence window length in nanoseconds
#self.event_info = event_info # data type and aggregation of energy data
self.latency = np.array(latency, dtype='int') # per channel latencies
self.maxwin = self.coinc_win - self.latency # end of coincidence window in each channel
self.num_ch = num_ch # number of channels in the data
self.chan_list = np.arange(self.num_ch, dtype='int32')
self.bit_list = 2**self.chan_list
self.chmax = np.zeros((self.num_ch,), dtype='uint64') # max data idx per channel
self.timing_chmax = np.zeros((self.num_ch,), dtype='uint64') # max timing entries per channel
#self.data_mat = np.zeros((int(max_datasize*self.num_ch), # Worst case scenario has no coincidences
# self.num_ch), dtype='int16')
# time vec is recorded
self.time_vec = np.zeros((max_datasize * self.num_ch,), dtype='uint64')
# big time holds timestamp and index of every event in time order
self.big_time = np.zeros((max_datasize * self.num_ch, 2), dtype='uint64')
self.timing_data_sz = 2000
type_list = [('idx', '<u8'), ('t', '<u8')]
for x in range(self.num_ch):
type_list.append(('dt{}'.format(x), '<f4'))
self.timing_data = np.zeros((self.timing_data_sz,), dtype=type_list)
self.timing_idx = 0 # idx of current timing data leading edge (first always zeros)
self.t0 = np.zeros((self.num_ch,), dtype='uint64')
#self.E0 = -1*np.ones((self.num_ch,), dtype='int16')
self.timing0 = np.zeros((self.num_ch,), dtype='uint64') # idx values of timing front
# construct the processors and outputs for processor pipeline
self.proc_list = []
self.out_list = []
self.name_list = []
self.ev_count = np.zeros((self.num_ch,), dtype='uint64')
# energy
ch_mask = np.ones((self.num_ch,), dtype='bool')
self.defaults = []
# this is ordered like this due to historical reasons.. Not worth the trouble to change.
self.proc_list.append(ColProcessor(event_info))
self.out_list.append(event_info['empty_val'] * np.ones((int(max_datasize * event_info['num_col']), # Worst case
event_info['num_col']), dtype=event_info['type'])) # scenario has no coincidences
self.name_list.append('energy')
self.defaults.append(event_info['empty_val'])
# add extras. Timing is not in the pipeline.
if extras is not None:
for e_idx, extra in enumerate(extras):
self.proc_list.append(process_dict[extra['aggregate']](extra))
self.defaults.append(extra['empty_val'])
self.out_list.append(np.zeros((int(max_datasize*self.num_ch), extra['num_col']),
dtype=extra['type']))
self.name_list.append(extra['name'])
self.t_front = np.zeros((self.num_ch,), dtype='uint64') # indices that are compared currently
self.timing_front = np.zeros((self.num_ch,), dtype='uint64') # indices to timing front
self.total_sum = 0 # total accumulated events
self.ch_total_sum = np.zeros((self.num_ch,), dtype='uint64') # total number of input counts
self.first = True # Extra timing entry needs to be written on the first run of run_batch
[docs] def run_batch(self, data_dict, timing_list):
"""
The time front is a list of the lowest unbuilt indices for each channel.
(The t0 is the times, E0 the energies)
The channel which has lowest time in the front is put to an event
and if other channels in the front have time within the window, then
they are included. The front is incremented for all the channels that
were included and the iteration is started again.
:param data_dict: list of data_dicts for each channel
:param timing_list: list holding timing information for each channel
:return: data_dict, timing_data
"""
# zero all data and indices
coincsum = 0
# self.data_mat.fill(-1)
self.time_vec.fill(0)
self.t_front.fill(0)
self.timing_front.fill(0)
self.timing_data.fill(0)
# reset energy
# self.out_list[0].fill(-1)
# reset energy and extras
for idx in range(len(self.out_list)):
self.out_list[idx].fill(self.defaults[idx])
tot_counts = 0
for ch in range(self.num_ch):
data_dict[ch]['time'] = data_dict[ch]['time'] + self.latency[ch]
try:
self.chmax[ch] = data_dict[ch]['time'].shape[0]
except IndexError:
self.chmax[ch] = 0
try:
self.timing_chmax[ch] = timing_list[ch].shape[0]
if self.timing_chmax[ch] == 0:
# very clumsy way around empty timing lists. If set to max value the write is never triggered.
self.timing0[ch] = np.iinfo('uint64').max
else:
# otherwise we use the first index entry in timing_list
self.timing0[ch] = timing_list[ch][0]['idx']
except IndexError:
# Missing timing list
self.timing0[ch] = np.iinfo('uint64').max
except:
raise
tot_counts += int(self.chmax[ch])
print('Counts in eventbuilder', self.chmax)
print('Counts in timing', self.timing_chmax)
chan_mask = self.t_front < self.chmax # the channels that actually have any data
timing_chan_mask = self.timing_front < self.timing_chmax
# current_ev_inds = self.chan_list[chan_mask] # on first iteration all channels are inspected
count = 0
timing_idx = 0
if self.first:
# on first run we include the first row of zeros into timing data
timing_idx = 1
self.first = False
cnum = 0
oldt = 0
while count < tot_counts:
# go through the hits one by one and insert earliest into big time.
# Channels that can have data
ev_indices = self.chan_list[chan_mask]
# go through active channels and record current values of time for each channel
for ch in ev_indices:
self.t0[ch] = data_dict[ch]['time'][self.t_front[ch]]
# find channel with smallest t and insert into big list
chan = ev_indices[self.t0[ev_indices].argmin()]
self.big_time[count, :] = [self.t0[chan], chan]
if oldt > self.t0[chan]:
print(oldt, self.t0[chan])
print('Gotcha! Timestamp error!')
raise ex.ListModeTimestampError("Previous timestamp was bigger!")
oldt = self.t0[chan]
self.t_front[chan] += 1
chan_mask = self.t_front < self.chmax
count += 1
self.t_front.fill(0)
evnt_num = 0 # this is the most important number in the method. If wrong, the data is incorrectly cropped.
big_idx = 0
iterating = tot_counts > 0
while iterating: # through all events
# bookkeeping stage. Events are written into matrices. Here the list of data should be run
# through each corresponding processor function
ev_sz = 0
self.ev_count.fill(0)
t_end = int(self.big_time[big_idx, 0] + self.coinc_win)
# single event is looped always here, as first tstamp is guaranteed to be under
while self.big_time[big_idx + ev_sz, 0] < t_end:
# mark the channel to the event
self.ev_count[self.big_time[big_idx + ev_sz, 1]] += 1
if big_idx + ev_sz + 1 == tot_counts:
iterating = False
break
ev_sz += 1
if ev_sz > 1:
coincsum += 1
# set time of the event (time of trigger modified by latencies)
self.time_vec[evnt_num] = self.big_time[big_idx, 0]
# Set the event data. The data of the event is run through the processors and each processor fills
# corresponding indices in self.out_list. e.g. a hit in ch0 and ch2 will cause energy processor to fill
# values corresponding self.t_front[0,2] into to self.out_list[0][evnt_num, (0,2)]
for idx, proc in enumerate(self.proc_list):
# proc.process(data_dict, self.out_list[idx][evnt_num, :], self.t_front, self.ev_count)
proc.process(data_dict, self.out_list[idx][evnt_num, :], self.t_front, self.ev_count)
# handle timing
# If the data block has any timing data left then
# the current timing value is set according to the front
changed = False
for ch in self.chan_list[np.logical_and(self.ev_count > 0, timing_chan_mask)]:
# if current timing index is smaller than the current idx. Current idx per channel is equal to already
# written channel hits (self.ch_total_sum) t_front of current chunk and the ev_count of current event.
#
if self.timing0[ch] <= int(self.t_front[ch] + self.ch_total_sum[ch] + self.ev_count[ch]):
print('JIIIHAAAA!!!')
self.timing_data['dt{}'.format(ch)][timing_idx] = timing_list[ch]['dt0'][self.timing_front[ch]]
self.timing_front[ch] += 1
changed = True
# calculate new timing0
if self.timing_chmax[ch] > self.timing_front[ch]:
self.timing0[ch] = timing_list[ch]['idx'][self.timing_front[ch]]
else:
timing_chan_mask[ch] = False
if changed:
# The event idx to write, on the other hand, is equal to already written events (self.total_sum)
# plus event num of current event.
# Somehow this is still 1 event less than it should. A loader problem?
self.timing_data['idx'][timing_idx] = evnt_num + 1 + self.total_sum # evnt_num not incremented yet
self.timing_data['t'][timing_idx] = self.big_time[big_idx, 0] # evnt_num not incremented yet
timing_idx += 1 # increment output timing index if one or more channels were updated
# update event
self.t_front += self.ev_count
evnt_num += 1
big_idx += ev_sz
# update running values in the end of the processed chunk
self.total_sum += evnt_num
self.ch_total_sum += self.chmax
# build output
data_dict = dict()
data_dict['time'] = self.time_vec[:evnt_num]
for idx in range(len(self.out_list)):
data_dict[self.name_list[idx]] = self.out_list[idx][:evnt_num, ...]
print('Parsed', evnt_num, 'events with', coincsum, 'coincidences.')
print('last idx', self.total_sum, 'timing', self.timing_data[:timing_idx])
return data_dict, self.timing_data[:timing_idx]
[docs]def strip_cal(data_mat, coord, strip_cal, coord_ch):
"""
Calculates strip calibration for coordinate data.
:param data_mat: data
:param coord: coordinates
:param strip_cal: calibration matrix
:param coord_ch: order of coordinate channels
:return:
"""
for idx, cc in enumerate(coord_ch):
mask = data_mat[:, cc] > 0
data_mat[mask, cc] = (strip_cal[idx, coord[mask, idx], 0] +
strip_cal[idx, coord[mask, idx], 1] * data_mat[mask, cc] +
strip_cal[idx, coord[mask, idx], 2] * data_mat[mask, cc] ** 2)
[docs]def generate_timing(chfile, pulse_dead_time, t_vec):
"""
Utility function to generate timing vector if it does not exist. Takes pathlib type
filename, pulse dead time for the channel and t_vec.
Returns nothing, just writes the data.
"""
chunk_size = 100000
count = t_vec.shape[0]
if count % 100000 != 0:
sz = int(count//chunk_size)
else:
sz = int(count//chunk_size-1)
t_data = np.zeros((sz+2,), dtype=[('idx', '<u8'), ('t', '<u8'), ('dt0', '<f4')])
# t_data[0] = (0, 0.)
idx = 0
for idx in range(sz):
t_data[idx+1] = ((idx+1)*chunk_size, chunk_size*pulse_dead_time*1e-9)
leftover = t_vec[(idx)*chunk_size:].shape[0]
t_data[-1] = (t_vec.shape[0]-1, leftover*pulse_dead_time*1e-9)
with chfile.open('wb') as f:
f.write(t_data.tobytes())
[docs]def fill_default_data(cfg):
"""
Will generate reasonable defaults for parameters omitted for 'events' and 'extras' data_info dictionaries. It will
overwrite incompatible parameters. Does not work yet.
:param cfg: Configuration of the detector.
:return: data_info dictionary
"""
pass
#dict = {'name': name}
#if name == 'energy':
# dict['num_col'] = len(cfg['ch_mask'])
[docs]class ColProcessor:
"""
Simple class for aggregating data in event building.
It is initialized with the data info (like in extras definition) including
what happens when multiple events are found within the same time window.
The process method is given input events, channel mask and output data structure.
Output data is modified in-place. Each instance of a class is only updating its own
part of the data (energy, timing, coord, etc.) and is supposed to be run in a pipeline
for every event.
"""
def __init__(self, info):
"""
:param info: The data info dict containing information of the data:
"name": name of the output datafile: "basename_name.dat"
"type": datatype (u1, i2, u4 ..)
"num_col": number of columns in the output
"aggregate": aggregate type of the data. Accepted aggregate types are:
"col": each input channel is aggregated as a column to the
output matrix
"bit": each input channel is cast to bool and added to a bitmask
"multihit": No inputs. Outputs a bitmask of multiple hits per event on a multi-
channel detector.
"latency": No inputs. Outputs the time difference of coincident signals between a single
main channel and all the others. Needs "main" parameter to be set.
In the future add:
"sum": Sum of the data defined by "type" and "channel" parameters where "type" denotes data
type to sum and "channel" is a list of channels. This extra is associated to the
first channel in the list.
"multi": What to do if multiple hits to a channel in single event:
"sum": sum all to a single value
"max": take the maximum value
"max_e": take value on the hit with maximum energy
"min": take the minimum value
"mean": calculate arithmetic mean and round to fit "type"
"kill": set to 0
"ch_mask": Some data is only valid for some channels. Boolean channel mask is used to define
valid channels for the data. Must be np array with shape[0]=num_ch
"main": Used by the "latency" aggregate to define which channel is compared against the others.
In the future add:
"type": Type of data to sum up as extra.
"channel": List of channels to sum up as extra.
:param in_idx: Index of the input in the data pipeline. This is needed to fill correct output.
"""
self.info = info
# self.ch_mask = np.array(self.info['ch_mask'], dtype='i4')
self.ch_mask = np.array(self.info['ch_mask'], dtype='bool')
self.ch_ind = np.arange(self.ch_mask.shape[0]) # used to map from extra data index to channel idx
# channel map maps input channel index into output. It is used for data that can be missing from some
# channels, such as coordinate data. The cumulative sum works because channel mask masks the incorrect
# indices.
self.ch_map = self.ch_mask.cumsum() - 1
# With the new dictionary input one needs to use the name of the data to index it from the input instead of
# in_idx. This is also true for 'energy'
# self.in_idx = in_idx
self.name = self.info['name']
self.op = multi_dict[self.info['multi']]
self.template = np.zeros(self.ch_mask.shape[0], self.info['type'])
self.template.fill(self.info['empty_val'])
[docs] def process(self, in_list, out, t_front, ev_count):
"""
:param in_list: list of data_dicts, one per channel
:param out: list of initialized output data arrays
:param t_front: current position in data
:param ev_count: number of hits per channel in the event
:param ev_num: number of hits per channel in the event
:return:
"""
#result = self.template.copy()
#in_mask = ev_count > 0
for ch in self.ch_ind[np.logical_and(ev_count > 0, self.ch_mask)]:
# out[ self.ch_map[ch]] = in_list[ch][self.name][t_front[ch]]
# fill valid hits
# ch is index to detector channel
#try:
if ev_count[ch] == 1:
out[self.ch_map[ch]] = in_list[ch][self.name][t_front[ch]]
# #result[self.ch_map[ch]] = in_list[ch][self.name][t_front[ch]]
else:
# # out[self.ch_map[ch]] = self.op(in_list[ch][self.name][t_front[ch]:t_front[ch] + ev_count[ch]])
out[self.ch_map[ch]] = self.op(in_list[ch], t_front[ch], ev_count[ch], self.name)
# # result[self.ch_map[ch]] = self.op(in_list[ch], t_front[ch], ev_count[ch], self.name)
#except:
# print('Exception in col processor!')
# print(ch, self.name, in_list[ch][self.name].shape, t_front[ch], ev_count)
# raise
# out[:] = result
[docs]class BitProcessor (ColProcessor):
def __init__(self, info):
super().__init__(info)
self.bitvals = 2 ** np.array(range(len(self.ch_mask)), dtype=info['type'])
[docs] def process(self, in_list, out, t_front, ev_count):
# in_mask = ev_count > 0
#single_hits = self.ch_ind[np.logical_and(ev_count == 1, self.ch_mask)]
#multi_hits = self.ch_ind[np.logical_and(ev_count > 1, self.ch_mask)]
# temp = [in_list[ch][self.name][t_front[ch]]*self.bitvals[ch] for ch in single_hits]
# temp.extend([self.op(in_list[ch], t_front[ch], ev_count[ch], self.name)*self.bitvals[ch] for ch in multi_hits])
#temp = 0
for ch in self.ch_ind[np.logical_and(ev_count > 0, self.ch_mask)]:
# fill valid hits
# ch is index to detector channel
#try:
if ev_count[ch] == 1:
#temp[ch] += in_list[ch][self.name][t_front[ch]]*self.bitvals
out += in_list[ch][self.name][t_front[ch]] * self.bitvals[ch]
else: # will this ever be used?
out += self.op(in_list[ch], t_front[ch], ev_count[ch], self.name) * self.bitvals[ch]
#for ch in self.ch_ind[np.logical_and(ev_count > 1, self.ch_mask)]:
#temp[ch] += self.op(in_list[ch], t_front[ch], ev_count[ch], self.name)*self.bitvals
# out[ev_num] += self.op(in_list[ch], t_front[ch], ev_count[ch], self.name)*self.bitvals[ch]
# if temp > 0:
#except:
# print(ch, self.name, in_list[ch][self.name].shape, t_front[ch], ev_count)
# raise
# out[ev_num] = temp
[docs]class LatencyProcessor (ColProcessor):
"""
LatencyProcessor is a specialized processor used to visualize the timing properties of the input data. Each
output column is equal to time difference between event in main channel and event in each other channel
(so output of main channel is always zeros) calculated from latency corrected time data. Smallest possible value is
returned if there was no coincidence between the channels. All channels should show zero-centered distributions
in a properly tuned detector. Width of the distributions will show how big coincidence window is needed.
"""
def __init__(self, info):
super().__init__(info)
self.main_ch = info['main']
self.ch_mask[self.main_ch] = 0 # set to zero as self delta is constant 0
[docs] def process(self, in_list, out, t_front, ev_count):
in_mask = ev_count > 0
if in_mask[self.main_ch]:
out[self.main_ch] = 0
main_t = int(in_list[self.main_ch]['time'][t_front[self.main_ch]])
for ch in self.ch_ind[np.logical_and(in_mask, self.ch_mask)]:
# fill valid hits
# ch is index to detector channel
try:
out[self.ch_map[ch]] = int(in_list[ch]['time'][t_front[ch]]) - main_t
except:
print('out', out[self.ch_map[ch]])
print(ch, t_front[ch], ev_count)
raise
[docs]class MultiHitProcessor (BitProcessor):
"""
MultiHitProcessor calculates a bitmask where channels with multiple hits per event are
set to 1.
"""
[docs] def process(self, in_list, out, t_front, ev_count):
multi = ev_count > 1
for ch in self.ch_ind[np.logical_and(multi, self.ch_mask)]:
# fill valid hits
out[:] += self.bitvals[ch]
# out[:] = self.bitvals[self.ch_ind[np.logical_and(multi, self.ch_mask)]].sum()
[docs]def max_combinator(in_dict, idx, ev_count, name):
"""
Returns the hit that has highest value.
:param in_dict: A dictionary including all datas of the channel.
:param idx: Index of the first hit in the event
:param ev_count: Number of hits in the event
:param name: Name of the data
:return: A single value for the hit
"""
return np.max(in_dict[name][idx:idx+ev_count])
[docs]def max_e_combinator(in_dict, idx, ev_count, name):
"""
Returns the hit that has highest energy value.
:param in_list: A dictionary including all datas of the channel.
:param idx: Index of the first hit in the event
:param ev_count: Number of hits in the event
:param name: Name of the data
:return: A single value for the hit
"""
idx2 = in_dict['energy'][idx:idx+ev_count].argmax()
return in_dict[name][idx2]
[docs]def min_combinator(in_dict, idx, ev_count, name):
"""
Returns the hit that has smallest value.
:param in_list: A dictionary including all datas of the channel.
:param idx: Index of the first hit in the event
:param ev_count: Number of hits in the event
:param name: Name of the data
:return: A single value for the hit
"""
return np.min(in_dict[name][idx:idx+ev_count])
[docs]def mean_combinator(in_dict, idx, ev_count, name):
"""
Returns the mean of all hits in the event.
:param in_list: A dictionary including all datas of the channel.
:param idx: Index of the first hit in the event
:param ev_count: Number of hits in the event
:param name: Name of the data
:return: A single value for the hit
"""
return np.mean(in_dict[name][idx:idx+ev_count])
[docs]def sum_combinator(in_dict, idx, ev_count, name):
"""
Returns the sum of all hits in the event.
:param in_list: A dictionary including all datas of the channel.
:param idx: Index of the first hit in the event
:param ev_count: Number of hits in the event
:param name: Name of the data
:return: A single value for the hit
"""
return np.sum(in_dict[name][idx:idx+ev_count])
[docs]def kill_combinator(in_dict, idx, ev_count, name):
"""
Event is set to zero.
:param in_list: A dictionary including all datas of the channel.
:param idx: Index of the first hit in the event
:param ev_count: Number of hits in the event
:param name: Name of the data
:return: A single value for the hit
"""
return 0
multi_dict = {'max': max_combinator,
'min': min_combinator,
'mean': mean_combinator,
'sum': sum_combinator,
'kill': kill_combinator,
'max_e': max_e_combinator}
process_dict = {'col': ColProcessor,
'bit': BitProcessor,
'multihit': MultiHitProcessor,
'latency': LatencyProcessor}
[docs]class StreamData:
"""
Stream_data is a manager that pushes list mode data into disk as it comes available. Every kind
of data (time + energy for channel, time + energy matrix for events, timing data and extra data) needs to have its
own streamer.
Channel mode data is stored as raw binary files, with one file holding time (uint64), one the energy (uint16).
Note: there is no reason to save data in channel mode after latency and coincidence window are set.
Event data is stored as raw binary with timestamps (uint64), energy matrix (uint16 x num_ch)
Timing data is a row of timing info (uint32 idx + 2xuint32 x num_ch).
Extra data can can be given via the extras dictionary (keys: 'name', 'type', 'num_col'). Extras can include pile-up
flags (Type x num_ch) or coordinates (Type x N), where N is number of coordinates.
"""
def __init__(self, path, data_name, method='event', raw=False, channels=None, extra_name=None):
"""
Initialize the write method, coincidence window and number of channels.
:param path: path to the data. String or a pathlib Path
:param data_name: string, the base filename
:param method: * event: have e and t data as input, output timestamps and events
* timing: have index to event plus dead time float
* extra: have some other data, such as coordinates or tags as input, stream to 'name' in the
extra dict
:param raw: raw data is defined separately for each channel.
:param channels: Used if raw = True. This is a list of channel numbers that are saved. The time and energy
files will be appended with '_ch{channels[idx]}.dat'
:param extra_name:Used if mode is 'extra'. This is a name of the extra data. Filename will be
'data_name_{extra_name}.dat'
"""
self.raw = raw
self.path = Path(path)
self.data_name = data_name
self.channels = channels
self.method = method
self.extra_name = extra_name
if raw:
if self.channels is None:
raise ValueError('channels must be defined for raw stream mode!')
if self.method == 'event':
self.write = self._write_event
elif self.method == 'timing':
self.write = self._write_extra
elif self.method == 'extra':
if extra_name is None:
raise ValueError('extra_name must be defined for extra mode!')
self.write = self._write_extra
else:
print(self.method)
raise ValueError('Invalid method for disk write')
self.file_idx = 0 # the index of files in case the file size has exceeded 2GB and the data has been split
# Not in use at all
self.new_files()
[docs] def new_files(self):
self.time_files = []
self.data_files = []
if self.file_idx == 0:
suffix = 'dat'
else:
suffix = 'b{:02}'.format(self.file_idx)
if self.raw:
for ch in self.channels:
if self.method == 'event':
self.time_files.append((self.path / '{}_timestamps_ch{}.{}'.format(self.data_name,
ch,
suffix)).open('wb'))
self.data_files.append((self.path / '{}_events_ch{}.{}'.format(self.data_name,
ch,
suffix)).open('wb'))
elif self.method == 'timing':
self.data_files.append((self.path / '{}_timing_ch{}.{}'.format(self.data_name,
ch,
suffix)).open('wb'))
elif self.method == 'extra':
self.data_files.append((self.path / '{}_{}_ch{}.{}'.format(self.data_name,
self.extra_name,
ch,
suffix)).open('wb'))
else:
if self.method == 'event':
self.time_files.append((self.path / '{}_timestamps.{}'.format(self.data_name, suffix)).open('wb'))
self.data_files.append((self.path / '{}_events.{}'.format(self.data_name, suffix)).open('wb'))
elif self.method == 'timing':
self.data_files.append((self.path / '{}_timing.{}'.format(self.data_name, suffix)).open('wb'))
elif self.method == 'extra':
self.data_files.append((self.path / '{}_{}.{}'.format(self.data_name,
self.extra_name,
suffix)).open('wb'))
def _write_event(self, data):
"""
Writes a chunk of data data to file. For raw mode the data is given in a list
of len num_ch, but only channels specified in self.channels are actually written
into file.
:param data: Tuple of (Timestamp as np vector, Energies as np matrix) or a tuple of lists of
such if self.raw==True.
:return:
"""
t_data, e_data = data
if self.raw:
for idx, ch in enumerate(self.channels):
if len(t_data[ch]) > 1:
self.time_files[idx].write(t_data[ch].tobytes())
self.data_files[idx].write(e_data[ch].tobytes())
else:
self.time_files[0].write(t_data.tobytes())
self.data_files[0].write(e_data.tobytes())
def _write_extra(self, data):
"""
Same as write_event
:param extra_data: numpy matrix or list of matrices if raw == True
:return:
"""
if self.raw:
for idx, ch in enumerate(self.channels):
if len(data[ch]) > 1:
self.data_files[idx].write(data[ch].tobytes())
else:
self.data_files[0].write(data.tobytes())
[docs] def close(self):
for fil in self.time_files + self.data_files:
fil.close()
[docs]def read_binary_data(data_path, base_name, cfg, mode='event'):
"""
:param data_path: Path to the data directory
:param base_name: Base name of the data
:param cfg: The detector config dictionary
:param mode: What mode of data to read: 'event' or 'channel'.
:return:
The detector configuration is needed for defining the extras:
List of dicts defining extra data files, type and number of
columns. extras = {"name":'x', "type":'t', "num_col":'n'}, where type
is a numpy type string of the data. Several extras can be defined in
det_cfg (coord, ch_flags). These are handled automatically if they are
present.
Some extras, such as coord, need to have additional definitions in the
config. For coord, it is the 'coordinates' list which defines the number
of coordinates, the channels the data is found and the order of the
coordinates in i, j notation.
"""
event_info = cfg.det['events']
# Find the data and determine num_ch and ev_sz
extras = cfg.det['extras']
timenames, enames, tnames, xnames = ut.find_data_files(data_path, base_name, cfg, mode)
for fname in timenames + enames: # not checking against timing data as sometimes it has to be generated afterwards
if not fname.exists():
print('fname is:', fname)
raise ex.ListModeDataNotFoundError('Could not find all data files')
for chnamelist in xnames:
for fname in chnamelist:
if (fname is not None) and (not fname.exists()):
print('fname is:', fname)
raise ex.ListModeDataNotFoundError('Could not find all data files')
# Now all files in the name lists are loaded
# For channel data this is one item per ch, for events there is only one item. Each item of full_data_list
# is a ch_data_dict containing time, energy and extras. (Some extras are not included in channel mode read.)
# with time vector, energy and individual extras as items
full_data_list = []
timing_list = []
# build return tuple. Empty channels are given zeros vector instead of a memmap to prevent crashing the loader.
print('Read binary', len(timenames))
for idx in range(len(timenames)):
ch_data_dict = dict()
timename = timenames[idx]
ename = enames[idx]
# First the timing
ev_sz = timename.stat().st_size // 8 # number of events
if ev_sz > 0:
try:
ch_data_dict['time'] = np.memmap(timename, dtype='uint64', mode='r', shape=(ev_sz,))
except FileNotFoundError:
print(timename, 'not found!')
raise ex.ListModeDataNotFoundError
except:
print('Data load fails!')
raise
else:
#num_ch = len(cfg.det['ch_cfg'])
ch_data_dict['time'] = np.zeros((0,), dtype='uint64') # empty channel is just empty
# now for events and extras
if mode == 'channel':
# First channel mode
if ev_sz > 0:
print(event_info['type'])
ch_data_dict['energy'] = np.memmap(ename, dtype=event_info['type'], mode='r', shape=(ev_sz,))
else:
ch_data_dict['energy'] = np.zeros((0,), dtype=event_info['type'])
# for some data types the timing info is missing from channel data. Geant4 for example, but also
# appended Caen files are dumped without timing.
tname = tnames[idx]
try:
timing_sz = tname.stat().st_size // 20
except FileNotFoundError:
print('No tdata for ch', idx)
# need to generate timing data
generate_timing(tname, cfg.det['ch_cfg'][idx]['pdeadtime'], ch_data_dict['time'])
timing_sz = tname.stat().st_size // 20
# Actually loading the data here
try:
timing_list.append(np.memmap(tname, dtype=[('idx', '<u8'), ('t', '<u8'), ('dt0', '<f4')],
mode='r', shape=(timing_sz, 1)))
except:
print('Fails on load of timing data for channel', idx)
raise
# finally all the extras that are defined in channel mode
if extras is not None:
try:
for e_idx, extra in enumerate(extras):
# if this extra has channel info
if extra['name'] not in ['multihit', 'latency']: # if info for this channel exists
xname = xnames[e_idx][idx]
if xname: # an extra may be defined for a subset channels. Skip if empty.
# single channel extra always 1 column wide
if ev_sz > 0:
ch_data_dict[extra['name']] = np.memmap(xname, dtype=extra['type'], mode='r',
shape=(ev_sz,))
else:
ch_data_dict[extra['name']] = np.zeros((ev_sz,), dtype=extra['type'])
except:
print('Channel mode extras fail!')
print(xnames)
print(e_idx, idx)
raise
# event mode
else:
tname = tnames[0]
num_ch = len(cfg.det['ch_cfg']) # need num_ch to shape the data
if ev_sz > 0:
try:
ch_data_dict['energy'] = np.memmap(ename, dtype=event_info['type'], mode='r',
shape=(ev_sz, num_ch))
except:
print('Fails when loading events!')
raise
else:
ch_data_dict['energy'] = np.zeros((0, num_ch), dtype=event_info['type'])
timing_sz = tname.stat().st_size // (16 + num_ch*4)
try:
type_list = [('idx', '<u8'), ('t', '<u8')]
for x in range(num_ch):
type_list.append(('dt{}'.format(x), '<f4'))
timing_list.append(np.memmap(tname, dtype=type_list,
mode='r', shape=(timing_sz,)))
except:
print('Fails on load of timing data!', tnames[idx])
raise
# loop through extra data of the channel
if extras is not None:
for e_idx, extra in enumerate(extras):
# if this extra has info
xname = xnames[e_idx][idx]
if ev_sz > 0:
try:
ch_data_dict[extra['name']] = np.memmap(xname, dtype=extra['type'],
mode='r', shape=(ev_sz, extra['num_col']))
except:
print('Loading extras fail!')
raise
else:
ch_data_dict[extra['name']] = np.zeros((0, extra['num_col']), dtype=extra['type'])
full_data_list.append(ch_data_dict)
print(timing_list)
if mode == 'event':
print('Read binary data in event mode')
#print(timing_list[0])
return full_data_list[0], timing_list[0]
else:
return full_data_list, timing_list
[docs]def data_info(info, ch_list):
"""
Fills data_info dict with defaults for parts that are missing. Hardcoded settings for energy, multihit and latency
data will be overwritten if defined in config. A warning is printed if setup is overwritten.
:param info: info dict
:param ch_list: info dict
:return: dict with missing keys filled with defaults.
"""
# channel mask defines num_col so it has to be checked and calculated first
try:
ch_mask = info['ch_mask']
except KeyError:
ch_mask = list(np.ones((len(ch_list)), dtype='u1'))
info['ch_mask'] = ch_mask
# hardcoded values for different datatypes.
e_hardcoded = {'aggregate': 'col',
'empty_val': -1}
mh_hardcoded = {'type': 'u1',
'num_col': 1,
'aggregate': 'multihit',
'multi': 'max',
'empty_val': 0}
lat_hardcoded = {'type': 'i2',
'aggregate': 'latency',
'multi': 'min',
'unit': 'ns',
'raw_unit': 'ns'}
default = {'multi': 'max',
'empty_val': 0}
e_default = {'type': 'i2',
'raw_unit': 'ch',
'unit': 'keV'}
lat_default = {'main': 0}
# hardcoded values are written over ones defined in info. num_col is calculated and defaults are applied.
if info['name'] == 'energy':
for key in info:
if key in e_hardcoded:
print('Warning, {} in energy data is incompatible and will be overwritten!'.format(key))
info.update(e_hardcoded)
info['num_col'] = sum(ch_mask)
default.update(e_default)
for key, value in default.items():
if key not in info:
info[key] = value
elif info['name'] == 'multihit':
for key in info:
if key in mh_hardcoded:
print('Warning, {} in multihit extra is incompatible and will be overwritten.!'.format(key))
info.update(mh_hardcoded)
elif info['name'] == 'latency':
for key in info:
if key in lat_hardcoded:
print('Warning, {} in latency extra is incompatible and will be overwritten.!'.format(key))
info.update(lat_hardcoded)
default.update(lat_default)
for key, value in default.items():
if key not in info:
info[key] = value
info['num_col'] = sum(ch_mask)
info['empty_val'] = -32768
else:
agg = info['aggregate']
if issubclass(process_dict[agg], process_dict['bit']):
info['num_col'] = 1
else:
info['num_col'] = sum(ch_mask)
for key, value in default.items():
if key not in info:
info[key] = value
return info
[docs]def load_calibration(config):
"""
Loads calibration for the detector. Calibration gives the 2nd degree function coefficients for calibration for each
channel and for each data type. The data is organized as a dictionary with data types as keys and each data as
numpy arrays with channel in first axis and three coefficients (a, b and c) in second axis.
Missing data is fixed with dummy calibration ([0,1,0] coefficients), but incompatible data (e.g. wrong number of
channels) will raise an exception.
Old calibration data had keys for peaks used for calibration, but they have been dropped.
:param config: The detector config object (obviously missing the calibration info)
:return: The calibration dictionary. read from disk. Missing data is fixed with dummy calibration, but incompatible
"""
cal_name = config.det['cal_name']
try:
with ut.find_path(config, cal_name, '_ecal.json').open('r') as fil:
temp = json.load(fil)
cal = temp
except FileNotFoundError:
print('Calibration file not found!')
raise ex.ListModeConfigurationError('Calibration file not found!')
# Convert to numpy arrays and check that data is complete:
try:
if len(cal['energy']) == len(config.det['ch_list']):
cal['energy'] = np.asarray(cal['energy'])
else:
#print('cal loaded', cal)
#print(config.det['ch_list'])
raise ex.ListModeConfigurationError('Incompatible calibration data for energy!')
except KeyError:
temp = np.zeros((len(config.det['ch_list']), 3))
temp[:, 1] = 1
cal['energy'] = temp
for extra in config.det['extras']:
#('Extra cal', extra['name'])
data = extra['name']
#if not issubclass(dat.process_dict[extra['aggregate']], dat.process_dict['bit']): # bitmasks are not calibrated
try:
#print('Check data')
if len(cal[data]) == extra['num_col']:
cal[data] = np.asarray(cal[data])
else:
raise ex.ListModeConfigurationError('Incompatible calibration data for extra data!')
except KeyError: # missing calibration data is just generated here
temp = np.zeros((extra['num_col'], 3))
temp[:, 1] = 1
cal[data] = temp
return cal
[docs]def load_config(det_name, local_cfg, data_name=None):
"""
Detector configuration object is a namespace with:
paths into configuration directories and optionally to data.
Contents of the detector configuration file.
Calibration for the detector. Calibration gives the 2nd degree function coefficients for calibration for each
channel and for each data type. The data is organized as a dictionary with data types as keys and each data as
numpy arrays with channel in first axis and three coefficients (a, b and c) in second axis. Omitted calibration
data is replaced with [0,1,0] coefficients
:param det_name: Name of the detector configuration file without the _cfg.json
:param local_cfg: Paths needed to find configurations and data
:param data_name: Optional path to data, that will be added as "home" into config.path
:return: detector configuration object
"""
# load path information
# with(open('local_cfg.json', 'r')) as fil:
# path_cfg = json.load(fil)
path_cfg = local_cfg.copy()
if data_name is not None: # home is useful when saving data and looking for plot_name_list
data_name = Path(data_name)
if data_name.is_absolute:
path_cfg['home'] = data_name
else:
path_cfg['home'] = path_cfg['data_dir'] + '/' + data_name
else:
path_cfg['home'] = path_cfg['data_dir']
# and detector config
cfg_dir = path_cfg['cfg_dir']
with (Path(cfg_dir) / (det_name + '_cfg.json')).open('r') as fil:
det_cfg = json.load(fil)
# detector config needs sensible values set for data. This line forces load config into data module.
det_cfg['events'] = data_info(det_cfg['events'], det_cfg['ch_list'])
det_cfg['extras'] = [data_info(extra, det_cfg['ch_list']) for extra in det_cfg['extras']]
# The rest is only needed for DAQ and can be None
if det_cfg['readout_cfg'] is not None:
with (Path(cfg_dir) / (det_cfg['readout_cfg'] + '_boardcfg.json')).open('r') as fil:
readout_cfg = json.load(fil)
else:
readout_cfg = None
ch_cfg = []
for chdata in det_cfg['ch_cfg']:
if chdata['cfg_file'] is not None:
with (Path(cfg_dir) / (chdata['cfg_file'] + '_chcfg.json')).open('r') as fil:
ch_cfg.append(json.load(fil))
else:
ch_cfg.append(None)
# added calibrations
config = types.SimpleNamespace(path=path_cfg,
det=det_cfg,
readout=readout_cfg,
ch=ch_cfg,
cal=None)
config.cal = load_calibration(config)
return config