Source code for listmode.loaders

import time
import datetime as dt

from struct import unpack, calcsize

import numpy as np

import listmode.data
from listmode import utils as ut, misc as misc


[docs]def pixie4_loader(self, data_path): pass
[docs]def g4_loader(self, data_path): pass
[docs]def dspec_loader(self, data_path): pass
[docs]def caen_loader(self, data_path): """ Reads data saved by Caen MCA. Caen data format does not record time and date of data taking, so these are calculated from the datafile modification timestamp. In case there are several runs on single data (appended to same file) the results may be unpredictable. In the future all appends should be recognized and extracted as channel files, but only first one is loaded as data. The loaded channel list is compared to one in cfg and if not matching an error is generated. Caen loader supports two kinds of bitmask extra data: Pileup and flags. Flags is set whenever there the 'extras' data is nonzero. Pileup flag is set whenever the energy for the channel is zero, signaling a pileup event (unless set to return energy value). :param self: Calling Data instance. This is set when the loader is set as class method. :param data_path: path to a file :return: """ _DATA_DICT = {0: ('Trigger Time Tag', 't'), 1: ('Energy', 'E'), 2: ('Extras', 'x'), 3: ('Short Energy', 'sE'), 4: ('DPP Code', 'DPP'), 255: ('Fake', 'f')} _TYPE_DICT = {0: '<b', 1: '<B', 2: '<H', 3: '<H', 4: '<i', 5: '<I', 6: '<q', 7: '<Q', 8: 'string', # placeholder for the generic string 9: '<l', 10: '<d', 11: '<c', 128: '<H', # placeholder for the 3 byte string 255: 'Fake'} def _check_time_corruption(vector, last_good_v): """ Check for corrupt timestamps within a readout chunk. return begin, stop and next good indices out of vector. :param vector: :return: """ stop_idx = vector.shape[0] begin_idx = 0 next_good = -1 temp = np.zeros_like(vector, dtype='bool') #temp[1:] = vector[1:] > last_good_v + 36000000000000 # 10h gap temp[1:] = vector[1:] > vector[:-1] + 36000000000000 # 10h gap if vector[0] > last_good_v + 72000000000000: # first index is corrupt has to be handled as this will be true for the second subrange automatically. # Search for a good value to begin the iteration. temp[0] = True print(vector[0], last_good_v) good = np.argmin(temp) if vector[1] > last_good_v + 72000000000000 and good == 0: # all indices are corrupt print('All indices are corrupt') raise begin_idx = good # Check for garbage timestamps if np.any(temp[begin_idx:]): print('Garbage timestamp event') good = np.argmax(temp[begin_idx:]) # points to first bad index or zero if all are good if good != 0: stop_idx = begin_idx + good # one past the last good idx ng = np.argmin(temp[stop_idx:]) # points to next good index or zero if all bad if ng != 0: next_good = stop_idx + ng retval = (begin_idx, stop_idx, next_good) return retval def _read_header(fileid): PROTOCOL, NWORDS, EMPTY = unpack('<' + 'B' * 2 + 'H', fileid.read(4)) type_list = [] type_names = [] for line in range(NWORDS - 1): DATA, TYPE, EMPTY = unpack('<' + 'B' + 'H' + 'B', fileid.read(4)) if _DATA_DICT[DATA] != _DATA_DICT[4]: type_list.append(_TYPE_DICT[TYPE]) type_names.append(_DATA_DICT[DATA]) return PROTOCOL, type_list, type_names def make_streams(ch_list, data_path, base_name, pileup, flags, partnum): """ Spawn a new set of streams with file name modified by the partnum. also return matching datavectors :param ch_list: :param data_path: :param base_name: :param pileup: :param flags: :param partnum: :return: """ if partnum > 0: base_name = base_name + '_part-{}'.format(partnum) streams = [] streams.append(listmode.data.StreamData(data_path, base_name, raw=True, method='event', channels=ch_list)) if pileup: streams.append(listmode.data.StreamData(data_path, base_name, raw=True, method='extra', extra_name='pileup', channels=ch_list)) else: streams.append(None) if flags: streams.append(listmode.data.StreamData(data_path, base_name, raw=True, method='extra', extra_name='flags', channels=ch_list)) else: streams.append(None) streams.append( listmode.data.StreamData(data_path, self.base_name, raw=True, method='timing', channels=ch_list)) return streams # First find the number of channels names = list(data_path.glob(self.base_name + '_ch???.dat')) print(names, self.base_name + '_ch???.dat') temp_list = [] # list of channel indices found from data_path channel_list = self.ch_list # list of channel indices expected from config for name in names: # find saved channel data print(name) chnum = int(name.stem.split('_ch')[1]) temp_list.append(chnum) for ch in channel_list: # check if needed channels are found from the data print(channel_list, temp_list) temp_list.pop(temp_list.index(ch)) chunk_num = 100000 timing_num = 1000 extras = self.config.det['extras'] pileup = False flags = False for extra in extras: if extra['name'] == 'pileup': pileup = True elif extra['name'] == 'flags': flags = True streamer, pileup_streamer, flag_streamer, time_streamer = make_streams(self.ch_list, data_path, self.base_name, pileup, flags, 0) stream_list = [(streamer, pileup_streamer, flag_streamer, time_streamer)] # mod_date = [] # file modification date is the best estimate of run stop. Start is automatically calculated by metadata. self.metadata.stop = dt.datetime.fromtimestamp(names[channel_list[0]].stat().st_mtime) # The datafiles are streamed one by one. # Caen files have no timing information: timing file will be created after parsing. # Caen data can be corrupted by appending multiple runs into same file by accident. This is very bad # for timing, because the timestamps are not monotonously increasing anymore. These datafiles should be broken # into several one-run files when opened. What is done here is that new files with modified names are spawned # when a timestamp is smaller than previous. Only the first run belongs to this data, the others are named with # _part-N postfix. for channel, ch_idx in enumerate(self.ch_list): # empty lists because Stream data takes the data for all channels. Only current channel is updated E_vecs = [[] for _x in range(self.num_ch)] time_vecs = [[] for _x in range(self.num_ch)] p_vecs = [[] for _x in range(self.num_ch)] f_vecs = [[] for _x in range(self.num_ch)] timeout = [[] for _x in range(len(self.ch_list))] E_vecs[ch_idx] = np.zeros((chunk_num,), dtype='int16') # negative is empty so that Caen 0 really corresponds # to an event with energy 0 E_vecs[ch_idx].fill(-1) time_vecs[ch_idx] = np.zeros((chunk_num,), dtype='uint64') p_vecs[ch_idx] = np.zeros((chunk_num,), dtype='uint8') f_vecs[ch_idx] = np.zeros((chunk_num,), dtype='uint8') timing_vec = np.zeros((timing_num), dtype=[('idx', '<u8'), ('t', '<u8'), ('dt', '<f4')]) pdeadtime = self.config.det['ch_cfg'][ch_idx]['pdeadtime'] sample_ns = self.config.det['sample_ns'] ch_file = names[channel_list.index(ch_idx)] #prev_t = 0 # to check for decreasing timestamp event current_split = 0 # counts the decreasing timestamp events -> numbering for split files. streamer, pileup_streamer, flag_streamer, time_streamer = stream_list[current_split] # make sure we write to right stream with open(ch_file, 'rb') as df: PROTOCOL, type_list, type_names = _read_header(df) types = [(type_names[x][1], type_list[x]) for x in range(len(type_list))] type_string = '<' + ''.join([x[1] for x in type_list]) evnt_sz = calcsize(type_string[1:]) chunk_size = chunk_num * evnt_sz # prepare to read chunk_num events isdata = True # df.read(evnt_sz) chunk_idx = 0 tf_idx = 1 # timing file starts with row of zeroes, so the first idx is 1 eventcounter = 0 last_good_val = 0 # last accepted time value. If this is suddenly overshot by a large margin # the big timestamps are excluded as corrupt while isdata: buffer = df.read(chunk_size) buf_len = len(buffer) // evnt_sz # number of rows in chunk. # isdata = buf_len == chunk_num # Last data if less than chunk size. isdata = len(buffer) == chunk_size # Last data if less than chunk size. chunk = np.frombuffer(buffer, dtype=types, count=-1, offset=0) # the data chunk can be split into sub ranges if there is a timestamp reset or corrupt time data. # Indices relating to these are set here cur_begin_idx = 0 # where we start, usually 0 cur_end_idx = chunk_num # one past last idx of range, usually end of chunk next_begin_idx = -1 # where to start next range, negative if no next range # split_idx = 0 # the first index in the chunk that belongs to current data stream. Usually 0. # end_idx = buf_len # the first index in the chunk that belongs to next data stream or buf_len. # we use indices for splitting, but the data is read out to arrays before the loop time_ch = (chunk['t'][cur_begin_idx:cur_end_idx] * sample_ns) E_ch = (chunk['E'][cur_begin_idx:cur_end_idx] & 0x7FFF) if pileup: # All bad events are included, also those that have an associated energy value which is for # some reason still marked with the pileup bit (like a start of a saturation event, which # is flagged as a pileup event if a trigger is detected during the veto period). The first # part is not needed, because PU bit is on if pileup has been seen (e=0). If e = 1 and pu-bit # is set there is a saturation event, otherwise it is a generic bad event for unknown reason. p_ch = np.logical_or(chunk['E'][cur_begin_idx:cur_end_idx] == 0, (chunk['E'][cur_begin_idx:cur_end_idx] & 0x8000) != 0) if flags: # flags signify specific events found from EXTRAS. Existence of an extra does not signify much, as # such. It is a bitmask with a load of different data. # Bit 0: LOST EVENT - Events have been lost due to buffer full or preceding saturation. These # should still be recorded. # bit 1: ROLL-OVER - The DPP-PHA algorithm creates a fake event with Time Stamp = 0, Energy = 0, # PU = 1, bit[3] and bit[1] of EXTRAS = 1 # bit 2: RESERVED # bit 3: FAKE_EVENT # bit 4: INPUT_SATURATION - An event saturated the input dynamics. The event that saturates # the dynamics has Energy = 0x7FFFF, while the PU flag is set to 1 only if there is also # a pile-up event in the trigger veto period of 2*rise time. # bit 5: LOST_TRG - Set to 1 whenever 1024 lost events have been detected # bit 6: TOT_TRG - set to 1 whenever 1024 total events have been detected # bit 7: MATCH_COINC f_ch = chunk['x'][cur_begin_idx:cur_end_idx] != 0 # Check for corruption and splits while True: # Check against timestamp corruption events. If one is found, then the data is streamed # up to last good and next iteration starts from next good. corrupt_tuple = _check_time_corruption(time_ch[cur_begin_idx:cur_end_idx], last_good_val) if corrupt_tuple is not None: cur_begin_idx = cur_begin_idx + corrupt_tuple[0] cur_end_idx = cur_begin_idx + corrupt_tuple[1] next_begin_idx = cur_begin_idx + corrupt_tuple[2] #print('prev good', last_good_val) last_good_val = time_ch[cur_end_idx - 1] # end idx points to one past last good #print('last good', last_good_val) #print('tuple', corrupt_tuple) #print('prevs', time_ch[cur_begin_idx], time_ch[cur_end_idx - 1]) # We need to check the monotonousness of time vector for every chunk. If not, then # good data is written, new streamers are spawned and rest of the data is iterated # again. The while loop only quits after all data has been streamed. # The possibly multiple splits complicate indexing so we use split_idx to mark the # start position of the current split and end_idx to mark the end of current # split. Normally these would be 0 and buf_len respectively. split_idx = misc.check_monotonousness(time_ch[cur_begin_idx:cur_end_idx]) if split_idx is not None: cur_end_idx = cur_begin_idx + split_idx next_begin_idx = cur_end_idx # write to disk time_vecs[ch_idx] = time_ch[cur_begin_idx:cur_end_idx] E_vecs[ch_idx] = E_ch[cur_begin_idx:cur_end_idx] streamer.write((time_vecs, E_vecs)) # stream to .dat file to speed up if pileup: p_vecs[ch_idx] = p_ch[cur_begin_idx:cur_end_idx] pileup_streamer.write(p_vecs) if flags: f_vecs[ch_idx] = f_ch[cur_begin_idx:cur_end_idx] flag_streamer.write(f_vecs) # Dead time is just guessed using pdeadtime (rise-time + flat-top + trigger holdoff). counts_in_range = (cur_end_idx - cur_begin_idx) eventcounter += counts_in_range timing_vec[tf_idx] = (eventcounter - 1, time_vecs[ch_idx][cur_end_idx - 1], counts_in_range * pdeadtime * 1e-9) tf_idx += 1 if split_idx is not None: # New vecs and streamers are initialized if there was a split. Eventcounter is reset too current_split += 1 print("Timestamps not monotonous!!!", len(stream_list), current_split, split_idx) timeout[ch_idx] = timing_vec[:tf_idx] time_streamer.write(timeout) # stream old timing data if len(stream_list) <= current_split: print('Spawning new files.') streamer, pileup_streamer, flag_streamer, time_streamer = make_streams(self.ch_list, data_path, self.base_name, pileup, flags, current_split) stream_list.append((streamer, pileup_streamer, flag_streamer, time_streamer)) else: print('Writing to existing files') streamer, pileup_streamer, flag_streamer, time_streamer = stream_list[current_split] eventcounter = 0 tf_idx = 1 # continue filling timing data from 1 (idx 0 is zeros) if cur_end_idx < buf_len: # still iterating if next_begin_idx < 0: # here we are in a middle of a chunk, but there is no good events left. Go to next print('End of chunk corruption event!') break else: cur_begin_idx = next_begin_idx cur_end_idx = buf_len else: # Through the chunk if next_begin_idx > 0: # there is a next range after end of chunk! print('Unhandled end of Chunk!') break #else: # #eventcounter += counts_in_range # break # eventcounter += buf_len chunk_idx += 1 if tf_idx == timing_num: # double the timing vector if it runs out timing_vec = np.concatenate((timing_vec, np.zeros((timing_num,), dtype=[('idx', '<u8'), ('t', '<u8'), ('dt', '<f4')])), axis=0) timing_num = timing_vec.shape[0] if isdata == False: print('Operation is normal!') timeout[ch_idx] = timing_vec[:tf_idx] time_streamer.write(timeout) print('ch done') for split in stream_list: print('In split close') for astream in split: # Empty extras are None if astream is not None: astream.close() self._load_channel_data(data_path)
[docs]def panda_loader(self, data_path): """ Reads PANDA data. Even though PANDA data is already reconstructed in event mode, it will still be broken down to channel files for the pipeline. PANDA clock is used for dead time and timing. DSSSD will be handled as two detectors with associated coordinate extra. Due to this and the capability of the multi-hit processor to combine data from several channels the DSSSD data will be strip calibrated when read from the raw file. If strip calibration needs to be redone later one has to make a dummy calibration to access the uncalibrated strip values. If PANDA data is divided into several files, only one is converted and loaded. In this case either start_time, stop_time or both are undefined and will be calculated from data length and, in worst case, file modification time. :param self: Calling Data instance. This is set when the loader is set as class method. :param data_path: path to a file :return: """ BUFFER_TYPES = {1: 'DATABF', 2: 'SCALERBF', 3: 'SNAPSBF', 4: 'STATEVARBF', 5: 'RUNVARBF', 6: 'PKTDOCBF', 11: 'BEGRUNBF', 12: 'ENDRUNBF', 13: 'PAUSEBF', 14: 'RESUMEBF', 30: 'PARAMDESCRIP'} # init vars dead_time = 0. total_time = 0. evsum = 0 # ch_list = np.array(self.config.det['ch_list']) # strip calibration strip_cal = ut.load_strip_cal(self.config) ch_file = data_path / (self.base_name + self.data_ext) # init f_head_sz = 4 # frame header size of adc buffer chunk_over = 2000 # single buffer should have no more events. Max I've seen is ~1500. chunk_size = 250000 # Going for fewer array concatenations. # array_size = 250000 # current size of array big_time = 0 # incremented when timestamp overflowsf start_time = None stop_time = None prevtstamp = 0 min_tstamp = 0 # path, data_name, method = 'event', raw = False, channels = None, extra_name = None streamer = listmode.data.StreamData(data_path, self.base_name, raw=True, method='event', channels=self.ch_list) time_streamer = listmode.data.StreamData(data_path, self.base_name, raw=True, method='timing', channels=self.ch_list) timing_datas = [np.zeros((2000,), dtype=[('idx', 'u8'), ('dt', 'f4')]) for _x in range(self.num_ch)] # defining the out arrays time_vecs = [np.zeros((chunk_size + chunk_over,), dtype='uint64') for _x in range(self.num_ch)] # for timestamp. e_mats = [np.zeros((chunk_size + chunk_over,), dtype='uint16') for _x in range(self.num_ch)] # Energy data [e_mats[_x].fill(-1) for _x in range(self.num_ch)] # PANDA has coord extra. Coord should be signed integer so that we can put empty as -1. extras = self.config.det['extras'] for idx in range(len(extras)): ex = listmode.data.data_info(extras[idx], ch_list) if ex['name'] == 'coord': c_dtype = ex['type'] c_chmask = np.array(ex['ch_mask'], dtype='bool') coord_streamer = listmode.data.StreamData(data_path, self.base_name, raw=True, method='extra', extra_name='coord', channels=ch_list[c_chmask]) coord_datas = [np.zeros((chunk_size + chunk_over,), dtype=c_dtype) for _x in range(self.num_ch)] [coord_datas[_x].fill(-1) for _x in (0, 1)] total_counter = np.zeros((self.num_ch,), dtype='uint64') # events already written empty_counter = np.zeros((self.num_ch,), dtype='uint64') # zero energy events (using these?) events = 0 # total number of recorded accepted events chunk_counter = np.zeros((self.num_ch,), dtype='uint64') # ch indices of event in current chunk ev_counter = np.zeros((self.num_ch,), dtype='uint64') # ch indices of event in current event timing_idx = 1 # scalers are the same for every channel, so single idx instead of counter first = False first_time_of_file = 0 # This is needed to reset the timestamps on a continuation file with open(ch_file, 'rb') as df: while True: #reading next buffer buf_counter = 0 buf_idx = 0 # Byte offset of current buffer buffer = df.read(26656) if len(buffer) != 26656: print('Buffer size only {} / 26656'.format(len(buffer))) if np.any(chunk_counter > 0): # save whatever is in the buffer streamer.write(([(time_vecs[x][:chunk_counter[x]] - first_time_of_file) * self.config.det['sample_ns'] for x in self.ch_list], [e_mats[x][:chunk_counter[x]] for x in self.ch_list])) coord_streamer.write([coord_datas[x][:chunk_counter[x]] for x in (0, 1)]) total_counter += chunk_counter # total counter used to calculate metadata print(total_counter, 'events') # here the timing index is zero, if a save has just happened if timing_idx > 0: if timing_datas[0]['idx'][timing_idx - 1] < int(total_counter[0] - 1): # check if need timing data for ch in self.ch_list: print('Data stop without scaler buffer!') timing_datas[ch][timing_idx] = (int(total_counter[ch] - 1), chunk_counter[ch] * self.config.det['ch_cfg'][ch]['pdeadtime'] * 1e-9) timing_idx += 1 time_streamer.write([timing_datas[x][:timing_idx] for x in self.ch_list]) break #data_sz, data_type, num_evt = self._read_header(buffer[:28]) datatuple = unpack('<' + 'h' * 4 + 'i' + 'h' * 6 + 'i', buffer[:28]) buf_idx += 28 data_sz = datatuple[0] * 2 # data size in bytes data_type = datatuple[1] num_evt = datatuple[5] if BUFFER_TYPES[data_type] == 'DATABF': buf_idx += f_head_sz # offset the frame header eventno = 0 evsum += num_evt # full event size is added to evsum, but rejected events are subtracted later while eventno < num_evt: eventno += 1 last_ev_idx = buf_idx # First content is the amount of 2-byte words for event num_words = unpack('<h', buffer[buf_idx:buf_idx + 2])[0] buf_idx += 2 #matr_idx = chunk_counter + buf_counter if num_words == 6: # empty event, not counted evsum -= 1 # remove empty events from total sum buf_idx += (num_words) * 2 # go to end of event continue # read the rest of the event # event = st.unpack('<' + 'H' * num_words, buffer[buf_idx:buf_idx + num_words * 2]) ev_idx = 0 # index in the current event data words tstamp = 0 ev_counter.fill(0) while buf_idx < last_ev_idx + (num_words)*2: # looping through adcs in the event # the adc data is organized as: # word 0: number of hits # word 1: ADC number. 0 for x, 1 for y and 2 for hpge + beta # word 2: Energy # word 3: Channel number # [word 4: next hit energy] # [word 5: next hit channel] # next to last 2 words: adc timestamp 1 and 2 # last 2 word: end of adc data (0xFFFF, 0xFFFF) #read number of hits and adc id nhits, adc = unpack('<' + 'H'*2, buffer[buf_idx:buf_idx + 4]) buf_idx += 4 if nhits == 0xFFFF: # WTF? Empty ADC frame? Result of diital threshold, I presume. Skip! # print('empty adc frame') continue nhits -= 0x2001 # fourteenth bit is always 1, 1 means 0 adc = adc & 0x3 # first two bits code detector # energy/channel pairs, timestamp and footer event = unpack('<' + 'H'*nhits*2 + 'I'*2, buffer[buf_idx:buf_idx + nhits*4 + 8]) buf_idx += nhits*4 + 8 # take first tstamp in the event. This structure completely screws up the timestamp # reset detection so it is checked first... if tstamp == 0: t_val = (event[-2] & 0x3FFFFFFF) # bits 30 and 31 always on if t_val < (prevtstamp - 1000): # clock overflow when timestamp goes backwards. print('Clock overflow event!', t_val, prevtstamp-1000, min_tstamp) big_time += 2 ** 30 # 30 bit clock overflow prevtstamp = 0 min_tstamp = 0 # t_val can be smaller than previous tstamp. This is due to # differences between adc clocks. Using min_tstamp to ensure monotonous time in this case. # tstamp = max(min_tstamp, t_val) if adc < 2: # hit to the dsssd # loop through hits. for hit_idx in range(nhits): E = event[2 * hit_idx] ch = (event[2 * hit_idx + 1] & 0x3ff) # - 0x400 try: if E > 0: matr_idx = chunk_counter + ev_counter e_mats[adc][matr_idx[adc]] = (strip_cal[adc, ch, 0] + strip_cal[adc, ch, 1] * E + strip_cal[adc, ch, 2] * E**2) time_vecs[adc][matr_idx[adc]] = tstamp + big_time coord_datas[adc][matr_idx[adc]] = ch ev_counter[adc] += 1 else: print('empty') empty_counter[adc] += 1 except: print('Error in strip calibration!') print('matr_idx', matr_idx[adc]) print('ch', ch, 'chunk', chunk_counter, 'ev', ev_counter) print('shapes', e_mats[adc].shape, time_vecs[adc].shape, coord_datas[adc].shape) raise else: # hpge and beta otherwise straightforward, but the ADC channels for beta and hpge # are 16 channels apart. The detector is incremented for beta detector. # Timing can get hairy on events with no tstamp in adc1 or 2, as adc 3 has smaller ticks # which can sometimes overlap nastily with the next hit in adc 1 or 2. for hit_idx in range(nhits): # loop through hits ch = event[2 * hit_idx + 1] & 0xff E = event[2 * hit_idx] if E > 0: matr_idx = chunk_counter + ev_counter if ch == 0: detector = 2 if ch == 16: detector = 3 #e_mat[matr_idx, detector] = E e_mats[detector][matr_idx[detector]] = E # make sure there is no time overlap with adc 1 or 2 time_vecs[detector][matr_idx[detector]] = tstamp + big_time ev_counter[detector] += 1 else: print('empty') empty_counter[detector] += 1 buf_counter += 1 # buffer counter incremented once per event if tstamp == 0: print() print('zero time event!') raise min_tstamp = tstamp + 1 prevtstamp = t_val # tstamp=0 chunk_counter += ev_counter # chunk counter incremented for every count in event # NOTE! indenting this to the buffer loop to try to fix overflow problems!!! #buf_counter = 0 if np.any(chunk_counter >= chunk_size): # Write data when chunk overflows # save whatever is in the buffer print('save buffer', chunk_counter, 'timing idx', timing_idx) if not first: first_time_of_file = min([time_vecs[x][0] for x in self.ch_list]) first = True streamer.write(([(time_vecs[x][:chunk_counter[x]] - first_time_of_file) * self.config.det['sample_ns'] for x in self.ch_list], [e_mats[x][:chunk_counter[x]] for x in self.ch_list])) coord_streamer.write([coord_datas[x][:chunk_counter[x]] for x in (0, 1)]) time_streamer.write([timing_datas[x][:timing_idx] for x in self.ch_list]) [x.fill(0) for x in time_vecs] [x.fill(-1) for x in e_mats] [x.fill(-1) for x in coord_datas] [x.fill(0) for x in timing_datas] total_counter += chunk_counter chunk_counter.fill(0) timing_idx = 0 events += buf_counter elif BUFFER_TYPES[data_type] == 'SCALERBF': buf_idx += f_head_sz # offset the frame header sc_header = unpack('<IIhIIh', buffer[buf_idx:buf_idx + 20]) buf_idx += 20 # dead time and total time counts in scalers. The scalers don't signal clock overflow, but seem to # track dead time at least sc_data = unpack('<'+'I'*num_evt, buffer[buf_idx:buf_idx + num_evt*4]) dtime = sc_data[0]*1.0e-6 # scaler data in s (has internal divisor, timeCalibration, of 1000) dead_time += dtime for ch in self.ch_list: timing_datas[ch][timing_idx] = (int(total_counter[ch] + chunk_counter[ch] - 1), dtime) total_time += sc_data[1]*1.0e-6 timing_idx += 1 elif BUFFER_TYPES[data_type] == 'BEGRUNBF': print('BEGRUNBUF found - start datetime read!') # Control buffers [BEG- and ENDRUNBF have 80 character title for the run and the date and time buf_idx += f_head_sz # offset the frame header title = bytes(buffer[buf_idx:buf_idx + 80]).decode() # the text is handled by bytes cdata = unpack('<I7h', buffer[buf_idx + 80:buf_idx + 98]) # There is a possibility that month begins from 0. Dates seem to be consistently off start_time = dt.datetime(cdata[3]+1900, cdata[1] + 1, cdata[2], cdata[4], cdata[5], cdata[6], int(cdata[7]*1e5)) first = True # used to decide whether to cut timestamps by the first time entry elif BUFFER_TYPES[data_type] == 'ENDRUNBF': print('ENDRUNBUF found - stop datetime read!') buf_idx += f_head_sz # offset the frame header title = bytes(buffer[buf_idx:buf_idx + 80]).decode() # the text is handled by bytes cdata = unpack('<I7h', buffer[buf_idx + 80:buf_idx + 98]) stop_time = dt.datetime(cdata[3]+1900, cdata[1] + 1, cdata[2], cdata[4], cdata[5], cdata[6], int(cdata[7]*1e5)) else: print('Unhandled buffer type found!') print(BUFFER_TYPES[data_type]) streamer.close() coord_streamer.close() time_streamer.close() print() print('Events read:', total_counter) print('Discarded', empty_counter, 'empty events.') print() print('Starting parsing the events.', data_path, self.base_name) # channel data is parsed and events reconstructed. self._load_channel_data(data_path) # metadata is created here. We set everything as fully as we can. Metadata is then saved and we are ready. self.metadata.total_time = int(total_time*1e9) # live and dead times are automatically got from t_cache. No need to worry. self.metadata.run_id = self.base_name self.metadata.name = self.config.det['name'] self.metadata.notes = "Converted from .evt file at {}.".format(dt.datetime.fromtimestamp(time.time())) self.metadata.counts = total_counter self.metadata.input_counts = total_counter + empty_counter self.metadata.events = self.data_dict['time'].shape[0] print('events', self.data_dict['time'].shape[0], events) if start_time is None and stop_time is None: # no end or start run buffers print('Calculating start time from file timestamp') self.metadata.notes = self.metadata.notes + ' No start or end buffer - calculating time from file timestamp.' self.metadata.notes = self.metadata.notes + ' Recorded {} original events.'.format(events) start_time = dt.datetime.fromtimestamp(ch_file.stat().st_mtime) - dt.timedelta(seconds=total_time) if start_time is None: # For some reason there was no BEGUNBUF but ENDRUNBUF exists print('Calculating start time from stop time') self.metadata.notes = self.metadata.notes + ' No start buffer - calculating time from end time.' start_time = stop_time - dt.timedelta(seconds=total_time) if stop_time is None: # There was no ENDRUNBUF print('Calculating stop time from start time') self.metadata.notes = self.metadata.notes + ' No end buffer - calculating time from start buffer.' stop_time = start_time + dt.timedelta(seconds=total_time) self.metadata.start = start_time self.metadata.stop = stop_time #self._update() #self.metadata.calculate() self.metadata.save() print('exit read raw data, load channel data.') #self._load_channel_data(data_path) ''' class Pixie4Data(Data): def __init__(self, config, **kwargs): super().__init__(config, **kwargs) self.data_ext = 'bin' self.data_type = 'Pixie-4' def _read_raw_data(self, data_path, save_metadata=False): # First read the readout scheme dtformat = '%H.%M.%S %a, %d %b %Y' with (data_path / (self.base_name + '.ifm')).open('r') as df: lines = df.readlines() for lidx, line in enumerate(lines): if line.startswith('Acquisition started at'): start = dt.datetime.strptime(line[23:].strip(), dtformat) elif line.startswith('stopped at'): stop = dt.datetime.strptime(line[11:].strip(), dtformat) elif line.startswith('BUFFER_HEAD_LENGTH'): buf_head_len = int(line.split()[1]) elif line.startswith('EVENT_HEAD_LENGTH'): ev_head_len = int(line.split()[1]) elif line.startswith('CHANNEL_HEAD_LENGTH'): ch_head_len = int(line.split()[1]) elif line.startswith('NUMBER_EVENTS'): num_ev = int(line.split()[1]) elif line.startswith('Module Run Time(s)'): self.total_time.fill(0) self.total_time += int(float(lines[lidx+1].split()[1])*1e9) elif line.startswith('Module Channel'): for ch_idx in range(self.num_ch): num_str = lines[lidx+1+ch_idx].split()[2:5] self.live_time[ch_idx] = int(float(num_str[0]) * 1e9) self.input_rate[ch_idx] = float(num_str[1]) self.output_rate[ch_idx] = float(num_str[2]) self.first = True datacounter = 0 list_size = 100000 data_mat = np.zeros((list_size, self.num_ch), dtype='uint16') time_vec = np.zeros((list_size,), dtype='uint64') chunk_size = 11000 chunk = np.zeros((chunk_size//4, self.num_ch), dtype='uint16') time_chunk = np.zeros((chunk_size//4,), dtype='uint64') buffer = bytearray(chunk_size) header = bytearray(buf_head_len) self.file_counter = 0 bufbody = 0 datafile = data_path / (self.base_name + '.' + self.data_ext) file_length_in_bytes = datafile.stat().st_size channel_list = bin(0)[2:] # just empty for printing num_of_buffer = 0 with open(datafile, 'rb') as df: while self.file_counter < file_length_in_bytes: header = df.read(buf_head_len*2) (BUF_NDATA, BUF_MODNUM, BUF_FORMAT, BUF_TIMEHI, BUF_TIMEMI, BUF_TIMELO) = self._read_header(header, buf_head_len) if self.first: self.RunTask = hex(BUF_FORMAT) self.first = False self.file_counter += 2 * buf_head_len # increment counter for the header # Pixie4 does occasionally corrupt the buffers. Check # that what we read is a header to next buffer. if self.RunTask != hex(BUF_FORMAT): print('Corrupt buffer!', 'bufbody', bufbody) self._err_corr(df) # Find next header and rewind file to it continue # go to retrieve next header # check run mode if BUF_FORMAT == 0x2103: # print('self.RunTask', self.RunTask, TraceInfo) # BUF_FORMAT equals runtask (x103) plus x20T0, # where T equals channel mask for saved traces # (0 in case of x103, so x2103 is always runtask 103) uncompress = self._compr3 else: print('RunTask', hex(BUF_FORMAT), hex(0x2103)) print(self.file_counter) raise ValueError('Only mode 0x103 is supported') # calculate amount of data in the buffer bufbody = 2 * (BUF_NDATA - buf_head_len) bufidx = 0 # index in current buffer eventcounter = 0 # index of separate event if bufbody == 0: continue # go to next iteration if buffer is empty # todo: handling of chunk length. if chunk_size < bufbody: chunk_size = bufbody chunk = np.zeros((chunk_size // 4, self.num_ch), dtype='uint16') time_chunk = np.zeros((chunk_size // 4,), dtype='uint64') buffer = bytearray(chunk_size) print('Chunk size increased to', chunk_size) # read the full buffer into memory buffer[: bufbody] = df.read(bufbody) self.file_counter += bufbody if num_of_buffer % 500 == 0: print('Buffer max:', self.file_counter, file_length_in_bytes, datacounter) while bufidx < bufbody: # Event header (EVT_PATTERN, EVT_TIMEHI, EVT_TIMELO) = st.unpack('<' + 'H' * ev_head_len, #buffer.read(ev_head_len * 2)) buffer[bufidx:bufidx + ev_head_len * 2]) bufidx += ev_head_len * 2 channel_list = bin(EVT_PATTERN)[2:][-1:-5:-1] time_chunk[eventcounter] = self._parse_time(BUF_TIMEHI, EVT_TIMEHI, EVT_TIMELO) #times, energies = uncompress(buffer[bufidx:bufidx + 4], sum(channel_list)) for idx in range(self.num_ch): if channel_list[idx] == '1': time, energy = uncompress(buffer[bufidx:bufidx + 4]) bufidx += 4 chunk[eventcounter, idx] = energy else: chunk[eventcounter, idx] = 0 eventcounter += 1 if datacounter + eventcounter > list_size: # flush if the matrix would overflow self.data_mat = np.concatenate((self.data_mat, data_mat[:datacounter, :].copy()), axis=0) self.data_dict['time'] = np.concatenate((self.data_dict['time'], ((time_vec[:datacounter]) * 1000/75).astype('uint64'))) datacounter = 0 # copy to data matrix data_mat[datacounter: datacounter + eventcounter, :] = chunk[:eventcounter, :] time_vec[datacounter: datacounter + eventcounter] = time_chunk[:eventcounter] datacounter += eventcounter num_of_buffer += 1 self.data_dict['time'] -= self.data_dict['time'].min() # need to zero self.counts = np.count_nonzero(self.data_mat, axis=0).astype('uint64') self.input_counts += (self.output_rate * self.total_time * 1e-9).astype('uint64') self.start_time = [start for _x in range(self.num_ch)] self.stop_time = [stop for _x in range(self.num_ch)] @staticmethod def _read_header(buff, buflen): (BUF_NDATA, BUF_MODNUM, BUF_FORMAT, BUF_TIMEHI, BUF_TIMEMI, BUF_TIMELO) = st.unpack('<'+'H'*3+'H'*(buflen-3), buff[:buflen*2]) """ if self.first: self.RunTask = hex(BUF_FORMAT) self.first = False """ return (BUF_NDATA, BUF_MODNUM, BUF_FORMAT, BUF_TIMEHI, BUF_TIMEMI, BUF_TIMELO) @staticmethod def _parse_time(BUF_TIMEHI, EVT_TIMEHI, EVT_TIMELO): """ From PIXIE-4 User’s Manual V2.54: EventTime = EVT_TIMELO; For best precision, use 1us/75 in the conversion from clock ticks to seconds, 13.33e-9s may lead to rounding errors. EventTime += EVT_TIMEHI*pow(2,16); EventTime += BUF_TIMEHI*pow(2,32); EventTime *= 1e-6/75s; This EventTime can be used to match """ time_in_ticks = (EVT_TIMELO + EVT_TIMEHI*pow(2, 16) + BUF_TIMEHI*pow(2, 32)) #time_in_s *= 1e-6/75 return time_in_ticks def _err_corr(self, fileid): start_idx = self.file_counter seeking = True seek_idx = 0 words = [0, 0] while seeking: # we are trying to find piece of header with runtask and number of channels # set correctly words[seek_idx % 2] = st.unpack('<H', fileid.read(2))[0] self.file_counter += 2 if words[seek_idx % 2 - 1] < 4 and hex(words[seek_idx % 2]) == self.RunTask: self.file_counter -= 6 fileid.seek(self.file_counter) seeking = False print('Found new buffer at', self.file_counter, 'losing', self.file_counter - start_idx, 'bytes') seek_idx += 1 @staticmethod def _compr1(buffer): # For uncompressed and mode 1 data with traces (CHAN_NDATA, CHAN_TRIGTIME, CHAN_ENERGY, CHAN_XIAPSA, CHAN_USERPSA, T1, T2, T3, CHAN_REALTIMEHI) = st.unpack('<' + 'H' * 9, buffer.read(18)) return CHAN_TRIGTIME, CHAN_REALTIMEHI, CHAN_ENERGY @staticmethod def _compr3(inbuffer, size=1): # For mode 3 data (CHAN_TRIGTIME, CHAN_ENERGY) = st.unpack('<' + 'H' * 2 * size, inbuffer) return CHAN_TRIGTIME, CHAN_ENERGY ''' ''' class DSPECData(Data): def __init__(self, config, **kwargs): super().__init__(config, **kwargs) self.data_ext = 'Lis' self.name = 'DSPEC data' def _read_raw_data(self, data_path, save_metadata): """ :param data_file: :return: """ data_file = self.base_name + '.{}'.format(self.data_ext) chunk_size = 10000 streamer = StreamData(data_path, self.base_name, 1, max_data_sz=chunk_size) livetime = 0 rt_word = 0 input_counts = 0 self.data_mat = np.zeros((0, self.num_ch), dtype='uint16') self.data_dict['time'] = np.zeros((0,), dtype='uint64') E_chunk = np.zeros((chunk_size, self.num_ch), dtype='uint16') t_chunk = np.zeros((chunk_size,), dtype='uint64') input_counts = np.zeros((1,), dtype='uint64') counts = np.zeros((1,), dtype='uint64') with (data_path / data_file).open('rb') as df: header = df.read(256) total_time, live_time, start, stop = self._parse_header(header) evnt = True while evnt: eventcounter = 0 while eventcounter < chunk_size: try: data_word = st.unpack('<I', df.read(4))[0] except st.error: print('{} events'.format(eventcounter)) evnt = False break except: raise if data_word & 0xc0000000 == 0xc0000000: # data word E_chunk[eventcounter, 0] = (data_word & 0x3fff0000) >> 16 t_chunk[eventcounter] = ((data_word & 0xffff) | rt_word) * 200 # to ns eventcounter += 1 elif data_word & 0xc0000000 == 0x80000000: # counter for 50000 ticks rollover rt_word = (data_word & 0x3fffffff)*50000 elif data_word & 0xc0000000 == 0x40000000: # live time in ns, 10 ms resolution livetime = (data_word & 0x3fffffff)*1e7 elif data_word & 0xffff0000 == 0x4000000: # ADC counts per 10 ms input_counts[0] += data_word & 0xffff self.data_mat = np.concatenate((self.data_mat, E_chunk[:eventcounter])) self.data_dict['time'] = np.concatenate((self.data_dict['time'], t_chunk[:eventcounter])) streamer.write([eventcounter], [(t_chunk[:eventcounter], E_chunk[:eventcounter])]) # stream to .dat file to speed up self.events = self.data_mat.shape[0] counts[0] = self.events def _parse_header(self, header): headerinfo = st.unpack('<iid', header[:16]) if headerinfo[0] != -13: print('Invalid header for .Lis file') sys.exit() if headerinfo[1] != 2: print('List mode format {} not supported!'.format(headerinfo[1])) total_time = np.zeros((self.num_ch,), dtype='int64') live_time = np.zeros((self.num_ch,), dtype='int64') start_time = [] stop_time = [] stringinfo = st.unpack('<80c9c16c80cc4c', header[16:206]) #ecal = st.unpack('<3f', header[206:218]) #is_shapecal = st.unpack('<?', header[218:219])[0] #shapecal = st.unpack('<fff', header[219:231]) gain, det_id, r_t, l_t = st.unpack('<iiff', header[231:247]) total_time[0] = int(r_t*1e9) live_time[0] = int(l_t*1e9) # Stupid OLE Automation date format starts from 0 at 30.12.1899 start_time.append(dt.datetime(1899, 12, 30, 00, 00, 00) + dt.timedelta(days=headerinfo[2])) stop_time.append(start_time[0] + dt.timedelta(seconds=r_t)) return total_time, live_time, start_time, stop_time '''