#!/usr/bin/env python
# encoding: utf-8
"""
Utility to Handle Apex files
"""
from __future__ import print_function, division
__author__ = "Marc André Delsuc, Marie-Aude Coutouly <mac@nmrtec.com>"
__date__ = "July 2011"
import sys
import os
import unittest
import glob
import os.path as op
import math
import numpy as np
import tables
from ..FTICR import FTICRData
from . import HDF5File as hf
# tables.parameters.NODE_CACHE_SLOTS = 0
#tables.parameters.CHUNK_CACHE_SIZE = 0*1024*1024
# tables.parameters.METADATA_CACHE_SIZE = 10*1024*1024
if sys.version_info[0] < 3:
pass
else:
xrange = range
[docs]def read_param(filename):
"""
Open the given file and retrieve all parameters written initially for apexAcquisition.method
NC is written when no value for value is found
structure : <param><name>C_MsmsE</name><value>0.0</value></param>
read_param returns values in a dictionnary
"""
from xml.dom import minidom
xmldoc = minidom.parse(filename)
x = xmldoc.documentElement
pp = {}
children = x.childNodes
for child in children:
if (child.nodeName == 'paramlist'):
params = child.childNodes
for param in params:
if (param.nodeName == 'param'):
if 'name' in param.attributes.keys(): # some versions
k = param.attributes['name'].value
for element in param.childNodes:
if element.nodeName == "name": # other versions
k = element.firstChild.toxml()
elif element.nodeName == "value":
try:
v = element.firstChild.toxml()
except:
v = "NC"
pp[k] = v
return pp
#-----------------------------------------
[docs]def read_scan(filename):
"""
Function that returns the number of scan that have been recorded
It is used to see wether the number of recorded points correspond to the L_20 parameter
"""
from xml.dom import minidom
xmldoc = minidom.parse(filename)
x = xmldoc.documentElement
children = x.childNodes
count_scan = 0
for child in children:
if (child.nodeName == 'scan'):
count_scan += 1
return count_scan
#-----------------------------------------
[docs]def get_param(param,names, values):
"""
From params, this function returns the value of the given param
"""
for i in xrange(len(names)):
if names[i] == param:
return values[i]
#-----------------------------------------
[docs]def locate_acquisition(folder):
"""
From the given folder this function return the absolute path to the apexAcquisition.method file
It should always be in a subfolder
"""
L = glob.glob(op.join(folder,"*","apexAcquisition.method"))
if len(L)>1:
raise Exception( "You have more than 1 apexAcquisition.method file in the %s folder, using the first one"%folder )
elif len(L) == 0:
raise Exception( "You don't have any apexAcquisition.method file in the %s folder, please double check the path"%folder )
return L[0]
#-----------------------------------------
[docs]def Import_1D(inifolder,outfile=""):
"""
Entry point to import 1D spectra
It returns a FTICRData
It writes a HDF5 file if an outfile is mentionned
"""
import array
if sys.maxsize == 2**31-1: # the flag used by array depends on architecture - here on 32biy
flag = 'l' # Apex files are in int32
else: # here in 64bit
flag = 'i' # strange, but works here.
if op.isfile(inifolder):
folder = op.dirname(inifolder)
elif op.isdir(inifolder):
folder = inifolder
elif not op.exists(inifolder):
raise Exception("File does not exist: "+inifolder)
else:
raise Exception("File is undecipherable: "+inifolder)
try:
parfilename = locate_acquisition(folder)
except:
raise Exception('%s does not seem to be a valid Apex spectrum'%(inifolder,))
parfilename = locate_acquisition(folder)
params = read_param(parfilename)
# Import parameters : size in F1
sizeF1 = int(params["TD"])
if os.path.isfile(os.path.join(folder, "fid")):
fname = os.path.join(folder, "fid")
elif os.path.isfile(os.path.join(folder, "ser")):
raise Exception("You are dealing with 2D data, you should use Import_2D")
else:
raise Exception("No file found")
data = FTICRData( dim=1 ) # create dummy 1D
data.axis1.size = sizeF1 # then set parameters
data.axis1.calibA = float(params["ML1"])
data.axis1.calibB = float(params["ML2"])
data.axis1.calibC = float(params["ML3"])
if not math.isclose(data.axis1.calibC,0.0):
print('Using 3 parameters calibration, Warning calibB is -ML2')
data.axis1.calibB *= -1
data.axis1.specwidth = float(params["SW_h"])
# Test for NarrowBand
if params["AQ_mod"] == '1':
NarrowMode = True
else:
NarrowMode = False
data.axis1.highfreq = data.axis1.calibA/float(params["EXC_low"]) # these two are in m/z !
data.axis1.lowfreq = data.axis1.calibA/float(params["EXC_hi"])
if NarrowMode:
if params["SW_h"] != params["SW_h_Narrowband"]:
print("""Warning - there is some inconsistencies in spectral width definition - dataset assumed in NarrowBand mode""")
data.axis1.specwidth = float(params["SW_h_Narrowband"])
data.axis1.offsetfreq = float(params['O1'])- data.axis1.specwidth
data.axis1.lowfreq = data.axis1.offsetfreq
data.axis1.highmass = data.axis1.htomz(data.axis1.offsetfreq)
data.axis1.left_point = 0.0
else:
data.axis1.lowfreq = float(params["FR_low"])
data.axis1.highmass = float(params["MW_high"])
data.axis1.left_point = 0
data.axis1.offsetfreq = 0.0
data.params = params # add the parameters to the data-set
if (outfile): # Creates the fticrdata with no array, just infos for the table
HF = hf.HDF5File(outfile,"w")
HF.create_from_template(data)
HF.store_internal_object(params, h5name='params') # store params in the file
# then store files xx.methods and scan.xml
HF.store_internal_file(parfilename)
HF.store_internal_file( os.path.join(folder,"scan.xml") )
# HF.store_internal_file( os.path.join(folder,"ExciteSweep") )
data.hdf5file = HF
# I need a link back to the file in order to close it, however this creates a loop - experimental !
else:
data.buffer = np.zeros((sizeF1))
if NarrowMode:
data.axis1.itype = 1 # then assume data is complex
data.adapt_size()
with open(fname,"rb") as f:
tbuf = f.read(4*sizeF1)
abuf = np.array(array.array(flag,tbuf))
data.buffer[:] = abuf[:]
if (outfile):
HF.flush()
return data
#-----------------------------------------
[docs]def Import_2D(folder,outfile = "",F1specwidth = None):
"""
Entry point to import 2D spectra
It returns a FTICRData
It writes a HDF5 file if an outfile is mentionned
"""
import array
if sys.maxsize == 2**31-1: # the flag used by array depends on architecture - here on 32bit
flag = 'l' # Apex files are in int32
else: # here in 64bit
flag = 'i' # strange, but works here.
parfilename = locate_acquisition(folder)
params = read_param(parfilename)
#count_scan = read_scan(os.path.join(folder,"scan.xml"))
# Import parameters : size in F1 and F2
sizeF1 = int(params["L_20"])
# if (count_scan != sizeF1):
# print "Mismatch between Apex file and scan.xml size F1 set to %i"%count_scan
# sizeF1 = count_scan
sizeF2 = int(params["TD"])
if os.path.isfile(os.path.join(folder,"ser")):
fname = os.path.join(folder, "ser")
else:
raise Exception("You are dealing with 1D data, you should use Import_1D")
#size, specwidth, offset, left_point, highmass, calibA, calibB, calibC, lowfreq, highfreq
data = FTICRData( dim=2 ) # create dummy 2D
data.axis2.size = sizeF2 # then set parameters along F2 - classical axis -
data.axis2.calibA = float(params["ML1"])
data.axis2.calibB = float(params["ML2"])
data.axis2.calibC = float(params["ML3"])
data.axis2.specwidth = float(params["SW_h"])
#data.axis2.highfreq = data.axis2.calibA/float(params["EXC_low"]) # these two are in m/z !
#data.axis2.lowfreq = data.axis2.calibA/float(params["EXC_hi"])
data.axis2.highfreq = float(params["SW_h_Broadband"]) # these two are in m/z !
data.axis2.lowfreq = float(params["FR_low"])
data.axis2.highmass = float(params["MW_high"])
data.axis2.left_point = 0
data.axis2.offset = 0.0
if not math.isclose(data.axis2.calibC,0.0):
print('Using 3 parameters calibration, Warning calibB is -ML2')
data.axis2.calibB *= -1
data.axis1.size = sizeF1 # then set parameters along F1- non classical axis -
# assumes most parameter are equivalent - except specwidth
if F1specwidth is not None: # if given in arguments
data.axis1.specwidth = F1specwidth
else:
f1 = float(params["IN_26"]) # IN_26 is used in 2D sequence as incremental time
if f1 < 1E-4: # seems legit
data.axis1.specwidth = 1.0/(2*f1)
else:
data.axis1.specwidth = data.axis2.specwidth # else assume square...
data.axis1.calibA = data.axis2.calibA
data.axis1.calibB = data.axis2.calibB
data.axis1.calibC = data.axis2.calibC
#data.axis1.highfreq = data.axis1.calibA/float(params["EXC_low"]) # these two are in m/z !
#data.axis1.lowfreq = data.axis1.calibA/float(params["EXC_hi"])
data.axis1.highfreq = data.axis2.highfreq # these two are in m/z !
data.axis1.lowfreq = data.axis2.lowfreq
data.axis1.highmass = float(params["MW_high"])
data.axis1.left_point = 0
data.axis1.offset = 0.0
data.params = params # add the parameters to the data-set
c1 = int(sizeF1//8. +1)
c2 = int(sizeF2//8. +1)
#print "chunkshape",c1,c2
#tables.parameters.CHUNK_CACHE_PREEMPT = 1
#tables.parameters.CHUNK_CACHE_SIZE = c1*c2*8
if (outfile): # Creates the fticrdata with no array, just infos for the table
HF = hf.HDF5File(outfile,"w")
HF.create_from_template(data)
HF.store_internal_object(params, h5name='params') # store params in the file
# then store files xx.methods and scan.xml
HF.store_internal_file(parfilename)
HF.store_internal_file( os.path.join(folder,"scan.xml") )
# HF.store_internal_file( os.path.join(folder,"ExciteSweep") )
data.hdf5file = HF
# I need a link back to the file in order to close it, however this creates a loop - experimental !
else:
data.buffer = np.zeros((sizeF1, sizeF2))
# data.adapt_size()
with open(fname,"rb") as f:
for i1 in xrange(sizeF1-1):
tbuf = f.read(4*sizeF2)
abuf = np.array(array.array(flag,tbuf))
data.buffer[i1,:] = abuf[:]
# if (outfile):
# HF.close()
if (outfile):
HF.flush()
return data
#-----------------------------------------
[docs]def read_2D(sizeF1, sizeF2, filename="ser"):
"""
Reads in a Apex 2D fid
sizeF1 is the number of fid
sizeF2 is the number of data-points in the fid
uses array
"""
import array
# import platform # platform seems to be buggy on MacOs, see http://stackoverflow.com/questions/1842544
if sys.maxsize == 2**31-1: # the flag used by array depends on architecture - here on 32bit
flag = 'l' # Apex files are in int32
else: # here in 64bit
flag = 'i' # strange, but works here.
sz1, sz2 = int(sizeF1), int(sizeF2)
# read binary
fbuf = np.empty( (sz1, sz2) )
with open(filename, "rb") as f:
for i1 in xrange(sz1):
tbuf = f.read(4*sz2)
abuf = array.array(flag, tbuf)
fbuf[i1,0:sz2] = abuf[0:sz2]
return FTICRData(buffer = fbuf)
#-----------------------------------------
[docs]def read_3D(sizeF1, sizeF2, sizeF3, filename="ser"):
"""
Ebauche de fonction
Reads in a Apex 3D fid
uses array
"""
import array
# import platform # platform seems to be buggy on MacOs, see http://stackoverflow.com/questions/1842544
if sys.maxsize == 2**31-1: # the flag used by array depends on architecture - here on 32biy
flag = 'l' # Apex files are in int32
else: # here in 64bit
flag = 'i' # strange, but works here.
sz1, sz2, sz3 = int(sizeF1), int(sizeF2), int(sizeF3)
# read binary
fbuf = np.empty( (sz1,sz2,sz3) )
with open(filename,"rb") as f:
for i1 in xrange(sz1):
tbuf = f.read(4*sz2)
abuf = array.array(flag, tbuf)
fbuf[i1,0:sz2] = abuf[0:sz2]
return FTICRData(buffer=fbuf)
#-----------------------------------------
[docs]def write_ser(bufferdata,filename="ser"):
"""
Write a ser file from FTICRData
"""
with open(filename, "wb") as f:
for i in range (len(bufferdata)):
for j in range (len(bufferdata[0])):
f.write(bufferdata[i][j].astype("int32").tostring() )
#----------------------------------------------
[docs]class Apex_Tests(unittest.TestCase):
[docs] def setUp(self):
from ..Tests import filename, directory
rootfiles = os.getcwd()
self.TestFolder = directory()
self.DataFolder = filename('ubiquitine_2D_000002.d')#'cytoC_2D_000001.d')
# self.serfile = filename('cytoC_2D_000001.d/ser')
# self.outHDF = filename('cytoC_2D_000001_direct.msh5')
# self.name_write = filename("file_write")
# self.name_fticr = filename("file_fticr")
# self.name_npar = filename("file_npar")
# self.npar_fticr = filename("npar_fticr")
# self.name_chunk = filename("Chunk.hf")
self.name_get = filename("file_fticr_2D.msh5")
self.verbose = 1 # verbose > 0 switches messages on
[docs] def announce(self):
if self.verbose >0:
print("\n========",self.shortDescription(),'===============')
#-------------------------------------------------
[docs] def test_Import_2D(self):
"Test and time routine that import 2D from the MS-FTICR folder to the file given as second argument "
from time import time
self.announce()
t0 = time()
d = Import_2D(self.DataFolder, self.name_get)
#d = Import_2D("/Volumes/XeonData/Developement/MS-FTICR/cytoC_2D_000006.d")
print("import",time()-t0,"secondes")
"""
sur mon ordi
d = Import_2D("/DATA/FT-ICR-Cyto/cytoC_2D_000006.d") prend 1080Mo de mémoire en 18 secondes
d = Import_2D("/DATA/FT-ICR-Cyto/cytoC_2D_000006.d","test.hdf5") prend 80Mo de mémoire en 21.6 secondes et créé un fichier de 1Go
"""
#-------------------------------------------------
[docs] def test_Import_2D_keep_mem(self):
"Test and time routine that import 2D from the MS-FTICR folder in memory"
from time import time
self.announce()
t0 = time()
d = Import_2D( self.DataFolder )
#d = Import_2D("/Volumes/XeonData/Developement/MS-FTICR/cytoC_2D_000006.d")
print("import",time()-t0,"secondes")
"""
sur mon ordi
d = Import_2D("/DATA/FT-ICR-Cyto/cytoC_2D_000006.d") prend 1080Mo de mémoire en 18 secondes
d = Import_2D("/DATA/FT-ICR-Cyto/cytoC_2D_000006.d","test.hdf5") prend 80Mo de mémoire en 21.6 secondes et créé un fichier de 1Go
"""
#-------------------------------------------------
def _test_2(self):
" Test and time direct processing"
self.announce()
from time import time
d = Import_2D(self.DataFolder,self.name2)
t0 = time()
t00 = t0
d.rfft(axis=2)
print("rfft2",time()-t0,"secondes")
t0 = time()
d.rfft(axis=1)
print("rfft1",time()-t0,"secondes")
t0 = time()
d.modulus()
print("modulus",time()-t0,"secondes")
t0 = time()
print("modulus",time()-t0,"secondes")
print("calcul",time()-t00,"secondes")
d.display(scale=5, show=True)
d.hdf5file.close()
"""
sur mon ordi - en mémoire
rfft2 2.57822608948 secondes
rfft1 9.06142997742 secondes
modulus 0.851052999496 secondes
modulus 2.92809915543 secondes
calcul 15.4189431667 secondes
- sur fichier -
rfft2 7.82480788231 secondes
rfft1 est incroyablement long > 5 minutes - et fait des I/O de folie
Sur Xeon - en mémoire
rfft2 4.27431201935 secondes
rfft1 19.972933054 secondes
modulus 1.25446605682 secondes
modulus 5.96046447754e-06 secondes
calcul 25.5018758774 secondes
- sur fichier -
"""
#-------------------------------------------------
def _test_3(self):
"Another strategy close the file after fft on F2 and reopen everything for F1 fft"
self.announce()
from time import time
d = Import_2D(self.DataFolder,self.name3)
t0 = time()
t00 = t0
d.rfft(axis=2)
d.hdf5file.close() # je ferme
print("rfft2",time()-t0,"secondes")
t0 = time()
# je réouvre
H = hf.HDF5File(self.name3,"rw")
H.load()
d2 = H.data # B is a FTICRdata
d2.rfft(axis=1)
print("rfft1",time()-t0,"secondes") # toujours aussi lent !
t0 = time()
d2.modulus()
print("modulus",time()-t0,"secondes")
t0 = time()
print("modulus",time()-t0,"secondes")
print("calcul",time()-t00,"secondes")
print(type(d))
d2.display(scale=5, show=True)
H.close()
H = hf.HDF5File(self.name3,"r")
H.load()
B = H.data # B is a FTICRdata
H.close()
"""
Sur Xeon
rfft2 19.9385159016 secondes
rfft1 3348.65607595 secondes
modulus 5.01055693626 secondes
modulus 5.96046447754e-06 secondes
calcul 3373.60540509 secondes
en rfft(axis=2) 0.004 s par ligne --> 8.192 s
en rfft(axis=1) 0.05 s par ligne --> 3276.8 s
"""
# def test_1D(self):
# "Test the import_1D routine"
# self.announce()
# d = Import_1D("/Volumes/XeonData/Developement/MS-FTICR/glyco_ms_000002.d","/Users/mac/Desktop/test1D_comp.hdf5")
if __name__ == '__main__':
unittest.main()