Source code for altimetry.tools.spectrum

# -*- coding: utf-8 -*-
import numpy as np
import scipy.fftpack as ft
from scipy import stats
from altimetry.tools import detrend as detrend_fun, grid_track, message
if __debug__ : import matplotlib.pyplot as plt

[docs]def get_kx(N,dx): """ GET_KX :summary: Returns the frequencies to be used with FFT analysis :parameter N: number of samples in data :parameter dx: sampling step :return: Returns * k: frequency * L: length * imx: index of maximum frequency (for separating positive and negative frequencies) :author: Renaud DUSSURGET (RD) - LER/PAC, Ifremer :change: Created by RD, July 2012 """ # to work with dimensional axes L = N*dx #Odd or even odd = N&1 and True or False #Get frequency & wavenumber k=ft.fftfreq(N,d=dx) # k=f*N imx = (N-1)/2 if odd else N/2 #Maximum frequency #Convert into frequencies # k/=L return k, L, imx
[docs]def get_spec(dx,Vin,verbose=False,gain=1.0,integration=True): """ GET_SPEC :summary: Returns the spectrum of a regularly sampled dataset :parameter dq: sampling interval (1D) :parameter V: data to analyse (1D). :note: NaN can not be used. :return: * psd: Power Spectral Density * esd: Energy Spectral Density * fq: frequency * p: wavelength (period) :author: Renaud DUSSURGET (RD) - LER/PAC, Ifremer :change: Created by RD, July 2012. Changes * 29/08/2012 : Changed the computation of frequencies and the spectral integration (spectrum is averaged at mid-width frequencies) * 30/11/2012 : Outstanding changes : corrected the spectral integration for computing psd and corrected the normalisation """ V=Vin.copy() N=V.size # part 1 - estimate of the wavenumbers ###################################### [k,L,imx] = get_kx(N,dx) # print "dx = %s" %(dx) # print "kx = ",k[0:3] # print "shape",k.shape # part 2 - estimate of the spectrum # fast fourier transform ########################## fft=ft.fft(V)/(gain) if verbose : print 'Check parseval theorem 1: SUM|Y(f)|²={0}, SUM|y(t)|²={1}'.format(((np.abs(fft)**2)/N).sum(),(V**2).sum()) # Compute magnitude # CF.Wilks, 2006. Statistical Methods in the Atmopsheric Sciences. 2nd Edition. International Geophysics Series vol.96. Elsevier. # => Ck = [Ak^2 + Bk^2]^(1/2), where e^(iwt) = cos(wt) + i.sin(wt) a = fft.real b = fft.imag c = np.sqrt(a**2.0 + b**2.0) #This is the magnitude d = np.arctan(b/a) if verbose : print 'Check parseval theorem 2: SUM|Y(f)|²={0}, SUM|y(t)|²={1}'.format(((c**2)/N).sum(),(V**2).sum()) #Normalise data c /= np.float32(N) #~ d /= np.float32(N) if verbose : print 'Check parseval theorem 3: SUM|Y(f)|²={0}, SUM|y(t)|²={1}'.format(((c**2)*N).sum(),(V**2).sum()) # mean=fft.real[0]/N #Get average #Remove negative frequencies # phase=np.angle(2*fft[1:imx]) #Get phase (not used yet) c = 2*c[1:imx-1] #Multiply by 2 (because of negative frequencies removal) - loses a bit of energy d = 2*d[1:imx-1] if verbose : print 'Check parseval theorem 4: SUM|Y(f)|²={0}, SUM|y(t)|²={1}'.format(((c**2)*(N/2.0)).sum(),((V-V.mean())**2).sum()) # if not(N&1 and True or False) : print (np.sum(fft[1:N/2+1])-np.sum(fft[N/2+1:])), fft[N/2] #This is equal to fft[n/2] # cbis=np.absolute(2*fft[0:imx-1]) #Slightly differing from previous one # cbis /= np.float32(N) # integration of the spectrum # shift to have values centered on the intervals dk = k[1] - k[0] dk_half = dk/2 # half of the interval k = k[1:imx-1] k_= k[:-1] + dk_half #Shift wavelengths by half the unit # cesd = c**2 *(N/2) #Energy (ESD) csquared = c ** 2 #Energy (ESD) if verbose : print 'Check parseval theorem 5: SUM|Y(f)|²={0}, SUM|y(t)|²={1}'.format((csquared*(N/2)).sum(),((V-V.mean())**2).sum()) #Spectral integration if integration : esd = k_*0.0 psd = k_*0.0 for i in np.arange(len(k_)): esd[i] = np.sum((csquared * (N/2.0))[(k > (k_[i]-dk)) & (k < (k_[i]+dk))]) / 2.0 psd[i] = np.sum(csquared[(k > (k_[i]-dk)) & (k < (k_[i]+dk))]) / (2.0**2) #This is variance units integration fq=k_ else : esd = csquared psd = esd.copy() /2. fq=k.copy() psd = psd / dk # psd = psd/ (N**2.0) / dk # Normalisation (Danioux 2011) if verbose : print 'Check parseval theorem 6: SUM|Y(f)|²={0}, SUM|y(t)|²={1}'.format(esd.sum(),((V-V.mean())**2).sum()) #Get frequencies and period p=1/fq return {'psd':psd,'esd':esd,'fq':fq,'p':p,'gain':gain, 'phase':d} # Normalisation (Danioux 2011) # specvar = specvar / (var.size)**2 / dk #No normalization!! #Power spectral density # Dimensionalization to be able to compare with the litterature # nbwave=k*1000.0/2.0 #/np.pi # from rad/m to km/m # dk=dk*1000.0/2.0 #/np.pi # if fft: specvar=specvar*2.0*np.pi/1000.0
[docs]def spectral_analysis(dx,Ain,tapering=None,overlap=None,wsize=None,alpha=3.0,detrend=False,normalise=False,integration=True,average=True,ARspec=None): """ Spectral_Analysis : This function performs a spatial spectral analysis with different options on a time series of SLA profiles. :parameter dx: sampling distance :parameter Ain: 2D table of sla data with time along 2nd axis (NXxNT with NX the spatial length and NT the time length) :keyword tapering: apply tapering to the data * If this keyword is of type bool : apply hamming window. * If this keyword is a string : apply a hamming ('hamm'), hann ('hann'), kaiser-bessel ('kaiser'), kaiser-bessel ('blackman') or no ('none') tapering function. * If this keyword is an :class:`numpy.array` object : apply this array as taper. :keyword overlap: overlap coefficient of the windows (0.75 means 75% overlap). :keyword wsize: size of the sub-segments. :keyword normalise: If True, normalise the spectrum by its overall energy content. :keyword detrend: If True, removes a linear trend to the segmented signal (if tapered) or to the whole signal (if not tapered). :keyword integration: If True, integrate the spectrum between 2 frequencies. :keyword alpha: used to compute the input (beta) of the kaiser-bessel taper. :keyword ARspec: Applies an Auto-Regression model of the order provided as value of this parameter. :return: a spectrum structure .. code-block:: python {'esd':esd, #Energy Spectral Density 'psd':psd, #Power Spectral Density 'fq':fq, #frequency 'p':p, #wavelength 'params':params} #tapering parameters. :author: Renaud DUSSURGET (RD) - LER/PAC, Ifremer :change: Created by RD, December 2012 """ A=Ain.copy() #Check dimensions sh = A.shape ndims = len(sh) N = sh[0] #Time series are found along the last dimension #If vector, add one dimension if ndims == 1 : A = A.reshape((N,1)) sh = A.shape ndims = len(sh) nr = sh[1] #Number of repeats nt = nr # gain=1.0 #Scaling gain... (used for tapering issues) # #Get the overall energy # spec=get_spec(dx, A[:,0]) # F=spec['fq'] # Eref = ((A[:,0]-A[:,0].mean())**2).sum() #Get the reference energy level # ESDref=spec['esd'] # SFactor=Eref/spec['esd'].sum() # ESDref*=SFactor # PSDref=spec['psd']*SFactor # print 'Check parseval theorem : SUM|Y(f)|²={0}, SUM|y(t)|²={1}'.format(spec['esd'].sum(),((A[:,0]-A[:,0].mean())**2).sum()) #Apply tapering if asked ######################## if tapering is not None: #Set tapering defaults overlap=0 if overlap is None else overlap wsize=N if wsize is None else wsize #Get time splitting (tapering) parameters ######################################### a = np.float32(wsize) b = np.float32(overlap) c = np.float32(N) nn=np.floor((c - (a * b))/(a - (a * b))) #This is the number of segments print 'Number of windows :{0}\nTotal windowed points : {1} ({2} missing)\nTotal points : {3}'.format(nn,nn*wsize,N - nn*wsize*overlap,N) ix = np.arange(nn) * ((1.0 - b) * a) #These are the starting points of each segments #Moving window ############## dum = np.zeros((wsize, nn, nr),dtype=np.float64) for j in np.arange(nr): for i in np.arange(nn): #looping through time to get splitted time series dum[:,i,j] = detrend_fun(np.arange(wsize),A[ix[i] : ix[i] + wsize,j]) if detrend else A[ix[i] : ix[i] + wsize,j] #Set up tapering window ####################### beta=np.pi*alpha hamm = np.hamming(wsize) hann = np.hanning(wsize) kbess = np.kaiser(wsize,beta) blackman = np.blackman(wsize) notaper = np.ones(wsize) #overpass tapering option gain=1.0 if isinstance(tapering,bool) : which='hamm' elif isinstance(tapering,str) : if tapering.upper() == 'HAMMING' : which='hamm' gain=np.sum(hamm)/wsize #0.530416666667 elif tapering.upper() == 'HANNING' : which='hann' gain=np.sum(hann)/wsize #0.489583333333 elif tapering.upper() == 'KAISER' : which='kbess' gain=np.sum(kbess)/wsize #0.394170357504 elif tapering.upper() == 'NONE' : which='notaper' gain=1.0 elif tapering.upper() == 'BLACKMAN' : which='blackman' gain=np.sum(blackman)/wsize else : raise Exception('Unknown taper {0}'.format(tapering)) elif isinstance(tapering,np.ndarray) : which='tapering' gain=np.sum(tapering)/wsize else : raise Exception('Bad value for tapering keyword') exec('window='+which) window = np.repeat(window,nn*nr).reshape((wsize,nn,nr)) #Apply tapering on segmented data A=dum.copy()*window A=A.reshape(wsize,nr*nn) #Reshapa matrix nr=nn*nr else : if detrend : for i in np.arange(nr): A[:,i] = detrend_fun(np.arange(N),A[:,i]) if detrend else A[:,i] gain=1.0 #Run transform ############### for i in np.arange(nr): if ARspec is not None : spec=yule_walker_regression(dx, A[:,i], ARspec) else : spec=get_spec(dx, A[:,i],integration=integration,gain=gain) if i == 0: esd = spec['esd'] psd = spec['psd'] fq = spec['fq'] phase = spec['phase'] if ARspec: model =spec['psd'].model else : esd = np.append(esd,spec['esd']) psd = np.append(psd,spec['psd']) phase = spec['phase'] if ARspec: model =np.append(model,spec['psd'].model) # factor=((A[:,0]-A[:,0].mean())**2).sum()/spec['esd'].sum() #Average spectrum ################# nf=len(fq) p=1./fq esd=esd.reshape(nr,nf) psd=psd.reshape(nr,nf) if average : esd=(np.sum(esd,axis=0)/nr)#/gain psd=(np.sum(psd,axis=0)/nr)#/gain psd = psd * (gain**0.5) # print gain, np.sqrt(gain), gain **2, gain*0.5, gain/2. # esd=(np.sum(esd,axis=0))#/gain # psd=(np.sum(psd,axis=0))#/gain if ARspec: mparam,msig=zip(*tuple([ (el['parameters'],el['sig']) for el in model])) deg=ARspec mparam=np.concatenate(mparam).reshape((len(model),deg)) msig=np.array(msig) if average : mparam=mparam.mean(axis=0) msig=np.nansum(msig)/msig.size mstr={'parameters':mparam,'sig':msig,'n':N,'deg':ARspec} setattr(esd,'model',mstr) setattr(psd,'model',mstr) #Normalise by energy content Scaling_Factor=len(fq)/esd.sum() if normalise : esd*=Scaling_Factor psd*=Scaling_Factor if tapering is not None : return {'params':{'tapering':tapering is not None,'which':which,'wsize':int(wsize),'nwind':int(nn),'overlap':int(100.*overlap),'gain':gain},'psd':psd,'esd':esd,'fq':fq,'p':p,'phase':phase} else : return {'params':{'tapering':tapering is not None},'psd':psd,'esd':esd,'fq':fq,'p':p,'phase':phase} #def get_cospec(dx,dy,var1,var2): # # # part 1 - estimate of the wavenumbers # [kx,Lx] = get_kx(var1.size,dx) # # # part 2 - estimate of the spectrum # # fast fourier transform # hat_phi1=np.fft.fft(var1) # hat_phi2=np.fft.fft(var2) # hat_phi1=hat_phi1*hat_phi1.conj() # hat_phi1=hat_phi1.real.copy() # useless # hat_phi2=hat_phi2*hat_phi2.conj() # hat_phi2=hat_phi2.real.copy() # useless # hat_cospec = hat_phi1 + hat_phi2 # # # integration of the spectrum # # shift to have values centered on the intervals # dk = kx[1] - kx[0] # dk_half = dk/2 # half of the interval # k_= kx[0:min(var1.shape[1],var1.shape[0])/2] + dk # cospecvar = k_*0 # for i in range(len(k_)): # # get indexes that satisfy two conditions # # integration over a spectral width # cospecvar[i] = sum( hat_cospec[ np.nonzero(np.logical_and(kx<=k_[i]+dk_half,kx>k_[i]-dk_half) ) ] ) # # # Normalisation (Danioux 2011) # cospecvar = cospecvar / (var1.shape[1]*var1.shape[0])**2 / dk # # # Dimensionalization to be able to compare with the litterature # nbwave=k_*1000.0/2.0/np.pi # from rad/m to km/m # dk=dk*1000.0/2.0/np.pi # cospecvar=cospecvar*2.0*np.pi/1000.0 ## if options.debug: print "\n Normalized co-spectrum : \n",cospecvar # return cospecvar,nbwave,dk #def grid_track(dst,lat,lon,sla,remove_edges=True,backbone=None): # """ # # GRID_TRACK # # @summary: This function allow detecting gaps in a set/home/gtaburet/ETUDES/Amelioration_L4/Finalisation_CartoGlobalAmeliore/Validation/CoherenceSpectrale/tmp/altimetry/tools/spectrum.py of altimetry data and rebin this data regularlyy, with informations on gaps. # # @param dst {type:numeric} : along-track distance. # # @param lat {type:numeric} : latitude # # @param lon {type:numeric} : longitude # # @param sla {type:numeric} : data # # @return: # # outdst : resampled distance # # outlon : resampled longitude # # outlat : resampled latitude # # outsla : resampled data # # gaplen : length of the longest gap in data # # ngaps : number of detected gaps in data # # dx : average spatial sampling # # interpolated : True when data was interpolated (empty bin) # # # # @author: Renaud DUSSURGET (RD) - LER/PAC, Ifremer # # @change: Created by RD, July 2012 # # 29/08/2012 : Major change -> number of output variables changes (added INTERPOLATED), and rebinning modified # """ # # #Find gaps in data # dx = dst[1:] - dst[:-1] # mn_dx = np.median(dx) # bins = np.ceil(dst.max() / mn_dx) + 1 # range=(0/2.,mn_dx * bins) - mn_dx/2 # hist,bin_edges=np.histogram(dst, bins=bins, range=range) #We have binned the data along a regular grid of size (bins) in the range (range) # #Missing data is thus represented by no data in a given bin # # #Remove leading and trailing edges (and synchronise bin_edges) # if remove_edges == True : # while hist[0] == 0 : # hist=np.delete(hist,[0]) # bin_edges=np.delete(bin_edges,[0]) # while hist[-1] == 0 : # hist=np.delete(hist,[len(hist)-1]) # bin_edges=np.delete(bin_edges,[len(bin_edges)-1]) # # #Get filled bins indices # # ok = np.arange(len(hist)).compress(np.logical_and(hist,True or False)) # empty = np.arange(len(hist)).compress(~np.logical_and(hist,True or False)) # # outsla = np.repeat(np.NaN,len(hist)) # outlon = np.repeat(np.NaN,len(hist)) # outlat = np.repeat(np.NaN,len(hist)) # outdst = bin_edges [:-1]+ mn_dx/2 #distances is taken at bins centers # outsla[ok] = sla # outlon[ok] = lon # outlat[ok] = lat # # #Fill the gaps if there are some # if len(empty) > 0 : # #Interpolate lon,lat @ empty positions # outlon[empty] = AT.interp1d(ok, outlon[ok], empty, kind='cubic') # outlat[empty] = AT.interp1d(ok, outlat[ok], empty, kind='cubic') # outsla[empty] = AT.interp1d(ok, outsla[ok], empty, spline=True) # # # # #Get gap properties # ind=np.arange(len(hist)) # dhist=(hist[1:] - hist[:-1]) # st=ind.compress(dhist==-1)+1 # en=ind.compress(dhist==1) # gaplen=(en-st) + 1 # ngaps=len(st) # # #Get empty bin flag # interpolated=~hist.astype('bool') # # return outdst, outlon, outlat, outsla, gaplen, ngaps, dx, interpolated #def grid_track_backbone(lat,lon,sla,backlat,backlon,fill=None): # """ # # GRID_TRACK_BACKBONE # # @summary: This function allow detecting gaps in a set of altimetry data and rebin this data regularlyy, with informations on gaps. # # @param dst {type:numeric} : along-track distance. # # @param lat {type:numeric} : latitude # # @param lon {type:numeric} : longitude # # @param sla {type:numeric} : data # # @return: # # outdst : resampled distance # # outlon : resampled longitude # # outlat : resampled latitude # # outsla : resampled data # # gaplen : length of the longest gap in data # # ngaps : number of detected gaps in data # # dx : average spatial sampling # # interpolated : True when data was interpolated (empty bin) # # # # @author: Renaud DUSSURGET (RD) - LER/PAC, Ifremer # # @change: Created by RD, July 2012 # # 29/08/2012 : Major change -> number of output variables changes (added INTERPOLATED), and rebinning modified # """ # # #Find gaps in data # dst=AT.calcul_distance(backlat[0],backlon[0],lat,lon) # dstback=AT.calcul_distance(backlat,backlon) # dx = dstback[1:] - dstback[:-1] # mn_dx = np.median(dx) # # bins = np.ceil(dstback.max() / mn_dx) + 1 # range=(0/2.,mn_dx * bins) - mn_dx/2 # hist,bin_edges=np.histogram(dst, bins=bins, range=range) #We have binned the data along a regular grid of size (bins) in the range (range) # #Missing data is thus represented by no data in a given bin # # #Get filled bins indices # ok = np.arange(len(hist)).compress(np.logical_and(hist,True or False)) # empty = np.arange(len(hist)).compress(~np.logical_and(hist,True or False)) # # outsla = np.repeat(np.NaN,len(hist)) # outlon = np.repeat(np.NaN,len(hist)) # outlat = np.repeat(np.NaN,len(hist)) # outdst = bin_edges [:-1]+ mn_dx/2 #distances is taken at bins centers # outsla[ok] = sla # outlon[ok] = lon # outlat[ok] = lat # # #Fill the gaps if there are some # if (fill is not None) & (len(empty) > fill) : # #Interpolate lon,lat @ empty positions # outlon[empty] = AT.interp1d(ok, outlon[ok], empty, kind='cubic') # outlat[empty] = AT.interp1d(ok, outlat[ok], empty, kind='cubic') # outsla[empty] = AT.interp1d(ok, outsla[ok], empty, spline=True) # # #Get empty bin flag # interpolated=(~hist.astype('bool')) # # return outdst, outlon, outlat, outsla, dx, interpolated
[docs]def preprocess(lat,lon,sla, N_min=None, per_min=15.0, max_gap=None, leave_gaps=False, remove_edges=True, interp_over_continents=False, truncate_if_continents=True, discard_continental_gaps = True, #do not consider gaps on continents to discard time steps using max_gap. flag_interp=False, return_lonlat=False, return_interpolated=False, last=True,mid=None,first=None, #for get_segments verbose=1): ''' Preprocessing of the SLA data :: * process positions : * interpolate over gaps * find continents (extend the positions over continents to get the discontinuity) * find track edges * find gap lengths * clean SLA data:: * Remove gaps greater than maximum allowed length over which interpolate is OK. * Remove time steps with not enough coverage * get sub-segments of valid data of a given length :parameter lon: longitude :parameter lat: longitude :parameter sla: data :keyword N_min: Length of subsegments (cf :func:`altimetry.tools.spectrum.get_segments`) :keyword per_min: Minimum percentage of valid data to allow. :keyword max_gap: Maximum gap length to interpolate over (interpolation is done 1st, THEN long gaps are eliminated) :keyword leave_gaps: Leave gaps (equivalent to setting max_gap to number of points in track). :keyword remove_edges: discard data at track edges. :keyword truncate_if_continents: Force truncating data if a continent is found within a segment of data. :keyword last: Get segments of data sticked to the last point in track (cf :func:`altimetry.tools.spectrum.get_segments`) :keyword first: Get segments of data sticked to the first point in track (cf :func:`altimetry.tools.spectrum.get_segments`) :keyword mid: Get segments of data sticked to the middle point in track (cf :func:`altimetry.tools.spectrum.get_segments`) ''' sh=sla.shape nt=sh[0] nx=sh[1] dumsla=sla.copy() #Remove profiles with less than 3 points ok=np.where(sla.mask.sum(axis=1) < (nx -3))[0] if (nt != len(ok)) : message(2, '%i time steps on %i removed: contain less than 3 valid data points' % (nt - len(ok),nt),verbose=verbose) dumsla=dumsla[ok,:] ntinit=nt nt=len(ok) # 1 - regrid tracks regularly # get gap lengths ############################# fg_dumlon=nx for i in np.arange(nt): #grid track regularly fg=~dumsla.mask[i,:] dst, dumlon, dumlat, dsla, lgaps, n, edges, inter = grid_track(lat[fg], lon[fg], dumsla[i,:][fg],remove_edges=False,backbone=[lon,lat],interp_over_continents=interp_over_continents) if isinstance(dumlon,np.ma.masked_array) : fg_dumlon_new = dumlon.mask.sum() else : fg_dumlon_new = np.isfinite(dumlon).sum() if fg_dumlon_new < fg_dumlon : fg_dumlon = fg_dumlon_new lonout = dumlon latout = dumlat #extend matrix width if track has gone over any land (ie. any points not found in the backbone) if (len(dumlon) > len(lon)) & (i == 0) : lendiff = len(dumlon) - len(lon) print '[WARNING] : Pass goes over a land mass, changing the track size from {0} to {1}'.format(nx,nx+lendiff) nx+=lendiff #~ if i==0: #~ dumint=inter.reshape((1,len(dsla))) #~ dumslaout=dsla.reshape((1,len(dsla))) #~ else: #~ dumint=np.ma.array(np.concatenate([dumint.data,inter.data.reshape((1,len(dsla)))],axis=0), #~ mask=np.concatenate([dumint.mask,inter.mask.reshape((1,len(dsla)))],axis=0)) #~ dumslaout=np.ma.array(np.concatenate([dumslaout.data,dsla.data.reshape((1,len(dsla)))],axis=0), #~ mask=np.concatenate([dumslaout.data,dsla.data.reshape((1,len(dsla)))],axis=0)) dumint=inter.reshape((1,len(dsla))) if i == 0 else np.ma.concatenate([dumint,inter.reshape((1,len(dsla)))],axis=0) dumslaout=dsla.reshape((1,len(dsla))) if i == 0 else np.ma.concatenate([dumslaout,dsla.reshape((1,len(dsla)))],axis=0) if i == 0 : gaplen = [lgaps] gapedges= [edges] else : gaplen.append(lgaps) gapedges.append(edges) ngaps = n if i == 0 else np.append(ngaps,n) dumsla=dumslaout.copy() #These points are the continental ones. dumint=dumint.astype(bool) continent=dumsla.mask & dumint flagged=dumint.astype(bool) & ~continent #check for continental gaps iscont=np.sum(continent,axis=0) == nt indcont=np.arange(nx)[iscont] #here we get the position of the gap from its egdes and intersect with the continental points # -> if any point is over continent, cont_gap is set to True cont_gap = [[len(set(indcont).intersection(range(gapedges[j][0][jj],gapedges[j][1][jj]))) > 0 for jj in xrange(ngaps[j])] for j in xrange(nt)] if discard_continental_gaps : gaplen=[ np.array([g[jj] for jj in xrange(len(g)) if not cont_gap[j][jj]]) for j,g in enumerate(gaplen)] # 2 - remove/subsample using stats from previous loop and keyword arguments ########################################################################### if max_gap is not None: #Remove profiles with long gaps gapmax=np.array([np.max(g) if len(g) > 0 else 0 for g in gaplen]) id1 = np.where(gapmax <= max_gap)[0] if not leave_gaps else ok if len(id1) == 0 : raise Exception('[ERROR] : All gaps in current track are greater than the maximum specified gap') if (len(id1) != nt) : message(2, '%i time steps on %i removed: gaps > %i point' %(nt - len(id1), ntinit, int(max_gap)),verbose=verbose) dumsla=dumsla[id1,:] #Remove profiles with not enough coverage : # -> based on points with interpolation flag from grid_track and not on continents # per=100 * dumsla.mask.sum(axis=0) / np.float(nt) per = 100. * flagged[id1,:].sum(axis=1)/np.float(nx) if N_min is None : # N_min = np.round((0.01* (100- per_min)) * nx) N_min = nx # if per_min is None : per_min = 100* (1 - N_min/np.float(nx)) id2 = np.where( per <= per_min)[0] if len(id2) == 0 : raise Exception('[ERROR] : All time steps in current track have a percentage of invalid data > than the maximum allowed (%i)' % int(per_min)) if (len(id2) != len(id1)) : message(2, '%i time steps on %i removed: exceed maximum allowed percentage of invalid data (%i)' %(len(id1) - len(id2), ntinit, int(per_min)),verbose=verbose) dumsla=dumsla[id2,:] #At this point track edges are removed dumsla,id3=get_segment(dumsla,N_min,remove_edges=remove_edges,truncate_if_continents=truncate_if_continents,last=last,mid=mid,first=first) if len(id3) == 0 : raise Exception('[ERROR] : Remaining time steps do not reach the minimum length of %i points' % int(N_min)) if (len(id3) != len(id2)) : message(2, '%i time steps no reaching rhe minimum length of %i points have been removed)' %(len(id2) - len(id3), int(N_min)),verbose=verbose) res=(dumsla, ok[id1[id2[id3]]]) else : res=(dumsla, ngaps, gaplen) nt=res[0].shape[0] if (nt != ntinit) : message(1, '%i time steps on %i removed by data pre-processing' %(ntinit - nt, ntinit),verbose=verbose) if return_lonlat : res+=(lonout, latout) if return_interpolated : res += (dumint,) return res
[docs]def get_segment(sla,N,last=True,mid=None,first=None,remove_edges=True,truncate_if_continents=True): ''' Intelligent segmentation of data. :keyword remove_edges: discard data at track edges. :keyword truncate_if_continents: Force truncating data if a continent is found within a segment of data. :keyword last: Get segments of data sticked to the last point in track :keyword first: Get segments of data sticked to the first point in track :keyword mid: Get segments of data sticked to the middle point in track ''' #Set defaults if first is not None : last=None mid=None elif mid is not None : last=None first=None dumsla=sla.copy() nx=sla.shape[1] nt=sla.shape[0] #Save input mask dumsla.data[dumsla.mask]=dumsla.fill_value if len(dumsla.mask.shape) > 0: mask=np.ma.array(dumsla.mask.copy(),mask=np.zeros(sla.shape,dtype=bool)) else: mask=np.array([dumsla.mask]*sla.size).reshape(sla.shape) mask=np.ma.array(mask,mask=np.zeros(sla.shape,dtype=bool)) dumsla.mask=mask#np.array([dumsla.mask]*sla.size).reshape(sla.shape) #~ dumsla.mask[:]=False #Get edges if remove_edges : xid=np.ma.array(np.repeat(np.arange(nx),nt).reshape(nx,nt).transpose(),mask=mask.data) else : xid=np.ma.array(np.repeat(np.arange(nx),nt).reshape(nx,nt).transpose(),mask=np.zeros(sla.shape,dtype=bool)) left=xid.min(axis=1) right=xid.max(axis=1) #Shift towards end if last : st=(right-N).astype(int) +1 en=(right).astype(int) + 1 elif mid : midpt=nx/2 rlag=right-midpt llag=midpt-left odd = np.int(N)&1 and True or False if not odd : nr=nl=np.int(N)/2 else : nr=np.int(N)/2 + 1 nl=np.int(N)/2 # for i,jk in enumerate(zip(*(llag,rlag))): # j,k=jk # st=0 st=np.repeat(midpt-nl,nt) en=np.repeat(midpt+nr,nt) elif first : st=(left).astype(int) en=(left+N).astype(int) if not remove_edges : st[st < 0] = 0 en[en > nx] = nx for i in np.arange(nt) : dumsla.mask[i,:st[i]]=True dumsla.mask[i,en[i]:]=True mask.mask[i,:st[i]]=True mask.mask[i,en[i]:]=True #Update nt cycempty=dumsla.mask.sum(axis=1) == N ind=np.arange(nt)[~cycempty] nt=(~cycempty).sum() #Reform stuff dumsla=dumsla.compressed().reshape(nt,N) mask=mask.compressed().reshape(nt,N) if truncate_if_continents : empty=mask.sum(axis=0) == nt if empty.sum() > 0 : dumsla=dumsla[:,~empty] mask=mask[:,~empty] print '[WARNING] Points over land mass - removed {} pts'.format(empty.sum()) return np.ma.array(dumsla,mask=mask), ind
[docs]def get_slope(fq,spec,degree=1,frange=None,threshold=0.): """ GET_SLOPE :summary: This function returns the spectral slope of a spectrum using a least-square regression :parameter fq: frequency :parameter spec: spectrum data :keyword degree: Degree of the least-square regression model :return: * slope : spectral slope (or model coefficients for a higher order model) * intercept : Energy at unit frequency (1 cpkm) :author: Renaud DUSSURGET (RD) - LER/PAC, Ifremer :change: Created by RD, August 2012 """ sh=spec.shape ndims=len(sh) if ndims == 1 : x = np.log10(fq).flatten() y = np.log10(spec).flatten() #1) Linear least-square regression if degree == 1 : (slope, intercept, rval, pval, err) = stats.linregress(x,y) #2) Least-square regression using a higher-order spectral model # -> Gives the same results as #cf. http://pingswept.org/2009/01/24/least-squares-polynomial-fitting-in-python/ else : A = np.vander(x, degree+1) # form the Vandermonde matrix (coeffs, residuals, rank, sing_vals) = np.linalg.lstsq(A,y) # find the x that minimizes the norm of Ax-y (slope[:-1], intercept) = coeffs return slope,intercept else : x = np.log10(fq[(fq > np.min(frange)) & (fq <= np.max(frange))]).flatten() y = np.log10(spec[(fq > np.min(frange)) & (fq <= np.max(frange)),:]) nx = len(x) nt = sh[1] degree = 1 out = [] for i in np.arange(nt): (slope, intercept, rval, pval, err) = stats.linregress(x,y[:,i]) flag = (y.mask[:,i].sum(dtype=float)/nx) <= threshold if i == 0 : out.append((slope,intercept,flag)) else : out.append((slope,intercept,flag)) return out
[docs]def yule_walker(acf, orden): """ Program to solve Yule-Walker equations for AutoRegressive Models :author: XAVI LLORT (llort(at)grahi.upc.edu) :created: MAY 2007 :changes: adapted to python by R.Dussurget :parameter acf: AutoCorrelation Function :parameter orden: Order of the AutoRegressive Model :return: * parameters : Parameters * sigma_e : Standard deviation of the noise term """ if len(acf) + 1 <= orden : raise Exception('ACF too short for the solicited order!') bb = acf[1:orden+1] aa = np.zeros((orden, orden)) for ii in np.arange(0, orden): for jj in np.arange(0, orden): aa[ii, jj] = acf[ np.int(np.abs(ii - jj)) ] aa_1 = np.linalg.inv(aa) parameters = np.ma.dot(bb,aa_1) #Compliant with IDL aa_1#bb import warnings warnings.filterwarnings('error') try: s = acf[0] - np.sum(parameters * bb) sigma_e = np.sqrt(s) except RuntimeWarning: print "Warning : bad parameters - spectrum not computed" sigma_e = np.nan return parameters, sigma_e
def ARspec(F,deg,a,sig): sh=F.shape if len(sh) == 1: NF,nt=sh,1 F.reshape(NF,nt) a.reshape(NF,nt) else : NF,nt=sh p=np.arange(1,deg+1) arspec=np.ma.masked_array(np.ones((NF,nt)),mask=False) sig=np.ma.array(sig,mask=np.isnan(sig)) for ii in xrange(nt): # pass # arspec = sig**2 / (np.abs(1.-np.sum(a[p-1]*np.exp(-2.0 * np.pi * 1j *p*F[t])))**2) for f in np.arange(NF,dtype=int) : arspec[f,ii] = sig[ii]**2 / (np.abs(1.-np.sum(a[ii,p-1]*np.exp(-2.0 * np.pi * 1j *p*F[f,ii])))**2) if len(sh) == 1: arspec=np.squeeze(arspec) return arspec #+ # YULE_WALKER_REGRESSION : Estimation of an AR (autoregression) spectral model from data #@todo: To be written #-
[docs]def yule_walker_regression(dx, Y, deg, res=None): """ :parameter X: time vector (disabled) :parameter Y: stationary time series :parameter deg: AR model degree :return: * a : Yule Walker parameters * sig : Standard deviation of the noise term * aicc : corrected Akaike Information Criterion * gamma : Autocorrelation function * ar : Fitted function * argamma : Fitted autocorrelation function * arspec : Fitted spectral model * F : Relative frequency .. note:: To know more about yule-walker and autoregressive methods, see * `Example of AR(p) model auto-regression using yule-walker equations <http://www-ssc.igpp.ucla.edu/personnel/russell/ESS265/Ch9/autoreg/node7.html>`_ * `Other notes on the autoregressive method <http://www.ee.lamar.edu/gleb/adsp/Lecture%2009%20-%20Parametric%20SE.pdf>`_ :example: IDL example : .. code-block:: IDL #Define an n-element vector of time-series samples X = [6.63, 6.59, 6.46, 6.49, 6.45, 6.41, 6.38, 6.26, 6.09, 5.99, $ 5.92, 5.93, 5.83, 5.82, 5.95, 5.91, 5.81, 5.64, 5.51, 5.31, $ 5.36, 5.17, 5.07, 4.97, 5.00, 5.01, 4.85, 4.79, 4.73, 4.76] #Compute auto_correlation function acorr=A_CORRELATE(X,INDGEN(30)) #Solve YW equation to get auto-regression coefficients for AR(2) model YULE_WALKER, acorr, 2, a, sig #Process auto-regression model ar=DBLARR(28) FOR i = 2, 29 DO ar[i-2] = SQRT(a[0]*X[i-1]*X[i] + a[1]*x[i-2]*x[i]+sig*x[i]) #Compute spectrum spec=spectrogram(TRANSPOSE(X), INDGEN(N), WSIZE=N, OVERLAY=1.0, DISPLAY_IMAGE=0) #Compute AR(2) model spectrum ar2=spectrogram(TRANSPOSE(ar), INDGEN(28), WSIZE=28, OVERLAY=1.0, DISPLAY_IMAGE=0) """ #Defaults a=0 sig=0 #If DPRES (frequency resolution) is set, #then process spectrum on a regular WAVENUMBER array if res is None : res=1.0 #Demean first Y-=np.mean(Y) N=len(Y) #Get autocorr gamma = np.zeros(N) lag = np.arange(N) for i,l in enumerate(lag) : gamma[i] = np.corrcoef(Y, np.roll(Y,l))[0][1] #Odd or even odd = N&1 and True or False #Get frequency & wavenumber # fout=ft.fftfreq(N,d=dx) # NF = N/2.0 -0.5 + 1 if odd else N/2. + 1 # F = np.arange(NF,dtype=float) / (N)* [F,L,imx] = get_kx(N,1) [Fout,L,imx] = get_kx(N,dx) F=F[1:imx-1] Fout=Fout[1:imx-1] NF=len(F) df=F[1]-F[0] dfout=Fout[1]-Fout[0] #Solve Yule Walker Equation a, sig = yule_walker(gamma, deg) # std=np.std(Y) #Generate a normally distributed random noise # Z=np.random.normal(size=N,scale=std) #Process auto-regression model ar=np.ma.masked_array(np.zeros(N),mask=np.zeros(N,dtype=bool)) argamma=np.ma.masked_array(np.zeros(N),mask=np.zeros(N,dtype=bool)) arspec=np.ma.masked_array(np.ones(NF),mask=np.zeros(NF,dtype=bool)) ar[0:deg].mask=True argamma[0:deg].mask=True p=np.arange(1,deg+1) #Compute modeled time series for t in np.arange(deg,N,dtype=int): ar[t]=np.sum(a*Y[t-p]) #cf. http://www-ssc.igpp.ucla.edu/personnel/russell/ESS265/Ch9/autoreg/node9.html argamma[t]=np.sum(a*gamma[t-p]) # for p in np.arange(1, deg+1): # ar[t]+=a[p-1]*Y[t-p] #cf. http://www-ssc.igpp.ucla.edu/personnel/russell/ESS265/Ch9/autoreg/node9.html # argamma[t]+=a[p-1]*gamma[t-p] # #ar+=Z #Add random noise #Compute spectrum # arspec = sig**2 / (np.abs(1.-np.sum(a[p-1]*np.exp(-2.0 * np.pi * 1j *p*F)))**2) for t in np.arange(NF,dtype=int) : try: arspec[t] = sig**2 / (np.abs(1.-np.sum(a[p-1]*np.exp(-2.0 * np.pi * 1j *p*F[t])))**2) except ValueError: pass #positive frequencies : multiply by 2 arspec=arspec #Why 10? # arspec/=10 # arspec/=N # for t in np.arange(NF) : # for p in np.arange(1,deg+1): # arspec[t]-= a[p-1]*np.exp(-2.0 * np.pi * 1j *p*F[t]) # I am not sur of this section... Is removal of data necessary? Current testing with all data # ########################################################################################### # ar[0:deg-1]=ar[deg]#!VALUES.D_NAN # argamma[0:deg-1]=argamma[deg]#!VALUES.D_NAN # arspec[0:deg-1]=!VALUES.D_NAN # dumspec=spectrogram(TRANSPOSE(ar), INDGEN(N), WSIZE=N, OVERLAY=1.0, DISPLAY_IMAGE=0) # sTOP #arspec=sig^2d / arspec^2d #arspec=sig^2d / (arspec^2d * N) #arspec = N * sig / ABS(arspec)^2 # arspec=sig**2 / (np.abs(arspec)**2) #Normalize to conserve apropriate levels of energy # fac=F*np.sum(np.abs(arspec))/(np.sum(np.abs(Y)**2)*res) fac=np.ma.array(len(F))/arspec.sum() arspec/=fac esd=arspec psd=arspec/dfout #We define the AIC (http://pages.stern.nyu.edu/~churvich/TimeSeries/Handouts/AICC.pdf) to define the optimal model logf=np.ma.log if isinstance(arspec,np.ma.masked_array) else np.log aicc=N*(logf(sig**2)+1)+2*(deg+1)*(N/(N-deg-2)) aic=N*(logf(2*sig**2)+1) + 2 * deg #from http://www-ssc.igpp.ucla.edu/personnel/russell/ESS265/Ch9/autoreg/node15.html bic=N*logf(sig**2)+deg*logf(N) params={'description':'AR model parameters','parameters':a,'sig':sig,'deg':deg,'n':N,'aicc':aicc,'aic':aic,'bic':bic} setattr(psd, 'model',params) setattr(esd, 'model',params) setattr(ar, 'model',params) outStr={'_dimensions':{'_ndims':3,'N':N,'NF':NF,'P':deg}, # 'X':{'name':'x','long_name':'time series','data':X}, # 'gamma':{'name':'gamma','long_name':'autocorrelation function','data':gamma}, # 'model':{'name':'model','long_name':'AR model parameters','data':a,'sig':sig,'deg':deg,'n':N,'aicc':aicc,'aic':aic}, 'fq':Fout, 'ar':ar,#{'name':'ar','long_name':'Fitted model','data':ar}, # 'argamma':{'name':'argamma','long_name':'Fitted autocorrelation function','data':argamma}, # 'arspec':{'name':'arspec','long_name':'Fitted spectral model','data':arspec}, 'esd':esd, 'psd':psd} return outStr
[docs]def optimal_AR_spectrum(dx, Y, ndegrees=None,return_min=True): ''' Get the optimal order AR spectrum by minimizing the BIC. ''' if ndegrees is None : ndegrees=len(Y)-ndegrees aicc=np.arange(ndegrees) aic=aicc.copy() bic=aicc.copy() tmpStr=[] for i in np.arange(1, ndegrees): dum=yule_walker_regression(dx,Y, i) tmpStr.append(dum) aicc[i-1]=(tmpStr[i-1])['esd'].model['aicc'] aic[i-1]=(tmpStr[i-1])['esd'].model['aic'] bic[i-1]=(tmpStr[i-1])['esd'].model['bic'] if return_min : return np.argmin(bic)+1 else : return {'aicc':aicc,'aic':aic,'bic':bic} # mn_aicc=np.argmin(bic)+1 # return mn_aic