function [MeasureE,MeasureS] =NaroRummelSchindlerAndrzejak(Datavec,Setting,verbose)
% Matlab script to calculate the amplitude-based nonlinear prediction error E
% and rank-based nonlinear prediction score S as defined in Naro D, Rummel C,
% Schindler K, Andrzejak RG (2014): Detecting determinism with improved
% sensitivity in time series: Rank-based nonlinear predictability score.
% Phys. Rev. E. 90:032913
% If you use this source code, please make sure that you give a reference
% to the manuscript.
%
% INPUT
% =>Datavec is the signal. It is expected in an (Nx1) vector.
% =>Setting defines the parameter setting. Setting =1 corresponds to the
% one used in the manuscript. You can use ranges of parameters as in Setting=2,
% or Setting=3 below. Using a range of k and h comes at almost no additional
% computational cost.
% => verbose: activate (1) or deactivate (0) text output to screen
%
% OUTPUT
% => MeasureE Amplitude-based nonlinear prediction error E
% => MeasureS Rank-based nonlinear prediction score S
switch Setting
case 1
mr = 8; % range of embedding dimension
tr = 4; % range of time delay
kr = 5; % range of nearest neighbors
hr = 4; % range of prediction horizons
W = 19; % Theiler window
case 2
mr = 8;
tr = 4;
kr = [1, 5, 10];
hr = [1, 2, 4, 8, 16, 32];
W = 19;
case 3
mr = 2:2:12;
tr = [1,2,4,8];
kr = [1,5,10];
hr = [1,2,4,8,16,32];
W = 19;
end
if size(Datavec,1)==1
disp('Data is expected in an (Nx1) vector. Stoping');
return;
end
%Normalization to zero mean and unit standard deviation
Datavec = (Datavec-mean(Datavec))/std(Datavec);
%As in the paper, N represents the size of the signal
N = size(Datavec,1);
%Allocation of the memory: we will obtain results for
%two methods, and in dependence on four different parameters
MeasureE = zeros(length(mr),length(tr),length(kr),length(hr));
MeasureS=zeros(length(mr),length(tr),length(kr),length(hr));
%mc, tc (t for tau), hc, kc, are vector indeces used at the end to write the
%results.
mc = 0;
for m = mr
mc = mc+1;
tc = 0;
for tau = tr
tc = tc+1;
%embwin, eta in the paper
embwin = (m-1)*tau;
%N_NoEmb: number of indexes available, when avoiding
%the first positions due to the embedding
N_NoEmb = N -embwin;
%as stated in section II.C.2, for the S method, we want
%to know how many differences we have for each data position
NCandidates = zeros(1,N_NoEmb);
%We set first to the usual value,
NCandidates(1:N_NoEmb) = N_NoEmb-(W*2+1);
%we now apply the linear increases in the count of differences
%for the first and last positions
for counter = 1:W
NCandidates(counter) = N_NoEmb-W-(counter);
NCandidates(N_NoEmb-counter+1) = N_NoEmb-W-(counter);
end;
% Determine distance matrices, WITH EMBEDDING
DistmatEMB =zeros(N_NoEmb,N_NoEmb,'single'); %-embwin in writing, +embwin in reading, with embedding, used for MeasureE classic, and basis for ranks (predicted and predicting)
%We cannot construct an embedding vector for the tau first points,
%therefore we avoid them.
%STEP ONE
%We first compute the differences between the first tau+1 points
%for which we have an embedding vector...
for counter1 = embwin+1:embwin+1+tau
%..., and all next points...
for counter2 = counter1+1+W:N
distA = 0;
%... iterating over all the elements of the embedding
%window
for index = -embwin:tau:0;
distA = distA+(Datavec(counter1+index)-Datavec(counter2+index))^2;
end
DistmatEMB(counter1-embwin,counter2-embwin)=distA;
DistmatEMB(counter2-embwin,counter1-embwin)=distA;
end
end
%STEP TWO
%We compute the other differences, by taking the ones computed in
%step one, subtracting the differences between the embedding
%elements which are not in that difference anymore, and adding
%again those introduced by the new positions (counter1 and
%counter2).
for counter1 = embwin+2+tau:N
for counter2 = counter1+1+W:N
DistmatEMB(counter1-embwin,counter2-embwin)= DistmatEMB(counter1-embwin-tau,counter2-embwin-tau)-(Datavec(counter1-embwin-tau)-Datavec(counter2-embwin-tau))^2+(Datavec(counter1)-Datavec(counter2))^2;
DistmatEMB(counter2-embwin,counter1-embwin)=DistmatEMB(counter1-embwin,counter2-embwin);
end
end
%All those cells which have still the initial value, are those for
%which a difference was not computable, therefore we set them to
%infinity: we do not want to take them into account in the next
%operations.
DistmatEMB(DistmatEMB==0)=Inf;
% Determine distance matrices, WITH EMBEDDING DONE
% Determine distance matrices, WITHOUT EMBEDDING
%this corresponds to equation (4) of the paper
DistmatNOEMB =zeros(N_NoEmb,N_NoEmb,'single'); %-embwin in writing, +embwin in reading, with embedding, used for ranks of predicted samples
%To compute the differences without embedding, we just use a double
%for to compute the differences between all combinations.
for counter1 = embwin+1:N
for counter2 = counter1+1+W:N
distA = (Datavec(counter1)-Datavec(counter2))^2;
DistmatNOEMB(counter1-embwin,counter2-embwin)=distA;
DistmatNOEMB(counter2-embwin,counter1-embwin)=distA;
end
end
%Again we set the non-used values to infinity.
DistmatNOEMB(DistmatNOEMB==0)=Inf;
% Determine distance matrices, WITHOUT EMBEDDING DONE
hc = 0;
for H = hr;
hc =hc+1;
%We will not take into account the last points, where we have
%no data to look into the future: we reduced accordingly the
%number of data points we will use.
N_NoEmb_NoHor = N_NoEmb-H;
%We sort, and store the order of the original indices, in order
%to know:
%- from the available points to be used as a prediction
%(i.e. cannot be in the last h indices), which are the most
%similar
% This corresponds to matrix j of the paper
[~, IndexNearNeigh] = sort(DistmatEMB(1:N_NoEmb_NoHor,1:N_NoEmb_NoHor));
%- without using embedding which are the closest states
[~, IndexPredScalar] = sort(DistmatNOEMB);
MeasureEs = zeros(N_NoEmb_NoHor,1);
MeasureSs = zeros(N_NoEmb_NoHor,1);
ranksrs =zeros(N_NoEmb,N_NoEmb,'uint16');
%For each position, we store the rank list for the give state
N1 = 1:N_NoEmb;
for s = 1:N_NoEmb
%ranksrs correspond to the matrix g
ranksrs(IndexPredScalar(:,s),s) = N1;
end
kc = 0;
for k = kr
kc = kc+1;
for j =embwin+1:N-H
%In order to read properly we have to use and index
%with an offset of -embwin: this is why we read at
%position C
C =j-embwin;
% We compute the value of the upper bound, equation (8) of the paper
A=(NCandidates(C)+1)/2;
% The previous value, minus the lower bound is the
% denominator for the normalization of the results
B = A-(k+1)/2;
predins = IndexNearNeigh(1:k,C)+H+embwin;
% We take the k closest neighbors, for time index j
% (offseted into C), add to these the horizon H, and we
% correct again the reading position with an embwin
% offset.
%First we compute for the error method: we take the
%average error for the given time step.
MeasureEs(C) = (Datavec(j+H)-sum(Datavec(predins))/k);
%For the Score measure, we compare the observed
%selection as in (5) with the upper bound A, and we
%then normalize it with the denominator B
MeasureSs(C) = (A-sum(ranksrs(predins-embwin,C+H))/k)/(B);
end;
if verbose
disp([m,tau,k,H]);
end
%To complete the Error measure we take the root mean square
%value as in (3)...
MeasureE(mc,tc,kc,hc) = sqrt(mean(MeasureEs.^2));
%... and for the Score method, the mean as in (9).
MeasureS(mc,tc,kc,hc) = mean(MeasureSs);
end
end
end
end