-
Notifications
You must be signed in to change notification settings - Fork 313
/
MF_CompareAR.m
152 lines (135 loc) · 5.53 KB
/
MF_CompareAR.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
function out = MF_CompareAR(y,orders,testHow)
% MF_CompareAR Compares model fits of various orders to a time series.
%
% Uses functions from Matlab's System Identification Toolbox: iddata, arxstruc,
% and selstruc
%
%---INPUTS:
% y, vector of time-series data
% orders, a vector of possible model orders
% testHow, specify a fraction, or provide a string 'all' to train and test on
% all the data
%
%---OUTPUTS: statistics on the loss at each model order, which are obtained by
% applying the model trained on the training data to the testing data.
% ------------------------------------------------------------------------------
% Copyright (C) 2020, Ben D. Fulcher <ben.d.fulcher@gmail.com>,
% <http://www.benfulcher.com>
%
% If you use this code for your research, please cite the following two papers:
%
% (1) B.D. Fulcher and N.S. Jones, "hctsa: A Computational Framework for Automated
% Time-Series Phenotyping Using Massive Feature Extraction, Cell Systems 5: 527 (2017).
% DOI: 10.1016/j.cels.2017.10.001
%
% (2) B.D. Fulcher, M.A. Little, N.S. Jones, "Highly comparative time-series
% analysis: the empirical structure of time series and their methods",
% J. Roy. Soc. Interface 10(83) 20130048 (2013).
% DOI: 10.1098/rsif.2013.0048
%
% This function is free software: you can redistribute it and/or modify it under
% the terms of the GNU General Public License as published by the Free Software
% Foundation, either version 3 of the License, or (at your option) any later
% version.
%
% This program is distributed in the hope that it will be useful, but WITHOUT
% ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
% FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
% details.
%
% You should have received a copy of the GNU General Public License along with
% this program. If not, see <http://www.gnu.org/licenses/>.
% ------------------------------------------------------------------------------
% ------------------------------------------------------------------------------
%% Check that a System Identification Toolbox license is available:
% ------------------------------------------------------------------------------
BF_CheckToolbox('identification_toolbox')
% Preliminaries
doPlot = false; % set to true to plot outputs
N = length(y); % length of time series, N
%% Check Inputs
% (1) Time series, y
% Convert y to time series object
y = iddata(y,[],1);
% (2) Model orders:
if nargin < 2 || isempty(orders)
orders = (1:10)';
end
if size(orders,1) == 1
orders = orders'; % make sure a column vector
end
% (3) testHow -- either all (trains and tests on the whole time series);
% or a proportion of the time series to train on; will test on the
% remaining portion.
if nargin < 3 || isempty(testHow)
testHow = 'all';
end
% ------------------------------------------------------------------------------
%% Run
% ------------------------------------------------------------------------------
% Get normalized prediction errors, V, from training to test set for each
% model order
% This could be done for model residuals using code in, say,
% MF_StateSpaceCompOrder, or MF_StateSpace_n4sid...
if ischar(testHow)
if strcmp(testHow,'all')
yTrain = y;
yTest = y;
else
error('Unknown testing set specifier ''%s''',testHow);
end
else
% use first <proportion> to train, rest to test
co = floor(N*testHow); % cutoff
yTrain = y(1:co);
yTest = y(co+1:end);
end
V = arxstruc(yTrain,yTest,orders);
% ------------------------------------------------------------------------------
%% Output
% ------------------------------------------------------------------------------
% Statistics on V, which contains loss functions at each order (normalized sum of
% squared prediction errors)
v = V(1,1:end-1); % the loss function vector over the range of orders
out.maxv = max(v);
out.minv = min(v);
out.meanv = mean(v);
out.medianv = median(v);
out.firstonmin = v(1)/min(v);
out.maxonmed = max(v)/median(v);
out.meandiff = mean(diff(v));
out.stddiff = std(diff(v));
out.maxdiff = max(abs(diff(v)));
out.meddiff = median(diff(v));
% where does it steady off?
stdfromi = zeros(length(v),1);
for i = 1:length(stdfromi)
stdfromi(i) = std(v(i:end))/sqrt(length(v)-i+1);
end
out.minstdfromi = min(stdfromi(stdfromi > 0));
if isempty(out.minstdfromi), out.minstdfromi = NaN; end
out.where01max = find(stdfromi<max(stdfromi)*0.1,1,'first');
if isempty(out.where01max), out.where01max = NaN; end
out.whereen4 = find(stdfromi < 1e-4,1,'first');
if isempty(out.whereen4), out.whereen4 = NaN; end
% ------------------------------------------------------------------------------
%% Plotting
% ------------------------------------------------------------------------------
if doPlot
plot(v);
plot(stdfromi,'r');
end
% ------------------------------------------------------------------------------
%% Use selstruc function to obtain 'best' order measures
% ------------------------------------------------------------------------------
% Get specific 'best' measures
[nn, vmod0] = selstruc(V,0); % minimizes squared prediction errors
out.best_n = nn;
[nn, vmodaic] = selstruc(V,'aic'); % minimize Akaike's Information Criterion (AIC)
out.aic_n = nn; % optimum model order minimizing AIC in the range given
out.bestaic = vmodaic(nn == min(nn));
% Using minimum description length is basically the same as using AIC:
% [nn, vmodmdl] = selstruc(V,'mdl'); % minimize Rissanen's Minimum Description Length (MDL)
% out.mdl_n = nn; % optimal model order minimizing MDL in the range given
% out.bestmdl = vmodmdl(nn == min(nn));
end