1 ## Copyright (C) 2008 Michel D. Schmid <michaelschmid@users.sourceforge.net>
4 ## This program is free software; you can redistribute it and/or modify it
5 ## under the terms of the GNU General Public License as published by
6 ## the Free Software Foundation; either version 2, or (at your option)
9 ## This program is distributed in the hope that it will be useful, but
10 ## WITHOUT ANY WARRANTY; without even the implied warranty of
11 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 ## General Public License for more details.
14 ## You should have received a copy of the GNU General Public License
15 ## along with this program; see the file COPYING. If not, see
16 ## <http://www.gnu.org/licenses/>.
19 ## @deftypefn {Function File} {}[@var{mTrain}, @var{mTest}, @var{mVali}] = subset (@var{mData},@var{nTargets},@var{iOpti},@var{fTest},@var{fVali})
20 ## @code{subset} splits the main data matrix which contains inputs and targets into 2 or 3 subsets
21 ## depending on the parameters.
23 ## The first parameter @var{mData} must be in row order. This means if the network
24 ## contains three inputs, the matrix must be have 3 rows and x columns to define the
25 ## data for the inputs. And some more rows for the outputs (targets), e.g. a neural network
26 ## with three inputs and two outputs must have 5 rows with x columns!
27 ## The second parameter @var{nTargets} defines the number or rows which contains the target values!
28 ## The third argument @code{iOpti} is optional and can have three status:
30 ## 1: will randomise the column order and order the columns containing min and max values to be in the train set
31 ## 2: will NOT randomise the column order, but order the columns containing min and max values to be in the train set
32 ## default value is @code{1}
33 ## The fourth argument @code{fTest} is also optional and defines how
34 ## much data sets will be in the test set. Default value is @code{1/3}
35 ## The fifth parameter @code{fTrain} is also optional and defines how
36 ## much data sets will be in the train set. Default value is @code{1/6}
37 ## So we have 50% of all data sets which are for training with the default values.
40 ## [mTrain, mTest] = subset(mData,1)
41 ## returns three subsets of the complete matrix
42 ## with randomized and optimized columns!
45 ## [mTrain, mTest] = subset(mData,1,)
46 ## returns two subsets
51 ## Author: Michel D. Schmid
54 function [mTrain, mTest, mVali] = subset(mData,nTargets,iOpti,fTest,fVali)
56 ## check range of input arguments
57 error(nargchk(2,5,nargin))
59 ## check the input arguments ...!
61 error("No TARGETS defined! This doesn't make any sense for feed-forward neural networks! Please define at least one row of targets")
73 ## if fTest is set and nothing is set
74 ## for fVali I assume that fVali is not used!
78 ## calculate the number of train, test and validation sets
79 fTrain = 1-fTest-fVali;
80 nTrainSets = floor(size(mData,2)*fTrain);
81 diffRestSets = size(mData,2)-nTrainSets;
82 nTestSets = floor(size(mData,2)*fTest);
83 nValiSets = size(mData,2)-nTrainSets-nTestSets;
86 ## now let's see if matrix must be optimized!
90 # check that only one optimizing run is enough!!
91 # maybe it's necessary to do it twice ..!
92 # check that all min and max values are in the train set ...!
93 mData = __optimizedatasets(mData,nTrainSets,nTargets,iOpti);
94 mTrain = mData(:,1:nTrainSets);
95 iRuns = size(mTrain,1);
99 if ( max(mTrain(i,:)) == max(mData(i,:)) )
108 # check that only one optimizing run is enough!!
109 # maybe it's necessary to do it twice ..!
110 # check that all min and max values are in the train set ...!
111 mData = __optimizedatasets(mData,nTrainSets,nTargets,iOpti);
112 mTrain = mData(:,1:nTrainSets);
113 iRuns = size(mTrain,1);
117 if (max(mTrain(i,:))==max(mData(i,:)))
126 ## in this case, iOpti must be 0 ==> nothing todo
129 endwhile #END OF while(bOptiAgain)
135 mTrain = mData(:,1:nTrainSets);
136 mTest = mData(:,nTrainSets+1:nTrainSets+nTestSets);
138 mTrain = mData(:,1:nTrainSets);
139 mTest = mData(:,nTrainSets+1:nTrainSets+nTestSets);
140 mVali = mData(:,nTrainSets+nTestSets+1:end);
145 %!shared matrix, nTargets, mTrain, mTest, mVali
146 %! disp("testing subset")
147 %! matrix = [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 18 20; \
148 %! 0 2 4 1 3 5 3 4 1 -1 -2 -9 -1 10 12 20 11 11 11 11; \
149 %! -2 2 2 2 2 0 0 0 0 0 10 12 13 12 13 44 33 32 98 11; \
150 %! 0 0 0 0 1 1 1 1 0 0 1 1 1 0 0 1 1 1 0 0; \
151 %! 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4; \
152 %! 1 2 3 4 5 6 7 8 9 10 11 12 13 33 44 55 66 77 88 99];
153 %! nTargets = 1; # the last row is equivalent to the target values.
154 %! [mTrain, mTest, mVali] = subset(matrix,nTargets); ############################
155 %!assert(size(mTrain,2)==10);# 50% of 20
156 %!assert(size(mTest,2)==6);# 1/3 of 20 = 6 (floor)
157 %!assert(size(mVali,2)==4);# 1/6 of 20 = 4 (floor)
158 %! # It's not possible to test the column order with this call!
159 %! # randomizing is used! But all max and min values should be
160 %! # in the training set
161 %!assert(max(mTrain(1,:))==max(matrix(1,:)));
162 %!assert(min(mTrain(1,:))==min(matrix(1,:)));
163 %!assert(max(mTrain(2,:))==max(matrix(2,:)));
164 %!assert(min(mTrain(2,:))==min(matrix(2,:)));
165 %!assert(max(mTrain(3,:))==max(matrix(3,:)));
166 %!assert(min(mTrain(3,:))==min(matrix(3,:)));
167 %!assert(max(mTrain(4,:))==max(matrix(4,:)));
168 %!assert(min(mTrain(4,:))==min(matrix(4,:)));
171 %! [mTrain, mTest, mVali] = subset(matrix,nTargets,0); ############################
172 %!assert(size(mTrain,2)==10);# 50% of 20
173 %!assert(size(mTest,2)==6);# 1/3 of 20 = 6 (floor)
174 %!assert(size(mVali,2)==4);# 1/6 of 20 = 4 (floor)
175 %!assert(mTrain==matrix(:,1:10));
176 %!assert(mTest==matrix(:,11:16));
177 %!assert(mVali==matrix(:,17:20));
180 %! [mTrain, mTest, mVali] = subset(matrix,nTargets,2); ############################
181 %!assert(size(mTrain,2)==10);# 50% of 20
182 %!assert(size(mTest,2)==6);# 1/3 of 20 = 6 (floor)
183 %!assert(size(mVali,2)==4);# 1/6 of 20 = 4 (floor)
184 %!assert(max(mTrain(1,:))==max(matrix(1,:)));
185 %!assert(min(mTrain(1,:))==min(matrix(1,:)));
186 %!assert(max(mTrain(2,:))==max(matrix(2,:)));
187 %!assert(min(mTrain(2,:))==min(matrix(2,:)));
188 %!assert(max(mTrain(3,:))==max(matrix(3,:)));
189 %!assert(min(mTrain(3,:))==min(matrix(3,:)));
190 %!assert(max(mTrain(4,:))==max(matrix(4,:)));
191 %!assert(min(mTrain(4,:))==min(matrix(4,:)));
194 %! ## next test ... optimize twice
195 %! matrix = [1 2 3 4 5 6 7 20 8 10 11 12 13 14 15 16 17 18 18 9; \
196 %! 0 2 4 1 3 5 3 4 1 -1 -2 -9 -1 10 12 20 11 11 11 11; \
197 %! -2 2 2 2 2 0 0 0 0 0 10 12 13 12 13 44 33 32 98 11; \
198 %! 0 0 0 0 1 1 1 1 0 0 1 1 1 0 0 1 1 1 0 0; \
199 %! 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4; \
200 %! 1 2 3 4 5 6 7 8 9 10 11 12 13 33 44 55 66 77 88 99];
201 %! [mTrain, mTest, mVali] = subset(matrix,nTargets,2); ############################
202 %!assert(max(mTrain(1,:))==max(matrix(1,:)));
203 %!assert(min(mTrain(1,:))==min(matrix(1,:)));
204 %!assert(max(mTrain(2,:))==max(matrix(2,:)));
205 %!assert(min(mTrain(2,:))==min(matrix(2,:)));
206 %!assert(max(mTrain(3,:))==max(matrix(3,:)));
207 %!assert(min(mTrain(3,:))==min(matrix(3,:)));
208 %!assert(max(mTrain(4,:))==max(matrix(4,:)));
209 %!assert(min(mTrain(4,:))==min(matrix(4,:)));
211 ## \todo, a lot of tests to be sure, everything is working OK!!
212 ## all combinations of arguments must be testet!