octave_packages/nnet-0.1.13/__calcjacobian.m

   1 ## Copyright (C) 2006 Michel D. Schmid   <michaelschmid@users.sourceforge.net>
   2 ##
   3 ##
   4 ## This program is free software; you can redistribute it and/or modify it
   5 ## under the terms of the GNU General Public License as published by
   6 ## the Free Software Foundation; either version 2, or (at your option)
   7 ## any later version.
   8 ##
   9 ## This program is distributed in the hope that it will be useful, but
  10 ## WITHOUT ANY WARRANTY; without even the implied warranty of
  11 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12 ## General Public License for more details.
  13 ##
  14 ## You should have received a copy of the GNU General Public License
  15 ## along with this program; see the file COPYING.  If not, see
  16 ## <http://www.gnu.org/licenses/>.
  17
  18 ## -*- texinfo -*-
  19 ## @deftypefn {Function File} {}@var{Jj} = __calcjacobian (@var{net},@var{Im},@var{Nn},@var{Aa},@var{vE})
  20 ## This function calculates the jacobian matrix. It's used inside the
  21 ## Levenberg-Marquardt algorithm of the neural network toolbox.
  22 ## PLEASE DO NOT USE IT ELSEWEHRE, it proparly will not work!
  23 ## @end deftypefn
  24
  25 ## Author: Michel D. Schmid
  26
  27
  28 function [Jj] = __calcjacobian(net,Im,Nn,Aa,vE)
  29
  30   ## comment:
  31   ## - return value Jj is jacobi matrix
  32   ##   for this calculation, see "Neural Network Design; Hagan, Demuth & Beale page 12-45"
  33
  34
  35   ## check range of input arguments
  36   error(nargchk(5,5,nargin))
  37
  38   ## get signals from inside the network
  39   bias  = net.b;
  40
  41   ## calculate some help matrices
  42   mInputWeight = net.IW{1} * Im;
  43   nLayers = net.numLayers;
  44   for i=2:nLayers
  45     mLayerWeight{i,1} = net.LW{i,i-1} * Aa{i-1,1};
  46   endfor
  47
  48   ## calculate number of columns and rows in jacobi matrix
  49   ## firstly, number of columns
  50   a = ones(nLayers+1,1); # +1 is for the input
  51   a(1) = net.inputs{1}.size;
  52   for iLayers = 1:nLayers
  53     a(iLayers+1) = net.layers{iLayers}.size;
  54   endfor
  55   nColumnsJacobiMatrix = 0;
  56   for iLayers = 1:nLayers
  57     nColumnsJacobiMatrix = (a(iLayers)+1)*a(iLayers+1) + nColumnsJacobiMatrix;
  58   endfor
  59   ## secondly, number of rows
  60   ve = vE{nLayers,1};
  61   nRowsJacobiMatrix = length(ve(:));
  62
  63
  64   ## FIRST STEP -----------------------------------------------------
  65   ## calculate the neuron outputs without the transfer function
  66   ## - n1_1 = W^1*a_1^0+b^1: the ^x factor defined the xth train data set
  67   ##   the _x factor defines the layer
  68   ## **********  this datas should be hold in Nn
  69   ## **********  should be calculated in "__calcperf"
  70   ## **********  so Nn{1} means hidden layer
  71   ## **********  so Nn{2} means second hidden layer or output layer
  72   ## **********  and so on ...
  73   ## END FIRST STEP -------------------------------------------------
  74
  75   ## now we can rerange the signals ... this will be done only for
  76   ## matrix calculation ...
  77   [nRowsError nColumnsError] = size(ve);
  78   errorSize = size(ve(:),1); # this will calculate, if only one row
  79   # of errors exist... in other words... two rows will be reranged to
  80   # one row with the same number of elements.
  81   rerangeIndex = floor([0:(errorSize-1)]/nRowsError)+1;
  82   nLayers = net.numLayers;
  83
  84   for i = 1:nLayers
  85     Nn{i,1} = Nn{i,1}(:,rerangeIndex);
  86     Aa{i,1} = Aa{i,1}(:,rerangeIndex);
  87     [nRows nColumns] = size(Nn{i,1});
  88     bTemp = bias{i,1};
  89     bias{i,1} = repmat(bTemp,1,nColumns);
  90     bias{i,1} = bias{i,1}(:,rerangeIndex);
  91   endfor
  92   mInputWeight = mInputWeight(:,rerangeIndex);
  93   for i=2:nLayers
  94     mLayerWeight{i,1} = mLayerWeight{i,1}(:,rerangeIndex);
  95   endfor
  96   Im = Im(:,rerangeIndex);
  97
  98   ## define how the errors are connected
  99   ## ATTENTION! this happens in row order...
 100   numTargets = net.numTargets;
 101   mIdentity = -eye(numTargets);
 102   cols = size(mIdentity,2);
 103   mIdentity = mIdentity(:,rem(0:(cols*nColumnsError-1),cols)+1);
 104   errorConnect = cell(net.numLayers,1);
 105   startPos = 0;
 106   for i=net.numLayers
 107     targSize = net.layers{i}.size;
 108     errorConnect{i} = mIdentity(startPos+[1:targSize],:);
 109     startPos = startPos + targSize;
 110   endfor
 111
 112   ## SECOND STEP ----------------------------------------------
 113   ## define and calculate the derivative matrix dF
 114   ## - this is "done" by the two first derivative functions
 115   ##   of the transfer functions
 116   ##   e.g. __dpureline, __dtansig, __dlogsig and so on ...
 117
 118   ## calculate the sensitivity matrix tildeS
 119   ## start at the end layer, this means of course the output layer,
 120   ## the transfer function is selectable
 121
 122   ## for calculating the last layer
 123   ## this should happen like following:
 124   ## tildeSx = -dFx(n_x^x)
 125   ## use mIdentity to calculate the number of targets correctly
 126   ## for all other layers, use instead:
 127   ## tildeSx(-1) = dF1(n_x^(x-1))(W^x)' * tildeSx;
 128
 129   for iLayers = nLayers:-1:1 # this will count from the last
 130                              # layer to the first layer ...
 131     n = Nn{iLayers}; # nLayers holds the value of the last layer...
 132     ## which transfer function should be used?
 133     if (iLayers==nLayers)
 134       switch(net.layers{iLayers}.transferFcn)
 135         case "radbas"
 136           tildeSxTemp = __dradbas(n);
 137         case "purelin"
 138           tildeSxTemp = __dpurelin(n);
 139         case "tansig"
 140           n = tansig(n);
 141           tildeSxTemp = __dtansig(n);
 142         case "logsig"
 143           n = logsig(n);
 144           tildeSxTemp = __dlogsig(n);
 145         otherwise
 146           error(["transfer function argument: " net.layers{iLayers}.transferFcn  " is not valid!"])
 147       endswitch
 148       tildeSx{iLayers,1} = tildeSxTemp .* mIdentity;
 149       n = bias{nLayers,1};
 150       switch(net.layers{iLayers}.transferFcn)
 151         case "radbas"
 152           tildeSbxTemp = __dradbas(n);
 153         case "purelin"
 154           tildeSbxTemp = __dpurelin(n);
 155         case "tansig"
 156           n = tansig(n);
 157           tildeSbxTemp = __dtansig(n);
 158         case "logsig"
 159           n = logsig(n);
 160           tildeSbxTemp = __dlogsig(n);
 161         otherwise
 162           error(["transfer function argument: " net.layers{iLayers}.transferFcn  " is not valid!"])
 163       endswitch
 164       tildeSbx{iLayers,1} = tildeSbxTemp .* mIdentity;
 165     endif
 166
 167     if (iLayers<nLayers)
 168       dFx = ones(size(n));
 169       switch(net.layers{iLayers}.transferFcn) ######## new lines ...
 170         case "radbas"
 171           nx = radbas(n);
 172           dFx = __dradbas(nx);
 173         case "purelin"
 174           nx = purelin(n);
 175           dFx = __dpurelin(nx);
 176         case "tansig"         ######## new lines ...
 177           nx = tansig(n);
 178           dFx = __dtansig(nx);
 179         case "logsig"    ######## new lines ...
 180           nx = logsig(n);  ######## new lines ...
 181           dFx = __dlogsig(nx); ######## new lines ...
 182         otherwise     ######## new lines ...
 183           error(["transfer function argument: " net.layers{iLayers}.transferFcn  " is not valid!"])######## new lines ...
 184        endswitch ############# new lines ....
 185           LWtranspose = net.LW{iLayers+1,iLayers};
 186       if iLayers<(nLayers-1)
 187         mIdentity = -ones(net.layers{iLayers}.size,size(mIdentity,2));
 188       endif
 189
 190       mTest = tildeSx{iLayers+1,1};
 191       LWtranspose = LWtranspose' * mTest;
 192       tildeSx{iLayers,1} = dFx .* LWtranspose;
 193       tildeSxTemp = dFx .* LWtranspose;
 194       tildeSbx{iLayers,1} = ones(size(nx)).*tildeSxTemp;
 195     endif
 196
 197   endfor #  if iLayers = nLayers:-1:1
 198   ## END SECOND STEP -------------------------------------------------
 199
 200   ## THIRD STEP ------------------------------------------------------
 201   ## some problems occur if we have more than only one target... so how
 202   ## does the jacobi matrix looks like?
 203
 204   ## each target will cause an extra row in the jacobi matrix, for
 205   ## each training set..  this means, 2 targets --> double of rows in the
 206   ## jacobi matrix ... 3 targets --> three times the number of rows like
 207   ## with one target and so on.
 208
 209   ## now calculate jacobi matrix
 210   ## to do this, define first the transposed of it
 211   ## this makes it easier to calculate on the "batch" way, means all inputs
 212   ## at the same time...
 213   ## and it makes it easier to use the matrix calculation way..
 214
 215   JjTrans = zeros(nRowsJacobiMatrix,nColumnsJacobiMatrix)'; # transposed jacobi matrix
 216
 217   ## Weight Gradients
 218   for i=1:net.numLayers
 219     if i==1
 220       newInputs = Im;
 221       newTemps =  tildeSx{i,1};
 222       gIW{i,1} = copyRows(newTemps,net.inputs{i}.size) .* copyRowsInt(newInputs,net.layers{i}.size);
 223     endif
 224     if i>1
 225       Ad = cell2mat(Aa(i-1,1)');
 226       newInputs = Ad;
 227       newTemps = tildeSx{i,1};
 228       gLW{i,1} = copyRows(newTemps,net.layers{i-1}.size) .* copyRowsInt(newInputs,net.layers{i}.size);
 229     endif
 230   endfor
 231
 232   for i=1:net.numLayers
 233     [nRows, nColumns] = size(Im);
 234     if (i==1)
 235       nWeightElements = a(i)*a(i+1); # n inputs * n hidden neurons
 236       JjTrans(1:nWeightElements,:) =  gIW{i}(1:nWeightElements,:);
 237       nWeightBias = a(i+1);
 238       start = nWeightElements;
 239       JjTrans(start+1:start+nWeightBias,:) = tildeSbx{i,1};
 240       start = start+nWeightBias;
 241     endif
 242     if (i>1)
 243       nLayerElements = a(i)*a(i+1); # n hidden neurons * n output neurons
 244       JjTrans(start+1:start+nLayerElements,:)=gLW{i}(1:nLayerElements,:);
 245       start = start +  nLayerElements;
 246       nLayerBias = a(i+1);
 247       JjTrans(start+1:start+nLayerBias,:) = tildeSbx{i,1};
 248       start = start + nLayerBias;
 249     endif
 250   endfor
 251   Jj = JjTrans';
 252   ## END THIRD STEP -------------------------------------------------
 253
 254
 255 #=======================================================
 256 #
 257 # additional functions
 258 #
 259 #=======================================================
 260
 261   function k = copyRows(k,m)
 262     # make copies of the ROWS of Aa matrix
 263
 264     mRows = size(k,1);
 265     k = k(rem(0:(mRows*m-1),mRows)+1,:);
 266   endfunction
 267
 268 # -------------------------------------------------------
 269
 270   function k = copyRowsInt(k,m)
 271     # make copies of the ROWS of matrix with elements INTERLEAVED
 272
 273     mRows = size(k,1);
 274     k = k(floor([0:(mRows*m-1)]/m)+1,:);
 275   endfunction
 276
 277 # =====================================================================
 278 #
 279 # END additional functions
 280 #
 281 # =====================================================================
 282
 283 endfunction