octave_packages/econometrics-1.0.8/kernel_density.m

   1 # Copyright (C) 2006 Michael Creel <michael.creel@uab.es>
   2 #
   3 # This program is free software; you can redistribute it and/or modify
   4 # it under the terms of the GNU General Public License as published by
   5 # the Free Software Foundation; either version 2 of the License, or
   6 # (at your option) any later version.
   7 #
   8 # This program is distributed in the hope that it will be useful,
   9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11 # GNU General Public License for more details.
  12 #
  13 # You should have received a copy of the GNU General Public License
  14 # along with this program; If not, see <http://www.gnu.org/licenses/>.
  15
  16 # kernel_density: multivariate kernel density estimator
  17 #
  18 # usage:
  19 #       dens = kernel_density(eval_points, data, bandwidth)
  20 #
  21 # inputs:
  22 #       eval_points: PxK matrix of points at which to calculate the density
  23 #       data: NxK matrix of data points
  24 #       bandwidth: positive scalar, the smoothing parameter. The fit
  25 #               is more smooth as the bandwidth increases.
  26 #       kernel (optional): string. Name of the kernel function. Default is
  27 #               Gaussian kernel.
  28 #       prewhiten bool (optional): default false. If true, rotate data
  29 #               using Choleski decomposition of inverse of covariance,
  30 #               to approximate independence after the transformation, which
  31 #               makes a product kernel a reasonable choice.
  32 #       do_cv: bool (optional). default false. If true, calculate leave-1-out
  33 #                density for cross validation
  34 #       computenodes: int (optional, default 0).
  35 #               Number of compute nodes for parallel evaluation
  36 #       debug: bool (optional, default false). show results on compute nodes if doing
  37 #               a parallel run
  38 # outputs:
  39 #       dens: Px1 vector: the fitted density value at each of the P evaluation points.
  40 #
  41 # References:
  42 # Wand, M.P. and Jones, M.C. (1995), 'Kernel smoothing'.
  43 # http://www.xplore-stat.de/ebooks/scripts/spm/html/spmhtmlframe73.html
  44
  45 function z = kernel_density(eval_points, data, bandwidth, kernel, prewhiten, do_cv, computenodes, debug)
  46
  47         if nargin < 2; error("kernel_density: at least 2 arguments are required"); endif
  48
  49         n = rows(data);
  50         k = columns(data);
  51
  52
  53         # set defaults for optional args
  54         if (nargin < 3) bandwidth = (n ^ (-1/(4+k))); endif     # bandwidth - see Li and Racine pg. 26
  55         if (nargin < 4) kernel = "__kernel_normal"; endif # what kernel?
  56         if (nargin < 5) prewhiten = false; endif        # automatic prewhitening?
  57         if (nargin < 6) do_cv = false; endif            # ordinary or leave-1-out
  58         if (nargin < 7) computenodes = 0; endif         # parallel?
  59         if (nargin < 8) debug = false; endif;           # debug?
  60
  61         nn = rows(eval_points);
  62         n = rows(data);
  63         if prewhiten
  64                 H = bandwidth*chol(cov(data));
  65         else
  66                 H = bandwidth;
  67         endif
  68
  69         # Inverse bandwidth matrix H_inv
  70         H_inv = inv(H);
  71
  72         # weight by inverse bandwidth matrix
  73         eval_points = eval_points*H_inv;
  74         data = data*H_inv;
  75
  76         # check if doing this parallel or serial
  77         global PARALLEL NSLAVES NEWORLD NSLAVES TAG
  78         PARALLEL = 0;
  79
  80         if computenodes > 0
  81                 PARALLEL = 1;
  82                 NSLAVES = computenodes;
  83                 LAM_Init(computenodes, debug);
  84         endif
  85
  86         if !PARALLEL # ordinary serial version
  87                 points_per_node = nn; # do the all on this node
  88                 z = kernel_density_nodes(eval_points, data, do_cv, kernel, points_per_node, computenodes, debug);
  89         else # parallel version
  90                 z = zeros(nn,1);
  91                 points_per_node = floor(nn/(NSLAVES + 1)); # number of obsns per slave
  92                 # The command that the slave nodes will execute
  93                 cmd=['z_on_node = kernel_density_nodes(eval_points, data, do_cv, kernel, points_per_node, computenodes, debug); ',...
  94                 'MPI_Send(z_on_node, 0, TAG, NEWORLD);'];
  95
  96                 # send items to slaves
  97
  98                 NumCmds_Send({"eval_points", "data", "do_cv", "kernel", "points_per_node", "computenodes", "debug","cmd"}, {eval_points, data, do_cv, kernel, points_per_node, computenodes, debug, cmd});
  99
 100                 # evaluate last block on master while slaves are busy
 101                 z_on_node = kernel_density_nodes(eval_points, data, do_cv, kernel, points_per_node, computenodes, debug);
 102                 startblock = NSLAVES*points_per_node + 1;
 103                 endblock = nn;
 104                 z(startblock:endblock,:) = z(startblock:endblock,:) + z_on_node;
 105
 106                 # collect slaves' results
 107                 z_on_node = zeros(points_per_node,1); # size may differ between master and compute nodes - reset here
 108                 for i = 1:NSLAVES
 109                         MPI_Recv(z_on_node,i,TAG,NEWORLD);
 110                         startblock = i*points_per_node - points_per_node + 1;
 111                         endblock = i*points_per_node;
 112                         z(startblock:endblock,:) = z(startblock:endblock,:) + z_on_node;
 113                 endfor
 114
 115                 # clean up after parallel
 116                 LAM_Finalize;
 117         endif
 118         z = z*det(H_inv);
 119 endfunction