1 # Copyright (C) 2006 Michael Creel <michael.creel@uab.es>
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; If not, see <http://www.gnu.org/licenses/>.
16 # kernel_regression: kernel regression estimator
19 # fit = kernel_regression(eval_points, depvar, condvars, bandwidth)
22 # eval_points: PxK matrix of points at which to calculate the density
23 # depvar: Nx1 vector of observations of the dependent variable
24 # condvars: NxK matrix of data points
25 # bandwidth (optional): positive scalar, the smoothing parameter.
26 # Default is N ^ (-1/(4+K))
27 # kernel (optional): string. Name of the kernel function. Default is
29 # prewhiten bool (optional): default true. If true, rotate data
30 # using Choleski decomposition of inverse of covariance,
31 # to approximate independence after the transformation, which
32 # makes a product kernel a reasonable choice.
33 # do_cv: bool (optional). default false. If true, calculate leave-1-out
34 # fit to calculate the cross validation score
35 # computenodes: int (optional, default 0).
36 # Number of compute nodes for parallel evaluation
37 # debug: bool (optional, default false). show results on compute nodes if doing
40 # fit: Px1 vector: the fitted value at each of the P evaluation points.
43 function z = kernel_regression(eval_points, depvar, condvars, bandwidth, kernel, prewhiten, do_cv, computenodes, debug)
45 if nargin < 3; error("kernel_regression: at least 3 arguments are required"); endif
48 k = columns(condvars);
50 # set defaults for optional args
51 if (nargin < 4) bandwidth = (n ^ (-1/(4+k))); endif # bandwidth - see Li and Racine pg. 66
52 if (nargin < 5) kernel = "__kernel_normal"; endif # what kernel?
53 if (nargin < 6) prewhiten = true; endif # automatic prewhitening?
54 if (nargin < 7) do_cv = false; endif # ordinary or leave-1-out
55 if (nargin < 8) computenodes = 0; endif # parallel?
56 if (nargin < 9) debug = false; endif; # debug?
59 nn = rows(eval_points);
63 H = bandwidth*chol(cov(condvars));
69 # weight by inverse bandwidth matrix
70 eval_points = eval_points*H_inv;
71 condvars = condvars*H_inv;
73 data = [depvar condvars]; # put it all together for sending to nodes
75 # check if doing this parallel or serial
76 global PARALLEL NSLAVES NEWORLD NSLAVES TAG
81 NSLAVES = computenodes;
82 LAM_Init(computenodes, debug);
85 if !PARALLEL # ordinary serial version
86 points_per_node = nn; # do the all on this node
87 z = kernel_regression_nodes(eval_points, data, do_cv, kernel, points_per_node, computenodes, debug);
88 else # parallel version
90 points_per_node = floor(nn/(NSLAVES + 1)); # number of obsns per slave
91 # The command that the slave nodes will execute
92 cmd=['z_on_node = kernel_regression_nodes(eval_points, data, do_cv, kernel, points_per_node, computenodes, debug); ',...
93 'MPI_Send(z_on_node, 0, TAG, NEWORLD);'];
95 # send items to slaves
97 NumCmds_Send({"eval_points", "data", "do_cv", "kernel", "points_per_node", "computenodes", "debug","cmd"}, {eval_points, data, do_cv, kernel, points_per_node, computenodes, debug, cmd});
99 # evaluate last block on master while slaves are busy
100 z_on_node = kernel_regression_nodes(eval_points, data, do_cv, kernel, points_per_node, computenodes, debug);
101 startblock = NSLAVES*points_per_node + 1;
103 z(startblock:endblock,:) = z(startblock:endblock,:) + z_on_node;
105 # collect slaves' results
106 z_on_node = zeros(points_per_node,1); # size may differ between master and compute nodes - reset here
108 MPI_Recv(z_on_node,i,TAG,NEWORLD);
109 startblock = i*points_per_node - points_per_node + 1;
110 endblock = i*points_per_node;
111 z(startblock:endblock,:) = z(startblock:endblock,:) + z_on_node;
114 # clean up after parallel