1 # Copyright (C) 2007 Michael Creel <michael.creel@uab.es>
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; If not, see <http://www.gnu.org/licenses/>.
16 # kernel_example: examples of how to use kernel density and regression functions
17 # requires the optim and plot packages from Octave Forge
19 # usage: kernel_example;
21 # sample size (default n = 500) - should get better fit (on average)
22 # as this increases, supposing that you allow the optimal window width
23 # to be found, by uncommenting the relevant lines
26 # set this to greater than 0 to try parallel computations (requires MPITB)
28 nodes = compute_nodes + 1; # count master node
33 ############################################################
34 # kernel regression example
35 # uniformly spaced data points on [0,2]
39 # generate dependent variable
40 trueline = x + (x.^2)/2 - 3.1*(x.^3)/3 + 1.2*(x.^4)/4;
42 y = trueline + sig*randn(n,1);
44 fit = kernel_regression(x, y, x); # use the default bandwidth
47 printf("########################################################################\n");
48 printf("time for kernel regression example using %d data points and %d compute nodes: %f\n", n, nodes, t1);
49 plot(x, fit, ";fit;", x, trueline,";true;");
51 title("Example 1: Kernel regression fit");
53 ############################################################
54 # kernel density example: univariate - fit to Chi^2(3) data
55 data = sumsq(randn(n,3),2);
56 # evaluation point are on a grid for plotting
58 grid_x = (-1:stepsize:11)';
60 # get optimal bandwidth (time consuming, uncomment if you want to try it)
61 # bandwidth = kernel_optimal_bandwidth(data);
62 # get the fitted density and do a plot
64 dens = kernel_density(grid_x, data, bandwidth, "__kernel_normal", false, false, compute_nodes);
67 printf("########################################################################\n");
68 printf("time for univariate kernel density example using %d data points and %d compute nodes: %f\n", n, nodes, t1);
69 printf("A rough integration under the fitted univariate density is %f\n", sum(dens)*stepsize);
71 plot(grid_x, dens, ";fitted density;", grid_x, chi2pdf(grid_x,3), ";true density;");
72 title("Example 2: Kernel density fit: Univariate Chi^2(3) data");
74 ############################################################
75 # kernel density example: bivariate
80 data = [d(:,1) sumsq(d,2)];
81 # evaluation points are on a grid for plotting
83 a = (-5:stepsize:5)'; # for the N(0,1)
84 b = (-1:stepsize:9)'; # for the Chi squared(3)
86 [grid_x, grid_y] = meshgrid(a, b);
87 eval_points = [vec(grid_x) vec(grid_y)];
89 # get optimal bandwidth (time consuming, uncomment if you want to try it)
90 # bandwidth = kernel_optimal_bandwidth(data);
91 # get the fitted density and do a plot
93 dens = kernel_density(eval_points, data, bandwidth, "__kernel_normal", false, false, compute_nodes);
96 printf("########################################################################\n");
97 printf("time for multivariate kernel density example using %d data points and %d compute nodes: %f\n", n, nodes, t1);
98 dens = reshape(dens, gridsize, gridsize);
99 printf("A rough integration under the fitted bivariate density is %f\n", sum(sum(dens))*stepsize^2);
101 surf(grid_x, grid_y, dens);
102 title("Example 3: Kernel density fit: dependent bivariate data");
103 xlabel("true marginal density is N(0,1)");
104 ylabel("true marginal density is Chi^2(3)");
105 # more extensive test of parallel
106 if compute_nodes > 0 # only try this if parallel is available
107 ns =[4000; 8000; 10000; 12000; 16000; 20000];
109 printf("########################################################################\n");
110 printf("kernel regression example with several sample sizes serial/parallel timings\n");
113 title("Compute time versus nodes, kernel regression with different sample sizes");
115 ylabel("time (sec)");
117 ts = zeros(rows(ns),4);
119 for compute_nodes = 0:3
120 nodes = compute_nodes + 1;
125 # generate dependent variable
126 trueline = x + (x.^2)/2 - 3.1*(x.^3)/3 + 1.2*(x.^4)/4;
128 y = trueline + sig*randn(n,1);
131 fit = kernel_regression(x, y, x, bandwidth, "__kernel_normal", false, false, compute_nodes);
134 plot(nodes, t1, "*");
135 printf(" %d data points and %d compute nodes: %f\n", n, nodes, t1);