1 ## Copyright (C) 1995-2012 Kurt Hornik
3 ## This file is part of Octave.
5 ## Octave is free software; you can redistribute it and/or modify it
6 ## under the terms of the GNU General Public License as published by
7 ## the Free Software Foundation; either version 3 of the License, or (at
8 ## your option) any later version.
10 ## Octave is distributed in the hope that it will be useful, but
11 ## WITHOUT ANY WARRANTY; without even the implied warranty of
12 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ## General Public License for more details.
15 ## You should have received a copy of the GNU General Public License
16 ## along with Octave; see the file COPYING. If not, see
17 ## <http://www.gnu.org/licenses/>.
20 ## @deftypefn {Function File} {[@var{pval}, @var{k}, @var{df}] =} kruskal_wallis_test (@var{x1}, @dots{})
21 ## Perform a Kruskal-Wallis one-factor "analysis of variance".
23 ## Suppose a variable is observed for @var{k} > 1 different groups, and
24 ## let @var{x1}, @dots{}, @var{xk} be the corresponding data vectors.
26 ## Under the null hypothesis that the ranks in the pooled sample are not
27 ## affected by the group memberships, the test statistic @var{k} is
28 ## approximately chi-square with @var{df} = @var{k} - 1 degrees of
31 ## If the data contains ties (some value appears more than once)
32 ## @var{k} is divided by
34 ## 1 - @var{sum_ties} / (@var{n}^3 - @var{n})
36 ## where @var{sum_ties} is the sum of @var{t}^2 - @var{t} over each group
37 ## of ties where @var{t} is the number of ties in the group and @var{n}
38 ## is the total number of values in the input data. For more info on
39 ## this adjustment see "Use of Ranks in One-Criterion Variance Analysis"
40 ## in Journal of the American Statistical Association, Vol. 47,
41 ## No. 260 (Dec 1952) by William H. Kruskal and W. Allen Wallis.
43 ## The p-value (1 minus the CDF of this distribution at @var{k}) is
44 ## returned in @var{pval}.
46 ## If no output argument is given, the p-value is displayed.
49 ## Author: KH <Kurt.Hornik@wu-wien.ac.at>
50 ## Description: Kruskal-Wallis test
52 function [pval, k, df] = kruskal_wallis_test (varargin)
65 error ("kruskal_wallis_test: all arguments must be vectors");
69 p = [p, (reshape (x, 1, l))];
77 k = k + (sum (r ((j + 1) : (j + n(i))))) ^ 2 / n(i);
82 k = 12 * k / (n * (n + 1)) - 3 * (n + 1);
84 ## Adjust the result to takes ties into account.
85 sum_ties = sum (polyval ([1, 0, -1, 0], runlength (sort (p))));
86 k = k / (1 - sum_ties / (n^3 - n));
89 pval = 1 - chi2cdf (k, df);
92 printf ("pval: %g\n", pval);
98 %!assert (abs(kruskal_wallis_test([86 86], [74]) - 0.157299207050285) < 0.0000000000001)