1 function r = ranks(X,DIM,Mode)
2 % RANKS gives the rank of each element in a vector.
3 % This program uses an advanced algorithm with averge effort O(m.n.log(n))
4 % NaN in the input yields NaN in the output.
7 % if X is a vector, return the vector of ranks of X adjusted for ties.
8 % if X is matrix, the rank is calculated along dimension DIM.
9 % if DIM is zero or empty, the lowest dimension with more then 1 element is used.
10 % r = ranks(X,DIM,'traditional')
11 % implements the traditional algorithm with O(n^2) computational
12 % and O(n^2) memory effort
13 % r = ranks(X,DIM,'mtraditional')
14 % implements the traditional algorithm with O(n^2) computational
15 % and O(n) memory effort
16 % r = ranks(X,DIM,'advanced ')
17 % implements an advanced algorithm with O(n*log(n)) computational
18 % and O(n.log(n)) memory effort
19 % r = ranks(X,DIM,'advanced-ties')
20 % implements an advanced algorithm with O(n*log(n)) computational
21 % and O(n.log(n)) memory effort
22 % but without correction for ties
23 % This is the fastest algorithm
25 % see also: CORRCOEF, SPEARMAN, RANKCORR
31 % $Id: ranks.m 8456 2011-08-10 13:20:17Z schloegl $
32 % Copyright (C) 2000-2002,2005,2010 by Alois Schloegl <alois.schloegl@gmail.com>
33 % This script is part of the NaN-toolbox
34 % http://pub.ist.ac.at/~schloegl/matlab/NaN/
36 % This program is free software; you can redistribute it and/or modify
37 % it under the terms of the GNU General Public License as published by
38 % the Free Software Foundation; either version 3 of the License, or
39 % (at your option) any later version.
41 % This program is distributed in the hope that it will be useful,
42 % but WITHOUT ANY WARRANTY; without even the implied warranty of
43 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
44 % GNU General Public License for more details.
46 % You should have received a copy of the GNU General Public License
47 % along with this program; If not, see <http://www.gnu.org/licenses/>.
50 % + is fast, uses an efficient algorithm for the rank correlation
51 % + computational effort is O(n.log(n)) instead of O(n^2)
52 % + memory effort is O(n.log(n)), instead of O(n^2).
53 % Now, the ranks of 8000 elements can be easily calculated
54 % + NaN's in the input yield NaN in the output
55 % + compatible with this software and Matlab
56 % + traditional method is also implemented for comparison.
59 if nargin<2, DIM = 0; end;
80 if strcmp(Mode(1:min(11,length(Mode))),'traditional'), % traditional, needs O(m.n^2)
81 % this method was originally implemented by: KH <Kurt.Hornik@ci.tuwien.ac.at>
82 % Comment of KH: This code is rather ugly, but is there an easy way to get the ranks adjusted for ties from sort?
86 p = X(:, i(ones(1,N)));
87 r(:,i) = (sum (p < p') + (sum (p == p') + 1) / 2)';
91 elseif strcmp(Mode(1:min(12,length(Mode))),'mtraditional'),
92 % + memory effort is lower
97 r(k,i) = (sum (X(:,i) < X(k,i)) + (sum (X(:,i) == X(k,i)) + 1) / 2);
102 elseif strcmp(Mode(1:min(13,length(Mode))),'advanced-ties'), % advanced
103 % + uses sorting, hence needs only O(m.n.log(n)) computations
104 % - does not fix ties
107 [sX, ix] = sort(X,1);
109 [tmp,r(:,k)] = sort(ix(:,k),1); % r yields the rank of each element
114 elseif strcmp(Mode(1:min(8,length(Mode))),'advanced'), % advanced
115 % + uses sorting, hence needs only O(m.n.log(n)) computations
117 % [tmp,ix] = sort([X,Y]);
118 % [tmp,r] = sort(ix); % r yields rank.
119 % but because sort does not work accordingly for cell arrays,
120 % and DIM argument not supported by Octave
121 % and DIM argument does not work for cell-arrays in Matlab
122 % we sort each column separately:
127 [sX,ix] = sort(X(:,k));
128 [tmp,r(:,k)] = sort(ix); % r yields the rank of each element
130 % identify multiple occurences (not sure if this important, but implemented to be compatible with traditional version)
132 n=sum(~isnan(X(:,k)));
134 x = [0;find(sX~=[sX(2:N);n])]; % for this reason, cells are not implemented yet.
137 % correct rank of multiple occurring elements
139 t = (x(d(l))+1:x(d(l)+1))';
140 r(ix(t),k) = mean(t);
145 elseif strcmp(Mode,'=='),
146 % the results of both algorithms are compared for testing.
148 % if the Mode-argument is omitted, both methods are applied and
149 % the results are compared. Once the advanced algorithm is confirmed,
150 % it will become the default Mode.
152 r = ranks(X,'advanced ');
156 r1 = ranks(X,'mtraditional'); % Memory effort is lower
158 r1 = ranks(X,'traditional');
161 fprintf(2,'WARNING RANKS: advanced algorithm does not agree with traditional one\n Please report to <alois.schloegl@gmail.com>\n');