accumarray.m

   1 ## Copyright (C) 2007-2012 David Bateman
   2 ## Copyright (C) 2009-2010 VZLU Prague
   3 ##
   4 ## This file is part of Octave.
   5 ##
   6 ## Octave is free software; you can redistribute it and/or modify it
   7 ## under the terms of the GNU General Public License as published by
   8 ## the Free Software Foundation; either version 3 of the License, or (at
   9 ## your option) any later version.
  10 ##
  11 ## Octave is distributed in the hope that it will be useful, but
  12 ## WITHOUT ANY WARRANTY; without even the implied warranty of
  13 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 ## General Public License for more details.
  15 ##
  16 ## You should have received a copy of the GNU General Public License
  17 ## along with Octave; see the file COPYING.  If not, see
  18 ## <http://www.gnu.org/licenses/>.
  19
  20 ## -*- texinfo -*-
  21 ## @deftypefn  {Function File} {} accumarray (@var{subs}, @var{vals}, @var{sz}, @var{func}, @var{fillval}, @var{issparse})
  22 ## @deftypefnx {Function File} {} accumarray (@var{subs}, @var{vals}, @dots{})
  23 ##
  24 ## Create an array by accumulating the elements of a vector into the
  25 ## positions defined by their subscripts.  The subscripts are defined by
  26 ## the rows of the matrix @var{subs} and the values by @var{vals}.  Each
  27 ## row of @var{subs} corresponds to one of the values in @var{vals}.  If
  28 ## @var{vals} is a scalar, it will be used for each of the row of
  29 ## @var{subs}.  If @var{subs} is a cell array of vectors, all vectors
  30 ## must be of the same length, and the subscripts in the @var{k}th
  31 ## vector must correspond to the @var{k}th dimension of the result.
  32 ##
  33 ## The size of the matrix will be determined by the subscripts
  34 ## themselves.  However, if @var{sz} is defined it determines the matrix
  35 ## size.  The length of @var{sz} must correspond to the number of columns
  36 ## in @var{subs}.  An exception is if @var{subs} has only one column, in
  37 ## which case @var{sz} may be the dimensions of a vector and the
  38 ## subscripts of @var{subs} are taken as the indices into it.
  39 ##
  40 ## The default action of @code{accumarray} is to sum the elements with
  41 ## the same subscripts.  This behavior can be modified by defining the
  42 ## @var{func} function.  This should be a function or function handle
  43 ## that accepts a column vector and returns a scalar.  The result of the
  44 ## function should not depend on the order of the subscripts.
  45 ##
  46 ## The elements of the returned array that have no subscripts associated
  47 ## with them are set to zero.  Defining @var{fillval} to some other value
  48 ## allows these values to be defined.  This behavior changes, however,
  49 ## for certain values of @var{func}.  If @var{func} is @code{min}
  50 ## (respectively, @code{max}) then the result will be filled with the
  51 ## minimum (respectively, maximum) integer if @var{vals} is of integral
  52 ## type, logical false (respectively, logical true) if @var{vals} is of
  53 ## logical type, zero if @var{fillval} is zero and all values are
  54 ## non-positive (respectively, non-negative), and NaN otherwise.
  55 ##
  56 ## By default @code{accumarray} returns a full matrix.  If
  57 ## @var{issparse} is logically true, then a sparse matrix is returned
  58 ## instead.
  59 ##
  60 ## The following @code{accumarray} example constructs a frequency table
  61 ## that in the first column counts how many occurrences each number in
  62 ## the second column has, taken from the vector @var{x}.  Note the usage
  63 ## of @code{unique}  for assigning to all repeated elements of @var{x}
  64 ## the same index (@pxref{doc-unique}).
  65 ##
  66 ## @example
  67 ## @group
  68 ## @var{x} = [91, 92, 90, 92, 90, 89, 91, 89, 90, 100, 100, 100];
  69 ## [@var{u}, ~, @var{j}] = unique (@var{x});
  70 ## [accumarray(@var{j}', 1), @var{u}']
  71 ##   @result{}  2    89
  72 ##       3    90
  73 ##       2    91
  74 ##       2    92
  75 ##       3   100
  76 ## @end group
  77 ## @end example
  78 ##
  79 ## Another example, where the result is a multi-dimensional 3-D array and
  80 ## the default value (zero) appears in the output:
  81 ##
  82 ## @example
  83 ## @group
  84 ## accumarray ([1, 1, 1;
  85 ##              2, 1, 2;
  86 ##              2, 3, 2;
  87 ##              2, 1, 2;
  88 ##              2, 3, 2], 101:105)
  89 ## @result{} ans(:,:,1) = [101, 0, 0; 0, 0, 0]
  90 ## @result{} ans(:,:,2) = [0, 0, 0; 206, 0, 208]
  91 ## @end group
  92 ## @end example
  93 ##
  94 ## The sparse option can be used as an alternative to the @code{sparse}
  95 ## constructor (@pxref{doc-sparse}). Thus
  96 ##
  97 ## @example
  98 ## sparse (@var{i}, @var{j}, @var{sv})
  99 ## @end example
 100 ##
 101 ## @noindent
 102 ## can be written with @code{accumarray} as
 103 ##
 104 ## @example
 105 ## accumarray ([@var{i}, @var{j}], @var{sv}', [], [], 0, true)
 106 ## @end example
 107 ##
 108 ## @noindent
 109 ## For repeated indices, @code{sparse} adds the corresponding value. To
 110 ## take the minimum instead, use @code{min} as an accumulator function:
 111 ##
 112 ## @example
 113 ## accumarray ([@var{i}, @var{j}], @var{sv}', [], @@min, 0, true)
 114 ## @end example
 115 ##
 116 ## The complexity of accumarray in general for the non-sparse case is
 117 ## generally O(M+N), where N is the number of subscripts and M is the
 118 ## maximum subscript (linearized in multi-dimensional case).  If
 119 ## @var{func} is one of @code{@@sum} (default), @code{@@max},
 120 ## @code{@@min} or @code{@@(x) @{x@}}, an optimized code path is used.
 121 ## Note that for general reduction function the interpreter overhead can
 122 ## play a major part and it may be more efficient to do multiple
 123 ## accumarray calls and compute the results in a vectorized manner.
 124 ##
 125 ## @seealso{accumdim, unique, sparse}
 126 ## @end deftypefn
 127
 128 function A = accumarray (subs, vals, sz = [], func = [], fillval = [], issparse = [])
 129
 130   if (nargin < 2 || nargin > 6)
 131     print_usage ();
 132   endif
 133
 134   lenvals = length (vals);
 135
 136   if (iscell (subs))
 137     subs = cellfun (@vec, subs, "uniformoutput", false);
 138     ndims = numel (subs);
 139     if (ndims == 1)
 140       subs = subs{1};
 141     endif
 142
 143     lensubs = cellfun (@length, subs);
 144
 145     if (any (lensubs != lensubs(1)) ||
 146         (lenvals > 1 && lenvals != lensubs(1)))
 147       error ("accumarray: dimension mismatch");
 148     endif
 149
 150   else
 151     ndims = columns (subs);
 152     if (lenvals > 1 && lenvals != rows (subs))
 153       error ("accumarray: dimension mismatch")
 154     endif
 155   endif
 156
 157   if (isempty (fillval))
 158     fillval = 0;
 159   endif
 160
 161   if (isempty (issparse))
 162     issparse = false;
 163   endif
 164
 165   if (issparse)
 166
 167     ## Sparse case. Avoid linearizing the subscripts, because it could
 168     ## overflow.
 169
 170     if (fillval != 0)
 171       error ("accumarray: FILLVAL must be zero in the sparse case");
 172     endif
 173
 174     ## Ensure subscripts are a two-column matrix.
 175     if (iscell (subs))
 176       subs = [subs{:}];
 177     endif
 178
 179     ## Validate dimensions.
 180     if (ndims == 1)
 181       subs(:,2) = 1;
 182     elseif (ndims != 2)
 183       error ("accumarray: in the sparse case, needs 1 or 2 subscripts");
 184     endif
 185
 186     if (isnumeric (vals) || islogical (vals))
 187       vals = double (vals);
 188     else
 189       error ("accumarray: in the sparse case, values must be numeric or logical");
 190     endif
 191
 192     if (! (isempty (func) || func == @sum))
 193
 194       ## Reduce values. This is not needed if we're about to sum them,
 195       ## because "sparse" can do that.
 196
 197       ## Sort indices.
 198       [subs, idx] = sortrows (subs);
 199       n = rows (subs);
 200       ## Identify runs.
 201       jdx = find (any (diff (subs, 1, 1), 2));
 202       jdx = [jdx; n];
 203
 204       vals = cellfun (func, mat2cell (vals(:)(idx), diff ([0; jdx])));
 205       subs = subs(jdx, :);
 206       mode = "unique";
 207     else
 208       mode = "sum";
 209     endif
 210
 211     ## Form the sparse matrix.
 212     if (isempty (sz))
 213       A = sparse (subs(:,1), subs(:,2), vals, mode);
 214     elseif (length (sz) == 2)
 215
 216       ## Row vector case
 217       if (sz(1) == 1)
 218         [i, j] = deal (subs(:,2), subs(:,1));
 219       else
 220         [i, j] = deal (subs(:,1), subs(:,2));
 221       endif
 222       A = sparse (i, j, vals, sz(1), sz(2), mode);
 223     else
 224       error ("accumarray: dimensions mismatch");
 225     endif
 226
 227   else
 228
 229     ## Linearize subscripts.
 230     if (ndims > 1)
 231       if (isempty (sz))
 232         if (iscell (subs))
 233           sz = cellfun ("max", subs);
 234         else
 235           sz = max (subs, [], 1);
 236         endif
 237       elseif (ndims != length (sz))
 238         error ("accumarray: dimensions mismatch");
 239       endif
 240
 241       ## Convert multidimensional subscripts.
 242       if (ismatrix (subs))
 243         subs = num2cell (subs, 1);
 244       endif
 245       subs = sub2ind (sz, subs{:}); # creates index cache
 246     elseif (! isempty (sz) && length (sz) < 2)
 247       error ("accumarray: needs at least 2 dimensions");
 248     elseif (! isindex (subs)) # creates index cache
 249       error ("accumarray: indices must be positive integers");
 250     endif
 251
 252
 253     ## Some built-in reductions handled efficiently.
 254
 255     if (isempty (func) || func == @sum)
 256       ## Fast summation.
 257       if (isempty (sz))
 258         A = __accumarray_sum__ (subs, vals);
 259       else
 260         A = __accumarray_sum__ (subs, vals, prod (sz));
 261         ## set proper shape.
 262         A = reshape (A, sz);
 263       endif
 264
 265       ## we fill in nonzero fill value.
 266       if (fillval != 0)
 267         mask = true (size (A));
 268         mask(subs) = false;
 269         A(mask) = fillval;
 270       endif
 271     elseif (func == @max)
 272       ## Fast maximization.
 273
 274       if (isinteger (vals))
 275         zero = intmin (class (vals));
 276       elseif (islogical (vals))
 277         zero = false;
 278       elseif (fillval == 0 && all (vals(:) >= 0))
 279         ## This is a common case - fillval is zero, all numbers
 280         ## nonegative.
 281         zero = 0;
 282       else
 283         zero = NaN; # Neutral value.
 284       endif
 285
 286       if (isempty (sz))
 287         A = __accumarray_max__ (subs, vals, zero);
 288       else
 289         A = __accumarray_max__ (subs, vals, zero, prod (sz));
 290         A = reshape (A, sz);
 291       endif
 292
 293       if (fillval != zero && ! (isnan (fillval) || isnan (zero)))
 294         mask = true (size (A));
 295         mask(subs) = false;
 296         A(mask) = fillval;
 297       endif
 298     elseif (func == @min)
 299       ## Fast minimization.
 300
 301       if (isinteger (vals))
 302         zero = intmax (class (vals));
 303       elseif (islogical (vals))
 304         zero = true;
 305       elseif (fillval == 0 && all (vals(:) <= 0))
 306         ## This is a common case - fillval is zero, all numbers
 307         ## non-positive.
 308         zero = 0;
 309       else
 310         zero = NaN; # Neutral value.
 311       endif
 312
 313       if (isempty (sz))
 314         A = __accumarray_min__ (subs, vals, zero);
 315       else
 316         A = __accumarray_min__ (subs, vals, zero, prod (sz));
 317         A = reshape (A, sz);
 318       endif
 319
 320       if (fillval != zero && ! (isnan (fillval) || isnan (zero)))
 321         mask = true (size (A));
 322         mask(subs) = false;
 323         A(mask) = fillval;
 324       endif
 325     else
 326
 327       ## The general case. Reduce values.
 328       n = rows (subs);
 329       if (numel (vals) == 1)
 330         vals = vals(ones (1, n), 1);
 331       else
 332         vals = vals(:);
 333       endif
 334
 335       ## Sort indices.
 336       [subs, idx] = sort (subs);
 337       ## Identify runs.
 338       jdx = find (subs(1:n-1) != subs(2:n));
 339       jdx = [jdx; n];
 340       vals = mat2cell (vals(idx), diff ([0; jdx]));
 341       ## Optimize the case when function is @(x) {x}, i.e. we just want
 342       ## to collect the values to cells.
 343       persistent simple_cell_str = func2str (@(x) {x});
 344       if (! strcmp (func2str (func), simple_cell_str))
 345         vals = cellfun (func, vals);
 346       endif
 347       subs = subs(jdx);
 348
 349       if (isempty (sz))
 350         sz = max (subs);
 351         if (length (sz) == 1)
 352           sz(2) = 1;
 353         endif
 354       endif
 355
 356       ## Construct matrix of fillvals.
 357       if (iscell (vals))
 358         A = cell (sz);
 359       elseif (fillval == 0)
 360         A = zeros (sz, class (vals));
 361       else
 362         A = repmat (fillval, sz);
 363       endif
 364
 365       ## Set the reduced values.
 366       A(subs) = vals;
 367     endif
 368   endif
 369 endfunction
 370
 371 %!error (accumarray (1:5))
 372 %!error (accumarray ([1,2,3],1:2))
 373 %!assert (accumarray ([1;2;4;2;4],101:105), [101;206;0;208])
 374 %!assert (accumarray ([1,1,1;2,1,2;2,3,2;2,1,2;2,3,2],101:105),cat(3, [101,0,0;0,0,0],[0,0,0;206,0,208]))
 375 %!assert (accumarray ([1,1,1;2,1,2;2,3,2;2,1,2;2,3,2],101:105,[],@(x)sin(sum(x))),sin(cat(3, [101,0,0;0,0,0],[0,0,0;206,0,208])))
 376 %!assert (accumarray ({[1 3 3 2 3 1 2 2 3 3 1 2],[3 4 2 1 4 3 4 2 2 4 3 4],[1 1 2 2 1 1 2 1 1 1 2 2]},101:112),cat(3,[0,0,207,0;0,108,0,0;0,109,0,317],[0,0,111,0;104,0,0,219;0,103,0,0]))
 377 %!assert (accumarray ([1,1;2,1;2,3;2,1;2,3],101:105,[2,4],@max,NaN),[101,NaN,NaN,NaN;104,NaN,105,NaN])
 378 %!assert (accumarray ([1 1; 2 1; 2 3; 2 1; 2 3],101:105, [], @prod), [101, 0, 0; 10608, 0, 10815])
 379 %!assert (accumarray ([1 1; 2 1; 2 3; 2 1; 2 3],101:105,[2 4],@prod,0,true),sparse([1,2,2],[1,1,3],[101,10608,10815],2,4))
 380 %!assert (accumarray ([1 1; 2 1; 2 3; 2 1; 2 3],1,[2,4]), [1,0,0,0;2,0,2,0])
 381 %!assert (accumarray ([1 1; 2 1; 2 3; 2 1; 2 3],101:105,[2,4],@(x)length(x)>1),[false,false,false,false;true,false,true,false])
 382 %!assert (accumarray ([1; 2], [3; 4], [2, 1], @min, [], 0), [3; 4])
 383 %!assert (accumarray ([1; 2], [3; 4], [2, 1], @min, [], 1), sparse ([3; 4]))
 384 %!assert (accumarray ([1; 2], [3; 4], [1, 2], @min, [], 0), [3, 4])
 385 %!assert (accumarray ([1; 2], [3; 4], [1, 2], @min, [], 1), sparse ([3, 4]))
 386 %!test
 387 %! A = accumarray ([1 1; 2 1; 2 3; 2 1; 2 3],101:105,[2,4],@(x){x});
 388 %! assert (A{2},[102;104])
 389 %!test
 390 %! subs = ceil (rand (2000, 3)*10);
 391 %! vals = rand (2000, 1);
 392 %! assert (accumarray (subs, vals, [], @max), accumarray (subs, vals, [], @(x) max (x)));
 393 %!test
 394 %! subs = ceil (rand (2000, 1)*100);
 395 %! vals = rand (2000, 1);
 396 %! assert (accumarray (subs, vals, [100, 1], @min, NaN), accumarray (subs, vals, [100, 1], @(x) min (x), NaN));
 397 %!test
 398 %! subs = ceil (rand (2000, 2)*30);
 399 %! subsc = num2cell (subs, 1);
 400 %! vals = rand (2000, 1);
 401 %! assert (accumarray (subsc, vals, [], [], 0, true), accumarray (subs, vals, [], [], 0, true));
 402 %!test
 403 %! subs = ceil (rand (2000, 3)*10);
 404 %! subsc = num2cell (subs, 1);
 405 %! vals = rand (2000, 1);
 406 %! assert (accumarray (subsc, vals, [], @max), accumarray (subs, vals, [], @max));
 407
 408