1 function [R, tix] = histo4(Y, W)
2 % HISTO4 calculates histogram of multidimensional data samples
3 % and supports data compression
7 % Y data: on sample per row, each sample has with size(Y,2) elements
8 % W weights of each sample (default: [])
9 % W = [] indicates that each sample has equal weight
10 % R is a struct with these fields:
11 % R.X are the bin-values
12 % R.H is the frequency of occurence of value X (weighted with W)
13 % R.N are the total number of samples (or sum of W)
15 % HISTO4 might be useful for data compression, because
17 % is the compression step
19 % is the decompression step
21 % The effort (in memory and speed) for compression is O(n*log(n))
22 % The effort (in memory and speed) for decompression is only O(n)
24 % see also: HISTO, HISTO2, HISTO3, HISTO4
27 % C.E. Shannon and W. Weaver 'The mathematical theory of communication' University of Illinois Press, Urbana 1949 (reprint 1963).
30 % $Id: histo4.m 8383 2011-07-16 20:06:59Z schloegl $
31 % Copyright (C) 1996-2005,2008,2009,2011 by Alois Schloegl <alois.schloegl@gmail.com>
32 % This is part of the TSA-toolbox
33 % http://pub.ist.ac.at/~schloegl/matlab/tsa/
35 % This program is free software: you can redistribute it and/or modify
36 % it under the terms of the GNU General Public License as published by
37 % the Free Software Foundation, either version 3 of the License, or
38 % (at your option) any later version.
40 % This program is distributed in the hope that it will be useful,
41 % but WITHOUT ANY WARRANTY; without even the implied warranty of
42 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
43 % GNU General Public License for more details.
45 % You should have received a copy of the GNU General Public License
46 % along with this program. If not, see <http://www.gnu.org/licenses/>.
49 %%%%% check input arguments %%%%%
54 if ~isempty(W) && (yr ~= numel(W)),
55 error('number of rows of Y does not match number of elements in W');
58 %%%%% identify all possible X's and generate overall Histogram %%%%%
59 [Y, idx] = sortrows(Y);
62 ix = any( (~isnan(d) & (d~=0) ) | diff(isnan(Y),[],1), 2);
66 R.datatype = 'HISTOGRAM';
68 R.H = [tmp(1); diff(tmp)];
72 R.H = [W(tmp(1)); diff(W(tmp))];
76 %%%%% generate inverse index %%%%%
78 tix = cumsum([1;ix]); % rank
91 [tmp, idx] = sort(idx); % inverse index
92 tix = tix(idx); % inverse sort rank
94 R.compressionratio = (prod(size(R.X)) + yr/cc) / (yr*yc);