1 ## Copyright (C) 2003 David Bateman
3 ## This program is free software; you can redistribute it and/or modify it under
4 ## the terms of the GNU General Public License as published by the Free Software
5 ## Foundation; either version 3 of the License, or (at your option) any later
8 ## This program is distributed in the hope that it will be useful, but WITHOUT
9 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
13 ## You should have received a copy of the GNU General Public License along with
14 ## this program; if not, see <http://www.gnu.org/licenses/>.
17 ## @deftypefn {Function File} {[@var{table}, @var{codes}] = } lloyds (@var{sig},@var{init_codes})
18 ## @deftypefnx {Function File} {[@var{table}, @var{codes}] = } lloyds (@var{sig},@var{len})
19 ## @deftypefnx {Function File} {[@var{table}, @var{codes}] = } lloyds (@var{sig},@var{...},@var{tol})
20 ## @deftypefnx {Function File} {[@var{table}, @var{codes}] = } lloyds (@var{sig},@var{...},@var{tol},@var{type})
21 ## @deftypefnx {Function File} {[@var{table}, @var{codes}, @var{dist}] = } lloyds (@var{...})
22 ## @deftypefnx {Function File} {[@var{table}, @var{codes}, @var{dist}, @var{reldist}] = } lloyds (@var{...})
24 ## Optimize the quantization table and codes to reduce distortion. This is
25 ## based on the article by Lloyd
27 ## S. Lloyd @emph{Least squared quantization in PCM}, IEEE Trans Inform
28 ## Thoery, Mar 1982, no 2, p129-137
30 ## which describes an iterative technique to reduce the quantization error
31 ## by making the intervals of the table such that each interval has the same
32 ## area under the PDF of the training signal @var{sig}. The initial codes to
33 ## try can either be given in the vector @var{init_codes} or as scalar
34 ## @var{len}. In the case of a scalar the initial codes will be an equi-spaced
35 ## vector of length @var{len} between the minimum and maximum value of the
38 ## The stopping criteria of the iterative algorithm is given by
41 ## abs(@var{dist}(n) - @var{dist}(n-1)) < max(@var{tol}, abs(@var{eps}*max(@var{sig}))
44 ## By default @var{tol} is 1.e-7. The final input argument determines how the
45 ## updated table is created. By default the centroid of the values of the
46 ## training signal that fall within the interval described by @var{codes}
47 ## are used to update @var{table}. If @var{type} is any other string than
48 ## "centroid", this behaviour is overriden and @var{table} is updated as
52 ## @var{table} = (@var{code}(2:length(@var{code})) + @var{code}(1:length(@var{code}-1))) / 2
55 ## The optimized values are returned as @var{table} and @var{code}. In
56 ## addition the distortion of the the optimized codes representing the
57 ## training signal is returned as @var{dist}. The relative distortion in
58 ## the final iteration is also returned as @var{reldist}.
63 function [table, code, dist, reldist] = lloyds(sig, init, tol, type)
65 if ((nargin < 2) || (nargin > 4))
66 usage (" [table, codes, dist, reldist] = lloyds(sig, init [, tol [,type]])");
69 if (min(size(sig)) != 1)
70 error ("lloyds: training signal must be a vector");
77 if (length(init) == 1)
79 init = [0:len-1]'/(len-1) * (sigmax - sigmin) + sigmin;
80 elseif (min(size(init)))
83 error ("lloyds: unrecognized initial codebook");
87 if (any(init != sort(init)))
88 ## Must be monotonically increasing
89 error ("lloyds: Initial codebook must be monotonically increasing");
97 stop_criteria = max(eps * abs(sigmax), abs(tol));
101 elseif (!ischar(type))
102 error ("lloyds: expecting string argument for type");
105 ## Setup initial codebook, table and distortion
107 table = (code(2:lcode) + code(1:lcode-1))/2;
108 [indx, ignore, dist] = quantiz(sig, table, code);
111 while (reldist > stop_criteria)
112 ## The formula of the code at the new iteration is
114 ## code = Int_{table_{i-1}}^{table_i} x PSD(sig(x)) dx / ..
115 ## Int_{table_{i-1}}^{table_i} PSD(sig(x)) dx
117 ## As sig is a discrete signal, this comes down to counting the number
118 ## of times "sig" has values in each interval of the table, and taking
119 ## the mean of these values. If no value of the signals in interval, take
120 ## the middle of the interval. That is calculate the centroid of the data
121 ## of the training signal falling in the interval. We can reuse the index
122 ## from the previous call to quantiz to define the values in the interval.
124 psd_in_interval = find(indx == i-1);
125 if (!isempty(psd_in_interval))
126 code(i) = mean(sig(psd_in_interval));
128 code(i) = (table(i) + sigmin) / 2;
130 code(i) = (sigmax + table(i-1)) / 2;
132 code(i) = (table(i) + table(i-1)) / 2;
136 ## Now update the table. There is a problem here, in that I believe
137 ## the elements of the new table should be given by b(i)=(c(i+1)+c(i))/2,
138 ## but Matlab doesn't seem to do this. Matlab seems to also take the
139 ## centroid of the code for the table (this was a real pain to find
140 ## why my results and Matlab's disagreed). For this reason, I have a
141 ## default behaviour the same as Matlab, and allow a flag to overide
142 ## it to be the behaviour I expect. If any one wants to tell me what
143 ## the CORRECT behaviour is, then I'll get rid of of the irrelevant
145 if (strcmp(type,"centroid"))
147 sig_in_code = sig(find(sig >= code(i)));
148 sig_in_code = sig_in_code(find(sig_in_code < code(i+1)));
149 if (!isempty(sig_in_code))
150 table(i) = mean(sig_in_code);
152 table(i) = (code(i+1) + code(i)) / 2;
156 table = (code(2:lcode) + code(1:lcode-1))/2;
159 ## Update the distortion levels
161 [indx, ignore, dist] = quantiz(sig, table, code);
162 reldist = abs(reldist - dist);
165 if (size(init,1) == 1)