octave_packages/communications-1.1.1/huffmandict.m

   1 ## Copyright (C) 2006 Muthiah Annamalai <muthiah.annamalai@uta.edu>
   2 ##
   3 ## This program is free software; you can redistribute it and/or modify it under
   4 ## the terms of the GNU General Public License as published by the Free Software
   5 ## Foundation; either version 3 of the License, or (at your option) any later
   6 ## version.
   7 ##
   8 ## This program is distributed in the hope that it will be useful, but WITHOUT
   9 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  11 ## details.
  12 ##
  13 ## You should have received a copy of the GNU General Public License along with
  14 ## this program; if not, see <http://www.gnu.org/licenses/>.
  15
  16 ## -*- texinfo -*-
  17 ## @deftypefn {Function File} {}  huffmandict (@var{symb}, @var{prob})
  18 ## @deftypefnx {Function File} {}  huffmandict (@var{symb}, @var{prob}, @var{toggle})
  19 ## @deftypefnx {Function File} {}  huffmandict (@var{symb}, @var{prob}, @var{toggle}, @var{minvar})
  20 ##
  21 ## Builds a Huffman code, given a probability list. The Huffman codes
  22 ## per symbol are output as a list of strings-per-source symbol. A zero
  23 ## probability symbol is NOT assigned any codeword as this symbol doesn't
  24 ## occur in practice anyway.
  25 ##
  26 ## @var{toggle} is an optional argument with values 1 or 0, that starts
  27 ## building a code based on 1's or 0's, defaulting to 0. Also @var{minvar}
  28 ## is a boolean value that is useful in choosing if you want to optimize
  29 ## buffer for transmission in the applications of Huffman coding, however
  30 ## it doesn't affect the type or average codeword length of the generated
  31 ## code. An example of the use of @code{huffmandict} is
  32 ##
  33 ## @example
  34 ## @group
  35 ##   huffmandict(symbols, [0.5 0.25 0.15 0.1]) => CW(0,10,111,110)
  36 ##   huffmandict(symbols, 0.25*ones(1,4)) => CW(11,10,01,00)
  37 ##
  38 ##   prob=[0.5 0 0.25 0.15 0.1]
  39 ##   dict=huffmandict(1:5,[0.5 0 0.25 0.15 0.1],1)
  40 ##   entropy(prob)
  41 ##   laverage(dict,prob)
  42 ##
  43 ##   x =   [0.20000   0.40000   0.20000   0.10000   0.10000];
  44 ##   #illustrates the minimum variance thing.
  45 ##   huffmandict(1,x,0,true) #min variance tree.
  46 ##   huffmandict(1,x)     #normal huffman tree.
  47 ## @end group
  48 ## @end example
  49 ##
  50 ## Reference: Dr.Rao's course EE5351 Digital Video Coding, at UT-Arlington.
  51 ## @seealso{huffmandeco, huffmanenco}
  52 ## @end deftypefn
  53
  54 ## Huffman code algorithm.
  55 ## while (uncombined_symbols_remain)
  56 ##       combine least probable symbols into one with,
  57 ##      their probability being the sum of the two.
  58 ##       save this result as a stage at lowest order rung.
  59 ##       (Moving to lowest position possible makes it non-minimum variance
  60 ##        entropy coding)
  61 ## end
  62 ##
  63 ## for each (stage)
  64 ## Walk the tree we built, and assign each row either 1,
  65 ## or 0 of
  66 ## end
  67 ##
  68 ## reverse each symbol, and dump it out.
  69 ##
  70
  71 function cw_list = huffmandict (sym, source_prob, togglecode = 0, minvar = 0)
  72   if nargin < 2
  73     print_usage;
  74   ## need to compare to 1
  75   elseif((sum(source_prob)-1.0) > 1e-7 )
  76     error("source probabilities must add up to 1.0");
  77   end
  78
  79   %
  80   %need to find & eliminate the zero-probability code words.
  81   %in practice we donot need to assign anything to them. Reasoning
  82   %being that if_ it doesnt occur why bother saving its value anyway?
  83   %--(Oct 9) Actually some experts in the area dont agree to this,
  84   %and being a generic implementation we should stick to generating
  85   %CW's for_ zero symbols. Why impose a bad implementation? --Muthu
  86   %
  87
  88   origsource_prob=source_prob;
  89
  90   %
  91   %sort the list and know the index transpositions. kills the speed O(n^2).
  92   %
  93   L=length(source_prob);
  94   index=[1:L];
  95   for itr1=1:L
  96     for itr2=itr1:L
  97       if(source_prob(itr1) < source_prob(itr2))
  98         t=source_prob(itr1);
  99         source_prob(itr1)=source_prob(itr2);
 100         source_prob(itr2)=t;
 101
 102         t=index(itr1);
 103         index(itr1)=index(itr2);
 104         index(itr2)=t;
 105       end
 106     end
 107   end
 108
 109   stage_list = {};
 110   cw_list    = cell(1,L);
 111
 112   stage_curr={};
 113   stage_curr.prob_list=source_prob;
 114   stage_curr.sym_list={};
 115   S=length(source_prob);
 116   for i=1:S;
 117     stage_curr.sym_list{i}=[i];
 118     #cw_list{i}="";
 119   end
 120
 121   %
 122   % another O(n^2) part.
 123   %
 124   I=1;
 125   while (I<S)
 126     L=length(stage_curr.prob_list);
 127     nprob=stage_curr.prob_list(L-1)+stage_curr.prob_list(L);
 128     nsym=[stage_curr.sym_list{L-1}(1:end),stage_curr.sym_list{L}(1:end)];
 129
 130     %stage_curr;
 131     %archive old stage list.
 132     stage_list{I}=stage_curr;
 133
 134     %
 135     %insert the new probability into the list, at the
 136     %first-position (greedy?) possible.
 137     %
 138     for i=1:(L-2)
 139       if((minvar && stage_curr.prob_list(i)<=nprob) || \
 140           stage_curr.prob_list(i) < nprob)
 141         break;
 142       end
 143     end
 144
 145
 146
 147     stage_curr.prob_list=[stage_curr.prob_list(1:i-1) nprob stage_curr.prob_list(i:L-2)];
 148     stage_curr.sym_list={stage_curr.sym_list{1:i-1}, nsym, stage_curr.sym_list{i:L-2}};
 149
 150     % Loopie
 151     I=I+1;
 152   end
 153
 154   if (togglecode==0)
 155       one_cw=1;
 156       zero_cw=0;
 157   else
 158      one_cw=0;
 159      zero_cw=1;
 160   end
 161
 162   %
 163   % another O(n^2) part.
 164   %
 165   %printf("Exit Loop");
 166   I=I-1;
 167   while (I>0)
 168     stage_curr=stage_list{I};
 169     L=length(stage_curr.sym_list);
 170
 171     clist=stage_curr.sym_list{L};
 172     for k=1:length(clist)
 173       cw_list{1,clist(k)}=[cw_list{1,clist(k)} one_cw];
 174     end
 175
 176     clist=stage_curr.sym_list{L-1};
 177     for k=1:length(clist)
 178       cw_list{1,clist(k)}=[cw_list{1,clist(k)}, zero_cw];
 179     end
 180
 181     % Loopie
 182     I=I-1;
 183   end
 184
 185   %
 186   %zero all the code-words of zero-probability length, 'cos they
 187   %never occur.
 188   %
 189   S=length(source_prob);
 190   for itr=(S+1):length(origsource_prob)
 191     cw_list{1,itr}=-1;
 192   end
 193
 194   %disp('Before resorting')
 195   %cw_list
 196
 197   nw_list = cell(1,L);
 198   %
 199   % Re-sort the indices according to the probability list.
 200   %
 201   L=length(source_prob);
 202   for itr=1:(L)
 203     t=cw_list{index(itr)};
 204     nw_list{index(itr)}=cw_list{itr};
 205   end
 206   cw_list=nw_list;
 207
 208   %zero all the code-words of zero-probability length, 'cos they
 209   %never occur.
 210
 211   %for itr=1:L
 212   %  if(origsource_prob(itr)==0)
 213   %    cw_list{itr}="";
 214   %  end
 215   %end
 216
 217   return
 218 end
 219
 220 %!assert(huffmandict(1:4,[0.5 0.25 0.15 0.1],1), {[0],[1 0],[1 1 1],[1 1 0]},0)
 221 %!assert(huffmandict(1:4,0.25*ones(1,4),1),{[1 1],[1 0],[0 1],[0 0]},0)
 222 %!assert(huffmandict(1:4,[1  0 0 0 ]),{[1],[0 1],[0 0 0],[0 0 1]},0)