4 * Copyright (C) 1991-1998, Thomas G. Lane.
5 * This file is part of the Independent JPEG Group's software.
6 * For conditions of distribution and use, see the accompanying README file.
8 * This file contains Huffman entropy decoding routines which are shared
9 * by the sequential, progressive and lossless decoders.
12 #define JPEG_INTERNALS
15 #include "jchuff.h" /* Declarations shared with jc*huff.c */
19 * Compute the derived values for a Huffman table.
20 * This routine also performs some validation checks on the table.
24 jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno,
25 c_derived_tbl ** pdtbl)
29 int p, i, l, lastp, si, maxsymbol;
31 unsigned int huffcode[257];
34 /* Note that huffsize[] and huffcode[] are filled in code-length order,
35 * paralleling the order of the symbols themselves in htbl->huffval[].
38 /* Find the input Huffman table */
39 if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
40 ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
42 isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
44 ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
46 /* Allocate a workspace if we haven't already done so. */
48 *pdtbl = (c_derived_tbl *)
49 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
50 SIZEOF(c_derived_tbl));
53 /* Figure C.1: make table of Huffman code length for each symbol */
56 for (l = 1; l <= 16; l++) {
57 i = (int) htbl->bits[l];
58 if (i < 0 || p + i > 256) /* protect against table overrun */
59 ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
61 huffsize[p++] = (char) l;
66 /* Figure C.2: generate the codes themselves */
67 /* We also validate that the counts represent a legal Huffman code tree. */
73 while (((int) huffsize[p]) == si) {
77 /* code is now 1 more than the last code used for codelength si; but
78 * it must still fit in si bits, since no code is allowed to be all ones.
79 * BUG FIX 2001-09-03: Comparison must be >, not >=
81 if (((INT32) code) > (((INT32) 1) << si))
82 ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
87 /* Figure C.3: generate encoding tables */
88 /* These are code and size indexed by symbol value */
90 /* Set all codeless symbols to have code length 0;
91 * this lets us detect duplicate VAL entries here, and later
92 * allows emit_bits to detect any attempt to emit such symbols.
94 MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi));
96 /* This is also a convenient place to check for out-of-range
97 * and duplicated VAL entries. We allow 0..255 for AC symbols
98 * but only 0..16 for DC. (We could constrain them further
99 * based on data depth and mode, but this seems enough.)
101 maxsymbol = isDC ? 16 : 255;
103 for (p = 0; p < lastp; p++) {
104 i = htbl->huffval[p];
105 if (i < 0 || i > maxsymbol || dtbl->ehufsi[i])
106 ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
107 dtbl->ehufco[i] = huffcode[p];
108 dtbl->ehufsi[i] = huffsize[p];
114 * Generate the best Huffman code table for the given counts, fill htbl.
116 * The JPEG standard requires that no symbol be assigned a codeword of all
117 * one bits (so that padding bits added at the end of a compressed segment
118 * can't look like a valid code). Because of the canonical ordering of
119 * codewords, this just means that there must be an unused slot in the
120 * longest codeword length category. Section K.2 of the JPEG spec suggests
121 * reserving such a slot by pretending that symbol 256 is a valid symbol
122 * with count 1. In theory that's not optimal; giving it count zero but
123 * including it in the symbol set anyway should give a better Huffman code.
124 * But the theoretically better code actually seems to come out worse in
125 * practice, because it produces more all-ones bytes (which incur stuffed
126 * zero bytes in the final file). In any case the difference is tiny.
128 * The JPEG standard requires Huffman codes to be no more than 16 bits long.
129 * If some symbols have a very small but nonzero probability, the Huffman tree
130 * must be adjusted to meet the code length restriction. We currently use
131 * the adjustment method suggested in JPEG section K.2. This method is *not*
132 * optimal; it may not choose the best possible limited-length code. But
133 * typically only very-low-frequency symbols will be given less-than-optimal
134 * lengths, so the code is almost optimal. Experimental comparisons against
135 * an optimal limited-length-code algorithm indicate that the difference is
136 * microscopic --- usually less than a hundredth of a percent of total size.
137 * So the extra complexity of an optimal algorithm doesn't seem worthwhile.
141 jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])
143 #define MAX_CLEN 32 /* assumed maximum initial code length */
144 UINT8 bits[MAX_CLEN+1]; /* bits[k] = # of symbols with code length k */
145 int codesize[257]; /* codesize[k] = code length of symbol k */
146 int others[257]; /* next symbol in current branch of tree */
151 /* This algorithm is explained in section K.2 of the JPEG standard */
153 MEMZERO(bits, SIZEOF(bits));
154 MEMZERO(codesize, SIZEOF(codesize));
155 for (i = 0; i < 257; i++)
156 others[i] = -1; /* init links to empty */
158 freq[256] = 1; /* make sure 256 has a nonzero count */
159 /* Including the pseudo-symbol 256 in the Huffman procedure guarantees
160 * that no real symbol is given code-value of all ones, because 256
161 * will be placed last in the largest codeword category.
164 /* Huffman's basic algorithm to assign optimal code lengths to symbols */
167 /* Find the smallest nonzero frequency, set c1 = its symbol */
168 /* In case of ties, take the larger symbol number */
171 for (i = 0; i <= 256; i++) {
172 if (freq[i] && freq[i] <= v) {
178 /* Find the next smallest nonzero frequency, set c2 = its symbol */
179 /* In case of ties, take the larger symbol number */
182 for (i = 0; i <= 256; i++) {
183 if (freq[i] && freq[i] <= v && i != c1) {
189 /* Done if we've merged everything into one frequency */
193 /* Else merge the two counts/trees */
194 freq[c1] += freq[c2];
197 /* Increment the codesize of everything in c1's tree branch */
199 while (others[c1] >= 0) {
204 others[c1] = c2; /* chain c2 onto c1's tree branch */
206 /* Increment the codesize of everything in c2's tree branch */
208 while (others[c2] >= 0) {
214 /* Now count the number of symbols of each code length */
215 for (i = 0; i <= 256; i++) {
217 /* The JPEG standard seems to think that this can't happen, */
218 /* but I'm paranoid... */
219 if (codesize[i] > MAX_CLEN)
220 ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);
226 /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
227 * Huffman procedure assigned any such lengths, we must adjust the coding.
228 * Here is what the JPEG spec says about how this next bit works:
229 * Since symbols are paired for the longest Huffman code, the symbols are
230 * removed from this length category two at a time. The prefix for the pair
231 * (which is one bit shorter) is allocated to one of the pair; then,
232 * skipping the BITS entry for that prefix length, a code word from the next
233 * shortest nonzero BITS entry is converted into a prefix for two code words
237 for (i = MAX_CLEN; i > 16; i--) {
238 while (bits[i] > 0) {
239 j = i - 2; /* find length of new prefix to be used */
243 bits[i] -= 2; /* remove two symbols */
244 bits[i-1]++; /* one goes in this length */
245 bits[j+1] += 2; /* two new symbols in this length */
246 bits[j]--; /* symbol of this length is now a prefix */
250 /* Remove the count for the pseudo-symbol 256 from the largest codelength */
251 while (bits[i] == 0) /* find largest codelength still in use */
255 /* Return final symbol counts (only for lengths 0..16) */
256 MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits));
258 /* Return a list of the symbols sorted by code length */
259 /* It's not real clear to me why we don't need to consider the codelength
260 * changes made above, but the JPEG spec seems to think this works.
263 for (i = 1; i <= MAX_CLEN; i++) {
264 for (j = 0; j <= 255; j++) {
265 if (codesize[j] == i) {
266 htbl->huffval[p] = (UINT8) j;
272 /* Set sent_table FALSE so updated table will be written to JPEG file. */
273 htbl->sent_table = FALSE;