summaryrefslogtreecommitdiff
path: root/test/compression/arcode.c
diff options
context:
space:
mode:
authorGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2008-08-09 08:06:33 +0000
committerGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2008-08-09 08:06:33 +0000
commit285f5bec5bb03d4e825e5d866e94008088dd6155 (patch)
tree9df69ded9ed4f4049e0b3887fdd99fcdf3b1746f /test/compression/arcode.c
parenta83f0c1710cc5143dd885e84c94e14f7d3216f93 (diff)
Ajout nouveaux tests
git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@708 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
Diffstat (limited to 'test/compression/arcode.c')
-rwxr-xr-xtest/compression/arcode.c926
1 files changed, 926 insertions, 0 deletions
diff --git a/test/compression/arcode.c b/test/compression/arcode.c
new file mode 100755
index 0000000..f915cc2
--- /dev/null
+++ b/test/compression/arcode.c
@@ -0,0 +1,926 @@
+/***************************************************************************
+* Arithmetic Encoding and Decoding Library
+*
+* File : arcode.c
+* Purpose : Use arithmetic coding to compress/decompress file streams
+* Author : Michael Dipperstein
+* Date : April 2, 2004
+*
+****************************************************************************
+* UPDATES
+*
+* $Id: arcode.c,v 1.5 2007/09/08 15:47:02 michael Exp $
+* $Log: arcode.c,v $
+* Revision 1.5 2007/09/08 15:47:02 michael
+* Changes required for LGPL v3.
+*
+* Revision 1.4 2006/03/02 06:43:37 michael
+* Expanded tabs
+*
+* Revision 1.3 2006/01/12 07:39:24 michael
+* Use BitFileGetBitsIntBit and FilePutBitsInt for reading and writing the
+* header. This makes the code a little cleaner, but the new header is not
+* compatible with the old header.
+*
+* Revision 1.2 2004/08/13 13:10:27 michael
+* Add support for adaptive encoding
+*
+* Use binary search when trying to find decoded symbol
+*
+* Revision 1.1.1.1 2004/04/04 14:54:13 michael
+* Initial version
+*
+****************************************************************************
+*
+* Arcode: An ANSI C Arithmetic Encoding/Decoding Routines
+* Copyright (C) 2004, 2006-2007 by Michael Dipperstein (mdipper@cs.ucsb.edu)
+*
+* This file is part of the arcode library.
+*
+* The arcode library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public License as
+* published by the Free Software Foundation; either version 3 of the
+* License, or (at your option) any later version.
+*
+* The arcode library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+* General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+***************************************************************************/
+
+/***************************************************************************
+* INCLUDED FILES
+***************************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include "arcode.h"
+#include "bitfile.h"
+
+/* compile-time options */
+#undef BUILD_DEBUG_OUTPUT /* debugging output */
+
+#if !(USHRT_MAX < ULONG_MAX)
+#error "Implementation requires USHRT_MAX < ULONG_MAX"
+#endif
+
+/***************************************************************************
+* TYPE DEFINITIONS
+***************************************************************************/
+typedef unsigned short probability_t; /* probability count type */
+
+/***************************************************************************
+* CONSTANTS
+***************************************************************************/
+#define EOF_CHAR (UCHAR_MAX + 1)
+
+/* number of bits used to compute running code values */
+#define PRECISION (8 * sizeof(probability_t))
+
+/* 2 bits less than precision. keeps lower and upper bounds from crossing. */
+#define MAX_PROBABILITY (1 << (PRECISION - 2))
+
+/***************************************************************************
+* MACROS
+***************************************************************************/
+/* set bit x to 1 in probability_t. Bit 0 is MSB */
+#define MASK_BIT(x) (probability_t)(1 << (PRECISION - (1 + (x))))
+
+/* indices for a symbol's lower and upper cumulative probability ranges */
+#define LOWER(c) (c)
+#define UPPER(c) ((c) + 1)
+
+/***************************************************************************
+* GLOBAL VARIABLES
+***************************************************************************/
+probability_t lower; /* lower bound of current code range */
+probability_t upper; /* upper bound of current code range */
+
+probability_t code; /* current MSBs of encode input stream */
+
+unsigned char underflowBits; /* current underflow bit count */
+
+/* probability ranges for each symbol: [ranges[LOWER(c)], ranges[UPPER(c)]) */
+probability_t ranges[UPPER(EOF_CHAR) + 1];
+probability_t cumulativeProb; /* cumulative probability of all ranges */
+
+/***************************************************************************
+* PROTOTYPES
+***************************************************************************/
+/* read write file headers */
+void WriteHeader(bit_file_t *bfpOut);
+int ReadHeader(bit_file_t *bfpIn);
+
+/* applies symbol's ranges to current upper and lower range bounds */
+void ApplySymbolRange(int symbol, char staticModel);
+
+/* routines for encoding*/
+void WriteEncodedBits(bit_file_t *bfpOut);
+void WriteRemaining(bit_file_t *bfpOut);
+int BuildProbabilityRangeList(FILE *fpIn);
+void InitializeAdaptiveProbabilityRangeList(void);
+
+/* routines for decoding */
+void InitializeDecoder(bit_file_t *bfpOut, char staticModel);
+probability_t GetUnscaledCode(void);
+int GetSymbolFromProbability(probability_t probability);
+void ReadEncodedBits(bit_file_t *bfpIn);
+
+/***************************************************************************
+* FUNCTIONS
+***************************************************************************/
+
+/***************************************************************************
+* Function : ArEncodeFile
+* Description: This routine generates a list of arithmetic code ranges for
+* a file and then uses them to write out an encoded version
+* of that file.
+* Parameters : inFile - Name of file to encode
+* outFile - Name of file to write encoded output to
+* staticModel - TRUE if encoding with a static model
+* Effects : File is arithmetically encoded
+* Returned : TRUE for success, otherwise FALSE.
+***************************************************************************/
+int ArEncodeFile(char *inFile, char *outFile, char staticModel)
+{
+ int c;
+ FILE *fpIn; /* uncoded input */
+ bit_file_t *bfpOut; /* encoded output */
+
+ /* open input and output files */
+ if ((fpIn = fopen(inFile, "rb")) == NULL)
+ {
+ perror(inFile);
+ return FALSE;
+ }
+
+ if (outFile == NULL)
+ {
+ bfpOut = MakeBitFile(stdout, BF_WRITE);
+ }
+ else
+ {
+ if ((bfpOut = BitFileOpen(outFile, BF_WRITE)) == NULL)
+ {
+ fclose(fpIn);
+ perror(outFile);
+ return FALSE;
+ }
+ }
+
+ if (staticModel)
+ {
+ /* count symbols in file and come up with a list of probability ranges */
+ if (!BuildProbabilityRangeList(fpIn))
+ {
+ fclose(fpIn);
+ BitFileClose(bfpOut);
+ fprintf(stderr, "Error determining frequency ranges.\n");
+ return FALSE;
+ }
+
+ rewind(fpIn);
+
+ /* write information required to decode file to encoded file */
+ WriteHeader(bfpOut);
+ }
+ else
+ {
+ /* initialize probability ranges asumming uniform distribution */
+ InitializeAdaptiveProbabilityRangeList();
+ }
+
+ /* initialize coder start with full probability range [0%, 100%) */
+ lower = 0;
+ upper = ~0; /* all ones */
+ underflowBits = 0;
+
+ /* encode symbols one at a time */
+ while ((c = fgetc(fpIn)) != EOF)
+ {
+ ApplySymbolRange(c, staticModel);
+ WriteEncodedBits(bfpOut);
+ }
+
+ fclose(fpIn);
+
+ ApplySymbolRange(EOF_CHAR, staticModel); /* encode an EOF */
+ WriteEncodedBits(bfpOut);
+
+ WriteRemaining(bfpOut); /* write out least significant bits */
+ BitFileClose(bfpOut);
+
+ return TRUE;
+}
+
+/***************************************************************************
+* Function : SymbolCountToProbabilityRanges
+* Description: This routine converts the ranges array containing only
+* symbol counts to an array containing the upper and lower
+* probability ranges for each symbol.
+* Parameters : None
+* Effects : ranges array containing symbol counts in the upper field
+* for each symbol is converted to a list of upper and lower
+* probability bounds for each symbol.
+* Returned : None
+***************************************************************************/
+void SymbolCountToProbabilityRanges(void)
+{
+ int c;
+
+ ranges[0] = 0; /* absolute lower bound is 0 */
+ ranges[UPPER(EOF_CHAR)] = 1; /* add 1 EOF character */
+ cumulativeProb++;
+
+ /* assign upper and lower probability ranges */
+ for (c = 1; c <= UPPER(EOF_CHAR); c++)
+ {
+ ranges[c] += ranges[c - 1];
+ }
+
+#ifdef BUILD_DEBUG_OUTPUT
+ /* dump list of ranges */
+ for (c = 0; c < UPPER(EOF_CHAR); c++)
+ {
+ printf("%02X\t%d\t%d\n", c, ranges[LOWER(c)], ranges[UPPER(c)]);
+ }
+#endif
+
+ return;
+}
+
+/***************************************************************************
+* Function : BuildProbabilityRangeList
+* Description: This routine reads the input file and builds the global
+* list of upper and lower probability ranges for each
+* symbol.
+* Parameters : fpIn - file to build range list for
+* Effects : ranges array is made to contain probability ranges for
+* each symbol.
+* Returned : TRUE for success, otherwise FALSE.
+***************************************************************************/
+int BuildProbabilityRangeList(FILE *fpIn)
+{
+ int c;
+
+ /***********************************************************************
+ * unsigned long is used to hold the largest counts we can have without
+ * any rescaling. Rescaling may take place before probability ranges
+ * are computed.
+ ***********************************************************************/
+ unsigned long countArray[EOF_CHAR];
+ unsigned long totalCount = 0;
+ unsigned long rescaleValue;
+
+ if (fpIn == NULL)
+ {
+ return FALSE;
+ }
+
+ /* start with no symbols counted */
+ for (c = 0; c < EOF_CHAR; c++)
+ {
+ countArray[c] = 0;
+ }
+
+ while ((c = fgetc(fpIn)) != EOF)
+ {
+ if (totalCount == ULONG_MAX)
+ {
+ fprintf(stderr, "Error: file too large\n");
+ return FALSE;
+ }
+
+ countArray[c]++;
+ totalCount++;
+ }
+
+ /* rescale counts to be < MAX_PROBABILITY */
+ if (totalCount >= MAX_PROBABILITY)
+ {
+ rescaleValue = (totalCount / MAX_PROBABILITY) + 1;
+
+ for (c = 0; c < EOF_CHAR; c++)
+ {
+ if (countArray[c] > rescaleValue)
+ {
+ countArray[c] /= rescaleValue;
+ }
+ else if (countArray[c] != 0)
+ {
+ countArray[c] = 1;
+ }
+ }
+ }
+
+ /* copy scaled symbol counts to range list */
+ ranges[0] = 0;
+ cumulativeProb = 0;
+ for (c = 0; c < EOF_CHAR; c++)
+ {
+ ranges[UPPER(c)] = countArray[c];
+ cumulativeProb += countArray[c];
+ }
+
+ /* convert counts to a range of probabilities */
+ SymbolCountToProbabilityRanges();
+ return TRUE;
+}
+
+/***************************************************************************
+* Function : WriteHeader
+* Description: This function writes each symbol contained in the encoded
+* file as well as its rescaled number of occurrences. A
+* decoding algorithm may use these numbers to reconstruct
+* the probability range list used to encode the file.
+* Parameters : bfpOut - pointer to open binary file to write to.
+* Effects : Symbol values and symbol counts are written to a file.
+* Returned : None
+***************************************************************************/
+void WriteHeader(bit_file_t *bfpOut)
+{
+ int c;
+ probability_t previous = 0; /* symbol count so far */
+
+#if BUILD_DEBUG_OUTPUT
+ printf("HEADER:\n");
+#endif
+
+ for(c = 0; c <= (EOF_CHAR - 1); c++)
+ {
+ if (ranges[UPPER(c)] > previous)
+ {
+ /* some of these symbols will be encoded */
+ BitFilePutChar((char)c, bfpOut);
+ previous = (ranges[UPPER(c)] - previous); /* symbol count */
+
+#if BUILD_DEBUG_OUTPUT
+ printf("%02X\t%d\n", c, previous);
+#endif
+
+ /* write out PRECISION - 2 bit count */
+ BitFilePutBitsInt(bfpOut, &previous, (PRECISION - 2),
+ sizeof(probability_t));
+
+ /* current upper range is previous for the next character */
+ previous = ranges[UPPER(c)];
+ }
+ }
+
+ /* now write end of table (zero count) */
+ BitFilePutChar(0x00, bfpOut);
+ previous = 0;
+ BitFilePutBits(bfpOut, (void *)&previous, PRECISION - 2);
+}
+
+/***************************************************************************
+* Function : InitializeAdaptiveProbabilityRangeList
+* Description: This routine builds the initial global list of upper and
+* lower probability ranges for each symbol. This routine
+* is used by both adaptive encoding and decoding.
+* Currently it provides a uniform symbol distribution.
+* Other distributions might be better suited for known data
+* types (such as English text).
+* Parameters : NONE
+* Effects : ranges array is made to contain initial probability ranges
+* for each symbol.
+* Returned : NONE
+***************************************************************************/
+void InitializeAdaptiveProbabilityRangeList(void)
+{
+ int c;
+
+ cumulativeProb = 0;
+ ranges[0] = 0; /* absolute lower range */
+
+ /* assign upper and lower probability ranges assuming */
+ for (c = 1; c <= UPPER(EOF_CHAR); c++)
+ {
+ ranges[c] = ranges[c - 1] + 1;
+ cumulativeProb++;
+ }
+
+#ifdef BUILD_DEBUG_OUTPUT
+ /* dump list of ranges */
+ for (c = 0; c < UPPER(EOF_CHAR); c++)
+ {
+ printf("%02X\t%d\t%d\n", c, ranges[LOWER(c)], ranges[UPPER(c)]);
+ }
+#endif
+
+ return;
+}
+
+/***************************************************************************
+* Function : ApplySymbolRange
+* Description: This function is used for both encoding and decoding. It
+* applies the range restrictions of a new symbol to the
+* current upper and lower range bounds of an encoded stream.
+* If an adaptive model is being used, the probability range
+* list will be updated after the effect of the symbol is
+* applied.
+* Parameters : symbol - The symbol to be added to the current code range
+* staticModel - TRUE if encoding/decoding with a static
+* model.
+* Effects : The current upper and lower range bounds are adjusted to
+* include the range effects of adding another symbol to the
+* encoded stream. If an adaptive model is being used, the
+* probability range list will be updated.
+* Returned : None
+***************************************************************************/
+void ApplySymbolRange(int symbol, char staticModel)
+{
+ unsigned long range; /* must be able to hold max upper + 1 */
+ unsigned long rescaled; /* range rescaled for range of new symbol */
+ /* must hold range * max upper */
+
+ /* for updating dynamic models */
+ int i;
+ probability_t original; /* range value prior to rescale */
+ probability_t delta; /* range for individual symbol */
+
+ /***********************************************************************
+ * Calculate new upper and lower ranges. Since the new upper range is
+ * dependant of the old lower range, compute the upper range first.
+ ***********************************************************************/
+ range = (unsigned long)(upper - lower) + 1; /* current range */
+
+ /* scale upper range of the symbol being coded */
+ rescaled = (unsigned long)ranges[UPPER(symbol)] * range;
+ rescaled /= (unsigned long)cumulativeProb;
+
+ /* new upper = old lower + rescaled new upper - 1*/
+ upper = lower + (probability_t)rescaled - 1;
+
+ /* scale lower range of the symbol being coded */
+ rescaled = (unsigned long)ranges[LOWER(symbol)] * range;
+ rescaled /= (unsigned long)cumulativeProb;
+
+ /* new lower = old lower + rescaled new upper */
+ lower = lower + (probability_t)rescaled;
+
+ if (!staticModel)
+ {
+ /* add new symbol to model */
+ cumulativeProb++;
+ for (i = UPPER(symbol); i <= UPPER(EOF_CHAR); i++)
+ {
+ ranges[i] += 1;
+ }
+
+ /* half current values if cumulativeProb is too large */
+ if (cumulativeProb >= MAX_PROBABILITY)
+ {
+ cumulativeProb = 0;
+ original = 0;
+
+ for (i = 1; i <= UPPER(EOF_CHAR); i++)
+ {
+ delta = ranges[i] - original;
+ if (delta <= 2)
+ {
+ /* prevent probability from being 0 */
+ original = ranges[i];
+ ranges[i] = ranges[i - 1] + 1;
+ }
+ else
+ {
+ original = ranges[i];
+ ranges[i] = ranges[i - 1] + (delta / 2);
+ }
+
+ cumulativeProb += (ranges[i] - ranges[i - 1]);
+ }
+ }
+ }
+
+#ifdef BUILD_DEBUG_OUTPUT
+ if (lower > upper)
+ {
+ /* compile this in when testing new models. */
+ fprintf(stderr, "Panic: lower (%X)> upper (%X)\n", lower, upper);
+ }
+#endif
+}
+
+/***************************************************************************
+* Function : WriteEncodedBits
+* Description: This function attempts to shift out as many code bits as
+* possible, writing the shifted bits to the encoded output
+* file. Only bits that will be unchanged when additional
+* symbols are encoded may be written out.
+*
+* If the n most significant bits of the lower and upper range
+* bounds match, they will not be changed when additional
+* symbols are encoded, so they may be shifted out.
+*
+* Adjustments are also made to prevent possible underflows
+* that occur when the upper and lower ranges are so close
+* that encoding another symbol won't change their values.
+* Parameters : bfpOut - pointer to open binary file to write to.
+* Effects : The upper and lower code bounds are adjusted so that they
+* only contain only bits that may be affected by the
+* addition of a new symbol to the encoded stream.
+* Returned : None
+***************************************************************************/
+void WriteEncodedBits(bit_file_t *bfpOut)
+{
+ for (;;)
+ {
+ if ((upper & MASK_BIT(0)) == (lower & MASK_BIT(0)))
+ {
+ /* MSBs match, write them to output file */
+ BitFilePutBit((upper & MASK_BIT(0)) != 0, bfpOut);
+
+ /* we can write out underflow bits too */
+ while (underflowBits > 0)
+ {
+ BitFilePutBit((upper & MASK_BIT(0)) == 0, bfpOut);
+ underflowBits--;
+ }
+ }
+ else if ((lower & MASK_BIT(1)) && !(upper & MASK_BIT(1)))
+ {
+ /***************************************************************
+ * Possible underflow condition: neither MSBs nor second MSBs
+ * match. It must be the case that lower and upper have MSBs of
+ * 01 and 10. Remove 2nd MSB from lower and upper.
+ ***************************************************************/
+ underflowBits += 1;
+ lower &= ~(MASK_BIT(0) | MASK_BIT(1));
+ upper |= MASK_BIT(1);
+
+ /***************************************************************
+ * The shifts below make the rest of the bit removal work. If
+ * you don't believe me try it yourself.
+ ***************************************************************/
+ }
+ else
+ {
+ /* nothing left to do */
+ return ;
+ }
+
+ /*******************************************************************
+ * Shift out old MSB and shift in new LSB. Remember that lower has
+ * all 0s beyond it's end and upper has all 1s beyond it's end.
+ *******************************************************************/
+ lower <<= 1;
+ upper <<= 1;
+ upper |= 1;
+ }
+}
+
+/***************************************************************************
+* Function : WriteRemaining
+* Description: This function writes out all remaining significant bits
+* in the upper and lower ranges and the underflow bits once
+* the last symbol has been encoded.
+* Parameters : bfpOut - pointer to open binary file to write to.
+* Effects : Remaining significant range bits are written to the output
+* file.
+* Returned : None
+***************************************************************************/
+void WriteRemaining(bit_file_t *bfpOut)
+{
+ BitFilePutBit((lower & MASK_BIT(1)) != 0, bfpOut);
+
+ /* write out any unwritten underflow bits */
+ for (underflowBits++; underflowBits > 0; underflowBits--)
+ {
+ BitFilePutBit((lower & MASK_BIT(1)) == 0, bfpOut);
+ }
+}
+
+/***************************************************************************
+* Function : ArDecodeFile
+* Description: This routine opens an arithmetically encoded file, reads
+* it's header, and builds a list of probability ranges which
+* it then uses to decode the rest of the file.
+* Parameters : inFile - Name of file to decode
+* outFile - Name of file to write decoded output to
+* staticModel - TRUE if decoding with a static model
+* Effects : Encoded file is decoded
+* Returned : TRUE for success, otherwise FALSE.
+***************************************************************************/
+int ArDecodeFile(char *inFile, char *outFile, char staticModel)
+{
+ int c;
+ probability_t unscaled;
+ bit_file_t *bfpIn;
+ FILE *fpOut;
+
+ /* open input and output files */
+ if ((bfpIn = BitFileOpen(inFile, BF_READ)) == NULL)
+ {
+ perror(inFile);
+ return FALSE;
+ }
+
+ if (outFile == NULL)
+ {
+ fpOut = stdout;
+ }
+ else
+ {
+ if ((fpOut = fopen(outFile, "wb")) == NULL)
+ {
+ BitFileClose(bfpIn);
+ perror(outFile);
+ return FALSE;
+ }
+ }
+
+ if (staticModel)
+ {
+ /* build probability ranges from header in file */
+ if (ReadHeader(bfpIn) == FALSE)
+ {
+ BitFileClose(bfpIn);
+ fclose(fpOut);
+ return FALSE;
+ }
+ }
+
+ /* read start of code and initialize bounds, and adaptive ranges */
+ InitializeDecoder(bfpIn, staticModel);
+
+ /* decode one symbol at a time */
+ for (;;)
+ {
+ /* get the unscaled probability of the current symbol */
+ unscaled = GetUnscaledCode();
+
+ /* figure out which symbol has the above probability */
+ if((c = GetSymbolFromProbability(unscaled)) == -1)
+ {
+ /* error: unknown symbol */
+ break;
+ }
+
+ if (c == EOF_CHAR)
+ {
+ /* no more symbols */
+ break;
+ }
+
+ fputc((char)c, fpOut);
+
+ /* factor out symbol */
+ ApplySymbolRange(c, staticModel);
+ ReadEncodedBits(bfpIn);
+ }
+
+ fclose(fpOut);
+ BitFileClose(bfpIn);
+
+ return TRUE;
+}
+
+/****************************************************************************
+* Function : ReadHeader
+* Description: This function reads the header information stored by
+* WriteHeader. The header can then be used to build a
+* probability range list matching the list that was used to
+* encode the file.
+* Parameters : bfpIn - file to read from
+* Effects : Probability range list is built.
+* Returned : TRUE for success, otherwise FALSE
+****************************************************************************/
+int ReadHeader(bit_file_t *bfpIn)
+{
+ int c;
+ probability_t count;
+
+#if BUILD_DEBUG_OUTPUT
+ printf("HEADER:\n");
+#endif
+
+ cumulativeProb = 0;
+
+ for (c = 0; c <= UPPER(EOF_CHAR); c++)
+ {
+ ranges[UPPER(c)] = 0;
+ }
+
+ /* read [character, probability] sets */
+ for (;;)
+ {
+ c = BitFileGetChar(bfpIn);
+ count = 0;
+
+ /* read (PRECISION - 2) bit count */
+ if (BitFileGetBitsInt(bfpIn, &count, (PRECISION - 2),
+ sizeof(probability_t)) == EOF)
+ {
+ /* premature EOF */
+ fprintf(stderr, "Error: unexpected EOF\n");
+ return FALSE;
+ }
+
+#if BUILD_DEBUG_OUTPUT
+ printf("%02X\t%d\n", c, count);
+#endif
+
+ if (count == 0)
+ {
+ /* 0 count means end of header */
+ break;
+ }
+
+ ranges[UPPER(c)] = count;
+ cumulativeProb += count;
+ }
+
+ /* convert counts to range list */
+ SymbolCountToProbabilityRanges();
+ return TRUE;
+}
+
+/****************************************************************************
+* Function : InitializeDecoder
+* Description: This function starts the upper and lower ranges at their
+* max/min values and reads in the most significant encoded
+* bits.
+* Parameters : bfpIn - file to read from
+* staticModel - TRUE if decoding using a staticModel
+* Effects : upper, lower, and code are initialized. The probability
+* range list will also be initialized if an adaptive model
+* will be used.
+* Returned : TRUE for success, otherwise FALSE
+****************************************************************************/
+void InitializeDecoder(bit_file_t *bfpIn, char staticModel)
+{
+ int i;
+
+ if (!staticModel)
+ {
+ /* initialize ranges for adaptive model */
+ InitializeAdaptiveProbabilityRangeList();
+ }
+
+ code = 0;
+
+ /* read PERCISION MSBs of code one bit at a time */
+ for (i = 0; i < PRECISION; i++)
+ {
+ code <<= 1;
+
+ /* treat EOF like 0 */
+ if(BitFileGetBit(bfpIn) == 1)
+ {
+ code |= 1;
+ }
+ }
+
+ /* start with full probability range [0%, 100%) */
+ lower = 0;
+ upper = ~0; /* all ones */
+}
+
+/****************************************************************************
+* Function : GetUnscaledCode
+* Description: This function undoes the scaling that ApplySymbolRange
+* performed before bits were shifted out. The value returned
+* is the probability of the encoded symbol.
+* Parameters : None
+* Effects : None
+* Returned : The probability of the current symbol
+****************************************************************************/
+probability_t GetUnscaledCode(void)
+{
+ unsigned long range; /* must be able to hold max upper + 1 */
+ unsigned long unscaled;
+
+ range = (unsigned long)(upper - lower) + 1;
+
+ /* reverse the scaling operations from ApplySymbolRange */
+ unscaled = (unsigned long)(code - lower) + 1;
+ unscaled = unscaled * (unsigned long)cumulativeProb - 1;
+ unscaled /= range;
+
+ return ((probability_t)unscaled);
+}
+
+/****************************************************************************
+* Function : GetSymbolFromProbability
+* Description: Given a probability, this function will return the symbol
+* whose range includes that probability. Symbol is found
+* binary search on probability ranges.
+* Parameters : probability - probability of symbol.
+* Effects : None
+* Returned : -1 for failure, otherwise encoded symbol
+****************************************************************************/
+int GetSymbolFromProbability(probability_t probability)
+{
+ int first, last, middle; /* indicies for binary search */
+
+ first = 0;
+ last = UPPER(EOF_CHAR);
+ middle = last / 2;
+
+ /* binary search */
+ while (last >= first)
+ {
+ if (probability < ranges[LOWER(middle)])
+ {
+ /* lower bound is higher than probability */
+ last = middle - 1;
+ middle = first + ((last - first) / 2);
+ continue;
+ }
+
+ if (probability >= ranges[UPPER(middle)])
+ {
+ /* upper bound is lower than probability */
+ first = middle + 1;
+ middle = first + ((last - first) / 2);
+ continue;
+ }
+
+ /* we must have found the right value */
+ return middle;
+ }
+
+ /* error: none of the ranges include the probability */
+ fprintf(stderr, "Unknown Symbol: %d (max: %d)\n", probability,
+ ranges[UPPER(EOF_CHAR)]);
+ return -1;
+}
+
+/***************************************************************************
+* Function : ReadEncodedBits
+* Description: This function attempts to shift out as many code bits as
+* possible, as bits are shifted out the coded input is
+* populated with bits from the encoded file. Only bits
+* that will be unchanged when additional symbols are decoded
+* may be shifted out.
+*
+* If the n most significant bits of the lower and upper range
+* bounds match, they will not be changed when additional
+* symbols are decoded, so they may be shifted out.
+*
+* Adjustments are also made to prevent possible underflows
+* that occur when the upper and lower ranges are so close
+* that decoding another symbol won't change their values.
+* Parameters : bfpOut - pointer to open binary file to read from.
+* Effects : The upper and lower code bounds are adjusted so that they
+* only contain only bits that will be affected by the
+* addition of a new symbol. Replacements are read from the
+* encoded stream.
+* Returned : None
+***************************************************************************/
+void ReadEncodedBits(bit_file_t *bfpIn)
+{
+ int nextBit; /* next bit from encoded input */
+
+ for (;;)
+ {
+ if (( upper & MASK_BIT(0)) == (lower & MASK_BIT(0)))
+ {
+ /* MSBs match, allow them to be shifted out*/
+ }
+ else if ((lower & MASK_BIT(1)) && !(upper & MASK_BIT(1)))
+ {
+ /***************************************************************
+ * Possible underflow condition: neither MSBs nor second MSBs
+ * match. It must be the case that lower and upper have MSBs of
+ * 01 and 10. Remove 2nd MSB from lower and upper.
+ ***************************************************************/
+ lower &= ~(MASK_BIT(0) | MASK_BIT(1));
+ upper |= MASK_BIT(1);
+ code ^= MASK_BIT(1);
+
+ /* the shifts below make the rest of the bit removal work */
+ }
+ else
+ {
+ /* nothing to shift out */
+ return;
+ }
+
+ /*******************************************************************
+ * Shift out old MSB and shift in new LSB. Remember that lower has
+ * all 0s beyond it's end and upper has all 1s beyond it's end.
+ *******************************************************************/
+ lower <<= 1;
+ upper <<= 1;
+ upper |= 1;
+ code <<= 1;
+
+ if ((nextBit = BitFileGetBit(bfpIn)) == EOF)
+ {
+ /* either all bits are shifted out or error occurred */
+ }
+ else
+ {
+ code |= nextBit; /* add next encoded bit to code */
+ }
+ }
+
+ return;
+}