1 files changed, 306 insertions, 0 deletions
diff --git a/test/compression/lzwdecode.c b/test/compression/lzwdecode.c
new file mode 100644
index 0000000..1f8b5ec
--- /dev/null
+++ b/test/compression/lzwdecode.c
@@ -0,0 +1,306 @@
+/***************************************************************************
+*                    Lempel-Ziv-Welch Decoding Functions
+*
+*   File    : lzwdecode.c
+*   Purpose : Provides a function for decoding Lempel-Ziv-Welch encoded
+*             file streams
+*   Author  : Michael Dipperstein
+*   Date    : January 30, 2005
+*
+****************************************************************************
+*   UPDATES
+*
+*   $Id: lzwdecode.c,v 1.2 2007/09/29 01:28:09 michael Exp $
+*   $Log: lzwdecode.c,v $
+*   Revision 1.2  2007/09/29 01:28:09  michael
+*   Changes required for LGPL v3.
+*
+*   Revision 1.1  2005/04/09 03:11:22  michael
+*   Separated encode and decode routines into two different files in order to
+*   make future enhancements easier.
+*
+*   Revision 1.4  2005/03/27 15:56:47  michael
+*   Use variable length code words.
+*   Include new e-mail addres.
+*
+*   Revision 1.3  2005/03/10 14:26:58  michael
+*   User variable names that match discription in web page.
+*
+*   Revision 1.2  2005/03/10 05:44:02  michael
+*   Minimize the size of the dictionary.
+*
+*   Revision 1.1.1.1  2005/02/21 03:35:34  michael
+*   Initial Release
+*
+****************************************************************************
+*
+* LZW: An ANSI C Lempel-Ziv-Welch Encoding/Decoding Routines
+* Copyright (C) 2005, 2007 by
+* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu)
+*
+* This file is part of the lzw library.
+*
+* The lzw library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public License as
+* published by the Free Software Foundation; either version 3 of the
+* License, or (at your option) any later version.
+*
+* The lzw library is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+* General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License
+* along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*
+***************************************************************************/
+
+/***************************************************************************
+*                             INCLUDED FILES
+***************************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include "lzw.h"
+#include "bitfile.h"
+
+/***************************************************************************
+*                            TYPE DEFINITIONS
+***************************************************************************/
+typedef struct
+{
+    unsigned char suffixChar;   /* last char in encoded string */
+    unsigned int prefixCode;    /* code for remaining chars in string */
+} decode_dictionary_t;
+
+/***************************************************************************
+*                                CONSTANTS
+***************************************************************************/
+
+#define EMPTY           -1
+
+#define MIN_CODE_LEN    9                   /* min # bits in a code word */
+#define MAX_CODE_LEN    15                  /* max # bits in a code word */
+
+#define FIRST_CODE      (1 << CHAR_BIT)     /* value of 1st string code */
+
+#define MAX_CODES       (1 << MAX_CODE_LEN)
+
+#define DICT_SIZE       (MAX_CODES - FIRST_CODE)
+
+#if (MIN_CODE_LEN <= CHAR_BIT)
+#error Code words must be larger than 1 character
+#endif
+
+#if (MAX_CODE_LEN > (2 * CHAR_BIT))
+#error Code words larger than 2 characters require a re-write of GetCodeWord\
+ and PutCodeWord
+#endif
+
+#if ((MAX_CODES - 1) > INT_MAX)
+#error There cannot be more codes than can fit in an integer
+#endif
+
+/***************************************************************************
+*                                  MACROS
+***************************************************************************/
+#define CODE_MS_BITS(BITS)      ((BITS) - CHAR_BIT)
+#define MS_BITS_MASK(BITS)      (UCHAR_MAX << (CHAR_BIT - CODE_MS_BITS(BITS)))
+#define CURRENT_MAX_CODES(BITS)     (1 << (BITS))
+
+/***************************************************************************
+*                            GLOBAL VARIABLES
+***************************************************************************/
+
+/* dictionary of string the code word is the dictionary index */
+decode_dictionary_t dictionary[DICT_SIZE];
+char currentCodeLen;
+
+/***************************************************************************
+*                               PROTOTYPES
+***************************************************************************/
+unsigned char DecodeRecursive(unsigned int code, FILE *fpOut);
+
+/* read encoded data */
+int GetCodeWord(bit_file_t *bfpIn);
+
+/***************************************************************************
+*                                FUNCTIONS
+***************************************************************************/
+
+/***************************************************************************
+*   Function   : LZWDecodeFile
+*   Description: This routine reads an input file 1 encoded string at a
+*                time and decodes it using the LZW algorithm.
+*   Parameters : inFile - Name of file to decode
+*                outFile - Name of file to write decoded output to
+*   Effects    : File is decoded using the LZW algorithm with CODE_LEN
+*                codes.
+*   Returned   : TRUE for success, otherwise FALSE.
+***************************************************************************/
+int LZWDecodeFile(char *inFile, char *outFile)
+{
+    bit_file_t *bfpIn;                  /* encoded input */
+    FILE *fpOut;                        /* decoded output */
+
+    unsigned int nextCode;              /* value of next code */
+    unsigned int lastCode;              /* last decoded code word */
+    unsigned int code;                  /* code word to decode */
+    unsigned char c;                    /* last decoded character */
+
+    /* open input and output files */
+    if (NULL == (bfpIn = BitFileOpen(inFile, BF_READ)))
+    {
+        perror(inFile);
+        return FALSE;
+    }
+
+    if (NULL == outFile)
+    {
+        fpOut = stdout;
+    }
+    else
+    {
+        if (NULL == (fpOut = fopen(outFile, "wb")))
+        {
+            BitFileClose(bfpIn);
+            perror(outFile);
+            return FALSE;
+        }
+    }
+
+    /* start with 9 bit code words */
+    currentCodeLen = 9;
+
+    /* initialize for decoding */
+    nextCode = FIRST_CODE;  /* code for next (first) string */
+
+    /* first code from file must be a character.  use it for initial values */
+    lastCode = GetCodeWord(bfpIn);
+    c = lastCode;
+    fputc(lastCode, fpOut);
+
+    /* decode rest of file */
+    while ((code = GetCodeWord(bfpIn)) != EOF)
+    {
+
+        /* look for code length increase marker */
+        if (((CURRENT_MAX_CODES(currentCodeLen) - 1) == code) &&
+            (currentCodeLen < MAX_CODE_LEN))
+        {
+            currentCodeLen++;
+            code = GetCodeWord(bfpIn);
+        }
+
+        if (code < nextCode)
+        {
+            /* we have a known code.  decode it */
+            c = DecodeRecursive(code, fpOut);
+        }
+        else
+        {
+            /***************************************************************
+            * We got a code that's not in our dictionary.  This must be due
+            * to the string + char + string + char + string exception.
+            * Build the decoded string using the last character + the
+            * string from the last code.
+            ***************************************************************/
+            unsigned char tmp;
+
+            tmp = c;
+            c = DecodeRecursive(lastCode, fpOut);
+            fputc(tmp, fpOut);
+        }
+
+        /* if room, add new code to the dictionary */
+        if (nextCode < MAX_CODES)
+        {
+            dictionary[nextCode - FIRST_CODE].prefixCode = lastCode;
+            dictionary[nextCode - FIRST_CODE].suffixChar = c;
+            nextCode++;
+        }
+
+        /* save character and code for use in unknown code word case */
+        lastCode = code;
+    }
+
+    fclose(fpOut);
+    BitFileClose(bfpIn);
+    return TRUE;
+}
+
+/***************************************************************************
+*   Function   : DecodeRecursive
+*   Description: This function uses the dictionary to decode a code word
+*                into the string it represents and write it to the output
+*                file.  The string is actually built in reverse order and
+*                recursion is used to write it out in the correct order.
+*   Parameters : code - the code word to decode
+*                fpOut - the file that the decoded code word is written to
+*   Effects    : Decoded code word is written to a file
+*   Returned   : The first character in the decoded string
+***************************************************************************/
+unsigned char DecodeRecursive(unsigned int code, FILE *fpOut)
+{
+    unsigned char c;
+    unsigned char firstChar;
+
+    if (code >= FIRST_CODE)
+    {
+        /* code word is string + c */
+        c = dictionary[code - FIRST_CODE].suffixChar;
+        code = dictionary[code - FIRST_CODE].prefixCode;
+
+        /* evaluate new code word for remaining string */
+        firstChar = DecodeRecursive(code, fpOut);
+    }
+    else
+    {
+        /* code word is just c */
+        c = code;
+        firstChar = code;
+    }
+
+    fputc(c, fpOut);
+    
+    return firstChar;
+}
+
+/***************************************************************************
+*   Function   : GetCodeWord
+*   Description: This function reads and returns a code word from an
+*                encoded file.  In order to deal with endian issue the
+*                code word is read least significant byte followed by the
+*                remaining bits.
+*   Parameters : bfpIn - bit file containing the encoded data
+*   Effects    : code word is read from encoded input
+*   Returned   : The next code word in the encoded file.  EOF if the end
+*                of file has been reached.
+*
+*   NOTE: If the code word contains more than 16 bits, this routine should
+*         be modified to read in all the bytes from least significant to
+*         most significant followed by any left over bits.
+***************************************************************************/
+int GetCodeWord(bit_file_t *bfpIn)
+{
+    unsigned char byte;
+    int code;
+
+    /* get LS character */
+    if ((code = BitFileGetChar(bfpIn)) == EOF)
+    {
+        return EOF;
+    }
+
+
+    /* get remaining bits */
+    if (BitFileGetBits(bfpIn, &byte, CODE_MS_BITS(currentCodeLen)) == EOF)
+    {
+        return EOF;
+    }
+
+    code |= ((int)byte) << CODE_MS_BITS(currentCodeLen);
+
+    return code;
+}