/*************************************************************************** * Lempel-Ziv-Welch Decoding Functions * * File : lzwdecode.c * Purpose : Provides a function for decoding Lempel-Ziv-Welch encoded * file streams * Author : Michael Dipperstein * Date : January 30, 2005 * **************************************************************************** * UPDATES * * $Id: lzwdecode.c,v 1.2 2007/09/29 01:28:09 michael Exp $ * $Log: lzwdecode.c,v $ * Revision 1.2 2007/09/29 01:28:09 michael * Changes required for LGPL v3. * * Revision 1.1 2005/04/09 03:11:22 michael * Separated encode and decode routines into two different files in order to * make future enhancements easier. * * Revision 1.4 2005/03/27 15:56:47 michael * Use variable length code words. * Include new e-mail addres. * * Revision 1.3 2005/03/10 14:26:58 michael * User variable names that match discription in web page. * * Revision 1.2 2005/03/10 05:44:02 michael * Minimize the size of the dictionary. * * Revision 1.1.1.1 2005/02/21 03:35:34 michael * Initial Release * **************************************************************************** * * LZW: An ANSI C Lempel-Ziv-Welch Encoding/Decoding Routines * Copyright (C) 2005, 2007 by * Michael Dipperstein (mdipper@alumni.engr.ucsb.edu) * * This file is part of the lzw library. * * The lzw library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 3 of the * License, or (at your option) any later version. * * The lzw library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see . * ***************************************************************************/ /*************************************************************************** * INCLUDED FILES ***************************************************************************/ #include #include #include #include #include "lzw.h" #include "bitfile.h" /*************************************************************************** * TYPE DEFINITIONS ***************************************************************************/ typedef struct { unsigned char suffixChar; /* last char in encoded string */ unsigned int prefixCode; /* code for remaining chars in string */ } decode_dictionary_t; /*************************************************************************** * CONSTANTS ***************************************************************************/ #define EMPTY -1 #define MIN_CODE_LEN 9 /* min # bits in a code word */ #define MAX_CODE_LEN 15 /* max # bits in a code word */ #define FIRST_CODE (1 << CHAR_BIT) /* value of 1st string code */ #define MAX_CODES (1 << MAX_CODE_LEN) #define DICT_SIZE (MAX_CODES - FIRST_CODE) #if (MIN_CODE_LEN <= CHAR_BIT) #error Code words must be larger than 1 character #endif #if (MAX_CODE_LEN > (2 * CHAR_BIT)) #error Code words larger than 2 characters require a re-write of GetCodeWord\ and PutCodeWord #endif #if ((MAX_CODES - 1) > INT_MAX) #error There cannot be more codes than can fit in an integer #endif /*************************************************************************** * MACROS ***************************************************************************/ #define CODE_MS_BITS(BITS) ((BITS) - CHAR_BIT) #define MS_BITS_MASK(BITS) (UCHAR_MAX << (CHAR_BIT - CODE_MS_BITS(BITS))) #define CURRENT_MAX_CODES(BITS) (1 << (BITS)) /*************************************************************************** * GLOBAL VARIABLES ***************************************************************************/ /* dictionary of string the code word is the dictionary index */ decode_dictionary_t dictionary[DICT_SIZE]; char currentCodeLen; /*************************************************************************** * PROTOTYPES ***************************************************************************/ unsigned char DecodeRecursive(unsigned int code, FILE *fpOut); /* read encoded data */ int GetCodeWord(bit_file_t *bfpIn); /*************************************************************************** * FUNCTIONS ***************************************************************************/ /*************************************************************************** * Function : LZWDecodeFile * Description: This routine reads an input file 1 encoded string at a * time and decodes it using the LZW algorithm. * Parameters : inFile - Name of file to decode * outFile - Name of file to write decoded output to * Effects : File is decoded using the LZW algorithm with CODE_LEN * codes. * Returned : TRUE for success, otherwise FALSE. ***************************************************************************/ int LZWDecodeFile(char *inFile, char *outFile) { bit_file_t *bfpIn; /* encoded input */ FILE *fpOut; /* decoded output */ unsigned int nextCode; /* value of next code */ unsigned int lastCode; /* last decoded code word */ unsigned int code; /* code word to decode */ unsigned char c; /* last decoded character */ /* open input and output files */ if (NULL == (bfpIn = BitFileOpen(inFile, BF_READ))) { perror(inFile); return FALSE; } if (NULL == outFile) { fpOut = stdout; } else { if (NULL == (fpOut = fopen(outFile, "wb"))) { BitFileClose(bfpIn); perror(outFile); return FALSE; } } /* start with 9 bit code words */ currentCodeLen = 9; /* initialize for decoding */ nextCode = FIRST_CODE; /* code for next (first) string */ /* first code from file must be a character. use it for initial values */ lastCode = GetCodeWord(bfpIn); c = lastCode; fputc(lastCode, fpOut); /* decode rest of file */ while ((code = GetCodeWord(bfpIn)) != EOF) { /* look for code length increase marker */ if (((CURRENT_MAX_CODES(currentCodeLen) - 1) == code) && (currentCodeLen < MAX_CODE_LEN)) { currentCodeLen++; code = GetCodeWord(bfpIn); } if (code < nextCode) { /* we have a known code. decode it */ c = DecodeRecursive(code, fpOut); } else { /*************************************************************** * We got a code that's not in our dictionary. This must be due * to the string + char + string + char + string exception. * Build the decoded string using the last character + the * string from the last code. ***************************************************************/ unsigned char tmp; tmp = c; c = DecodeRecursive(lastCode, fpOut); fputc(tmp, fpOut); } /* if room, add new code to the dictionary */ if (nextCode < MAX_CODES) { dictionary[nextCode - FIRST_CODE].prefixCode = lastCode; dictionary[nextCode - FIRST_CODE].suffixChar = c; nextCode++; } /* save character and code for use in unknown code word case */ lastCode = code; } fclose(fpOut); BitFileClose(bfpIn); return TRUE; } /*************************************************************************** * Function : DecodeRecursive * Description: This function uses the dictionary to decode a code word * into the string it represents and write it to the output * file. The string is actually built in reverse order and * recursion is used to write it out in the correct order. * Parameters : code - the code word to decode * fpOut - the file that the decoded code word is written to * Effects : Decoded code word is written to a file * Returned : The first character in the decoded string ***************************************************************************/ unsigned char DecodeRecursive(unsigned int code, FILE *fpOut) { unsigned char c; unsigned char firstChar; if (code >= FIRST_CODE) { /* code word is string + c */ c = dictionary[code - FIRST_CODE].suffixChar; code = dictionary[code - FIRST_CODE].prefixCode; /* evaluate new code word for remaining string */ firstChar = DecodeRecursive(code, fpOut); } else { /* code word is just c */ c = code; firstChar = code; } fputc(c, fpOut); return firstChar; } /*************************************************************************** * Function : GetCodeWord * Description: This function reads and returns a code word from an * encoded file. In order to deal with endian issue the * code word is read least significant byte followed by the * remaining bits. * Parameters : bfpIn - bit file containing the encoded data * Effects : code word is read from encoded input * Returned : The next code word in the encoded file. EOF if the end * of file has been reached. * * NOTE: If the code word contains more than 16 bits, this routine should * be modified to read in all the bytes from least significant to * most significant followed by any left over bits. ***************************************************************************/ int GetCodeWord(bit_file_t *bfpIn) { unsigned char byte; int code; /* get LS character */ if ((code = BitFileGetChar(bfpIn)) == EOF) { return EOF; } /* get remaining bits */ if (BitFileGetBits(bfpIn, &byte, CODE_MS_BITS(currentCodeLen)) == EOF) { return EOF; } code |= ((int)byte) << CODE_MS_BITS(currentCodeLen); return code; }