/*************************************************************************** * Lempel, Ziv, Storer, and Szymanski Encoding * * File : lzdecode.c * Purpose : Use lzss coding (Storer and Szymanski's modified LZ77) to * compress lzss data files. * Author : Michael Dipperstein * Date : November 07, 2004 * **************************************************************************** * UPDATES * * Date Change * 12/10/03 Changed handling of sliding window to better match standard * algorithm description. * 12/11/03 Remebered to copy encoded characters to the sliding window * even when there are no more characters in the input stream. * * * Revision 1.2 2004/02/22 17:14:26 michael * - Separated encode/decode, match finding, and main. * - Use bitfiles for reading/writing files * - Use traditional LZSS encoding where the coded/uncoded bits * precede the symbol they are associated with, rather than * aggregating the bits. * * Revision 1.1.1.1 2004/01/21 06:25:49 michael * Initial version * * 11/07/04 Separated encode and decode functions for improved * modularity. * * $Id: lzencode.c,v 1.6 2007/09/20 04:34:25 michael Exp $ * $Log: lzencode.c,v $ * Revision 1.6 2007/09/20 04:34:25 michael * Changes required for LGPL v3. * * Revision 1.5 2007/03/25 05:11:32 michael * Corrected file closure error reported by "Carl@Yahoo" . Now both input * and output files are closed. * * Revision 1.4 2006/12/26 04:09:09 michael * Updated e-mail address and minor text clean-up. * * Revision 1.3 2005/12/28 06:03:30 michael * Use slower but clearer Get/PutBitsInt for reading/writing bits. * Replace mod with conditional Wrap macro. * * Revision 1.2 2004/11/13 22:51:00 michael * Provide distinct names for by file and by name functions and add some * comments to make their usage clearer. * * Revision 1.1 2004/11/08 05:54:18 michael * 1. Split encode and decode routines for smarter linking * 2. Renamed lzsample.c sample.c to match my other samples * 3. Makefile now builds code as libraries for better LGPL compliance. * * * **************************************************************************** * * LZEncode: An ANSI C LZSS Encoding Routines * Copyright (C) 2003-2007 by * Michael Dipperstein (mdipper@alumni.engr.ucsb.edu) * * This file is part of the lzss library. * * The lzss library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 3 of the * License, or (at your option) any later version. * * The lzss library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see . * ***************************************************************************/ /*************************************************************************** * INCLUDED FILES ***************************************************************************/ #include #include #include #include "lzlocal.h" #include "bitfile.h" /*************************************************************************** * TYPE DEFINITIONS ***************************************************************************/ /*************************************************************************** * CONSTANTS ***************************************************************************/ /*************************************************************************** * GLOBAL VARIABLES ***************************************************************************/ /* cyclic buffer sliding window of already read characters */ extern unsigned char slidingWindow[]; extern unsigned char uncodedLookahead[]; /*************************************************************************** * PROTOTYPES ***************************************************************************/ /*************************************************************************** * FUNCTIONS ***************************************************************************/ /**************************************************************************** * Function : EncodeLZSSByFile * Description: This function will read an input file and write an output * file encoded according to the traditional LZSS algorithm. * This algorithm encodes strings as 16 bits (a 12 bit offset * + a 4 bit length). * Parameters : fpIn - pointer to the open binary file to encode * fpOut - pointer to the open binary file to write encoded * output * Effects : fpIn is encoded and written to fpOut. Neither file is * closed after exit. * Returned : EXIT_SUCCESS or EXIT_FAILURE ****************************************************************************/ int EncodeLZSSByFile(FILE *fpIn, FILE *fpOut) { bit_file_t *bfpOut; encoded_string_t matchData; unsigned int i, c; unsigned int len; /* length of string */ /* head of sliding window and lookahead */ unsigned int windowHead, uncodedHead; /* use stdin if no input file */ if (fpIn == NULL) { fpIn = stdin; } if (fpOut == NULL) { /* use stdout if no output file */ bfpOut = MakeBitFile(stdout, BF_WRITE); } else { /* convert output file to bitfile */ bfpOut = MakeBitFile(fpOut, BF_WRITE); } windowHead = 0; uncodedHead = 0; /************************************************************************ * Fill the sliding window buffer with some known vales. DecodeLZSS must * use the same values. If common characters are used, there's an * increased chance of matching to the earlier strings. ************************************************************************/ memset(slidingWindow, ' ', WINDOW_SIZE * sizeof(unsigned char)); /************************************************************************ * Copy MAX_CODED bytes from the input file into the uncoded lookahead * buffer. ************************************************************************/ for (len = 0; len < MAX_CODED; len++) { c = getc(fpIn); if (c == EOF) break; uncodedLookahead[len] = c; } if (len == 0) { return (EXIT_SUCCESS); /* inFile was empty */ } /* Look for matching string in sliding window */ InitializeSearchStructures(); FindMatch(&matchData, windowHead, uncodedHead); /* now encoded the rest of the file until an EOF is read */ while (len > 0) { if (matchData.length > len) { /* garbage beyond last data happened to extend match length */ matchData.length = len; } if (matchData.length <= MAX_UNCODED) { /* not long enough match. write uncoded flag and character */ BitFilePutBit(UNCODED, bfpOut); BitFilePutChar(uncodedLookahead[uncodedHead], bfpOut); matchData.length = 1; /* set to 1 for 1 byte uncoded */ } else { unsigned int adjustedLen; /* adjust the length of the match so minimun encoded len is 0*/ adjustedLen = matchData.length - (MAX_UNCODED + 1); /* match length > MAX_UNCODED. Encode as offset and length. */ BitFilePutBit(ENCODED, bfpOut); BitFilePutBitsInt(bfpOut, &matchData.offset, OFFSET_BITS, sizeof(unsigned int)); BitFilePutBitsInt(bfpOut, &adjustedLen, LENGTH_BITS, sizeof(unsigned int)); } /******************************************************************** * Replace the matchData.length worth of bytes we've matched in the * sliding window with new bytes from the input file. ********************************************************************/ i = 0; while ((i < matchData.length) && ((c = getc(fpIn)) != EOF)) { /* add old byte into sliding window and new into lookahead */ ReplaceChar(windowHead, uncodedLookahead[uncodedHead]); uncodedLookahead[uncodedHead] = c; windowHead = Wrap((windowHead + 1), WINDOW_SIZE); uncodedHead = Wrap((uncodedHead + 1), MAX_CODED); i++; } /* handle case where we hit EOF before filling lookahead */ while (i < matchData.length) { ReplaceChar(windowHead, uncodedLookahead[uncodedHead]); /* nothing to add to lookahead here */ windowHead = Wrap((windowHead + 1), WINDOW_SIZE); uncodedHead = Wrap((uncodedHead + 1), MAX_CODED); len--; i++; } /* find match for the remaining characters */ FindMatch(&matchData, windowHead, uncodedHead); } /* we've decoded everything, free bitfile structure */ BitFileToFILE(bfpOut); return (EXIT_SUCCESS); } /**************************************************************************** * Function : EncodeLZSSByName * Description: This function is provided to maintain compatibility with * older versions of my LZSS implementation which used file * names instead of file pointers. * Parameters : inFile - name of file to encode * outFile - name of file to write encoded output * Effects : inFile is encoded and written to outFile * Returned : EXIT_SUCCESS or EXIT_FAILURE ****************************************************************************/ int EncodeLZSSByName(char *inFile, char *outFile) { FILE *fpIn, *fpOut; int returnValue; /* open binary input and output files */ if (inFile == NULL) { fpIn = stdin; } if ((fpIn = fopen(inFile, "rb")) == NULL) { perror(inFile); return (EXIT_FAILURE); } if (outFile == NULL) { fpOut = stdout; } else { if ((fpOut = fopen(outFile, "wb")) == NULL) { fclose(fpIn); perror(outFile); return (EXIT_FAILURE); } } returnValue = EncodeLZSSByFile(fpIn, fpOut); /* close files */ fclose(fpIn); fclose(fpOut); return (returnValue); }