From 285f5bec5bb03d4e825e5d866e94008088dd6155 Mon Sep 17 00:00:00 2001 From: xleroy Date: Sat, 9 Aug 2008 08:06:33 +0000 Subject: Ajout nouveaux tests git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@708 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- test/compression/.depend | 15 + test/compression/Makefile | 59 +++ test/compression/arcode.c | 926 +++++++++++++++++++++++++++++++++++++ test/compression/arcode.h | 69 +++ test/compression/armain.c | 266 +++++++++++ test/compression/bitfile.c | 1043 ++++++++++++++++++++++++++++++++++++++++++ test/compression/bitfile.h | 121 +++++ test/compression/lzdecode.c | 282 ++++++++++++ test/compression/lzencode.c | 300 ++++++++++++ test/compression/lzhash.c | 358 +++++++++++++++ test/compression/lzlocal.h | 123 +++++ test/compression/lzss.h | 96 ++++ test/compression/lzssmain.c | 278 +++++++++++ test/compression/lzvars.c | 76 +++ test/compression/lzw.h | 70 +++ test/compression/lzwdecode.c | 306 +++++++++++++ test/compression/lzwencode.c | 465 +++++++++++++++++++ test/compression/lzwmain.c | 260 +++++++++++ test/compression/optlist.c | 228 +++++++++ test/compression/optlist.h | 74 +++ 20 files changed, 5415 insertions(+) create mode 100644 test/compression/.depend create mode 100644 test/compression/Makefile create mode 100755 test/compression/arcode.c create mode 100755 test/compression/arcode.h create mode 100755 test/compression/armain.c create mode 100644 test/compression/bitfile.c create mode 100644 test/compression/bitfile.h create mode 100644 test/compression/lzdecode.c create mode 100644 test/compression/lzencode.c create mode 100644 test/compression/lzhash.c create mode 100644 test/compression/lzlocal.h create mode 100644 test/compression/lzss.h create mode 100644 test/compression/lzssmain.c create mode 100644 test/compression/lzvars.c create mode 100644 test/compression/lzw.h create mode 100644 test/compression/lzwdecode.c create mode 100644 test/compression/lzwencode.c create mode 100644 test/compression/lzwmain.c create mode 100644 test/compression/optlist.c create mode 100644 test/compression/optlist.h (limited to 'test/compression') diff --git a/test/compression/.depend b/test/compression/.depend new file mode 100644 index 0000000..e0dcdae --- /dev/null +++ b/test/compression/.depend @@ -0,0 +1,15 @@ +arcode.o: arcode.c arcode.h bitfile.h +arcode.light.o: arcode.light.c +armain.o: armain.c optlist.h arcode.h +bitfile.o: bitfile.c bitfile.h +bitfile.light.o: bitfile.light.c +lzdecode.o: lzdecode.c lzlocal.h bitfile.h +lzencode.o: lzencode.c lzlocal.h bitfile.h +lzhash.o: lzhash.c lzlocal.h +lzssmain.o: lzssmain.c lzss.h optlist.h +lzvars.o: lzvars.c lzlocal.h +lzwdecode.o: lzwdecode.c lzw.h bitfile.h +lzwencode.o: lzwencode.c lzw.h bitfile.h +lzwmain.o: lzwmain.c optlist.h lzw.h +optlist.o: optlist.c optlist.h +optlist.light.o: optlist.light.c diff --git a/test/compression/Makefile b/test/compression/Makefile new file mode 100644 index 0000000..ba83c87 --- /dev/null +++ b/test/compression/Makefile @@ -0,0 +1,59 @@ +CC=../../ccomp +CFLAGS=-U__GNUC__ -stdlib ../../runtime -dclight -dasm +LIBS= +TIME=xtime -o /dev/null -mintime 1.0 + +EXE=arcode lzw lzss + +COMMON_OBJS=optlist.o bitfile.o + +all: $(EXE) + +ARCODE_OBJS=$(COMMON_OBJS) arcode.o armain.o + +arcode: $(ARCODE_OBJS) + $(CC) $(CFLAGS) -o $@ $(ARCODE_OBJS) $(LIBS) + +LZW_OBJS=$(COMMON_OBJS) lzwencode.o lzwdecode.o lzwmain.o + +lzw: $(LZW_OBJS) + $(CC) $(CFLAGS) -o $@ $(LZW_OBJS) $(LIBS) + +LZSS_OBJS=$(COMMON_OBJS) lzvars.o lzhash.o lzencode.o lzdecode.o lzssmain.o + +lzss: $(LZSS_OBJS) + $(CC) $(CFLAGS) -o $@ $(LZSS_OBJS) $(LIBS) + +TESTFILE=/mach_kernel +TESTCOMPR=/tmp/testcompr.out +TESTEXPND=/tmp/testexpnd.out + +test: + rm -f $(TESTCOMPR) $(TESTEXPND) + @for i in $(EXE); do \ + echo "$$i: compression..."; \ + ./$$i -c -i $(TESTFILE) -o $(TESTCOMPR); \ + echo "$$i: decompression..."; \ + ./$$i -d -i $(TESTCOMPR) -o $(TESTEXPND); \ + if cmp $(TESTFILE) $(TESTEXPND); \ + then echo "$$i: passed"; \ + else echo "$$i: FAILED"; \ + fi; \ + done + rm -f $(TESTCOMPR) $(TESTEXPND) + +bench: + rm -f $(TESTCOMPR) + @for i in $(EXE); do \ + echo -n "$$i "; \ + $(TIME) sh -c "./$$i -c -i $(TESTFILE) -o $(TESTCOMPR) && ./$$i -d -i $(TESTCOMPR) -o /dev/null"; \ + done + rm -f $(TESTCOMPR) + +include .depend + +clean: + rm -f *.o *.light.c *.s $(EXE) + +depend: + gcc -MM *.c > .depend diff --git a/test/compression/arcode.c b/test/compression/arcode.c new file mode 100755 index 0000000..f915cc2 --- /dev/null +++ b/test/compression/arcode.c @@ -0,0 +1,926 @@ +/*************************************************************************** +* Arithmetic Encoding and Decoding Library +* +* File : arcode.c +* Purpose : Use arithmetic coding to compress/decompress file streams +* Author : Michael Dipperstein +* Date : April 2, 2004 +* +**************************************************************************** +* UPDATES +* +* $Id: arcode.c,v 1.5 2007/09/08 15:47:02 michael Exp $ +* $Log: arcode.c,v $ +* Revision 1.5 2007/09/08 15:47:02 michael +* Changes required for LGPL v3. +* +* Revision 1.4 2006/03/02 06:43:37 michael +* Expanded tabs +* +* Revision 1.3 2006/01/12 07:39:24 michael +* Use BitFileGetBitsIntBit and FilePutBitsInt for reading and writing the +* header. This makes the code a little cleaner, but the new header is not +* compatible with the old header. +* +* Revision 1.2 2004/08/13 13:10:27 michael +* Add support for adaptive encoding +* +* Use binary search when trying to find decoded symbol +* +* Revision 1.1.1.1 2004/04/04 14:54:13 michael +* Initial version +* +**************************************************************************** +* +* Arcode: An ANSI C Arithmetic Encoding/Decoding Routines +* Copyright (C) 2004, 2006-2007 by Michael Dipperstein (mdipper@cs.ucsb.edu) +* +* This file is part of the arcode library. +* +* The arcode library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public License as +* published by the Free Software Foundation; either version 3 of the +* License, or (at your option) any later version. +* +* The arcode library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +* General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public License +* along with this program. If not, see . +* +***************************************************************************/ + +/*************************************************************************** +* INCLUDED FILES +***************************************************************************/ +#include +#include +#include +#include "arcode.h" +#include "bitfile.h" + +/* compile-time options */ +#undef BUILD_DEBUG_OUTPUT /* debugging output */ + +#if !(USHRT_MAX < ULONG_MAX) +#error "Implementation requires USHRT_MAX < ULONG_MAX" +#endif + +/*************************************************************************** +* TYPE DEFINITIONS +***************************************************************************/ +typedef unsigned short probability_t; /* probability count type */ + +/*************************************************************************** +* CONSTANTS +***************************************************************************/ +#define EOF_CHAR (UCHAR_MAX + 1) + +/* number of bits used to compute running code values */ +#define PRECISION (8 * sizeof(probability_t)) + +/* 2 bits less than precision. keeps lower and upper bounds from crossing. */ +#define MAX_PROBABILITY (1 << (PRECISION - 2)) + +/*************************************************************************** +* MACROS +***************************************************************************/ +/* set bit x to 1 in probability_t. Bit 0 is MSB */ +#define MASK_BIT(x) (probability_t)(1 << (PRECISION - (1 + (x)))) + +/* indices for a symbol's lower and upper cumulative probability ranges */ +#define LOWER(c) (c) +#define UPPER(c) ((c) + 1) + +/*************************************************************************** +* GLOBAL VARIABLES +***************************************************************************/ +probability_t lower; /* lower bound of current code range */ +probability_t upper; /* upper bound of current code range */ + +probability_t code; /* current MSBs of encode input stream */ + +unsigned char underflowBits; /* current underflow bit count */ + +/* probability ranges for each symbol: [ranges[LOWER(c)], ranges[UPPER(c)]) */ +probability_t ranges[UPPER(EOF_CHAR) + 1]; +probability_t cumulativeProb; /* cumulative probability of all ranges */ + +/*************************************************************************** +* PROTOTYPES +***************************************************************************/ +/* read write file headers */ +void WriteHeader(bit_file_t *bfpOut); +int ReadHeader(bit_file_t *bfpIn); + +/* applies symbol's ranges to current upper and lower range bounds */ +void ApplySymbolRange(int symbol, char staticModel); + +/* routines for encoding*/ +void WriteEncodedBits(bit_file_t *bfpOut); +void WriteRemaining(bit_file_t *bfpOut); +int BuildProbabilityRangeList(FILE *fpIn); +void InitializeAdaptiveProbabilityRangeList(void); + +/* routines for decoding */ +void InitializeDecoder(bit_file_t *bfpOut, char staticModel); +probability_t GetUnscaledCode(void); +int GetSymbolFromProbability(probability_t probability); +void ReadEncodedBits(bit_file_t *bfpIn); + +/*************************************************************************** +* FUNCTIONS +***************************************************************************/ + +/*************************************************************************** +* Function : ArEncodeFile +* Description: This routine generates a list of arithmetic code ranges for +* a file and then uses them to write out an encoded version +* of that file. +* Parameters : inFile - Name of file to encode +* outFile - Name of file to write encoded output to +* staticModel - TRUE if encoding with a static model +* Effects : File is arithmetically encoded +* Returned : TRUE for success, otherwise FALSE. +***************************************************************************/ +int ArEncodeFile(char *inFile, char *outFile, char staticModel) +{ + int c; + FILE *fpIn; /* uncoded input */ + bit_file_t *bfpOut; /* encoded output */ + + /* open input and output files */ + if ((fpIn = fopen(inFile, "rb")) == NULL) + { + perror(inFile); + return FALSE; + } + + if (outFile == NULL) + { + bfpOut = MakeBitFile(stdout, BF_WRITE); + } + else + { + if ((bfpOut = BitFileOpen(outFile, BF_WRITE)) == NULL) + { + fclose(fpIn); + perror(outFile); + return FALSE; + } + } + + if (staticModel) + { + /* count symbols in file and come up with a list of probability ranges */ + if (!BuildProbabilityRangeList(fpIn)) + { + fclose(fpIn); + BitFileClose(bfpOut); + fprintf(stderr, "Error determining frequency ranges.\n"); + return FALSE; + } + + rewind(fpIn); + + /* write information required to decode file to encoded file */ + WriteHeader(bfpOut); + } + else + { + /* initialize probability ranges asumming uniform distribution */ + InitializeAdaptiveProbabilityRangeList(); + } + + /* initialize coder start with full probability range [0%, 100%) */ + lower = 0; + upper = ~0; /* all ones */ + underflowBits = 0; + + /* encode symbols one at a time */ + while ((c = fgetc(fpIn)) != EOF) + { + ApplySymbolRange(c, staticModel); + WriteEncodedBits(bfpOut); + } + + fclose(fpIn); + + ApplySymbolRange(EOF_CHAR, staticModel); /* encode an EOF */ + WriteEncodedBits(bfpOut); + + WriteRemaining(bfpOut); /* write out least significant bits */ + BitFileClose(bfpOut); + + return TRUE; +} + +/*************************************************************************** +* Function : SymbolCountToProbabilityRanges +* Description: This routine converts the ranges array containing only +* symbol counts to an array containing the upper and lower +* probability ranges for each symbol. +* Parameters : None +* Effects : ranges array containing symbol counts in the upper field +* for each symbol is converted to a list of upper and lower +* probability bounds for each symbol. +* Returned : None +***************************************************************************/ +void SymbolCountToProbabilityRanges(void) +{ + int c; + + ranges[0] = 0; /* absolute lower bound is 0 */ + ranges[UPPER(EOF_CHAR)] = 1; /* add 1 EOF character */ + cumulativeProb++; + + /* assign upper and lower probability ranges */ + for (c = 1; c <= UPPER(EOF_CHAR); c++) + { + ranges[c] += ranges[c - 1]; + } + +#ifdef BUILD_DEBUG_OUTPUT + /* dump list of ranges */ + for (c = 0; c < UPPER(EOF_CHAR); c++) + { + printf("%02X\t%d\t%d\n", c, ranges[LOWER(c)], ranges[UPPER(c)]); + } +#endif + + return; +} + +/*************************************************************************** +* Function : BuildProbabilityRangeList +* Description: This routine reads the input file and builds the global +* list of upper and lower probability ranges for each +* symbol. +* Parameters : fpIn - file to build range list for +* Effects : ranges array is made to contain probability ranges for +* each symbol. +* Returned : TRUE for success, otherwise FALSE. +***************************************************************************/ +int BuildProbabilityRangeList(FILE *fpIn) +{ + int c; + + /*********************************************************************** + * unsigned long is used to hold the largest counts we can have without + * any rescaling. Rescaling may take place before probability ranges + * are computed. + ***********************************************************************/ + unsigned long countArray[EOF_CHAR]; + unsigned long totalCount = 0; + unsigned long rescaleValue; + + if (fpIn == NULL) + { + return FALSE; + } + + /* start with no symbols counted */ + for (c = 0; c < EOF_CHAR; c++) + { + countArray[c] = 0; + } + + while ((c = fgetc(fpIn)) != EOF) + { + if (totalCount == ULONG_MAX) + { + fprintf(stderr, "Error: file too large\n"); + return FALSE; + } + + countArray[c]++; + totalCount++; + } + + /* rescale counts to be < MAX_PROBABILITY */ + if (totalCount >= MAX_PROBABILITY) + { + rescaleValue = (totalCount / MAX_PROBABILITY) + 1; + + for (c = 0; c < EOF_CHAR; c++) + { + if (countArray[c] > rescaleValue) + { + countArray[c] /= rescaleValue; + } + else if (countArray[c] != 0) + { + countArray[c] = 1; + } + } + } + + /* copy scaled symbol counts to range list */ + ranges[0] = 0; + cumulativeProb = 0; + for (c = 0; c < EOF_CHAR; c++) + { + ranges[UPPER(c)] = countArray[c]; + cumulativeProb += countArray[c]; + } + + /* convert counts to a range of probabilities */ + SymbolCountToProbabilityRanges(); + return TRUE; +} + +/*************************************************************************** +* Function : WriteHeader +* Description: This function writes each symbol contained in the encoded +* file as well as its rescaled number of occurrences. A +* decoding algorithm may use these numbers to reconstruct +* the probability range list used to encode the file. +* Parameters : bfpOut - pointer to open binary file to write to. +* Effects : Symbol values and symbol counts are written to a file. +* Returned : None +***************************************************************************/ +void WriteHeader(bit_file_t *bfpOut) +{ + int c; + probability_t previous = 0; /* symbol count so far */ + +#if BUILD_DEBUG_OUTPUT + printf("HEADER:\n"); +#endif + + for(c = 0; c <= (EOF_CHAR - 1); c++) + { + if (ranges[UPPER(c)] > previous) + { + /* some of these symbols will be encoded */ + BitFilePutChar((char)c, bfpOut); + previous = (ranges[UPPER(c)] - previous); /* symbol count */ + +#if BUILD_DEBUG_OUTPUT + printf("%02X\t%d\n", c, previous); +#endif + + /* write out PRECISION - 2 bit count */ + BitFilePutBitsInt(bfpOut, &previous, (PRECISION - 2), + sizeof(probability_t)); + + /* current upper range is previous for the next character */ + previous = ranges[UPPER(c)]; + } + } + + /* now write end of table (zero count) */ + BitFilePutChar(0x00, bfpOut); + previous = 0; + BitFilePutBits(bfpOut, (void *)&previous, PRECISION - 2); +} + +/*************************************************************************** +* Function : InitializeAdaptiveProbabilityRangeList +* Description: This routine builds the initial global list of upper and +* lower probability ranges for each symbol. This routine +* is used by both adaptive encoding and decoding. +* Currently it provides a uniform symbol distribution. +* Other distributions might be better suited for known data +* types (such as English text). +* Parameters : NONE +* Effects : ranges array is made to contain initial probability ranges +* for each symbol. +* Returned : NONE +***************************************************************************/ +void InitializeAdaptiveProbabilityRangeList(void) +{ + int c; + + cumulativeProb = 0; + ranges[0] = 0; /* absolute lower range */ + + /* assign upper and lower probability ranges assuming */ + for (c = 1; c <= UPPER(EOF_CHAR); c++) + { + ranges[c] = ranges[c - 1] + 1; + cumulativeProb++; + } + +#ifdef BUILD_DEBUG_OUTPUT + /* dump list of ranges */ + for (c = 0; c < UPPER(EOF_CHAR); c++) + { + printf("%02X\t%d\t%d\n", c, ranges[LOWER(c)], ranges[UPPER(c)]); + } +#endif + + return; +} + +/*************************************************************************** +* Function : ApplySymbolRange +* Description: This function is used for both encoding and decoding. It +* applies the range restrictions of a new symbol to the +* current upper and lower range bounds of an encoded stream. +* If an adaptive model is being used, the probability range +* list will be updated after the effect of the symbol is +* applied. +* Parameters : symbol - The symbol to be added to the current code range +* staticModel - TRUE if encoding/decoding with a static +* model. +* Effects : The current upper and lower range bounds are adjusted to +* include the range effects of adding another symbol to the +* encoded stream. If an adaptive model is being used, the +* probability range list will be updated. +* Returned : None +***************************************************************************/ +void ApplySymbolRange(int symbol, char staticModel) +{ + unsigned long range; /* must be able to hold max upper + 1 */ + unsigned long rescaled; /* range rescaled for range of new symbol */ + /* must hold range * max upper */ + + /* for updating dynamic models */ + int i; + probability_t original; /* range value prior to rescale */ + probability_t delta; /* range for individual symbol */ + + /*********************************************************************** + * Calculate new upper and lower ranges. Since the new upper range is + * dependant of the old lower range, compute the upper range first. + ***********************************************************************/ + range = (unsigned long)(upper - lower) + 1; /* current range */ + + /* scale upper range of the symbol being coded */ + rescaled = (unsigned long)ranges[UPPER(symbol)] * range; + rescaled /= (unsigned long)cumulativeProb; + + /* new upper = old lower + rescaled new upper - 1*/ + upper = lower + (probability_t)rescaled - 1; + + /* scale lower range of the symbol being coded */ + rescaled = (unsigned long)ranges[LOWER(symbol)] * range; + rescaled /= (unsigned long)cumulativeProb; + + /* new lower = old lower + rescaled new upper */ + lower = lower + (probability_t)rescaled; + + if (!staticModel) + { + /* add new symbol to model */ + cumulativeProb++; + for (i = UPPER(symbol); i <= UPPER(EOF_CHAR); i++) + { + ranges[i] += 1; + } + + /* half current values if cumulativeProb is too large */ + if (cumulativeProb >= MAX_PROBABILITY) + { + cumulativeProb = 0; + original = 0; + + for (i = 1; i <= UPPER(EOF_CHAR); i++) + { + delta = ranges[i] - original; + if (delta <= 2) + { + /* prevent probability from being 0 */ + original = ranges[i]; + ranges[i] = ranges[i - 1] + 1; + } + else + { + original = ranges[i]; + ranges[i] = ranges[i - 1] + (delta / 2); + } + + cumulativeProb += (ranges[i] - ranges[i - 1]); + } + } + } + +#ifdef BUILD_DEBUG_OUTPUT + if (lower > upper) + { + /* compile this in when testing new models. */ + fprintf(stderr, "Panic: lower (%X)> upper (%X)\n", lower, upper); + } +#endif +} + +/*************************************************************************** +* Function : WriteEncodedBits +* Description: This function attempts to shift out as many code bits as +* possible, writing the shifted bits to the encoded output +* file. Only bits that will be unchanged when additional +* symbols are encoded may be written out. +* +* If the n most significant bits of the lower and upper range +* bounds match, they will not be changed when additional +* symbols are encoded, so they may be shifted out. +* +* Adjustments are also made to prevent possible underflows +* that occur when the upper and lower ranges are so close +* that encoding another symbol won't change their values. +* Parameters : bfpOut - pointer to open binary file to write to. +* Effects : The upper and lower code bounds are adjusted so that they +* only contain only bits that may be affected by the +* addition of a new symbol to the encoded stream. +* Returned : None +***************************************************************************/ +void WriteEncodedBits(bit_file_t *bfpOut) +{ + for (;;) + { + if ((upper & MASK_BIT(0)) == (lower & MASK_BIT(0))) + { + /* MSBs match, write them to output file */ + BitFilePutBit((upper & MASK_BIT(0)) != 0, bfpOut); + + /* we can write out underflow bits too */ + while (underflowBits > 0) + { + BitFilePutBit((upper & MASK_BIT(0)) == 0, bfpOut); + underflowBits--; + } + } + else if ((lower & MASK_BIT(1)) && !(upper & MASK_BIT(1))) + { + /*************************************************************** + * Possible underflow condition: neither MSBs nor second MSBs + * match. It must be the case that lower and upper have MSBs of + * 01 and 10. Remove 2nd MSB from lower and upper. + ***************************************************************/ + underflowBits += 1; + lower &= ~(MASK_BIT(0) | MASK_BIT(1)); + upper |= MASK_BIT(1); + + /*************************************************************** + * The shifts below make the rest of the bit removal work. If + * you don't believe me try it yourself. + ***************************************************************/ + } + else + { + /* nothing left to do */ + return ; + } + + /******************************************************************* + * Shift out old MSB and shift in new LSB. Remember that lower has + * all 0s beyond it's end and upper has all 1s beyond it's end. + *******************************************************************/ + lower <<= 1; + upper <<= 1; + upper |= 1; + } +} + +/*************************************************************************** +* Function : WriteRemaining +* Description: This function writes out all remaining significant bits +* in the upper and lower ranges and the underflow bits once +* the last symbol has been encoded. +* Parameters : bfpOut - pointer to open binary file to write to. +* Effects : Remaining significant range bits are written to the output +* file. +* Returned : None +***************************************************************************/ +void WriteRemaining(bit_file_t *bfpOut) +{ + BitFilePutBit((lower & MASK_BIT(1)) != 0, bfpOut); + + /* write out any unwritten underflow bits */ + for (underflowBits++; underflowBits > 0; underflowBits--) + { + BitFilePutBit((lower & MASK_BIT(1)) == 0, bfpOut); + } +} + +/*************************************************************************** +* Function : ArDecodeFile +* Description: This routine opens an arithmetically encoded file, reads +* it's header, and builds a list of probability ranges which +* it then uses to decode the rest of the file. +* Parameters : inFile - Name of file to decode +* outFile - Name of file to write decoded output to +* staticModel - TRUE if decoding with a static model +* Effects : Encoded file is decoded +* Returned : TRUE for success, otherwise FALSE. +***************************************************************************/ +int ArDecodeFile(char *inFile, char *outFile, char staticModel) +{ + int c; + probability_t unscaled; + bit_file_t *bfpIn; + FILE *fpOut; + + /* open input and output files */ + if ((bfpIn = BitFileOpen(inFile, BF_READ)) == NULL) + { + perror(inFile); + return FALSE; + } + + if (outFile == NULL) + { + fpOut = stdout; + } + else + { + if ((fpOut = fopen(outFile, "wb")) == NULL) + { + BitFileClose(bfpIn); + perror(outFile); + return FALSE; + } + } + + if (staticModel) + { + /* build probability ranges from header in file */ + if (ReadHeader(bfpIn) == FALSE) + { + BitFileClose(bfpIn); + fclose(fpOut); + return FALSE; + } + } + + /* read start of code and initialize bounds, and adaptive ranges */ + InitializeDecoder(bfpIn, staticModel); + + /* decode one symbol at a time */ + for (;;) + { + /* get the unscaled probability of the current symbol */ + unscaled = GetUnscaledCode(); + + /* figure out which symbol has the above probability */ + if((c = GetSymbolFromProbability(unscaled)) == -1) + { + /* error: unknown symbol */ + break; + } + + if (c == EOF_CHAR) + { + /* no more symbols */ + break; + } + + fputc((char)c, fpOut); + + /* factor out symbol */ + ApplySymbolRange(c, staticModel); + ReadEncodedBits(bfpIn); + } + + fclose(fpOut); + BitFileClose(bfpIn); + + return TRUE; +} + +/**************************************************************************** +* Function : ReadHeader +* Description: This function reads the header information stored by +* WriteHeader. The header can then be used to build a +* probability range list matching the list that was used to +* encode the file. +* Parameters : bfpIn - file to read from +* Effects : Probability range list is built. +* Returned : TRUE for success, otherwise FALSE +****************************************************************************/ +int ReadHeader(bit_file_t *bfpIn) +{ + int c; + probability_t count; + +#if BUILD_DEBUG_OUTPUT + printf("HEADER:\n"); +#endif + + cumulativeProb = 0; + + for (c = 0; c <= UPPER(EOF_CHAR); c++) + { + ranges[UPPER(c)] = 0; + } + + /* read [character, probability] sets */ + for (;;) + { + c = BitFileGetChar(bfpIn); + count = 0; + + /* read (PRECISION - 2) bit count */ + if (BitFileGetBitsInt(bfpIn, &count, (PRECISION - 2), + sizeof(probability_t)) == EOF) + { + /* premature EOF */ + fprintf(stderr, "Error: unexpected EOF\n"); + return FALSE; + } + +#if BUILD_DEBUG_OUTPUT + printf("%02X\t%d\n", c, count); +#endif + + if (count == 0) + { + /* 0 count means end of header */ + break; + } + + ranges[UPPER(c)] = count; + cumulativeProb += count; + } + + /* convert counts to range list */ + SymbolCountToProbabilityRanges(); + return TRUE; +} + +/**************************************************************************** +* Function : InitializeDecoder +* Description: This function starts the upper and lower ranges at their +* max/min values and reads in the most significant encoded +* bits. +* Parameters : bfpIn - file to read from +* staticModel - TRUE if decoding using a staticModel +* Effects : upper, lower, and code are initialized. The probability +* range list will also be initialized if an adaptive model +* will be used. +* Returned : TRUE for success, otherwise FALSE +****************************************************************************/ +void InitializeDecoder(bit_file_t *bfpIn, char staticModel) +{ + int i; + + if (!staticModel) + { + /* initialize ranges for adaptive model */ + InitializeAdaptiveProbabilityRangeList(); + } + + code = 0; + + /* read PERCISION MSBs of code one bit at a time */ + for (i = 0; i < PRECISION; i++) + { + code <<= 1; + + /* treat EOF like 0 */ + if(BitFileGetBit(bfpIn) == 1) + { + code |= 1; + } + } + + /* start with full probability range [0%, 100%) */ + lower = 0; + upper = ~0; /* all ones */ +} + +/**************************************************************************** +* Function : GetUnscaledCode +* Description: This function undoes the scaling that ApplySymbolRange +* performed before bits were shifted out. The value returned +* is the probability of the encoded symbol. +* Parameters : None +* Effects : None +* Returned : The probability of the current symbol +****************************************************************************/ +probability_t GetUnscaledCode(void) +{ + unsigned long range; /* must be able to hold max upper + 1 */ + unsigned long unscaled; + + range = (unsigned long)(upper - lower) + 1; + + /* reverse the scaling operations from ApplySymbolRange */ + unscaled = (unsigned long)(code - lower) + 1; + unscaled = unscaled * (unsigned long)cumulativeProb - 1; + unscaled /= range; + + return ((probability_t)unscaled); +} + +/**************************************************************************** +* Function : GetSymbolFromProbability +* Description: Given a probability, this function will return the symbol +* whose range includes that probability. Symbol is found +* binary search on probability ranges. +* Parameters : probability - probability of symbol. +* Effects : None +* Returned : -1 for failure, otherwise encoded symbol +****************************************************************************/ +int GetSymbolFromProbability(probability_t probability) +{ + int first, last, middle; /* indicies for binary search */ + + first = 0; + last = UPPER(EOF_CHAR); + middle = last / 2; + + /* binary search */ + while (last >= first) + { + if (probability < ranges[LOWER(middle)]) + { + /* lower bound is higher than probability */ + last = middle - 1; + middle = first + ((last - first) / 2); + continue; + } + + if (probability >= ranges[UPPER(middle)]) + { + /* upper bound is lower than probability */ + first = middle + 1; + middle = first + ((last - first) / 2); + continue; + } + + /* we must have found the right value */ + return middle; + } + + /* error: none of the ranges include the probability */ + fprintf(stderr, "Unknown Symbol: %d (max: %d)\n", probability, + ranges[UPPER(EOF_CHAR)]); + return -1; +} + +/*************************************************************************** +* Function : ReadEncodedBits +* Description: This function attempts to shift out as many code bits as +* possible, as bits are shifted out the coded input is +* populated with bits from the encoded file. Only bits +* that will be unchanged when additional symbols are decoded +* may be shifted out. +* +* If the n most significant bits of the lower and upper range +* bounds match, they will not be changed when additional +* symbols are decoded, so they may be shifted out. +* +* Adjustments are also made to prevent possible underflows +* that occur when the upper and lower ranges are so close +* that decoding another symbol won't change their values. +* Parameters : bfpOut - pointer to open binary file to read from. +* Effects : The upper and lower code bounds are adjusted so that they +* only contain only bits that will be affected by the +* addition of a new symbol. Replacements are read from the +* encoded stream. +* Returned : None +***************************************************************************/ +void ReadEncodedBits(bit_file_t *bfpIn) +{ + int nextBit; /* next bit from encoded input */ + + for (;;) + { + if (( upper & MASK_BIT(0)) == (lower & MASK_BIT(0))) + { + /* MSBs match, allow them to be shifted out*/ + } + else if ((lower & MASK_BIT(1)) && !(upper & MASK_BIT(1))) + { + /*************************************************************** + * Possible underflow condition: neither MSBs nor second MSBs + * match. It must be the case that lower and upper have MSBs of + * 01 and 10. Remove 2nd MSB from lower and upper. + ***************************************************************/ + lower &= ~(MASK_BIT(0) | MASK_BIT(1)); + upper |= MASK_BIT(1); + code ^= MASK_BIT(1); + + /* the shifts below make the rest of the bit removal work */ + } + else + { + /* nothing to shift out */ + return; + } + + /******************************************************************* + * Shift out old MSB and shift in new LSB. Remember that lower has + * all 0s beyond it's end and upper has all 1s beyond it's end. + *******************************************************************/ + lower <<= 1; + upper <<= 1; + upper |= 1; + code <<= 1; + + if ((nextBit = BitFileGetBit(bfpIn)) == EOF) + { + /* either all bits are shifted out or error occurred */ + } + else + { + code |= nextBit; /* add next encoded bit to code */ + } + } + + return; +} diff --git a/test/compression/arcode.h b/test/compression/arcode.h new file mode 100755 index 0000000..aac3208 --- /dev/null +++ b/test/compression/arcode.h @@ -0,0 +1,69 @@ +/*************************************************************************** +* Header for Arithmetic Encoding and Decoding Library +* +* File : arcode.h +* Purpose : Provides prototypes for functions that use arithmetic coding +* to encode/decode files. +* Author : Michael Dipperstein +* Date : April 2, 2004 +* +**************************************************************************** +* UPDATES +* +* $Id: arcode.h,v 1.3 2007/09/08 15:47:02 michael Exp $ +* $Log: arcode.h,v $ +* Revision 1.3 2007/09/08 15:47:02 michael +* Changes required for LGPL v3. +* +* Revision 1.2 2004/08/13 13:09:46 michael +* Add support for adaptive encoding +* +* Revision 1.1.1.1 2004/04/04 14:54:13 michael +* Initial version +* +**************************************************************************** +* +* Arcode: An ANSI C Arithmetic Encoding/Decoding Routines +* Copyright (C) 2004, 2006-2007 by Michael Dipperstein (mdipper@cs.ucsb.edu) +* +* This file is part of the arcode library. +* +* The arcode library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public License as +* published by the Free Software Foundation; either version 3 of the +* License, or (at your option) any later version. +* +* The arcode library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +* General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public License +* along with this program. If not, see . +* +***************************************************************************/ + +#ifndef _ARCODE_H_ +#define _ARCODE_H_ + +/*************************************************************************** +* CONSTANTS +***************************************************************************/ +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +/*************************************************************************** +* PROTOTYPES +***************************************************************************/ + /* encode inFile */ +int ArEncodeFile(char *inFile, char *outFile, char staticModel); + +/* decode inFile*/ +int ArDecodeFile(char *inFile, char *outFile, char staticModel); + +#endif /* ndef _ARCODE_H_ */ diff --git a/test/compression/armain.c b/test/compression/armain.c new file mode 100755 index 0000000..8f37c4f --- /dev/null +++ b/test/compression/armain.c @@ -0,0 +1,266 @@ +/*************************************************************************** +* Sample Program Using Arithmetic Encoding Library +* +* File : sample.c +* Purpose : Demonstrate usage of arithmetic encoding library +* Author : Michael Dipperstein +* Date : March 10, 2004 +* +**************************************************************************** +* UPDATES +* +* $Id: sample.c,v 1.3 2007/09/08 15:48:14 michael Exp $ +* $Log: sample.c,v $ +* Revision 1.3 2007/09/08 15:48:14 michael +* Replace getopt with optlist. +* Changes required for LGPL v3. +* +* Revision 1.2 2004/08/13 13:08:43 michael +* Add support for adaptive encoding +* +* Use executable name in help messages +* +* Revision 1.1.1.1 2004/04/04 14:54:13 michael +* Initial version +* +* +**************************************************************************** +* +* SAMPLE: Sample usage of the arcode Arithmetic Encoding Library +* Copyright (C) 2004, 2007 by Michael Dipperstein (mdipper@cs.ucsb.edu) +* +* This file is part of the arcode library. +* +* The arcode library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public License as +* published by the Free Software Foundation; either version 3 of the +* License, or (at your option) any later version. +* +* The arcode library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +* General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public License +* along with this program. If not, see . +* +***************************************************************************/ + +/*************************************************************************** +* INCLUDED FILES +***************************************************************************/ +#include +#include +#include +#include "optlist.h" +#include "arcode.h" + +/*************************************************************************** +* PROTOTYPES +***************************************************************************/ +char *RemovePath(char *fullPath); + +/*************************************************************************** +* FUNCTIONS +***************************************************************************/ + +/**************************************************************************** +* Function : main +* Description: This is the main function for this program, it validates +* the command line input and, if valid, it will call +* functions to encode or decode a file using the arithmetic +* coding algorithm. +* Parameters : argc - number of parameters +* argv - parameter list +* Effects : Encodes/Decodes input file +* Returned : EXIT_SUCCESS for success, otherwise EXIT_FAILURE. +****************************************************************************/ +int main(int argc, char *argv[]) +{ + option_t *optList, *thisOpt; + char *inFile, *outFile; /* name of input & output files */ + char encode; /* encode/decode */ + char staticModel; /* static/adaptive model*/ + + /* initialize data */ + inFile = NULL; + outFile = NULL; + encode = TRUE; + staticModel = TRUE; + + /* parse command line */ + optList = GetOptList(argc, argv, "acdi:o:h?"); + thisOpt = optList; + + while (thisOpt != NULL) + { + switch(thisOpt->option) + { + case 'a': /* adaptive model vs. static */ + staticModel = FALSE; + break; + + case 'c': /* compression mode */ + encode = TRUE; + break; + + case 'd': /* decompression mode */ + encode = FALSE; + break; + + case 'i': /* input file name */ + if (inFile != NULL) + { + fprintf(stderr, "Multiple input files not allowed.\n"); + free(inFile); + + if (outFile != NULL) + { + free(outFile); + } + + FreeOptList(optList); + exit(EXIT_FAILURE); + } + else if ((inFile = + (char *)malloc(strlen(thisOpt->argument) + 1)) == NULL) + { + perror("Memory allocation"); + + if (outFile != NULL) + { + free(outFile); + } + + FreeOptList(optList); + exit(EXIT_FAILURE); + } + + strcpy(inFile, thisOpt->argument); + break; + + case 'o': /* output file name */ + if (outFile != NULL) + { + fprintf(stderr, "Multiple output files not allowed.\n"); + free(outFile); + + if (inFile != NULL) + { + free(inFile); + } + + FreeOptList(optList); + exit(EXIT_FAILURE); + } + else if ((outFile = + (char *)malloc(strlen(thisOpt->argument) + 1)) == NULL) + { + perror("Memory allocation"); + + if (inFile != NULL) + { + free(inFile); + } + + FreeOptList(optList); + exit(EXIT_FAILURE); + } + + strcpy(outFile, thisOpt->argument); + break; + + case 'h': + case '?': + printf("Usage: %s \n\n", RemovePath(argv[0])); + printf("options:\n"); + printf(" -c : Encode input file to output file.\n"); + printf(" -d : Decode input file to output file.\n"); + printf(" -i : Name of input file.\n"); + printf(" -o : Name of output file.\n"); + printf(" -a : Use adaptive model instead of static.\n"); + printf(" -h | ? : Print out command line options.\n\n"); + printf("Default: %s -c\n", RemovePath(argv[0])); + + FreeOptList(optList); + return(EXIT_SUCCESS); + } + + optList = thisOpt->next; + free(thisOpt); + thisOpt = optList; + } + + /* validate command line */ + if (inFile == NULL) + { + fprintf(stderr, "Input file must be provided\n"); + fprintf(stderr, "Enter \"%s -?\" for help.\n", RemovePath(argv[0])); + + if (outFile != NULL) + { + free(outFile); + } + + exit (EXIT_FAILURE); + } + else if (outFile == NULL) + { + fprintf(stderr, "Output file must be provided\n"); + fprintf(stderr, "Enter \"%s -?\" for help.\n", RemovePath(argv[0])); + + if (inFile != NULL) + { + free(inFile); + } + + exit (EXIT_FAILURE); + } + + /* we have valid parameters encode or decode */ + if (encode) + { + ArEncodeFile(inFile, outFile, staticModel); + } + else + { + ArDecodeFile(inFile, outFile, staticModel); + } + + free(inFile); + free(outFile); + return EXIT_SUCCESS; +} + +/**************************************************************************** +* Function : RemovePath +* Description: This is function accepts a pointer to the name of a file +* along with path information and returns a pointer to the +* character that is not part of the path. +* Parameters : fullPath - pointer to an array of characters containing +* a file name and possible path modifiers. +* Effects : None +* Returned : Returns a pointer to the first character after any path +* information. +****************************************************************************/ +char *RemovePath(char *fullPath) +{ + int i; + char *start, *tmp; /* start of file name */ + const char delim[3] = {'\\', '/', ':'}; /* path deliminators */ + + start = fullPath; + + /* find the first character after all file path delimiters */ + for (i = 0; i < 3; i++) + { + tmp = strrchr(start, delim[i]); + + if (tmp != NULL) + { + start = tmp + 1; + } + } + + return start; +} diff --git a/test/compression/bitfile.c b/test/compression/bitfile.c new file mode 100644 index 0000000..7480ce9 --- /dev/null +++ b/test/compression/bitfile.c @@ -0,0 +1,1043 @@ +/*************************************************************************** +* Bit Stream File Implementation +* +* File : bitfile.c +* Purpose : This file implements a simple library of I/O functions for +* files that contain data in sizes that aren't integral bytes. +* An attempt was made to make the functions in this library +* analogous to functions provided to manipulate byte streams. +* The functions contained in this library were created with +* compression algorithms in mind, but may be suited to other +* applications. +* Author : Michael Dipperstein +* Date : January 9, 2004 +* +**************************************************************************** +* UPDATES +* +* $Id: bitfile.c,v 1.10 2007/08/26 21:53:48 michael Exp $ +* $Log: bitfile.c,v $ +* Revision 1.10 2007/08/26 21:53:48 michael +* Changes required for LGPL v3. +* +* Revision 1.9 2007/07/10 05:34:07 michael +* Remove ',' after last element in the enum endian_t. +* +* Revision 1.8 2007/02/06 06:22:07 michael +* Used trim program to remove trailing spaces. +* +* Revision 1.7 2006/06/03 19:32:38 michael +* Corrected error reporetd anonymous. The allocation of constants used to +* open underlying read/write/append files did not account for a terminating +* null. +* +* Used spell checker to correct spelling. +* +* Revision 1.6 2005/12/08 06:56:55 michael +* Minor text corrections. +* +* Revision 1.5 2005/12/06 15:06:37 michael +* Added BitFileGetBitsInt and BitFilePutBitsInt for integer types. +* +* Revision 1.4 2005/06/23 04:34:18 michael +* Prevent BitfileGetBits/PutBits from accessing an extra byte when given +* an integral number of bytes. +* +* Revision 1.3 2004/11/09 14:16:58 michael +* Added functions to convert open bit_file_t to FILE and to +* align open bit_file_t to the next byte. +* +* Revision 1.2 2004/06/15 13:15:58 michael +* Use incomplete type to hide definition of bitfile structure +* +* Revision 1.1.1.1 2004/02/09 05:31:42 michael +* Initial release +* +* +**************************************************************************** +* +* Bitfile: Bit stream File I/O Routines +* Copyright (C) 2004-2007 by Michael Dipperstein (mdipper@cs.ucsb.edu) +* +* This file is part of the bit file library. +* +* The bit file library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public License as +* published by the Free Software Foundation; either version 3 of the +* License, or (at your option) any later version. +* +* The bit file library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +* General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public License +* along with this program. If not, see . +* +***************************************************************************/ + +/*************************************************************************** +* INCLUDED FILES +***************************************************************************/ +#include +#include +#include "bitfile.h" + +/*************************************************************************** +* TYPE DEFINITIONS +***************************************************************************/ + +typedef enum +{ + BF_UNKNOWN_ENDIAN, + BF_LITTLE_ENDIAN, + BF_BIG_ENDIAN +} endian_t; + +struct bit_file_t +{ + FILE *fp; /* file pointer used by stdio functions */ + endian_t endian; /* endianess of architecture */ + unsigned char bitBuffer; /* bits waiting to be read/written */ + unsigned char bitCount; /* number of bits in bitBuffer */ + BF_MODES mode; /* open for read, write, or append */ +}; + +/* union used to test for endianess */ +typedef union +{ + unsigned long word; + unsigned char bytes[sizeof(unsigned long)]; +} endian_test_t; + +/*************************************************************************** +* PROTOTYPES +***************************************************************************/ +endian_t DetermineEndianess(void); + +int BitFilePutBitsLE(bit_file_t *stream, void *bits, const unsigned int count); +int BitFilePutBitsBE(bit_file_t *stream, void *bits, const unsigned int count, + const size_t size); + +int BitFileGetBitsLE(bit_file_t *stream, void *bits, const unsigned int count); +int BitFileGetBitsBE(bit_file_t *stream, void *bits, const unsigned int count, + const size_t size); + +/*************************************************************************** +* FUNCTIONS +***************************************************************************/ + +/*************************************************************************** +* Function : BitFileOpen +* Description: This function opens a bit file for reading, writing, +* or appending. If successful, a bit_file_t data +* structure will be allocated and a pointer to the +* structure will be returned. +* Parameters : fileName - NULL terminated string containing the name of +* the file to be opened. +* mode - The mode of the file to be opened +* Effects : The specified file will be opened and file structure will +* be allocated. +* Returned : Pointer to the bit_file_t structure for the bit file +* opened, or NULL on failure. errno will be set for all +* failure cases. +***************************************************************************/ +bit_file_t *BitFileOpen(const char *fileName, const BF_MODES mode) +{ + char modes[3][3] = {"rb", "wb", "ab"}; /* binary modes for fopen */ + bit_file_t *bf; + + bf = (bit_file_t *)malloc(sizeof(bit_file_t)); + + if (bf == NULL) + { + /* malloc failed */ + errno = ENOMEM; + } + else + { + bf->fp = fopen(fileName, modes[mode]); + + if (bf->fp == NULL) + { + /* fopen failed */ + free(bf); + bf = NULL; + } + else + { + /* fopen succeeded fill in remaining bf data */ + bf->bitBuffer = 0; + bf->bitCount = 0; + bf->mode = mode; + + /*************************************************************** + * TO DO: Consider using the last byte in a file to indicate + * the number of bits in the previous byte that actually have + * data. If I do that, I'll need special handling of files + * opened with a mode of BF_APPEND. + ***************************************************************/ + } + } + + bf->endian = DetermineEndianess(); + return (bf); +} + +/*************************************************************************** +* Function : MakeBitFile +* Description: This function naively wraps a standard file in a +* bit_file_t structure. ANSI-C doesn't support file status +* functions commonly found in other C variants, so the +* caller must be passed as a parameter. +* Parameters : stream - pointer to the standard file being wrapped. +* mode - The mode of the file being wrapped. +* Effects : A bit_file_t structure will be created for the stream +* passed as a parameter. +* Returned : Pointer to the bit_file_t structure for the bit file +* or NULL on failure. errno will be set for all failure +* cases. +***************************************************************************/ +bit_file_t *MakeBitFile(FILE *stream, const BF_MODES mode) +{ + bit_file_t *bf; + + if (stream == NULL) + { + /* can't wrapper empty steam */ + errno = EBADF; + bf = NULL; + } + else + { + bf = (bit_file_t *)malloc(sizeof(bit_file_t)); + + if (bf == NULL) + { + /* malloc failed */ + errno = ENOMEM; + } + else + { + /* set structure data */ + bf->fp = stream; + bf->bitBuffer = 0; + bf->bitCount = 0; + bf->mode = mode; + } + } + + bf->endian = DetermineEndianess(); + + return (bf); +} + +/*************************************************************************** +* Function : DetermineEndianess +* Description: This function determines the endianess of the current +* hardware architecture. An unsigned long is set to 1. If +* the 1st byte of the unsigned long gets the 1, this is a +* little endian machine. If the last byte gets the 1, this +* is a big endian machine. +* Parameters : None +* Effects : None +* Returned : endian_t for current machine architecture +***************************************************************************/ +endian_t DetermineEndianess(void) +{ + endian_t endian; + endian_test_t endianTest; + + endianTest.word = 1; + + if (endianTest.bytes[0] == 1) + { + /* LSB is 1st byte (little endian)*/ + endian = BF_LITTLE_ENDIAN; + } + else if (endianTest.bytes[sizeof(unsigned long) - 1] == 1) + { + /* LSB is last byte (big endian)*/ + endian = BF_BIG_ENDIAN; + } + else + { + endian = BF_UNKNOWN_ENDIAN; + } + + return endian; +} + +/*************************************************************************** +* Function : BitFileClose +* Description: This function closes a bit file and frees all associated +* data. +* Parameters : stream - pointer to bit file stream being closed +* Effects : The specified file will be closed and the file structure +* will be freed. +* Returned : 0 for success or EOF for failure. +***************************************************************************/ +int BitFileClose(bit_file_t *stream) +{ + int returnValue = 0; + + if (stream == NULL) + { + return(EOF); + } + + if ((stream->mode == BF_WRITE) || (stream->mode == BF_APPEND)) + { + /* write out any unwritten bits */ + if (stream->bitCount != 0) + { + (stream->bitBuffer) <<= 8 - (stream->bitCount); + fputc(stream->bitBuffer, stream->fp); /* handle error? */ + } + } + + /*********************************************************************** + * TO DO: Consider writing an additional byte indicating the number of + * valid bits (bitCount) in the previous byte. + ***********************************************************************/ + + /* close file */ + returnValue = fclose(stream->fp); + + /* free memory allocated for bit file */ + free(stream); + + return(returnValue); +} + +/*************************************************************************** +* Function : BitFileToFILE +* Description: This function flushes and frees the bitfile structure, +* returning a pointer to a stdio file. +* Parameters : stream - pointer to bit file stream being closed +* Effects : The specified bitfile will be made usable as a stdio +* FILE. +* Returned : Pointer to FILE. NULL for failure. +***************************************************************************/ +FILE *BitFileToFILE(bit_file_t *stream) +{ + FILE *fp = NULL; + + if (stream == NULL) + { + return(NULL); + } + + if ((stream->mode == BF_WRITE) || (stream->mode == BF_APPEND)) + { + /* write out any unwritten bits */ + if (stream->bitCount != 0) + { + (stream->bitBuffer) <<= 8 - (stream->bitCount); + fputc(stream->bitBuffer, stream->fp); /* handle error? */ + } + } + + /*********************************************************************** + * TO DO: Consider writing an additional byte indicating the number of + * valid bits (bitCount) in the previous byte. + ***********************************************************************/ + + /* close file */ + fp = stream->fp; + + /* free memory allocated for bit file */ + free(stream); + + return(fp); +} + +/*************************************************************************** +* Function : BitFileByteAlign +* Description: This function aligns the bitfile to the nearest byte. For +* output files, this means writing out the bit buffer with +* extra bits set to 0. For input files, this means flushing +* the bit buffer. +* Parameters : stream - pointer to bit file stream to align +* Effects : Flushes out the bit buffer. +* Returned : EOF if stream is NULL. Otherwise, the contents of the +* bit buffer. +***************************************************************************/ +int BitFileByteAlign(bit_file_t *stream) +{ + int returnValue; + + if (stream == NULL) + { + return(EOF); + } + + returnValue = stream->bitBuffer; + + if ((stream->mode == BF_WRITE) || (stream->mode == BF_APPEND)) + { + /* write out any unwritten bits */ + if (stream->bitCount != 0) + { + (stream->bitBuffer) <<= 8 - (stream->bitCount); + fputc(stream->bitBuffer, stream->fp); /* handle error? */ + } + } + + stream->bitBuffer = 0; + stream->bitCount = 0; + + return (returnValue); +} + +/*************************************************************************** +* Function : BitFileGetChar +* Description: This function returns the next byte from the file passed as +* a parameter. +* Parameters : stream - pointer to bit file stream to read from +* Effects : Reads next byte from file and updates buffer accordingly. +* Returned : EOF if a whole byte cannot be obtained. Otherwise, +* the character read. +***************************************************************************/ +int BitFileGetChar(bit_file_t *stream) +{ + int returnValue; + unsigned char tmp; + + if (stream == NULL) + { + return(EOF); + } + + returnValue = fgetc(stream->fp); + + if (stream->bitCount == 0) + { + /* we can just get byte from file */ + return returnValue; + } + + /* we have some buffered bits to return too */ + if (returnValue != EOF) + { + /* figure out what to return */ + tmp = ((unsigned char)returnValue) >> (stream->bitCount); + tmp |= ((stream->bitBuffer) << (8 - (stream->bitCount))); + + /* put remaining in buffer. count shouldn't change. */ + stream->bitBuffer = returnValue; + + returnValue = tmp; + } + + return returnValue; +} + +/*************************************************************************** +* Function : BitFilePutChar +* Description: This function writes the byte passed as a parameter to the +* file passed a parameter. +* Parameters : c - the character to be written +* stream - pointer to bit file stream to write to +* Effects : Writes a byte to the file and updates buffer accordingly. +* Returned : On success, the character written, otherwise EOF. +***************************************************************************/ +int BitFilePutChar(const int c, bit_file_t *stream) +{ + unsigned char tmp; + + if (stream == NULL) + { + return(EOF); + } + + if (stream->bitCount == 0) + { + /* we can just put byte from file */ + return fputc(c, stream->fp); + } + + /* figure out what to write */ + tmp = ((unsigned char)c) >> (stream->bitCount); + tmp = tmp | ((stream->bitBuffer) << (8 - stream->bitCount)); + + if (fputc(tmp, stream->fp) != EOF) + { + /* put remaining in buffer. count shouldn't change. */ + stream->bitBuffer = c; + } + else + { + return EOF; + } + + return tmp; +} + +/*************************************************************************** +* Function : BitFileGetBit +* Description: This function returns the next bit from the file passed as +* a parameter. The bit value returned is the msb in the +* bit buffer. +* Parameters : stream - pointer to bit file stream to read from +* Effects : Reads next bit from bit buffer. If the buffer is empty, +* a new byte will be read from the file. +* Returned : 0 if bit == 0, 1 if bit == 1, and EOF if operation fails. +***************************************************************************/ +int BitFileGetBit(bit_file_t *stream) +{ + int returnValue; + + if (stream == NULL) + { + return(EOF); + } + + if (stream->bitCount == 0) + { + /* buffer is empty, read another character */ + if ((returnValue = fgetc(stream->fp)) == EOF) + { + return EOF; + } + else + { + stream->bitCount = 8; + stream->bitBuffer = returnValue; + } + } + + /* bit to return is msb in buffer */ + stream->bitCount--; + returnValue = (stream->bitBuffer) >> (stream->bitCount); + + return (returnValue & 0x01); +} + +/*************************************************************************** +* Function : BitFilePutBit +* Description: This function writes the bit passed as a parameter to the +* file passed a parameter. +* Parameters : c - the bit value to be written +* stream - pointer to bit file stream to write to +* Effects : Writes a bit to the bit buffer. If the buffer has a byte, +* the buffer is written to the file and cleared. +* Returned : On success, the bit value written, otherwise EOF. +***************************************************************************/ +int BitFilePutBit(const int c, bit_file_t *stream) +{ + int returnValue = c; + + if (stream == NULL) + { + return(EOF); + } + + stream->bitCount++; + stream->bitBuffer <<= 1; + + if (c != 0) + { + stream->bitBuffer |= 1; + } + + /* write bit buffer if we have 8 bits */ + if (stream->bitCount == 8) + { + if (fputc(stream->bitBuffer, stream->fp) == EOF) + { + returnValue = EOF; + } + + /* reset buffer */ + stream->bitCount = 0; + stream->bitBuffer = 0; + } + + return returnValue; +} + +/*************************************************************************** +* Function : BitFileGetBits +* Description: This function reads the specified number of bits from the +* file passed as a parameter and writes them to the +* requested memory location (msb to lsb). +* Parameters : stream - pointer to bit file stream to read from +* bits - address to store bits read +* count - number of bits to read +* Effects : Reads bits from the bit buffer and file stream. The bit +* buffer will be modified as necessary. +* Returned : EOF for failure, otherwise the number of bits read. If +* an EOF is reached before all the bits are read, bits +* will contain every bit through the last complete byte. +***************************************************************************/ +int BitFileGetBits(bit_file_t *stream, void *bits, const unsigned int count) +{ + unsigned char *bytes, shifts; + int offset, remaining, returnValue; + + bytes = (unsigned char *)bits; + + if ((stream == NULL) || (bits == NULL)) + { + return(EOF); + } + + offset = 0; + remaining = count; + + /* read whole bytes */ + while (remaining >= 8) + { + returnValue = BitFileGetChar(stream); + + if (returnValue == EOF) + { + return EOF; + } + + bytes[offset] = (unsigned char)returnValue; + remaining -= 8; + offset++; + } + + if (remaining != 0) + { + /* read remaining bits */ + shifts = 8 - remaining; + + while (remaining > 0) + { + returnValue = BitFileGetBit(stream); + + if (returnValue == EOF) + { + return EOF; + } + + bytes[offset] <<= 1; + bytes[offset] |= (returnValue & 0x01); + remaining--; + } + + /* shift last bits into position */ + bytes[offset] <<= shifts; + } + + return count; +} + +/*************************************************************************** +* Function : BitFilePutBits +* Description: This function writes the specified number of bits from the +* memory location passed as a parameter to the file passed +* as a parameter. Bits are written msb to lsb. +* Parameters : stream - pointer to bit file stream to write to +* bits - pointer to bits to write +* count - number of bits to write +* Effects : Writes bits to the bit buffer and file stream. The bit +* buffer will be modified as necessary. +* Returned : EOF for failure, otherwise the number of bits written. If +* an error occurs after a partial write, the partially +* written bits will not be unwritten. +***************************************************************************/ +int BitFilePutBits(bit_file_t *stream, void *bits, const unsigned int count) +{ + unsigned char *bytes, tmp; + int offset, remaining, returnValue; + + bytes = (unsigned char *)bits; + + if ((stream == NULL) || (bits == NULL)) + { + return(EOF); + } + + offset = 0; + remaining = count; + + /* write whole bytes */ + while (remaining >= 8) + { + returnValue = BitFilePutChar(bytes[offset], stream); + + if (returnValue == EOF) + { + return EOF; + } + + remaining -= 8; + offset++; + } + + if (remaining != 0) + { + /* write remaining bits */ + tmp = bytes[offset]; + + while (remaining > 0) + { + returnValue = BitFilePutBit((tmp & 0x80), stream); + + if (returnValue == EOF) + { + return EOF; + } + + tmp <<= 1; + remaining--; + } + } + + return count; +} + +/*************************************************************************** +* Function : BitFileGetBitsInt +* Description: This function provides a machine independent layer that +* allows a single function call to stuff an arbitrary number +* of bits into an integer type variable. +* Parameters : stream - pointer to bit file stream to read from +* bits - address to store bits read +* count - number of bits to read +* size - sizeof type containing "bits" +* Effects : Calls a function that reads bits from the bit buffer and +* file stream. The bit buffer will be modified as necessary. +* the bits will be written to "bits" from least significant +* byte to most significant byte. +* Returned : EOF for failure, otherwise the number of bits read by the +* called function. +***************************************************************************/ +int BitFileGetBitsInt(bit_file_t *stream, void *bits, const unsigned int count, + const size_t size) +{ + int returnValue; + + if ((stream == NULL) || (bits == NULL)) + { + return(EOF); + } + + if (stream->endian == BF_LITTLE_ENDIAN) + { + returnValue = BitFileGetBitsLE(stream, bits, count); + } + else if (stream->endian == BF_BIG_ENDIAN) + { + returnValue = BitFileGetBitsBE(stream, bits, count, size); + } + else + { + returnValue = EOF; + } + + return returnValue; +} + +/*************************************************************************** +* Function : BitFileGetBitsLE (Little Endian) +* Description: This function reads the specified number of bits from the +* file passed as a parameter and writes them to the +* requested memory location (LSB to MSB). +* Parameters : stream - pointer to bit file stream to read from +* bits - address to store bits read +* count - number of bits to read +* Effects : Reads bits from the bit buffer and file stream. The bit +* buffer will be modified as necessary. bits is treated as +* a little endian integer of length >= (count/8) + 1. +* Returned : EOF for failure, otherwise the number of bits read. If +* an EOF is reached before all the bits are read, bits +* will contain every bit through the last successful read. +***************************************************************************/ +int BitFileGetBitsLE(bit_file_t *stream, void *bits, const unsigned int count) +{ + unsigned char *bytes, shifts; + int offset, remaining, returnValue; + + bytes = (unsigned char *)bits; + + offset = 0; + remaining = count; + + /* read whole bytes */ + while (remaining >= 8) + { + returnValue = BitFileGetChar(stream); + + if (returnValue == EOF) + { + return EOF; + } + + bytes[offset] = (unsigned char)returnValue; + remaining -= 8; + offset++; + } + + if (remaining != 0) + { + /* read remaining bits */ + shifts = 8 - remaining; + + while (remaining > 0) + { + returnValue = BitFileGetBit(stream); + + if (returnValue == EOF) + { + return EOF; + } + + bytes[offset] <<= 1; + bytes[offset] |= (returnValue & 0x01); + remaining--; + } + + } + + return count; +} + +/*************************************************************************** +* Function : BitFileGetBitsBE (Big Endian) +* Description: This function reads the specified number of bits from the +* file passed as a parameter and writes them to the +* requested memory location (LSB to MSB). +* Parameters : stream - pointer to bit file stream to read from +* bits - address to store bits read +* count - number of bits to read +* size - sizeof type containing "bits" +* Effects : Reads bits from the bit buffer and file stream. The bit +* buffer will be modified as necessary. bits is treated as +* a big endian integer of length size. +* Returned : EOF for failure, otherwise the number of bits read. If +* an EOF is reached before all the bits are read, bits +* will contain every bit through the last successful read. +***************************************************************************/ +int BitFileGetBitsBE(bit_file_t *stream, void *bits, const unsigned int count, + const size_t size) +{ + unsigned char *bytes, shifts; + int offset, remaining, returnValue; + + if (count > (size * 8)) + { + /* too many bits to read */ + return EOF; + } + + bytes = (unsigned char *)bits; + + offset = size - 1; + remaining = count; + + /* read whole bytes */ + while (remaining >= 8) + { + returnValue = BitFileGetChar(stream); + + if (returnValue == EOF) + { + return EOF; + } + + bytes[offset] = (unsigned char)returnValue; + remaining -= 8; + offset--; + } + + if (remaining != 0) + { + /* read remaining bits */ + shifts = 8 - remaining; + + while (remaining > 0) + { + returnValue = BitFileGetBit(stream); + + if (returnValue == EOF) + { + return EOF; + } + + bytes[offset] <<= 1; + bytes[offset] |= (returnValue & 0x01); + remaining--; + } + + } + + return count; +} + +/*************************************************************************** +* Function : BitFilePutBitsInt +* Description: This function provides a machine independent layer that +* allows a single function call to write an arbitrary number +* of bits from an integer type variable into a file. +* Parameters : stream - pointer to bit file stream to write to +* bits - pointer to bits to write +* count - number of bits to write +* size - sizeof type containing "bits" +* Effects : Calls a function that writes bits to the bit buffer and +* file stream. The bit buffer will be modified as necessary. +* the bits will be written to the file stream from least +* significant byte to most significant byte. +* Returned : EOF for failure, otherwise the number of bits written. If +* an error occurs after a partial write, the partially +* written bits will not be unwritten. +***************************************************************************/ +int BitFilePutBitsInt(bit_file_t *stream, void *bits, const unsigned int count, + const size_t size) +{ + int returnValue; + + if ((stream == NULL) || (bits == NULL)) + { + return(EOF); + } + + if (stream->endian == BF_LITTLE_ENDIAN) + { + returnValue = BitFilePutBitsLE(stream, bits, count); + } + else if (stream->endian == BF_BIG_ENDIAN) + { + returnValue = BitFilePutBitsBE(stream, bits, count, size); + } + else + { + returnValue = EOF; + } + + return returnValue; +} + +/*************************************************************************** +* Function : BitFilePutBitsLE (Little Endian) +* Description: This function writes the specified number of bits from the +* memory location passed as a parameter to the file passed +* as a parameter. Bits are written LSB to MSB. +* Parameters : stream - pointer to bit file stream to write to +* bits - pointer to bits to write +* count - number of bits to write +* Effects : Writes bits to the bit buffer and file stream. The bit +* buffer will be modified as necessary. bits is treated as +* a little endian integer of length >= (count/8) + 1. +* Returned : EOF for failure, otherwise the number of bits written. If +* an error occurs after a partial write, the partially +* written bits will not be unwritten. +***************************************************************************/ +int BitFilePutBitsLE(bit_file_t *stream, void *bits, const unsigned int count) +{ + unsigned char *bytes, tmp; + int offset, remaining, returnValue; + + bytes = (unsigned char *)bits; + offset = 0; + remaining = count; + + /* write whole bytes */ + while (remaining >= 8) + { + returnValue = BitFilePutChar(bytes[offset], stream); + + if (returnValue == EOF) + { + return EOF; + } + + remaining -= 8; + offset++; + } + + if (remaining != 0) + { + /* write remaining bits */ + tmp = bytes[offset]; + tmp <<= (8 - remaining); + + while (remaining > 0) + { + returnValue = BitFilePutBit((tmp & 0x80), stream); + + if (returnValue == EOF) + { + return EOF; + } + + tmp <<= 1; + remaining--; + } + } + + return count; +} + +/*************************************************************************** +* Function : BitFilePutBitsBE (Big Endian) +* Description: This function writes the specified number of bits from the +* memory location passed as a parameter to the file passed +* as a parameter. Bits are written LSB to MSB. +* Parameters : stream - pointer to bit file stream to write to +* bits - pointer to bits to write +* count - number of bits to write +* Effects : Writes bits to the bit buffer and file stream. The bit +* buffer will be modified as necessary. bits is treated as +* a big endian integer of length size. +* Returned : EOF for failure, otherwise the number of bits written. If +* an error occurs after a partial write, the partially +* written bits will not be unwritten. +***************************************************************************/ +int BitFilePutBitsBE(bit_file_t *stream, void *bits, const unsigned int count, + const size_t size) +{ + unsigned char *bytes, tmp; + int offset, remaining, returnValue; + + if (count > (size * 8)) + { + /* too many bits to write */ + return EOF; + } + + bytes = (unsigned char *)bits; + offset = size - 1; + remaining = count; + + /* write whole bytes */ + while (remaining >= 8) + { + returnValue = BitFilePutChar(bytes[offset], stream); + + if (returnValue == EOF) + { + return EOF; + } + + remaining -= 8; + offset--; + } + + if (remaining != 0) + { + /* write remaining bits */ + tmp = bytes[offset]; + tmp <<= (8 - remaining); + + while (remaining > 0) + { + returnValue = BitFilePutBit((tmp & 0x80), stream); + + if (returnValue == EOF) + { + return EOF; + } + + tmp <<= 1; + remaining--; + } + } + + return count; +} diff --git a/test/compression/bitfile.h b/test/compression/bitfile.h new file mode 100644 index 0000000..20d102c --- /dev/null +++ b/test/compression/bitfile.h @@ -0,0 +1,121 @@ +/*************************************************************************** +* Bit Stream File Header +* +* File : bitfile.h +* Purpose : Provides definitions and prototypes for a simple library of +* I/O functions for files that contain data in sizes that aren't +* integral bytes. +* An attempt was made to make the functions in this library +* analogous to functions provided to manipulate byte streams. +* The functions contained in this library were created with +* compression algorithms in mind, but may be suited to other +* applications. +* Author : Michael Dipperstein +* Date : January 9, 2004 +* +**************************************************************************** +* UPDATES +* +* $Id: bitfile.h,v 1.6 2007/08/26 21:53:48 michael Exp $ +* $Log: bitfile.h,v $ +* Revision 1.6 2007/08/26 21:53:48 michael +* Changes required for LGPL v3. +* +* Revision 1.5 2006/06/03 19:33:11 michael +* Used spell checker to correct spelling. +* +* Revision 1.4 2005/12/06 15:06:37 michael +* Added BitFileGetBitsInt and BitFilePutBitsInt for integer types. +* +* Revision 1.3 2004/11/09 14:16:58 michael +* Added functions to convert open bit_file_t to FILE and to +* align open bit_file_t to the next byte. +* +* Revision 1.2 2004/06/15 13:16:10 michael +* Use incomplete type to hide definition of bitfile structure +* +* Revision 1.1.1.1 2004/02/09 05:31:42 michael +* Initial release +* +* +**************************************************************************** +* +* Bitfile: Bit stream File I/O Routines +* Copyright (C) 2004-2007 by Michael Dipperstein (mdipper@cs.ucsb.edu) +* +* This file is part of the bit file library. +* +* The bit file library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public License as +* published by the Free Software Foundation; either version 3 of the +* License, or (at your option) any later version. +* +* The bit file library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +* General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public License +* along with this program. If not, see . +* +***************************************************************************/ + +#ifndef _BITFILE_H_ +#define _BITFILE_H_ + +/*************************************************************************** +* INCLUDED FILES +***************************************************************************/ +#include + +/*************************************************************************** +* TYPE DEFINITIONS +***************************************************************************/ +typedef enum +{ + BF_READ = 0, + BF_WRITE = 1, + BF_APPEND= 2, + BF_NO_MODE +} BF_MODES; + +/* incomplete type to hide implementation */ +struct bit_file_t; +typedef struct bit_file_t bit_file_t; + +/*************************************************************************** +* PROTOTYPES +***************************************************************************/ + +/* open/close file */ +bit_file_t *BitFileOpen(const char *fileName, const BF_MODES mode); +bit_file_t *MakeBitFile(FILE *stream, const BF_MODES mode); +int BitFileClose(bit_file_t *stream); +FILE *BitFileToFILE(bit_file_t *stream); + +/* toss spare bits and byte align file */ +int BitFileByteAlign(bit_file_t *stream); + +/* get/put character */ +int BitFileGetChar(bit_file_t *stream); +int BitFilePutChar(const int c, bit_file_t *stream); + +/* get/put single bit */ +int BitFileGetBit(bit_file_t *stream); +int BitFilePutBit(const int c, bit_file_t *stream); + +/* get/put number of bits (most significant bit to least significat bit) */ +int BitFileGetBits(bit_file_t *stream, void *bits, const unsigned int count); +int BitFilePutBits(bit_file_t *stream, void *bits, const unsigned int count); + +/*************************************************************************** +* get/put number of bits from integer types (short, int, long, ...) +* machine endiness is accounted for. +* size is the size of the data structure pointer to by bits. +***************************************************************************/ +int BitFileGetBitsInt(bit_file_t *stream, void *bits, const unsigned int count, + const size_t size); +int BitFilePutBitsInt(bit_file_t *stream, void *bits, const unsigned int count, + const size_t size); + +#endif /* _BITFILE_H_ */ diff --git a/test/compression/lzdecode.c b/test/compression/lzdecode.c new file mode 100644 index 0000000..cb46301 --- /dev/null +++ b/test/compression/lzdecode.c @@ -0,0 +1,282 @@ +/*************************************************************************** +* Lempel, Ziv, Storer, and Szymanski Decoding +* +* File : lzdecode.c +* Purpose : Use lzss coding (Storer and Szymanski's modified LZ77) to +* decompress lzss encoded files. +* Author : Michael Dipperstein +* Date : November 07, 2004 +* +**************************************************************************** +* UPDATES +* +* Date Change +* 12/10/03 Changed handling of sliding window to better match standard +* algorithm description. +* 12/11/03 Remebered to copy encoded characters to the sliding window +* even when there are no more characters in the input stream. +* +* +* Revision 1.2 2004/02/22 17:14:26 michael +* - Separated encode/decode, match finding, and main. +* - Use bitfiles for reading/writing files +* - Use traditional LZSS encoding where the coded/uncoded bits +* precede the symbol they are associated with, rather than +* aggregating the bits. +* +* Revision 1.1.1.1 2004/01/21 06:25:49 michael +* Initial version +* +* 11/07/04 Separated encode and decode functions for improved +* modularity. +* +* $Id: lzdecode.c,v 1.7 2007/09/20 04:34:25 michael Exp $ +* $Log: lzdecode.c,v $ +* Revision 1.7 2007/09/20 04:34:25 michael +* Changes required for LGPL v3. +* +* Revision 1.6 2007/03/25 05:11:32 michael +* Corrected file closure error reported by "Carl@Yahoo" . Now both input +* and output files are closed. +* +* Revision 1.5 2006/12/26 04:09:09 michael +* Updated e-mail address and minor text clean-up. +* +* Revision 1.4 2006/12/26 02:05:00 michael +* Corrected bug identified by Andrej Sinicyn which resulted in stdin being +* used as the default output for decoded data. +* +* Revision 1.3 2005/12/28 06:03:30 michael +* Use slower but clearer Get/PutBitsInt for reading/writing bits. +* Replace mod with conditional Wrap macro. +* +* Revision 1.2 2004/11/13 22:51:00 michael +* Provide distinct names for by file and by name functions and add some +* comments to make their usage clearer. +* +* Revision 1.1 2004/11/08 05:54:18 michael +* 1. Split encode and decode routines for smarter linking +* 2. Renamed lzsample.c sample.c to match my other samples +* 3. Makefile now builds code as libraries for better LGPL compliance. +* +* +* +**************************************************************************** +* +* LZDecode: An ANSI C LZSS Decoding Routines +* Copyright (C) 2003-2007 by +* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu) +* +* This file is part of the lzss library. +* +* The lzss library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public License as +* published by the Free Software Foundation; either version 3 of the +* License, or (at your option) any later version. +* +* The lzss library is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +* General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public License +* along with this program. If not, see . +* +***************************************************************************/ + +/*************************************************************************** +* INCLUDED FILES +***************************************************************************/ +#include +#include +#include +#include "lzlocal.h" +#include "bitfile.h" + +/*************************************************************************** +* TYPE DEFINITIONS +***************************************************************************/ + +/*************************************************************************** +* CONSTANTS +***************************************************************************/ + +/*************************************************************************** +* GLOBAL VARIABLES +***************************************************************************/ +/* cyclic buffer sliding window of already read characters */ +extern unsigned char slidingWindow[]; +extern unsigned char uncodedLookahead[]; + +/*************************************************************************** +* PROTOTYPES +***************************************************************************/ + +/*************************************************************************** +* FUNCTIONS +***************************************************************************/ + +/**************************************************************************** +* Function : DecodeLZSSByFile +* Description: This function will read an LZSS encoded input file and +* write an output file. This algorithm encodes strings as 16 +* bits (a 12 bit offset + a 4 bit length). +* Parameters : fpIn - pointer to the open binary file to decode +* fpOut - pointer to the open binary file to write decoded +* output +* Effects : fpIn is decoded and written to fpOut. Neither file is +* closed after exit. +* Returned : EXIT_SUCCESS or EXIT_FAILURE +****************************************************************************/ +int DecodeLZSSByFile(FILE *fpIn, FILE *fpOut) +{ + bit_file_t *bfpIn; + + int c; + unsigned int i, nextChar; + encoded_string_t code; /* offset/length code for string */ + + if (fpIn == NULL) + { + /* use stdin if no input file */ + bfpIn = MakeBitFile(stdin, BF_READ); + } + else + { + /* convert output file to bitfile */ + bfpIn = MakeBitFile(fpIn, BF_READ); + } + + /* use stdout if no output file */ + if (fpOut == NULL) + { + fpOut = stdout; + } + + /************************************************************************ + * Fill the sliding window buffer with some known vales. EncodeLZSS must + * use the same values. If common characters are used, there's an + * increased chance of matching to the earlier strings. + ************************************************************************/ + memset(slidingWindow, ' ', WINDOW_SIZE * sizeof(unsigned char)); + + nextChar = 0; + + while (TRUE) + { + if ((c = BitFileGetBit(bfpIn)) == EOF) + { + /* we hit the EOF */ + break; + } + + if (c == UNCODED) + { + /* uncoded character */ + if ((c = BitFileGetChar(bfpIn)) == EOF) + { + break; + } + + /* write out byte and put it in sliding window */ + putc(c, fpOut); + slidingWindow[nextChar] = c; + nextChar = Wrap((nextChar + 1), WINDOW_SIZE); + } + else + { + /* offset and length */ + code.offset = 0; + code.length = 0; + + if ((BitFileGetBitsInt(bfpIn, &code.offset, OFFSET_BITS, + sizeof(unsigned int))) == EOF) + { + break; + } + + if ((BitFileGetBitsInt(bfpIn, &code.length, LENGTH_BITS, + sizeof(unsigned int))) == EOF) + { + break; + } + + code.length += MAX_UNCODED + 1; + + /**************************************************************** + * Write out decoded string to file and lookahead. It would be + * nice to write to the sliding window instead of the lookahead, + * but we could end up overwriting the matching string with the + * new string if abs(offset - next char) < match length. + ****************************************************************/ + for (i = 0; i < code.length; i++) + { + c = slidingWindow[Wrap((code.offset + i), WINDOW_SIZE)]; + putc(c, fpOut); + uncodedLookahead[i] = c; + } + + /* write out decoded string to sliding window */ + for (i = 0; i < code.length; i++) + { + slidingWindow[(nextChar + i) % WINDOW_SIZE] = + uncodedLookahead[i]; + } + + nextChar = Wrap((nextChar + code.length), WINDOW_SIZE); + } + } + + /* we've decoded everything, free bitfile structure */ + BitFileToFILE(bfpIn); + + return (EXIT_SUCCESS); +} + +/**************************************************************************** +* Function : DecodeLZSSByName +* Description: This function is provided to maintain compatibility with +* older versions of my LZSS implementation which used file +* names instead of file pointers. +* Parameters : inFile - name of file to decode +* outFile - name of file to write decoded output +* Effects : inFile is decoded and written to outFile +* Returned : EXIT_SUCCESS or EXIT_FAILURE +****************************************************************************/ +int DecodeLZSSByName(char *inFile, char *outFile) +{ + FILE *fpIn, *fpOut; + int returnValue; + + if (inFile == NULL) + { + fpIn = stdin; + } + if ((fpIn = fopen(inFile, "rb")) == NULL) + { + perror(inFile); + return (EXIT_FAILURE); + } + + if (outFile == NULL) + { + fpOut = stdout; + } + else + { + if ((fpOut = fopen(outFile, "wb")) == NULL) + { + fclose(fpIn); + perror(outFile); + return (EXIT_FAILURE); + } + } + + returnValue = DecodeLZSSByFile(fpIn, fpOut); + + /* close files */ + fclose(fpIn); + fclose(fpOut); + + return (returnValue); +} diff --git a/test/compression/lzencode.c b/test/compression/lzencode.c new file mode 100644 index 0000000..35f8b83 --- /dev/null +++ b/test/compression/lzencode.c @@ -0,0 +1,300 @@ +/*************************************************************************** +* Lempel, Ziv, Storer, and Szymanski Encoding +* +* File : lzdecode.c +* Purpose : Use lzss coding (Storer and Szymanski's modified LZ77) to +* compress lzss data files. +* Author : Michael Dipperstein +* Date : November 07, 2004 +* +**************************************************************************** +* UPDATES +* +* Date Change +* 12/10/03 Changed handling of sliding window to better match standard +* algorithm description. +* 12/11/03 Remebered to copy encoded characters to the sliding window +* even when there are no more characters in the input stream. +* +* +* Revision 1.2 2004/02/22 17:14:26 michael +* - Separated encode/decode, match finding, and main. +* - Use bitfiles for reading/writing files +* - Use traditional LZSS encoding where the coded/uncoded bits +* precede the symbol they are associated with, rather than +* aggregating the bits. +* +* Revision 1.1.1.1 2004/01/21 06:25:49 michael +* Initial version +* +* 11/07/04 Separated encode and decode functions for improved +* modularity. +* +* $Id: lzencode.c,v 1.6 2007/09/20 04:34:25 michael Exp $ +* $Log: lzencode.c,v $ +* Revision 1.6 2007/09/20 04:34:25 michael +* Changes required for LGPL v3. +* +* Revision 1.5 2007/03/25 05:11:32 michael +* Corrected file closure error reported by "Carl@Yahoo" . Now both input +* and output files are closed. +* +* Revision 1.4 2006/12/26 04:09:09 michael +* Updated e-mail address and minor text clean-up. +* +* Revision 1.3 2005/12/28 06:03:30 michael +* Use slower but clearer Get/PutBitsInt for reading/writing bits. +* Replace mod with conditional Wrap macro. +* +* Revision 1.2 2004/11/13 22:51:00 michael +* Provide distinct names for by file and by name functions and add some +* comments to make their usage clearer. +* +* Revision 1.1 2004/11/08 05:54:18 michael +* 1. Split encode and decode routines for smarter linking +* 2. Renamed lzsample.c sample.c to match my other samples +* 3. Makefile now builds code as libraries for better LGPL compliance. +* +* +* +**************************************************************************** +* +* LZEncode: An ANSI C LZSS Encoding Routines +* Copyright (C) 2003-2007 by +* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu) +* +* This file is part of the lzss library. +* +* The lzss library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public License as +* published by the Free Software Foundation; either version 3 of the +* License, or (at your option) any later version. +* +* The lzss library is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +* General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public License +* along with this program. If not, see . +* +***************************************************************************/ + +/*************************************************************************** +* INCLUDED FILES +***************************************************************************/ +#include +#include +#include +#include "lzlocal.h" +#include "bitfile.h" + +/*************************************************************************** +* TYPE DEFINITIONS +***************************************************************************/ + +/*************************************************************************** +* CONSTANTS +***************************************************************************/ + +/*************************************************************************** +* GLOBAL VARIABLES +***************************************************************************/ +/* cyclic buffer sliding window of already read characters */ +extern unsigned char slidingWindow[]; +extern unsigned char uncodedLookahead[]; + +/*************************************************************************** +* PROTOTYPES +***************************************************************************/ + +/*************************************************************************** +* FUNCTIONS +***************************************************************************/ + +/**************************************************************************** +* Function : EncodeLZSSByFile +* Description: This function will read an input file and write an output +* file encoded according to the traditional LZSS algorithm. +* This algorithm encodes strings as 16 bits (a 12 bit offset +* + a 4 bit length). +* Parameters : fpIn - pointer to the open binary file to encode +* fpOut - pointer to the open binary file to write encoded +* output +* Effects : fpIn is encoded and written to fpOut. Neither file is +* closed after exit. +* Returned : EXIT_SUCCESS or EXIT_FAILURE +****************************************************************************/ +int EncodeLZSSByFile(FILE *fpIn, FILE *fpOut) +{ + bit_file_t *bfpOut; + + encoded_string_t matchData; + unsigned int i, c; + unsigned int len; /* length of string */ + + /* head of sliding window and lookahead */ + unsigned int windowHead, uncodedHead; + + /* use stdin if no input file */ + if (fpIn == NULL) + { + fpIn = stdin; + } + + if (fpOut == NULL) + { + /* use stdout if no output file */ + bfpOut = MakeBitFile(stdout, BF_WRITE); + } + else + { + /* convert output file to bitfile */ + bfpOut = MakeBitFile(fpOut, BF_WRITE); + } + + windowHead = 0; + uncodedHead = 0; + + /************************************************************************ + * Fill the sliding window buffer with some known vales. DecodeLZSS must + * use the same values. If common characters are used, there's an + * increased chance of matching to the earlier strings. + ************************************************************************/ + memset(slidingWindow, ' ', WINDOW_SIZE * sizeof(unsigned char)); + + /************************************************************************ + * Copy MAX_CODED bytes from the input file into the uncoded lookahead + * buffer. + ************************************************************************/ + for (len = 0; len < MAX_CODED; len++) { + c = getc(fpIn); + if (c == EOF) break; + uncodedLookahead[len] = c; + } + + if (len == 0) + { + return (EXIT_SUCCESS); /* inFile was empty */ + } + + /* Look for matching string in sliding window */ + InitializeSearchStructures(); + FindMatch(&matchData, windowHead, uncodedHead); + + /* now encoded the rest of the file until an EOF is read */ + while (len > 0) + { + if (matchData.length > len) + { + /* garbage beyond last data happened to extend match length */ + matchData.length = len; + } + + if (matchData.length <= MAX_UNCODED) + { + /* not long enough match. write uncoded flag and character */ + BitFilePutBit(UNCODED, bfpOut); + BitFilePutChar(uncodedLookahead[uncodedHead], bfpOut); + + matchData.length = 1; /* set to 1 for 1 byte uncoded */ + } + else + { + unsigned int adjustedLen; + + /* adjust the length of the match so minimun encoded len is 0*/ + adjustedLen = matchData.length - (MAX_UNCODED + 1); + + /* match length > MAX_UNCODED. Encode as offset and length. */ + BitFilePutBit(ENCODED, bfpOut); + BitFilePutBitsInt(bfpOut, &matchData.offset, OFFSET_BITS, + sizeof(unsigned int)); + BitFilePutBitsInt(bfpOut, &adjustedLen, LENGTH_BITS, + sizeof(unsigned int)); + } + + /******************************************************************** + * Replace the matchData.length worth of bytes we've matched in the + * sliding window with new bytes from the input file. + ********************************************************************/ + i = 0; + while ((i < matchData.length) && ((c = getc(fpIn)) != EOF)) + { + /* add old byte into sliding window and new into lookahead */ + ReplaceChar(windowHead, uncodedLookahead[uncodedHead]); + uncodedLookahead[uncodedHead] = c; + windowHead = Wrap((windowHead + 1), WINDOW_SIZE); + uncodedHead = Wrap((uncodedHead + 1), MAX_CODED); + i++; + } + + /* handle case where we hit EOF before filling lookahead */ + while (i < matchData.length) + { + ReplaceChar(windowHead, uncodedLookahead[uncodedHead]); + /* nothing to add to lookahead here */ + windowHead = Wrap((windowHead + 1), WINDOW_SIZE); + uncodedHead = Wrap((uncodedHead + 1), MAX_CODED); + len--; + i++; + } + + /* find match for the remaining characters */ + FindMatch(&matchData, windowHead, uncodedHead); + } + + /* we've decoded everything, free bitfile structure */ + BitFileToFILE(bfpOut); + + return (EXIT_SUCCESS); +} + +/**************************************************************************** +* Function : EncodeLZSSByName +* Description: This function is provided to maintain compatibility with +* older versions of my LZSS implementation which used file +* names instead of file pointers. +* Parameters : inFile - name of file to encode +* outFile - name of file to write encoded output +* Effects : inFile is encoded and written to outFile +* Returned : EXIT_SUCCESS or EXIT_FAILURE +****************************************************************************/ +int EncodeLZSSByName(char *inFile, char *outFile) +{ + FILE *fpIn, *fpOut; + int returnValue; + + /* open binary input and output files */ + if (inFile == NULL) + { + fpIn = stdin; + } + if ((fpIn = fopen(inFile, "rb")) == NULL) + { + perror(inFile); + return (EXIT_FAILURE); + } + + if (outFile == NULL) + { + fpOut = stdout; + } + else + { + if ((fpOut = fopen(outFile, "wb")) == NULL) + { + fclose(fpIn); + perror(outFile); + return (EXIT_FAILURE); + } + } + + returnValue = EncodeLZSSByFile(fpIn, fpOut); + + /* close files */ + fclose(fpIn); + fclose(fpOut); + + return (returnValue); +} diff --git a/test/compression/lzhash.c b/test/compression/lzhash.c new file mode 100644 index 0000000..83cbe94 --- /dev/null +++ b/test/compression/lzhash.c @@ -0,0 +1,358 @@ +#include +/*************************************************************************** +* Lempel, Ziv, Storer, and Szymanski Encoding and Decoding +* +* File : hash.c +* Purpose : Implement hash table optimized matching of uncoded strings +* for LZSS algorithm. +* Author : Michael Dipperstein +* Date : February 21, 2004 +* +**************************************************************************** +* UPDATES +* $Id: hash.c,v 1.7 2007/09/20 04:34:25 michael Exp $ +* $Log: hash.c,v $ +* Revision 1.7 2007/09/20 04:34:25 michael +* Changes required for LGPL v3. +* +* Revision 1.6 2006/12/26 04:09:09 michael +* Updated e-mail address and minor text clean-up. +* +* Revision 1.5 2005/12/29 14:37:56 michael +* Remove debug statements. +* +* Revision 1.4 2005/12/29 14:32:56 michael +* Correct algorithm for replacing characters in dictionary. +* +* Revision 1.3 2005/12/29 07:06:24 michael +* Let the hash table size vary with the size of the sliding window. +* +* Revision 1.2 2005/12/28 06:03:30 michael +* Use slower but clearer Get/PutBitsInt for reading/writing bits. +* Replace mod with conditional Wrap macro. +* +* Revision 1.1 2004/02/22 17:23:02 michael +* Initial revision of hash table search. Mostly code from lzhash.c. +* +* +**************************************************************************** +* +* Hash: Hash table optimized matching routines used by LZSS +* Encoding/Decoding Routine +* Copyright (C) 2004-2007 by +* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu) +* +* This file is part of the lzss library. +* +* The lzss library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public License as +* published by the Free Software Foundation; either version 3 of the +* License, or (at your option) any later version. +* +* The lzss library is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +* General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public License +* along with this program. If not, see . +* +***************************************************************************/ + +/*************************************************************************** +* INCLUDED FILES +***************************************************************************/ +#include "lzlocal.h" + +/*************************************************************************** +* TYPE DEFINITIONS +***************************************************************************/ + +/*************************************************************************** +* CONSTANTS +***************************************************************************/ +#define NULL_INDEX (WINDOW_SIZE + 1) + +#define HASH_SIZE (WINDOW_SIZE >> 2) /* size of hash table */ + +/*************************************************************************** +* GLOBAL VARIABLES +***************************************************************************/ +/* cyclic buffer sliding window of already read characters */ +extern unsigned char slidingWindow[]; +extern unsigned char uncodedLookahead[]; + +unsigned int hashTable[HASH_SIZE]; /* list head for each hash key */ +unsigned int next[WINDOW_SIZE]; /* indices of next in hash list */ + +/*************************************************************************** +* PROTOTYPES +***************************************************************************/ + +/*************************************************************************** +* FUNCTIONS +***************************************************************************/ + +/**************************************************************************** +* Function : HashKey +* Description: This function generates a hash key for a (MAX_UNCODED + 1) +* long string located either in the sliding window or the +* uncoded lookahead. The key generation algorithm is +* supposed to be based on the algorithm used by gzip. As +* reported in K. Sadakane, H. Imai. "Improving the Speed of +* LZ77 Compression by Hashing and Suffix Sorting". IEICE +* Trans. Fundamentals, Vol. E83-A, No. 12 (December 2000) +* Parameters : offset - offset into either the uncoded lookahead or the +* sliding window. +* lookahead - TRUE iff offset is an offset into the uncoded +* lookahead buffer. +* Effects : NONE +* Returned : The sliding window index where the match starts and the +* length of the match. If there is no match a length of +* zero will be returned. +****************************************************************************/ +unsigned int HashKey(unsigned int offset, unsigned char lookahead) +{ + int i, hashKey; + + hashKey = 0; + + if (lookahead) + { + /* string is in the lookahead buffer */ + for (i = 0; i < (MAX_UNCODED + 1); i++) + { + hashKey = (hashKey << 5) ^ (uncodedLookahead[offset]); + hashKey %= HASH_SIZE; + offset = Wrap((offset + 1), MAX_CODED); + } + } + else + { + /* string is in the sliding window */ + for (i = 0; i < (MAX_UNCODED + 1); i++) + { + hashKey = (hashKey << 5) ^ (slidingWindow[offset]); + hashKey %= HASH_SIZE; + offset = Wrap((offset + 1), WINDOW_SIZE); + } + } + + return hashKey; +} + +/**************************************************************************** +* Function : InitializeSearchStructures +* Description: This function initializes structures used to speed up the +* process of mathcing uncoded strings to strings in the +* sliding window. For hashed searches, this means that a +* hash table pointing to linked lists is initialized. +* Parameters : None +* Effects : The hash table and next array are initialized. +* Returned : None +* +* NOTE: This function assumes that the sliding window is initially filled +* with all identical characters. +****************************************************************************/ +void InitializeSearchStructures() +{ + unsigned int i; + + /************************************************************************ + * Since the encode routine only fills the sliding window with one + * character, there is only one hash key for the entier sliding window. + * That means all positions are in the same linked list. + ************************************************************************/ + for (i = 0; i < (WINDOW_SIZE - 1); i++) + { + next[i] = i + 1; + } + + /* there's no next for the last character */ + next[WINDOW_SIZE - 1] = NULL_INDEX; + + /* the only list right now is the " " list */ + for (i = 0; i < HASH_SIZE; i++) + { + hashTable[i] = NULL_INDEX; + } + + hashTable[HashKey(0, FALSE)] = 0; +} + +/**************************************************************************** +* Function : FindMatch +* Description: This function will search through the slidingWindow +* dictionary for the longest sequence matching the MAX_CODED +* long string stored in uncodedLookahead. +* Parameters : uncodedHead - head of uncoded lookahead buffer +* Effects : NONE +* Returned : The sliding window index where the match starts and the +* length of the match. If there is no match a length of +* zero will be returned. +****************************************************************************/ +/* XL: no struct return */ +void FindMatch(encoded_string_t * matchData, + unsigned int windowHead, unsigned int uncodedHead) +{ + unsigned int i, j; + + matchData->length = 0; + matchData->offset = 0; + + i = hashTable[HashKey(uncodedHead, TRUE)]; /* start of proper list */ + j = 0; + + while (i != NULL_INDEX) + { + if (slidingWindow[i] == uncodedLookahead[uncodedHead]) + { + /* we matched one how many more match? */ + j = 1; + + while(slidingWindow[Wrap((i + j), WINDOW_SIZE)] == + uncodedLookahead[Wrap((uncodedHead + j), MAX_CODED)]) + { + if (j >= MAX_CODED) + { + break; + } + j++; + } + + if (j > matchData->length) + { + matchData->length = j; + matchData->offset = i; + } + } + + if (j >= MAX_CODED) + { + matchData->length = MAX_CODED; + break; + } + + i = next[i]; /* try next in list */ + } +} + +/**************************************************************************** +* Function : AddString +* Description: This function adds the (MAX_UNCODED + 1) long string +* starting at slidingWindow[charIndex] to the hash table's +* linked list associated with its hash key. +* Parameters : charIndex - sliding window index of the string to be +* added to the linked list. +* Effects : The string starting at slidingWindow[charIndex] is appended +* to the end of the appropriate linked list. +* Returned : NONE +****************************************************************************/ +void AddString(unsigned int charIndex) +{ + unsigned int i, hashKey; + + /* inserted character will be at the end of the list */ + next[charIndex] = NULL_INDEX; + + hashKey = HashKey(charIndex, FALSE); + + if (hashTable[hashKey] == NULL_INDEX) + { + /* this is the only character in it's list */ + hashTable[hashKey] = charIndex; + return; + } + + /* find the end of the list */ + i = hashTable[hashKey]; + + while(next[i] != NULL_INDEX) + { + i = next[i]; + } + + /* add new character to the list end */ + next[i] = charIndex; +} + +/**************************************************************************** +* Function : RemoveString +* Description: This function removes the (MAX_UNCODED + 1) long string +* starting at slidingWindow[charIndex] from the hash table's +* linked list associated with its hash key. +* Parameters : charIndex - sliding window index of the string to be +* removed from the linked list. +* Effects : The string starting at slidingWindow[charIndex] is removed +* from its linked list. +* Returned : NONE +****************************************************************************/ +void RemoveString(unsigned int charIndex) +{ + unsigned int i, hashKey; + unsigned int nextIndex; + + nextIndex = next[charIndex]; /* remember where this points to */ + next[charIndex] = NULL_INDEX; + + hashKey = HashKey(charIndex, FALSE); + + if (hashTable[hashKey] == charIndex) + { + /* we're deleting a list head */ + hashTable[hashKey] = nextIndex; + return; + } + + /* find character pointing to ours */ + i = hashTable[hashKey]; + + while(next[i] != charIndex) + { + i = next[i]; + } + + /* point the previous next */ + next[i] = nextIndex; +} + +/**************************************************************************** +* Function : ReplaceChar +* Description: This function replaces the character stored in +* slidingWindow[charIndex] with the one specified by +* replacement. The hash table entries effected by the +* replacement are also corrected. +* Parameters : charIndex - sliding window index of the character to be +* removed from the linked list. +* Effects : slidingWindow[charIndex] is replaced by replacement. Old +* hash entries for strings containing slidingWindow[charIndex] +* are removed and new ones are added. +* Returned : NONE +****************************************************************************/ +void ReplaceChar(unsigned int charIndex, unsigned char replacement) +{ + unsigned int firstIndex, i; + + if (charIndex < MAX_UNCODED) + { + firstIndex = (WINDOW_SIZE + charIndex) - MAX_UNCODED; + } + else + { + firstIndex = charIndex - MAX_UNCODED; + } + + /* remove all hash entries containing character at char index */ + for (i = 0; i < (MAX_UNCODED + 1); i++) + { + RemoveString(Wrap((firstIndex + i), WINDOW_SIZE)); + } + + slidingWindow[charIndex] = replacement; + + /* add all hash entries containing character at char index */ + for (i = 0; i < (MAX_UNCODED + 1); i++) + { + AddString(Wrap((firstIndex + i), WINDOW_SIZE)); + } +} diff --git a/test/compression/lzlocal.h b/test/compression/lzlocal.h new file mode 100644 index 0000000..876de91 --- /dev/null +++ b/test/compression/lzlocal.h @@ -0,0 +1,123 @@ +/*************************************************************************** +* Lempel, Ziv, Storer, and Szymanski Encoding and Decoding +* +* File : lzlocal.h +* Purpose : Internal headers for LZSS encode and decode routines. +* Contains the prototypes to be used by the different match +* finding algorithms. +* Author : Michael Dipperstein +* Date : February 18, 2004 +* +**************************************************************************** +* UPDATES +* +* $Id: lzlocal.h,v 1.5 2007/09/20 04:34:25 michael Exp $ +* $Log: lzlocal.h,v $ +* Revision 1.5 2007/09/20 04:34:25 michael +* Changes required for LGPL v3. +* +* Revision 1.4 2006/12/26 04:09:09 michael +* Updated e-mail address and minor text clean-up. +* +* Revision 1.3 2005/12/28 05:58:52 michael +* Add Wrap macro to replace mod when value is less than twice the limit. +* +* Revision 1.2 2004/11/08 05:54:18 michael +* 1. Split encode and decode routines for smarter linking +* 2. Renamed lzsample.c sample.c to match my other samples +* 3. Makefile now builds code as libraries for better LGPL compliance. +* +* Revision 1.1 2004/02/22 17:32:40 michael +* Initial revision of header files for sliding window search implementations. +* +* +**************************************************************************** +* +* LZSS: An ANSI C LZSS Encoding/Decoding Routine +* Copyright (C) 2004-2007 by +* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu) +* +* This file is part of the lzss library. +* +* The lzss library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public License as +* published by the Free Software Foundation; either version 3 of the +* License, or (at your option) any later version. +* +* The lzss library is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +* General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public License +* along with this program. If not, see . +* +***************************************************************************/ +#ifndef _LZSS_LOCAL_H +#define _LZSS_LOCAL_H + +/*************************************************************************** +* INCLUDED FILES +***************************************************************************/ +#include + +/*************************************************************************** +* CONSTANTS +***************************************************************************/ +#define FALSE 0 +#define TRUE 1 + +#define OFFSET_BITS 12 +#define LENGTH_BITS 4 + +#if (((1 << (OFFSET_BITS + LENGTH_BITS)) - 1) > UINT_MAX) +#error "Size of encoded data must not exceed the size of an unsigned int" +#endif + +/* We want a sliding window*/ +#define WINDOW_SIZE (1 << OFFSET_BITS) + +/* maximum match length not encoded and maximum length encoded (4 bits) */ +#define MAX_UNCODED 2 +#define MAX_CODED ((1 << LENGTH_BITS) + MAX_UNCODED) + +#define ENCODED 0 /* encoded string */ +#define UNCODED 1 /* unencoded character */ + +/*************************************************************************** +* TYPE DEFINITIONS +***************************************************************************/ + +/*************************************************************************** +* This data structure stores an encoded string in (offset, length) format. +* The actual encoded string is stored using OFFSET_BITS for the offset and +* LENGTH_BITS for the length. +***************************************************************************/ +typedef struct encoded_string_t +{ + unsigned int offset; /* offset to start of longest match */ + unsigned int length; /* length of longest match */ +} encoded_string_t; + +/*************************************************************************** +* MACROS +***************************************************************************/ +/* wraps array index within array bounds (assumes value < 2 * limit) */ +#if 0 +#define Wrap(value, limit) (((value) < (limit)) ? (value) : ((value) - (limit))) +#else +#define Wrap(value, limit) ((unsigned int)(value) % (unsigned int)(limit)) +#endif + +/*************************************************************************** +* PROTOTYPES +***************************************************************************/ +void InitializeSearchStructures(void); + +/* XL: no struct return */ +void FindMatch(encoded_string_t * /*out*/, + unsigned int windowHead, unsigned int uncodedHead); + +void ReplaceChar(unsigned int charIndex, unsigned char replacement); + +#endif /* ndef _LZSS_LOCAL_H */ diff --git a/test/compression/lzss.h b/test/compression/lzss.h new file mode 100644 index 0000000..990d893 --- /dev/null +++ b/test/compression/lzss.h @@ -0,0 +1,96 @@ +/*************************************************************************** +* Lempel, Ziv, Storer, and Szymanski Encoding and Decoding +* +* File : lzss.h +* Purpose : Header for LZSS encode and decode routines. Contains the +* prototypes to be used by programs linking to the LZSS +* library. +* Author : Michael Dipperstein +* Date : February 21, 2004 +* +**************************************************************************** +* UPDATES +* +* $Id: lzss.h,v 1.5 2007/09/20 04:34:25 michael Exp $ +* $Log: lzss.h,v $ +* Revision 1.5 2007/09/20 04:34:25 michael +* Changes required for LGPL v3. +* +* Revision 1.4 2006/12/26 04:09:09 michael +* Updated e-mail address and minor text clean-up. +* +* Revision 1.3 2004/11/13 22:51:00 michael +* Provide distinct names for by file and by name functions and add some +* comments to make their usage clearer. +* +* Revision 1.2 2004/11/08 05:54:18 michael +* 1. Split encode and decode routines for smarter linking +* 2. Renamed lzsample.c sample.c to match my other samples +* 3. Makefile now builds code as libraries for better LGPL compliance. +* +* Revision 1.1 2004/02/22 17:37:50 michael +* Initial revision of headers for encode and decode routines. +* +* +**************************************************************************** +* +* LZSS: An ANSI C LZSS Encoding/Decoding Routine +* Copyright (C) 2004, 2006, 2007 by +* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu) +* +* This file is part of the lzss library. +* +* The lzss library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public License as +* published by the Free Software Foundation; either version 3 of the +* License, or (at your option) any later version. +* +* The lzss library is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +* General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public License +* along with this program. If not, see . +* +***************************************************************************/ +#ifndef _LZSS_H +#define _LZSS_H + +/*************************************************************************** +* INCLUDED FILES +***************************************************************************/ +#include + +/*************************************************************************** +* MACROS +***************************************************************************/ +/* macros for compatibility with older library */ +#define EncodeLZSS(in, out) EncodeLZSSByName((in), (out)) +#define DecodeLZSS(in, out) DecodeLZSSByName((in), (out)) + +/*************************************************************************** +* PROTOTYPES +***************************************************************************/ +/*************************************************************************** +* LZSS encoding and decoding prototypes for functions with file name +* parameters. Provide these functions with name of the file to be +* encoded/decoded (inFile) and the name of the target file (outFile). +* These functions return EXIT_SUCCESS or EXIT_FAILURE. +***************************************************************************/ +int EncodeLZSSByName(char *inFile, char *outFile); +int DecodeLZSSByName(char *inFile, char *outFile); + +/*************************************************************************** +* LZSS encoding and decoding prototypes for functions with file pointer +* parameters. Provide these functions with a pointer to the open binary +* file to be encoded/decoded (fpIn) and pointer to the open binary target +* file (fpOut). It is the job of the function caller to open the files +* prior to callings these functions and to close the file after these +* functions have been called. +* These functions return EXIT_SUCCESS or EXIT_FAILURE. +***************************************************************************/ +int EncodeLZSSByFile(FILE *fpIn, FILE *fpOut); +int DecodeLZSSByFile(FILE *fpIn, FILE *fpOut); + +#endif /* ndef _LZSS_H */ diff --git a/test/compression/lzssmain.c b/test/compression/lzssmain.c new file mode 100644 index 0000000..30fc5ab --- /dev/null +++ b/test/compression/lzssmain.c @@ -0,0 +1,278 @@ +/*************************************************************************** +* Sample Program Using LZSS Library +* +* File : sample.c +* Purpose : Demonstrate usage of LZSS library +* Author : Michael Dipperstein +* Date : February 21, 2004 +* +**************************************************************************** +* UPDATES +* +* Revision 1.1 2004/02/22 17:36:30 michael +* Initial revision. Mostly code form old lzss.c. +* +* 11/07/04 Name changed to sample.c +* +* $Id: sample.c,v 1.5 2007/09/20 04:34:45 michael Exp $ +* $Log: sample.c,v $ +* Revision 1.5 2007/09/20 04:34:45 michael +* Replace getopt with optlist. +* Changes required for LGPL v3. +* +* Revision 1.4 2006/12/26 04:09:09 michael +* Updated e-mail address and minor text clean-up. +* +* Revision 1.3 2004/11/13 22:51:01 michael +* Provide distinct names for by file and by name functions and add some +* comments to make their usage clearer. +* +* Revision 1.2 2004/11/11 14:37:26 michael +* Open input and output files as binary. +* +* Revision 1.1 2004/11/08 05:54:18 michael +* 1. Split encode and decode routines for smarter linking +* 2. Renamed lzsample.c sample.c to match my other samples +* 3. Makefile now builds code as libraries for better LGPL compliance. +* +* +**************************************************************************** +* +* SAMPLE: Sample usage of LZSS Library +* Copyright (C) 2004, 2006, 2007 by +* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu) +* +* This file is part of the lzss library. +* +* The lzss library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public License as +* published by the Free Software Foundation; either version 3 of the +* License, or (at your option) any later version. +* +* The lzss library is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +* General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public License +* along with this program. If not, see . +* +***************************************************************************/ + +/*************************************************************************** +* INCLUDED FILES +***************************************************************************/ +#include +#include +#include +#include "lzss.h" +#include "optlist.h" + +/*************************************************************************** +* TYPE DEFINITIONS +***************************************************************************/ +typedef enum +{ + ENCODE, + DECODE +} MODES; + +/*************************************************************************** +* CONSTANTS +***************************************************************************/ +char *RemovePath(char *fullPath); + +/*************************************************************************** +* GLOBAL VARIABLES +***************************************************************************/ + +/*************************************************************************** +* PROTOTYPES +***************************************************************************/ + +/*************************************************************************** +* FUNCTIONS +***************************************************************************/ + +/**************************************************************************** +* Function : main +* Description: This is the main function for this program, it validates +* the command line input and, if valid, it will either +* encode a file using the LZSS algorithm or decode a +* file encoded with the LZSS algorithm. +* Parameters : argc - number of parameters +* argv - parameter list +* Effects : Encodes/Decodes input file +* Returned : EXIT_SUCCESS for success, otherwise EXIT_FAILURE. +****************************************************************************/ +int main(int argc, char *argv[]) +{ + option_t *optList, *thisOpt; + FILE *fpIn, *fpOut; /* pointer to open input & output files */ + MODES mode; + + /* initialize data */ + fpIn = NULL; + fpOut = NULL; + mode = ENCODE; + + /* parse command line */ + optList = GetOptList(argc, argv, "cdi:o:h?"); + thisOpt = optList; + + while (thisOpt != NULL) + { + switch(thisOpt->option) + { + case 'c': /* compression mode */ + mode = ENCODE; + break; + + case 'd': /* decompression mode */ + mode = DECODE; + break; + + case 'i': /* input file name */ + if (fpIn != NULL) + { + fprintf(stderr, "Multiple input files not allowed.\n"); + fclose(fpIn); + + if (fpOut != NULL) + { + fclose(fpOut); + } + + FreeOptList(optList); + exit(EXIT_FAILURE); + } + + /* open input file as binary */ + fpIn = fopen(thisOpt->argument, "rb"); + if (fpIn == NULL) + { + perror("Opening input file"); + + if (fpOut != NULL) + { + fclose(fpOut); + } + + FreeOptList(optList); + exit(EXIT_FAILURE); + } + break; + + case 'o': /* output file name */ + if (fpOut != NULL) + { + fprintf(stderr, "Multiple output files not allowed.\n"); + fclose(fpOut); + + if (fpIn != NULL) + { + fclose(fpIn); + } + + FreeOptList(optList); + exit(EXIT_FAILURE); + } + + /* open output file as binary */ + fpOut = fopen(thisOpt->argument, "wb"); + if (fpOut == NULL) + { + perror("Opening output file"); + + if (fpIn != NULL) + { + fclose(fpIn); + } + + FreeOptList(optList); + exit(EXIT_FAILURE); + } + break; + + case 'h': + case '?': + printf("Usage: %s \n\n", RemovePath(argv[0])); + printf("options:\n"); + printf(" -c : Encode input file to output file.\n"); + printf(" -d : Decode input file to output file.\n"); + printf(" -i : Name of input file.\n"); + printf(" -o : Name of output file.\n"); + printf(" -h | ? : Print out command line options.\n\n"); + printf("Default: %s -c -i stdin -o stdout\n", + RemovePath(argv[0])); + + FreeOptList(optList); + return(EXIT_SUCCESS); + } + + optList = thisOpt->next; + free(thisOpt); + thisOpt = optList; + } + + + /* use stdin/out if no files are provided */ + if (fpIn == NULL) + { + fpIn = stdin; + } + + + if (fpOut == NULL) + { + fpOut = stdout; + } + + /* we have valid parameters encode or decode */ + if (mode == ENCODE) + { + EncodeLZSSByFile(fpIn, fpOut); + } + else + { + DecodeLZSSByFile(fpIn, fpOut); + } + + /* remember to close files */ + fclose(fpIn); + fclose(fpOut); + return EXIT_SUCCESS; +} + +/*************************************************************************** +* Function : RemovePath +* Description: This is function accepts a pointer to the name of a file +* along with path information and returns a pointer to the +* character that is not part of the path. +* Parameters : fullPath - pointer to an array of characters containing +* a file name and possible path modifiers. +* Effects : None +* Returned : Returns a pointer to the first character after any path +* information. +***************************************************************************/ +char *RemovePath(char *fullPath) +{ + int i; + char *start, *tmp; /* start of file name */ + const char delim[3] = {'\\', '/', ':'}; /* path deliminators */ + + start = fullPath; + + /* find the first character after all file path delimiters */ + for (i = 0; i < 3; i++) + { + tmp = strrchr(start, delim[i]); + + if (tmp != NULL) + { + start = tmp + 1; + } + } + + return start; +} diff --git a/test/compression/lzvars.c b/test/compression/lzvars.c new file mode 100644 index 0000000..cc82a6f --- /dev/null +++ b/test/compression/lzvars.c @@ -0,0 +1,76 @@ +/*************************************************************************** +* Lempel, Ziv, Storer, and Szymanski Global Variables +* +* File : lzvars.c +* Purpose : Declare global variables used lzss coding +* compress/decompress files. +* Author : Michael Dipperstein +* Date : November 07, 2003 +* +**************************************************************************** +* UPDATES +* +* $Id: lzvars.c,v 1.3 2007/09/20 04:34:25 michael Exp $ +* $Log: lzvars.c,v $ +* Revision 1.3 2007/09/20 04:34:25 michael +* Changes required for LGPL v3. +* +* Revision 1.2 2006/12/26 04:09:09 michael +* Updated e-mail address and minor text clean-up. +* +* Revision 1.1 2004/11/08 05:54:18 michael +* 1. Split encode and decode routines for smarter linking +* 2. Renamed lzsample.c sample.c to match my other samples +* 3. Makefile now builds code as libraries for better LGPL compliance. +* +* +**************************************************************************** +* +* LZvars: Global variables used in ANSI C LZSS Encoding/Decoding Routines +* Copyright (C) 2003, 2004, 2006, 2007 by +* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu) +* +* This file is part of the lzss library. +* +* The lzss library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public License as +* published by the Free Software Foundation; either version 3 of the +* License, or (at your option) any later version. +* +* The lzss library is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +* General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public License +* along with this program. If not, see . +* +***************************************************************************/ + +/*************************************************************************** +* INCLUDED FILES +***************************************************************************/ +#include "lzlocal.h" + +/*************************************************************************** +* TYPE DEFINITIONS +***************************************************************************/ + +/*************************************************************************** +* CONSTANTS +***************************************************************************/ + +/*************************************************************************** +* GLOBAL VARIABLES +***************************************************************************/ +/* cyclic buffer sliding window of already read characters */ +unsigned char slidingWindow[WINDOW_SIZE]; +unsigned char uncodedLookahead[MAX_CODED]; + +/*************************************************************************** +* PROTOTYPES +***************************************************************************/ + +/*************************************************************************** +* FUNCTIONS +***************************************************************************/ diff --git a/test/compression/lzw.h b/test/compression/lzw.h new file mode 100644 index 0000000..89255f0 --- /dev/null +++ b/test/compression/lzw.h @@ -0,0 +1,70 @@ +/*************************************************************************** +* Header for Lempel-Ziv-Welch Encoding and Decoding Library +* +* File : arcode.h +* Purpose : Provides prototypes for functions that use Lempel-Ziv-Welch +* coding to encode/decode files. +* Author : Michael Dipperstein +* Date : January 30, 2004 +* +**************************************************************************** +* UPDATES +* +* $Id: lzw.h,v 1.3 2007/09/29 01:28:09 michael Exp $ +* $Log: lzw.h,v $ +* Revision 1.3 2007/09/29 01:28:09 michael +* Changes required for LGPL v3. +* +* Revision 1.2 2005/03/27 21:12:03 michael +* Update e-mail address in copyright notice. +* +* Revision 1.1.1.1 2005/02/21 03:35:34 michael +* Initial Release +* +**************************************************************************** +* +* LZW: An ANSI C Lempel-Ziv-Welch Encoding/Decoding Routines +* Copyright (C) 2005, 2007 by +* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu) +* +* This file is part of the lzw library. +* +* The lzw library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public License as +* published by the Free Software Foundation; either version 3 of the +* License, or (at your option) any later version. +* +* The lzw library is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +* General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public License +* along with this program. If not, see . +* +***************************************************************************/ + +#ifndef _LZW_H_ +#define _LZW_H_ + +/*************************************************************************** +* CONSTANTS +***************************************************************************/ +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + +/*************************************************************************** +* PROTOTYPES +***************************************************************************/ + /* encode inFile */ +int LZWEncodeFile(char *inFile, char *outFile); + +/* decode inFile*/ +int LZWDecodeFile(char *inFile, char *outFile); + +#endif /* ndef _ARCODE_H_ */ diff --git a/test/compression/lzwdecode.c b/test/compression/lzwdecode.c new file mode 100644 index 0000000..1f8b5ec --- /dev/null +++ b/test/compression/lzwdecode.c @@ -0,0 +1,306 @@ +/*************************************************************************** +* Lempel-Ziv-Welch Decoding Functions +* +* File : lzwdecode.c +* Purpose : Provides a function for decoding Lempel-Ziv-Welch encoded +* file streams +* Author : Michael Dipperstein +* Date : January 30, 2005 +* +**************************************************************************** +* UPDATES +* +* $Id: lzwdecode.c,v 1.2 2007/09/29 01:28:09 michael Exp $ +* $Log: lzwdecode.c,v $ +* Revision 1.2 2007/09/29 01:28:09 michael +* Changes required for LGPL v3. +* +* Revision 1.1 2005/04/09 03:11:22 michael +* Separated encode and decode routines into two different files in order to +* make future enhancements easier. +* +* Revision 1.4 2005/03/27 15:56:47 michael +* Use variable length code words. +* Include new e-mail addres. +* +* Revision 1.3 2005/03/10 14:26:58 michael +* User variable names that match discription in web page. +* +* Revision 1.2 2005/03/10 05:44:02 michael +* Minimize the size of the dictionary. +* +* Revision 1.1.1.1 2005/02/21 03:35:34 michael +* Initial Release +* +**************************************************************************** +* +* LZW: An ANSI C Lempel-Ziv-Welch Encoding/Decoding Routines +* Copyright (C) 2005, 2007 by +* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu) +* +* This file is part of the lzw library. +* +* The lzw library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public License as +* published by the Free Software Foundation; either version 3 of the +* License, or (at your option) any later version. +* +* The lzw library is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +* General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public License +* along with this program. If not, see . +* +***************************************************************************/ + +/*************************************************************************** +* INCLUDED FILES +***************************************************************************/ +#include +#include +#include +#include +#include "lzw.h" +#include "bitfile.h" + +/*************************************************************************** +* TYPE DEFINITIONS +***************************************************************************/ +typedef struct +{ + unsigned char suffixChar; /* last char in encoded string */ + unsigned int prefixCode; /* code for remaining chars in string */ +} decode_dictionary_t; + +/*************************************************************************** +* CONSTANTS +***************************************************************************/ + +#define EMPTY -1 + +#define MIN_CODE_LEN 9 /* min # bits in a code word */ +#define MAX_CODE_LEN 15 /* max # bits in a code word */ + +#define FIRST_CODE (1 << CHAR_BIT) /* value of 1st string code */ + +#define MAX_CODES (1 << MAX_CODE_LEN) + +#define DICT_SIZE (MAX_CODES - FIRST_CODE) + +#if (MIN_CODE_LEN <= CHAR_BIT) +#error Code words must be larger than 1 character +#endif + +#if (MAX_CODE_LEN > (2 * CHAR_BIT)) +#error Code words larger than 2 characters require a re-write of GetCodeWord\ + and PutCodeWord +#endif + +#if ((MAX_CODES - 1) > INT_MAX) +#error There cannot be more codes than can fit in an integer +#endif + +/*************************************************************************** +* MACROS +***************************************************************************/ +#define CODE_MS_BITS(BITS) ((BITS) - CHAR_BIT) +#define MS_BITS_MASK(BITS) (UCHAR_MAX << (CHAR_BIT - CODE_MS_BITS(BITS))) +#define CURRENT_MAX_CODES(BITS) (1 << (BITS)) + +/*************************************************************************** +* GLOBAL VARIABLES +***************************************************************************/ + +/* dictionary of string the code word is the dictionary index */ +decode_dictionary_t dictionary[DICT_SIZE]; +char currentCodeLen; + +/*************************************************************************** +* PROTOTYPES +***************************************************************************/ +unsigned char DecodeRecursive(unsigned int code, FILE *fpOut); + +/* read encoded data */ +int GetCodeWord(bit_file_t *bfpIn); + +/*************************************************************************** +* FUNCTIONS +***************************************************************************/ + +/*************************************************************************** +* Function : LZWDecodeFile +* Description: This routine reads an input file 1 encoded string at a +* time and decodes it using the LZW algorithm. +* Parameters : inFile - Name of file to decode +* outFile - Name of file to write decoded output to +* Effects : File is decoded using the LZW algorithm with CODE_LEN +* codes. +* Returned : TRUE for success, otherwise FALSE. +***************************************************************************/ +int LZWDecodeFile(char *inFile, char *outFile) +{ + bit_file_t *bfpIn; /* encoded input */ + FILE *fpOut; /* decoded output */ + + unsigned int nextCode; /* value of next code */ + unsigned int lastCode; /* last decoded code word */ + unsigned int code; /* code word to decode */ + unsigned char c; /* last decoded character */ + + /* open input and output files */ + if (NULL == (bfpIn = BitFileOpen(inFile, BF_READ))) + { + perror(inFile); + return FALSE; + } + + if (NULL == outFile) + { + fpOut = stdout; + } + else + { + if (NULL == (fpOut = fopen(outFile, "wb"))) + { + BitFileClose(bfpIn); + perror(outFile); + return FALSE; + } + } + + /* start with 9 bit code words */ + currentCodeLen = 9; + + /* initialize for decoding */ + nextCode = FIRST_CODE; /* code for next (first) string */ + + /* first code from file must be a character. use it for initial values */ + lastCode = GetCodeWord(bfpIn); + c = lastCode; + fputc(lastCode, fpOut); + + /* decode rest of file */ + while ((code = GetCodeWord(bfpIn)) != EOF) + { + + /* look for code length increase marker */ + if (((CURRENT_MAX_CODES(currentCodeLen) - 1) == code) && + (currentCodeLen < MAX_CODE_LEN)) + { + currentCodeLen++; + code = GetCodeWord(bfpIn); + } + + if (code < nextCode) + { + /* we have a known code. decode it */ + c = DecodeRecursive(code, fpOut); + } + else + { + /*************************************************************** + * We got a code that's not in our dictionary. This must be due + * to the string + char + string + char + string exception. + * Build the decoded string using the last character + the + * string from the last code. + ***************************************************************/ + unsigned char tmp; + + tmp = c; + c = DecodeRecursive(lastCode, fpOut); + fputc(tmp, fpOut); + } + + /* if room, add new code to the dictionary */ + if (nextCode < MAX_CODES) + { + dictionary[nextCode - FIRST_CODE].prefixCode = lastCode; + dictionary[nextCode - FIRST_CODE].suffixChar = c; + nextCode++; + } + + /* save character and code for use in unknown code word case */ + lastCode = code; + } + + fclose(fpOut); + BitFileClose(bfpIn); + return TRUE; +} + +/*************************************************************************** +* Function : DecodeRecursive +* Description: This function uses the dictionary to decode a code word +* into the string it represents and write it to the output +* file. The string is actually built in reverse order and +* recursion is used to write it out in the correct order. +* Parameters : code - the code word to decode +* fpOut - the file that the decoded code word is written to +* Effects : Decoded code word is written to a file +* Returned : The first character in the decoded string +***************************************************************************/ +unsigned char DecodeRecursive(unsigned int code, FILE *fpOut) +{ + unsigned char c; + unsigned char firstChar; + + if (code >= FIRST_CODE) + { + /* code word is string + c */ + c = dictionary[code - FIRST_CODE].suffixChar; + code = dictionary[code - FIRST_CODE].prefixCode; + + /* evaluate new code word for remaining string */ + firstChar = DecodeRecursive(code, fpOut); + } + else + { + /* code word is just c */ + c = code; + firstChar = code; + } + + fputc(c, fpOut); + + return firstChar; +} + +/*************************************************************************** +* Function : GetCodeWord +* Description: This function reads and returns a code word from an +* encoded file. In order to deal with endian issue the +* code word is read least significant byte followed by the +* remaining bits. +* Parameters : bfpIn - bit file containing the encoded data +* Effects : code word is read from encoded input +* Returned : The next code word in the encoded file. EOF if the end +* of file has been reached. +* +* NOTE: If the code word contains more than 16 bits, this routine should +* be modified to read in all the bytes from least significant to +* most significant followed by any left over bits. +***************************************************************************/ +int GetCodeWord(bit_file_t *bfpIn) +{ + unsigned char byte; + int code; + + /* get LS character */ + if ((code = BitFileGetChar(bfpIn)) == EOF) + { + return EOF; + } + + + /* get remaining bits */ + if (BitFileGetBits(bfpIn, &byte, CODE_MS_BITS(currentCodeLen)) == EOF) + { + return EOF; + } + + code |= ((int)byte) << CODE_MS_BITS(currentCodeLen); + + return code; +} diff --git a/test/compression/lzwencode.c b/test/compression/lzwencode.c new file mode 100644 index 0000000..efd4a12 --- /dev/null +++ b/test/compression/lzwencode.c @@ -0,0 +1,465 @@ +/*************************************************************************** +* Lempel-Ziv-Welch Encoding Functions +* +* File : lzwencode.c +* Purpose : Provides a function for Lempel-Ziv-Welch encoding of file +* streams +* Author : Michael Dipperstein +* Date : January 30, 2005 +* +**************************************************************************** +* UPDATES +* +* $Id: lzwencode.c,v 1.3 2007/09/29 01:28:09 michael Exp $ +* $Log: lzwencode.c,v $ +* Revision 1.3 2007/09/29 01:28:09 michael +* Changes required for LGPL v3. +* +* Revision 1.2 2005/04/21 04:26:57 michael +* Encoding dictionary is now built using a binary tree. +* +* Revision 1.1 2005/04/09 03:11:22 michael +* Separated encode and decode routines into two different files in order to +* make future enhancements easier. +* +* Revision 1.4 2005/03/27 15:56:47 michael +* Use variable length code words. +* Include new e-mail addres. +* +* Revision 1.3 2005/03/10 14:26:58 michael +* User variable names that match discription in web page. +* +* Revision 1.2 2005/03/10 05:44:02 michael +* Minimize the size of the dictionary. +* +* Revision 1.1.1.1 2005/02/21 03:35:34 michael +* Initial Release +* +**************************************************************************** +* +* LZW: An ANSI C Lempel-Ziv-Welch Encoding/Decoding Routines +* Copyright (C) 2005, 2007 by +* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu) +* +* This file is part of the lzw library. +* +* The lzw library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public License as +* published by the Free Software Foundation; either version 3 of the +* License, or (at your option) any later version. +* +* The lzw library is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +* General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public License +* along with this program. If not, see . +* +***************************************************************************/ + +/*************************************************************************** +* INCLUDED FILES +***************************************************************************/ +#include +#include +#include +#include +#include "lzw.h" +#include "bitfile.h" + +/*************************************************************************** +* TYPE DEFINITIONS +***************************************************************************/ +/* node in dictionary tree */ +typedef struct dict_node_t +{ + unsigned int codeWord; /* code word for this entry */ + unsigned char suffixChar; /* last char in encoded string */ + unsigned int prefixCode; /* code for remaining chars in string */ + + /* pointer to child nodes */ + struct dict_node_t *left; /* child with < key */ + struct dict_node_t *right; /* child with >= key */ +} dict_node_t; + +/*************************************************************************** +* CONSTANTS +***************************************************************************/ + +#define MIN_CODE_LEN 9 /* min # bits in a code word */ +#define MAX_CODE_LEN 15 /* max # bits in a code word */ + +#define FIRST_CODE (1 << CHAR_BIT) /* value of 1st string code */ + +#define MAX_CODES (1 << MAX_CODE_LEN) + +#if (MIN_CODE_LEN <= CHAR_BIT) +#error Code words must be larger than 1 character +#endif + +#if (MAX_CODE_LEN > (2 * CHAR_BIT)) +#error Code words larger than 2 characters require a re-write of GetCodeWord\ + and PutCodeWord +#endif + +#if ((MAX_CODES - 1) > INT_MAX) +#error There cannot be more codes than can fit in an integer +#endif + +/*************************************************************************** +* MACROS +***************************************************************************/ +#define CODE_MS_BITS(BITS) ((BITS) - CHAR_BIT) +#define MS_BITS_MASK(BITS) (UCHAR_MAX << (CHAR_BIT - CODE_MS_BITS(BITS))) +#define CURRENT_MAX_CODES(BITS) (1 << (BITS)) + +/*************************************************************************** +* GLOBAL VARIABLES +***************************************************************************/ + +/* dictionary of string codes (encode process uses a hash key) */ +/* XL hack: reuse that of lzwdecode.c */ +extern char currentCodeLen; + +/*************************************************************************** +* PROTOTYPES +***************************************************************************/ + +/* dictionary tree node create/free */ +dict_node_t *MakeNode(unsigned int codeWord, + unsigned int prefixCode, unsigned char suffixChar); +void FreeTree(dict_node_t *node); + +/* searches tree for matching dictionary entry */ +dict_node_t *FindDictionaryEntry(dict_node_t *root, int prefixCode, + unsigned char c); + +/* makes key from prefix code and character */ +unsigned int MakeKey(unsigned int prefixCode, unsigned char suffixChar); + +/* write encoded data */ +void PutCodeWord(int code, bit_file_t *bfpOut); + +/*************************************************************************** +* FUNCTIONS +***************************************************************************/ + +/*************************************************************************** +* Function : LZWEncodeFile +* Description: This routine reads an input file 1 character at a time and +* writes out an LZW encoded version of that file. +* Parameters : inFile - Name of file to encode +* outFile - Name of file to write encoded output to +* Effects : File is encoded using the LZW algorithm with CODE_LEN +* codes. +* Returned : TRUE for success, otherwise FALSE. +***************************************************************************/ +int LZWEncodeFile(char *inFile, char *outFile) +{ + FILE *fpIn; /* uncoded input */ + bit_file_t *bfpOut; /* encoded output */ + + int code; /* code for current string */ + unsigned int nextCode; /* next available code index */ + int c; /* character to add to string */ + + dict_node_t *dictRoot; /* root of dictionary tree */ + dict_node_t *node; /* node of dictionary tree */ + + /* open input and output files */ + if (NULL == (fpIn = fopen(inFile, "rb"))) + { + perror(inFile); + return FALSE; + } + + if (NULL == outFile) + { + bfpOut = MakeBitFile(stdout, BF_WRITE); + } + else + { + if (NULL == (bfpOut = BitFileOpen(outFile, BF_WRITE))) + { + fclose(fpIn); + perror(outFile); + return FALSE; + } + } + + /* initialize dictionary as empty */ + dictRoot = NULL; + + /* start with 9 bit code words */ + currentCodeLen = 9; + + nextCode = FIRST_CODE; /* code for next (first) string */ + + /* now start the actual encoding process */ + + code = fgetc(fpIn); /* start with code string = first character */ + + /* create a tree root from 1st 2 character string */ + if ((c = fgetc(fpIn)) != EOF) + { + /* special case for NULL root */ + dictRoot = MakeNode(nextCode, code, c); + nextCode++; + + /* write code for 1st char */ + PutCodeWord(code, bfpOut); + + /* new code is just 2nd char */ + code = c; + } + + /* now encode normally */ + while ((c = fgetc(fpIn)) != EOF) + { + /* look for code + c in the dictionary */ + node = FindDictionaryEntry(dictRoot, code, c); + + if ((node->prefixCode == code) && + (node->suffixChar == c)) + { + /* code + c is in the dictionary, make it's code the new code */ + code = node->codeWord; + } + else + { + /* code + c is not in the dictionary, add it if there's room */ + if (nextCode < MAX_CODES) + { + dict_node_t *tmp; + + tmp = MakeNode(nextCode, code, c); + nextCode++; + + if (MakeKey(code, c) < + MakeKey(node->prefixCode, node->suffixChar)) + { + node->left = tmp; + } + else + { + node->right = tmp; + } + } + + /* are we using enough bits to write out this code word? */ + if ((code >= (CURRENT_MAX_CODES(currentCodeLen) - 1)) && + (currentCodeLen < MAX_CODE_LEN)) + { + /* mark need for bigger code word with all ones */ + PutCodeWord((CURRENT_MAX_CODES(currentCodeLen) - 1), bfpOut); + currentCodeLen++; + } + + /* write out code for the string before c was added */ + PutCodeWord(code, bfpOut); + + /* new code is just c */ + code = c; + } + } + + /* no more input. write out last of the code. */ + PutCodeWord(code, bfpOut); + + fclose(fpIn); + BitFileClose(bfpOut); + + /* free the dictionary */ + if (dictRoot != NULL) + { + FreeTree(dictRoot); + } + + return TRUE; +} + +/*************************************************************************** +* Function : MakeKey +* Description: This routine creates a simple key from a prefix code and +* an appended character. The key may be used to establish +* an order when building/searching a dictionary tree. +* Parameters : prefixCode - code for all but the last character of a +* string. +* suffixChar - the last character of a string +* Effects : None +* Returned : Key built from string represented as a prefix + char. Key +* format is {ms nibble of c} + prefix + {ls nibble of c} +***************************************************************************/ +unsigned int MakeKey(unsigned int prefixCode, unsigned char suffixChar) +{ + unsigned int key; + + /* position ms nibble */ + key = suffixChar & 0xF0; + key <<= MAX_CODE_LEN; + + /* include prefix code */ + key |= (prefixCode << 4); + + /* inclulde ls nibble */ + key |= (suffixChar & 0x0F); + + return key; +} + +/*************************************************************************** +* Function : MakeNode +* Description: This routine creates and initializes a dictionary entry +* for a string and the code word that encodes it. +* Parameters : codeWord - code word used to encode the string prefixCode + +* suffixChar +* prefixCode - code for all but the last character of a +* string. +* suffixChar - the last character of a string +* Effects : Node is allocated for new dictionary entry +* Returned : Pointer to newly allocated node +***************************************************************************/ +dict_node_t *MakeNode(unsigned int codeWord, + unsigned int prefixCode, unsigned char suffixChar) +{ + dict_node_t *node; + + node = malloc(sizeof(dict_node_t)); + + if (NULL != node) + { + node->codeWord = codeWord; + node->prefixCode = prefixCode; + node->suffixChar = suffixChar; + + node->left = NULL; + node->right = NULL; + } + else + { + perror("allocating dictionary node"); + exit(EXIT_FAILURE); + } + + return node; +} + +/*************************************************************************** +* Function : FreeTree +* Description: This routine will free all nodes of a tree rooted at the +* node past as a parameter. +* Parameters : node - root of tree to free +* Effects : frees allocated tree node from initial parameter down. +* Returned : none +***************************************************************************/ +void FreeTree(dict_node_t *node) +{ + /* free left branch */ + if (node->left != NULL) + { + FreeTree(node->left); + } + + /* free right branch */ + if (node->right != NULL) + { + FreeTree(node->right); + } + + /* free root */ + free(node); +} + +/*************************************************************************** +* Function : FindDictionaryEntry +* Description: This routine searches the dictionary tree for an entry +* with a matching string (prefix code + suffix character). +* If one isn't found, the parent node for that string is +* returned. +* Parameters : prefixCode - code for the prefix of string +* c - last character in string +* Effects : None +* Returned : If string is in dictionary, pointer to node containing +* string, otherwise pointer to suitable parent node. NULL +* is returned for an empty tree. +***************************************************************************/ +dict_node_t *FindDictionaryEntry(dict_node_t *root, int prefixCode, + unsigned char c) +{ + unsigned int searchKey, key; + + if (NULL == root) + { + return NULL; + } + + searchKey = MakeKey(prefixCode, c); /* key of string to find */ + + while (1) + { + /* key of current node */ + key = MakeKey(root->prefixCode, root->suffixChar); + + if (key == searchKey) + { + /* current node contains string */ + return root; + } + else if (searchKey < key) + { + if (NULL != root->left) + { + /* check left branch for string */ + root = root->left; + } + else + { + /* string isn't in tree, it can be added as a left child */ + return root; + } + } + else + { + if (NULL != root->right) + { + /* check right branch for string */ + root = root->right; + } + else + { + /* string isn't in tree, it can be added as a right child */ + return root; + } + } + } +} + +/*************************************************************************** +* Function : PutCodeWord +* Description: This function writes a code word from to an encoded file. +* In order to deal with endian issue the code word is +* written least significant byte followed by the remaining +* bits. +* Parameters : bfpOut - bit file containing the encoded data +* code - code word to add to the encoded data +* Effects : code word is written to the encoded output +* Returned : None +* +* NOTE: If the code word contains more than 16 bits, this routine should +* be modified to write out all the bytes from least significant to +* most significant followed by any left over bits. +***************************************************************************/ +void PutCodeWord(int code, bit_file_t *bfpOut) +{ + unsigned char byte; + + /* write LS character */ + byte = code & 0xFF; + BitFilePutChar(byte, bfpOut); + + /* write remaining bits */ + byte = (code >> CODE_MS_BITS(currentCodeLen)) & + MS_BITS_MASK(currentCodeLen); + BitFilePutBits(bfpOut, &byte, CODE_MS_BITS(currentCodeLen)); +} diff --git a/test/compression/lzwmain.c b/test/compression/lzwmain.c new file mode 100644 index 0000000..9f5a8b8 --- /dev/null +++ b/test/compression/lzwmain.c @@ -0,0 +1,260 @@ +/*************************************************************************** +* Sample Program Using LZW Encoding Library +* +* File : sample.c +* Purpose : Demonstrate usage of LZW encoding library +* Author : Michael Dipperstein +* Date : January 30, 2005 +* +**************************************************************************** +* UPDATES +* +* $Id: sample.c,v 1.4 2007/09/29 01:28:27 michael Exp $ +* $Log: sample.c,v $ +* Revision 1.4 2007/09/29 01:28:27 michael +* Replace getopt with optlist. +* Changes required for LGPL v3. +* +* Revision 1.3 2005/03/27 21:10:22 michael +* Update e-mail address in copyright notice. +* +* Revision 1.2 2005/02/21 03:52:06 michael +* Correct comments in headers. +* +* Revision 1.1.1.1 2005/02/21 03:35:34 michael +* Initial Release +* +**************************************************************************** +* +* SAMPLE: Sample usage of Lempel-Ziv-Welch Encoding Library +* Copyright (C) 2005, 2007 by +* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu) +* +* This file is part of the lzw library. +* +* The lzw library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public License as +* published by the Free Software Foundation; either version 3 of the +* License, or (at your option) any later version. +* +* The lzw library is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +* General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public License +* along with this program. If not, see . +* +***************************************************************************/ + +/*************************************************************************** +* INCLUDED FILES +***************************************************************************/ +#include +#include +#include +#include "optlist.h" +#include "lzw.h" + +/*************************************************************************** +* PROTOTYPES +***************************************************************************/ +char *RemovePath(char *fullPath); + +/*************************************************************************** +* FUNCTIONS +***************************************************************************/ + +/**************************************************************************** +* Function : main +* Description: This is the main function for this program, it validates +* the command line input and, if valid, it will call +* functions to encode or decode a file using the lzw +* coding algorithm. +* Parameters : argc - number of parameters +* argv - parameter list +* Effects : Encodes/Decodes input file +* Returned : EXIT_SUCCESS for success, otherwise EXIT_FAILURE. +****************************************************************************/ +int main(int argc, char *argv[]) +{ + option_t *optList, *thisOpt; + char *inFile, *outFile; /* name of input & output files */ + char encode; /* encode/decode */ + + /* initialize data */ + inFile = NULL; + outFile = NULL; + encode = TRUE; + + /* parse command line */ + optList = GetOptList(argc, argv, "cdi:o:h?"); + thisOpt = optList; + + while (thisOpt != NULL) + { + switch(thisOpt->option) + { + case 'c': /* compression mode */ + encode = TRUE; + break; + + case 'd': /* decompression mode */ + encode = FALSE; + break; + + case 'i': /* input file name */ + if (inFile != NULL) + { + fprintf(stderr, "Multiple input files not allowed.\n"); + free(inFile); + + if (outFile != NULL) + { + free(outFile); + } + + FreeOptList(optList); + exit(EXIT_FAILURE); + } + else if ((inFile = + (char *)malloc(strlen(thisOpt->argument) + 1)) == NULL) + { + perror("Memory allocation"); + + if (outFile != NULL) + { + free(outFile); + } + + FreeOptList(optList); + exit(EXIT_FAILURE); + } + + strcpy(inFile, thisOpt->argument); + break; + + case 'o': /* output file name */ + if (outFile != NULL) + { + fprintf(stderr, "Multiple output files not allowed.\n"); + free(outFile); + + if (inFile != NULL) + { + free(inFile); + } + + FreeOptList(optList); + exit(EXIT_FAILURE); + } + else if ((outFile = + (char *)malloc(strlen(thisOpt->argument) + 1)) == NULL) + { + perror("Memory allocation"); + + if (inFile != NULL) + { + free(inFile); + } + + FreeOptList(optList); + exit(EXIT_FAILURE); + } + + strcpy(outFile, thisOpt->argument); + break; + + case 'h': + case '?': + printf("Usage: %s \n\n", RemovePath(argv[0])); + printf("options:\n"); + printf(" -c : Encode input file to output file.\n"); + printf(" -d : Decode input file to output file.\n"); + printf(" -i : Name of input file.\n"); + printf(" -o : Name of output file.\n"); + printf(" -h | ? : Print out command line options.\n\n"); + printf("Default: %s -c\n", RemovePath(argv[0])); + + FreeOptList(optList); + return(EXIT_SUCCESS); + } + + optList = thisOpt->next; + free(thisOpt); + thisOpt = optList; + } + + /* validate command line */ + if (inFile == NULL) + { + fprintf(stderr, "Input file must be provided\n"); + fprintf(stderr, "Enter \"%s -?\" for help.\n", RemovePath(argv[0])); + + if (outFile != NULL) + { + free(outFile); + } + + exit (EXIT_FAILURE); + } + else if (outFile == NULL) + { + fprintf(stderr, "Output file must be provided\n"); + fprintf(stderr, "Enter \"%s -?\" for help.\n", RemovePath(argv[0])); + + if (inFile != NULL) + { + free(inFile); + } + + exit (EXIT_FAILURE); + } + + /* we have valid parameters encode or decode */ + if (encode) + { + LZWEncodeFile(inFile, outFile); + } + else + { + LZWDecodeFile(inFile, outFile); + } + + free(inFile); + free(outFile); + return EXIT_SUCCESS; +} + +/**************************************************************************** +* Function : RemovePath +* Description: This is function accepts a pointer to the name of a file +* along with path information and returns a pointer to the +* character that is not part of the path. +* Parameters : fullPath - pointer to an array of characters containing +* a file name and possible path modifiers. +* Effects : None +* Returned : Returns a pointer to the first character after any path +* information. +****************************************************************************/ +char *RemovePath(char *fullPath) +{ + int i; + char *start, *tmp; /* start of file name */ + const char delim[3] = {'\\', '/', ':'}; /* path deliminators */ + + start = fullPath; + + /* find the first character after all file path delimiters */ + for (i = 0; i < 3; i++) + { + tmp = strrchr(start, delim[i]); + + if (tmp != NULL) + { + start = tmp + 1; + } + } + + return start; +} diff --git a/test/compression/optlist.c b/test/compression/optlist.c new file mode 100644 index 0000000..d1fc013 --- /dev/null +++ b/test/compression/optlist.c @@ -0,0 +1,228 @@ +/*************************************************************************** +* Command Line Option Parser +* +* File : optlist.c +* Purpose : Provide getopt style command line option parsing +* Author : Michael Dipperstein +* Date : August 1, 2007 +* +**************************************************************************** +* HISTORY +* +* $Id: optlist.c,v 1.1.1.2 2007/09/04 04:45:42 michael Exp $ +* $Log: optlist.c,v $ +* Revision 1.1.1.2 2007/09/04 04:45:42 michael +* Added FreeOptList. +* +* Revision 1.1.1.1 2007/08/07 05:01:48 michael +* Initial Release +* +**************************************************************************** +* +* OptList: A command line option parsing library +* Copyright (C) 2007 by Michael Dipperstein (mdipper@alumni.engr.ucsb.edu) +* +* This file is part of the OptList library. +* +* OptList is free software; you can redistribute it and/or modify it +* under the terms of the GNU Lesser General Public License as published by +* the Free Software Foundation; either version 3 of the License, or (at +* your option) any later version. +* +* OptList is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +* General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public License +* along with this program. If not, see . +* +***************************************************************************/ + +/*************************************************************************** +* INCLUDED FILES +***************************************************************************/ +#include "optlist.h" +#include +#include +#include + +/*************************************************************************** +* TYPE DEFINITIONS +***************************************************************************/ + +/*************************************************************************** +* CONSTANTS +***************************************************************************/ + +/*************************************************************************** +* GLOBAL VARIABLES +***************************************************************************/ + +/*************************************************************************** +* PROTOTYPES +***************************************************************************/ +option_t *MakeOpt(const char option, char *const argument, const int index); + +/*************************************************************************** +* FUNCTIONS +***************************************************************************/ + +/**************************************************************************** +* Function : GetOptList +* Description: This function is similar to the POSIX function getopt. All +* options and their corresponding arguments are returned in a +* linked list. This function should only be called once per +* an option list and it does not modify argv or argc. +* Parameters : argc - the number of command line arguments (including the +* name of the executable) +* argv - pointer to the open binary file to write encoded +* output +* options - getopt style option list. A NULL terminated +* string of single character options. Follow an +* option with a colon to indicate that it requires +* an argument. +* Effects : Creates a link list of command line options and their +* arguments. +* Returned : option_t type value where the option and arguement fields +* contain the next option symbol and its argument (if any). +* The argument field will be set to NULL if the option is +* specified as having no arguments or no arguments are found. +* The option field will be set to PO_NO_OPT if no more +* options are found. +* +* NOTE: The caller is responsible for freeing up the option list when it +* is no longer needed. +****************************************************************************/ +option_t *GetOptList(const int argc, char *const argv[], char *const options) +{ + int nextArg; + option_t *head, *tail; + int optIndex; + + /* start with first argument and nothing found */ + nextArg = 1; + head = NULL; + tail = NULL; + + /* loop through all of the command line arguments */ + while (nextArg < argc) + { + if ((strlen(argv[nextArg]) > 1) && ('-' == argv[nextArg][0])) + { + /* possible option */ + optIndex = 0; + + /* attempt to find a matching option */ + while ((options[optIndex] != '\0') && + (options[optIndex] != argv[nextArg][1])) + { + do + { + optIndex++; + } + while ((options[optIndex] != '\0') && + (':' == options[optIndex])); + } + + if (options[optIndex] == argv[nextArg][1]) + { + /* we found the matching option */ + if (NULL == head) + { + head = MakeOpt(options[optIndex], NULL, OL_NOINDEX); + tail = head; + } + else + { + tail->next = MakeOpt(options[optIndex], NULL, OL_NOINDEX); + tail = tail->next; + } + + if (':' == options[optIndex + 1]) + { + /* the option found should have a text arguement */ + if (strlen(argv[nextArg]) > 2) + { + /* no space between argument and option */ + tail->argument = &(argv[nextArg][2]); + tail->argIndex = nextArg; + } + else if (nextArg < argc) + { + /* there must be space between the argument option */ + nextArg++; + tail->argument = argv[nextArg]; + tail->argIndex = nextArg; + } + } + } + } + + nextArg++; + } + + return head; +} + +/**************************************************************************** +* Function : MakeOpt +* Description: This function uses malloc to allocate space for an option_t +* type structure and initailizes the structure with the +* values passed as a parameter. +* Parameters : option - this option character +* argument - pointer string containg the argument for option. +* Use NULL for no argument +* index - argv[index] contains argument us OL_NOINDEX for +* no argument +* Effects : A new option_t type variable is created on the heap. +* Returned : Pointer to newly created and initialized option_t type +* structure. NULL if space for structure can't be allocated. +****************************************************************************/ +option_t *MakeOpt(const char option, char *const argument, const int index) +{ + option_t *opt; + + opt = malloc(sizeof(option_t)); + + if (opt != NULL) + { + opt->option = option; + opt->argument = argument; + opt->argIndex = index; + opt->next = NULL; + } + else + { + perror("Failed to Allocate option_t"); + } + + return opt; +} + +/**************************************************************************** +* Function : FreeOptList +* Description: This function will free all the elements in an option_t +* type linked list starting from the node passed as a +* parameter. +* Parameters : list - head of linked list to be freed +* Effects : All elements of the linked list pointed to by list will +* be freed and list will be set to NULL. +* Returned : None +****************************************************************************/ +void FreeOptList(option_t *list) +{ + option_t *head, *next; + + head = list; + list = NULL; + + while (head != NULL) + { + next = head->next; + free(head); + head = next; + } + + return; +} diff --git a/test/compression/optlist.h b/test/compression/optlist.h new file mode 100644 index 0000000..0461263 --- /dev/null +++ b/test/compression/optlist.h @@ -0,0 +1,74 @@ +/*************************************************************************** +* Command Line Option Parser +* +* File : optlist.h +* Purpose : Header for getopt style command line option parsing +* Author : Michael Dipperstein +* Date : August 1, 2007 +* +**************************************************************************** +* HISTORY +* +* $Id: optlist.h,v 1.1.1.2 2007/09/04 04:45:42 michael Exp $ +* $Log: optlist.h,v $ +* Revision 1.1.1.2 2007/09/04 04:45:42 michael +* Added FreeOptList. +* +* Revision 1.1.1.1 2007/08/07 05:01:48 michael +* Initial Release +* +**************************************************************************** +* +* OptList: A command line option parsing library +* Copyright (C) 2007 by Michael Dipperstein (mdipper@alumni.engr.ucsb.edu) +* +* This file is part of the OptList library. +* +* OptList is free software; you can redistribute it and/or modify it +* under the terms of the GNU Lesser General Public License as published by +* the Free Software Foundation; either version 3 of the License, or (at +* your option) any later version. +* +* OptList is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser +* General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public License +* along with this program. If not, see . +* +***************************************************************************/ +#ifndef OPTLIST_H +#define OPTLIST_H + +/*************************************************************************** +* INCLUDED FILES +***************************************************************************/ + +/*************************************************************************** +* MACROS +***************************************************************************/ + +/*************************************************************************** +* CONSTANTS +***************************************************************************/ +#define OL_NOINDEX -1 /* this option has no arguement */ + +/*************************************************************************** +* TYPE DEFINITIONS +***************************************************************************/ +typedef struct option_t +{ + char option; + char *argument; + int argIndex; + struct option_t *next; +} option_t; + +/*************************************************************************** +* PROTOTYPES +***************************************************************************/ +option_t *GetOptList(int argc, char *const argv[], char *const options); +void FreeOptList(option_t *list); + +#endif /* ndef OPTLIST_H */ -- cgit v1.2.3