summaryrefslogtreecommitdiff
path: root/test/compression
diff options
context:
space:
mode:
authorGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2008-08-09 08:06:33 +0000
committerGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2008-08-09 08:06:33 +0000
commit285f5bec5bb03d4e825e5d866e94008088dd6155 (patch)
tree9df69ded9ed4f4049e0b3887fdd99fcdf3b1746f /test/compression
parenta83f0c1710cc5143dd885e84c94e14f7d3216f93 (diff)
Ajout nouveaux tests
git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@708 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
Diffstat (limited to 'test/compression')
-rw-r--r--test/compression/.depend15
-rw-r--r--test/compression/Makefile59
-rwxr-xr-xtest/compression/arcode.c926
-rwxr-xr-xtest/compression/arcode.h69
-rwxr-xr-xtest/compression/armain.c266
-rw-r--r--test/compression/bitfile.c1043
-rw-r--r--test/compression/bitfile.h121
-rw-r--r--test/compression/lzdecode.c282
-rw-r--r--test/compression/lzencode.c300
-rw-r--r--test/compression/lzhash.c358
-rw-r--r--test/compression/lzlocal.h123
-rw-r--r--test/compression/lzss.h96
-rw-r--r--test/compression/lzssmain.c278
-rw-r--r--test/compression/lzvars.c76
-rw-r--r--test/compression/lzw.h70
-rw-r--r--test/compression/lzwdecode.c306
-rw-r--r--test/compression/lzwencode.c465
-rw-r--r--test/compression/lzwmain.c260
-rw-r--r--test/compression/optlist.c228
-rw-r--r--test/compression/optlist.h74
20 files changed, 5415 insertions, 0 deletions
diff --git a/test/compression/.depend b/test/compression/.depend
new file mode 100644
index 0000000..e0dcdae
--- /dev/null
+++ b/test/compression/.depend
@@ -0,0 +1,15 @@
+arcode.o: arcode.c arcode.h bitfile.h
+arcode.light.o: arcode.light.c
+armain.o: armain.c optlist.h arcode.h
+bitfile.o: bitfile.c bitfile.h
+bitfile.light.o: bitfile.light.c
+lzdecode.o: lzdecode.c lzlocal.h bitfile.h
+lzencode.o: lzencode.c lzlocal.h bitfile.h
+lzhash.o: lzhash.c lzlocal.h
+lzssmain.o: lzssmain.c lzss.h optlist.h
+lzvars.o: lzvars.c lzlocal.h
+lzwdecode.o: lzwdecode.c lzw.h bitfile.h
+lzwencode.o: lzwencode.c lzw.h bitfile.h
+lzwmain.o: lzwmain.c optlist.h lzw.h
+optlist.o: optlist.c optlist.h
+optlist.light.o: optlist.light.c
diff --git a/test/compression/Makefile b/test/compression/Makefile
new file mode 100644
index 0000000..ba83c87
--- /dev/null
+++ b/test/compression/Makefile
@@ -0,0 +1,59 @@
+CC=../../ccomp
+CFLAGS=-U__GNUC__ -stdlib ../../runtime -dclight -dasm
+LIBS=
+TIME=xtime -o /dev/null -mintime 1.0
+
+EXE=arcode lzw lzss
+
+COMMON_OBJS=optlist.o bitfile.o
+
+all: $(EXE)
+
+ARCODE_OBJS=$(COMMON_OBJS) arcode.o armain.o
+
+arcode: $(ARCODE_OBJS)
+ $(CC) $(CFLAGS) -o $@ $(ARCODE_OBJS) $(LIBS)
+
+LZW_OBJS=$(COMMON_OBJS) lzwencode.o lzwdecode.o lzwmain.o
+
+lzw: $(LZW_OBJS)
+ $(CC) $(CFLAGS) -o $@ $(LZW_OBJS) $(LIBS)
+
+LZSS_OBJS=$(COMMON_OBJS) lzvars.o lzhash.o lzencode.o lzdecode.o lzssmain.o
+
+lzss: $(LZSS_OBJS)
+ $(CC) $(CFLAGS) -o $@ $(LZSS_OBJS) $(LIBS)
+
+TESTFILE=/mach_kernel
+TESTCOMPR=/tmp/testcompr.out
+TESTEXPND=/tmp/testexpnd.out
+
+test:
+ rm -f $(TESTCOMPR) $(TESTEXPND)
+ @for i in $(EXE); do \
+ echo "$$i: compression..."; \
+ ./$$i -c -i $(TESTFILE) -o $(TESTCOMPR); \
+ echo "$$i: decompression..."; \
+ ./$$i -d -i $(TESTCOMPR) -o $(TESTEXPND); \
+ if cmp $(TESTFILE) $(TESTEXPND); \
+ then echo "$$i: passed"; \
+ else echo "$$i: FAILED"; \
+ fi; \
+ done
+ rm -f $(TESTCOMPR) $(TESTEXPND)
+
+bench:
+ rm -f $(TESTCOMPR)
+ @for i in $(EXE); do \
+ echo -n "$$i "; \
+ $(TIME) sh -c "./$$i -c -i $(TESTFILE) -o $(TESTCOMPR) && ./$$i -d -i $(TESTCOMPR) -o /dev/null"; \
+ done
+ rm -f $(TESTCOMPR)
+
+include .depend
+
+clean:
+ rm -f *.o *.light.c *.s $(EXE)
+
+depend:
+ gcc -MM *.c > .depend
diff --git a/test/compression/arcode.c b/test/compression/arcode.c
new file mode 100755
index 0000000..f915cc2
--- /dev/null
+++ b/test/compression/arcode.c
@@ -0,0 +1,926 @@
+/***************************************************************************
+* Arithmetic Encoding and Decoding Library
+*
+* File : arcode.c
+* Purpose : Use arithmetic coding to compress/decompress file streams
+* Author : Michael Dipperstein
+* Date : April 2, 2004
+*
+****************************************************************************
+* UPDATES
+*
+* $Id: arcode.c,v 1.5 2007/09/08 15:47:02 michael Exp $
+* $Log: arcode.c,v $
+* Revision 1.5 2007/09/08 15:47:02 michael
+* Changes required for LGPL v3.
+*
+* Revision 1.4 2006/03/02 06:43:37 michael
+* Expanded tabs
+*
+* Revision 1.3 2006/01/12 07:39:24 michael
+* Use BitFileGetBitsIntBit and FilePutBitsInt for reading and writing the
+* header. This makes the code a little cleaner, but the new header is not
+* compatible with the old header.
+*
+* Revision 1.2 2004/08/13 13:10:27 michael
+* Add support for adaptive encoding
+*
+* Use binary search when trying to find decoded symbol
+*
+* Revision 1.1.1.1 2004/04/04 14:54:13 michael
+* Initial version
+*
+****************************************************************************
+*
+* Arcode: An ANSI C Arithmetic Encoding/Decoding Routines
+* Copyright (C) 2004, 2006-2007 by Michael Dipperstein (mdipper@cs.ucsb.edu)
+*
+* This file is part of the arcode library.
+*
+* The arcode library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public License as
+* published by the Free Software Foundation; either version 3 of the
+* License, or (at your option) any later version.
+*
+* The arcode library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+* General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+***************************************************************************/
+
+/***************************************************************************
+* INCLUDED FILES
+***************************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include "arcode.h"
+#include "bitfile.h"
+
+/* compile-time options */
+#undef BUILD_DEBUG_OUTPUT /* debugging output */
+
+#if !(USHRT_MAX < ULONG_MAX)
+#error "Implementation requires USHRT_MAX < ULONG_MAX"
+#endif
+
+/***************************************************************************
+* TYPE DEFINITIONS
+***************************************************************************/
+typedef unsigned short probability_t; /* probability count type */
+
+/***************************************************************************
+* CONSTANTS
+***************************************************************************/
+#define EOF_CHAR (UCHAR_MAX + 1)
+
+/* number of bits used to compute running code values */
+#define PRECISION (8 * sizeof(probability_t))
+
+/* 2 bits less than precision. keeps lower and upper bounds from crossing. */
+#define MAX_PROBABILITY (1 << (PRECISION - 2))
+
+/***************************************************************************
+* MACROS
+***************************************************************************/
+/* set bit x to 1 in probability_t. Bit 0 is MSB */
+#define MASK_BIT(x) (probability_t)(1 << (PRECISION - (1 + (x))))
+
+/* indices for a symbol's lower and upper cumulative probability ranges */
+#define LOWER(c) (c)
+#define UPPER(c) ((c) + 1)
+
+/***************************************************************************
+* GLOBAL VARIABLES
+***************************************************************************/
+probability_t lower; /* lower bound of current code range */
+probability_t upper; /* upper bound of current code range */
+
+probability_t code; /* current MSBs of encode input stream */
+
+unsigned char underflowBits; /* current underflow bit count */
+
+/* probability ranges for each symbol: [ranges[LOWER(c)], ranges[UPPER(c)]) */
+probability_t ranges[UPPER(EOF_CHAR) + 1];
+probability_t cumulativeProb; /* cumulative probability of all ranges */
+
+/***************************************************************************
+* PROTOTYPES
+***************************************************************************/
+/* read write file headers */
+void WriteHeader(bit_file_t *bfpOut);
+int ReadHeader(bit_file_t *bfpIn);
+
+/* applies symbol's ranges to current upper and lower range bounds */
+void ApplySymbolRange(int symbol, char staticModel);
+
+/* routines for encoding*/
+void WriteEncodedBits(bit_file_t *bfpOut);
+void WriteRemaining(bit_file_t *bfpOut);
+int BuildProbabilityRangeList(FILE *fpIn);
+void InitializeAdaptiveProbabilityRangeList(void);
+
+/* routines for decoding */
+void InitializeDecoder(bit_file_t *bfpOut, char staticModel);
+probability_t GetUnscaledCode(void);
+int GetSymbolFromProbability(probability_t probability);
+void ReadEncodedBits(bit_file_t *bfpIn);
+
+/***************************************************************************
+* FUNCTIONS
+***************************************************************************/
+
+/***************************************************************************
+* Function : ArEncodeFile
+* Description: This routine generates a list of arithmetic code ranges for
+* a file and then uses them to write out an encoded version
+* of that file.
+* Parameters : inFile - Name of file to encode
+* outFile - Name of file to write encoded output to
+* staticModel - TRUE if encoding with a static model
+* Effects : File is arithmetically encoded
+* Returned : TRUE for success, otherwise FALSE.
+***************************************************************************/
+int ArEncodeFile(char *inFile, char *outFile, char staticModel)
+{
+ int c;
+ FILE *fpIn; /* uncoded input */
+ bit_file_t *bfpOut; /* encoded output */
+
+ /* open input and output files */
+ if ((fpIn = fopen(inFile, "rb")) == NULL)
+ {
+ perror(inFile);
+ return FALSE;
+ }
+
+ if (outFile == NULL)
+ {
+ bfpOut = MakeBitFile(stdout, BF_WRITE);
+ }
+ else
+ {
+ if ((bfpOut = BitFileOpen(outFile, BF_WRITE)) == NULL)
+ {
+ fclose(fpIn);
+ perror(outFile);
+ return FALSE;
+ }
+ }
+
+ if (staticModel)
+ {
+ /* count symbols in file and come up with a list of probability ranges */
+ if (!BuildProbabilityRangeList(fpIn))
+ {
+ fclose(fpIn);
+ BitFileClose(bfpOut);
+ fprintf(stderr, "Error determining frequency ranges.\n");
+ return FALSE;
+ }
+
+ rewind(fpIn);
+
+ /* write information required to decode file to encoded file */
+ WriteHeader(bfpOut);
+ }
+ else
+ {
+ /* initialize probability ranges asumming uniform distribution */
+ InitializeAdaptiveProbabilityRangeList();
+ }
+
+ /* initialize coder start with full probability range [0%, 100%) */
+ lower = 0;
+ upper = ~0; /* all ones */
+ underflowBits = 0;
+
+ /* encode symbols one at a time */
+ while ((c = fgetc(fpIn)) != EOF)
+ {
+ ApplySymbolRange(c, staticModel);
+ WriteEncodedBits(bfpOut);
+ }
+
+ fclose(fpIn);
+
+ ApplySymbolRange(EOF_CHAR, staticModel); /* encode an EOF */
+ WriteEncodedBits(bfpOut);
+
+ WriteRemaining(bfpOut); /* write out least significant bits */
+ BitFileClose(bfpOut);
+
+ return TRUE;
+}
+
+/***************************************************************************
+* Function : SymbolCountToProbabilityRanges
+* Description: This routine converts the ranges array containing only
+* symbol counts to an array containing the upper and lower
+* probability ranges for each symbol.
+* Parameters : None
+* Effects : ranges array containing symbol counts in the upper field
+* for each symbol is converted to a list of upper and lower
+* probability bounds for each symbol.
+* Returned : None
+***************************************************************************/
+void SymbolCountToProbabilityRanges(void)
+{
+ int c;
+
+ ranges[0] = 0; /* absolute lower bound is 0 */
+ ranges[UPPER(EOF_CHAR)] = 1; /* add 1 EOF character */
+ cumulativeProb++;
+
+ /* assign upper and lower probability ranges */
+ for (c = 1; c <= UPPER(EOF_CHAR); c++)
+ {
+ ranges[c] += ranges[c - 1];
+ }
+
+#ifdef BUILD_DEBUG_OUTPUT
+ /* dump list of ranges */
+ for (c = 0; c < UPPER(EOF_CHAR); c++)
+ {
+ printf("%02X\t%d\t%d\n", c, ranges[LOWER(c)], ranges[UPPER(c)]);
+ }
+#endif
+
+ return;
+}
+
+/***************************************************************************
+* Function : BuildProbabilityRangeList
+* Description: This routine reads the input file and builds the global
+* list of upper and lower probability ranges for each
+* symbol.
+* Parameters : fpIn - file to build range list for
+* Effects : ranges array is made to contain probability ranges for
+* each symbol.
+* Returned : TRUE for success, otherwise FALSE.
+***************************************************************************/
+int BuildProbabilityRangeList(FILE *fpIn)
+{
+ int c;
+
+ /***********************************************************************
+ * unsigned long is used to hold the largest counts we can have without
+ * any rescaling. Rescaling may take place before probability ranges
+ * are computed.
+ ***********************************************************************/
+ unsigned long countArray[EOF_CHAR];
+ unsigned long totalCount = 0;
+ unsigned long rescaleValue;
+
+ if (fpIn == NULL)
+ {
+ return FALSE;
+ }
+
+ /* start with no symbols counted */
+ for (c = 0; c < EOF_CHAR; c++)
+ {
+ countArray[c] = 0;
+ }
+
+ while ((c = fgetc(fpIn)) != EOF)
+ {
+ if (totalCount == ULONG_MAX)
+ {
+ fprintf(stderr, "Error: file too large\n");
+ return FALSE;
+ }
+
+ countArray[c]++;
+ totalCount++;
+ }
+
+ /* rescale counts to be < MAX_PROBABILITY */
+ if (totalCount >= MAX_PROBABILITY)
+ {
+ rescaleValue = (totalCount / MAX_PROBABILITY) + 1;
+
+ for (c = 0; c < EOF_CHAR; c++)
+ {
+ if (countArray[c] > rescaleValue)
+ {
+ countArray[c] /= rescaleValue;
+ }
+ else if (countArray[c] != 0)
+ {
+ countArray[c] = 1;
+ }
+ }
+ }
+
+ /* copy scaled symbol counts to range list */
+ ranges[0] = 0;
+ cumulativeProb = 0;
+ for (c = 0; c < EOF_CHAR; c++)
+ {
+ ranges[UPPER(c)] = countArray[c];
+ cumulativeProb += countArray[c];
+ }
+
+ /* convert counts to a range of probabilities */
+ SymbolCountToProbabilityRanges();
+ return TRUE;
+}
+
+/***************************************************************************
+* Function : WriteHeader
+* Description: This function writes each symbol contained in the encoded
+* file as well as its rescaled number of occurrences. A
+* decoding algorithm may use these numbers to reconstruct
+* the probability range list used to encode the file.
+* Parameters : bfpOut - pointer to open binary file to write to.
+* Effects : Symbol values and symbol counts are written to a file.
+* Returned : None
+***************************************************************************/
+void WriteHeader(bit_file_t *bfpOut)
+{
+ int c;
+ probability_t previous = 0; /* symbol count so far */
+
+#if BUILD_DEBUG_OUTPUT
+ printf("HEADER:\n");
+#endif
+
+ for(c = 0; c <= (EOF_CHAR - 1); c++)
+ {
+ if (ranges[UPPER(c)] > previous)
+ {
+ /* some of these symbols will be encoded */
+ BitFilePutChar((char)c, bfpOut);
+ previous = (ranges[UPPER(c)] - previous); /* symbol count */
+
+#if BUILD_DEBUG_OUTPUT
+ printf("%02X\t%d\n", c, previous);
+#endif
+
+ /* write out PRECISION - 2 bit count */
+ BitFilePutBitsInt(bfpOut, &previous, (PRECISION - 2),
+ sizeof(probability_t));
+
+ /* current upper range is previous for the next character */
+ previous = ranges[UPPER(c)];
+ }
+ }
+
+ /* now write end of table (zero count) */
+ BitFilePutChar(0x00, bfpOut);
+ previous = 0;
+ BitFilePutBits(bfpOut, (void *)&previous, PRECISION - 2);
+}
+
+/***************************************************************************
+* Function : InitializeAdaptiveProbabilityRangeList
+* Description: This routine builds the initial global list of upper and
+* lower probability ranges for each symbol. This routine
+* is used by both adaptive encoding and decoding.
+* Currently it provides a uniform symbol distribution.
+* Other distributions might be better suited for known data
+* types (such as English text).
+* Parameters : NONE
+* Effects : ranges array is made to contain initial probability ranges
+* for each symbol.
+* Returned : NONE
+***************************************************************************/
+void InitializeAdaptiveProbabilityRangeList(void)
+{
+ int c;
+
+ cumulativeProb = 0;
+ ranges[0] = 0; /* absolute lower range */
+
+ /* assign upper and lower probability ranges assuming */
+ for (c = 1; c <= UPPER(EOF_CHAR); c++)
+ {
+ ranges[c] = ranges[c - 1] + 1;
+ cumulativeProb++;
+ }
+
+#ifdef BUILD_DEBUG_OUTPUT
+ /* dump list of ranges */
+ for (c = 0; c < UPPER(EOF_CHAR); c++)
+ {
+ printf("%02X\t%d\t%d\n", c, ranges[LOWER(c)], ranges[UPPER(c)]);
+ }
+#endif
+
+ return;
+}
+
+/***************************************************************************
+* Function : ApplySymbolRange
+* Description: This function is used for both encoding and decoding. It
+* applies the range restrictions of a new symbol to the
+* current upper and lower range bounds of an encoded stream.
+* If an adaptive model is being used, the probability range
+* list will be updated after the effect of the symbol is
+* applied.
+* Parameters : symbol - The symbol to be added to the current code range
+* staticModel - TRUE if encoding/decoding with a static
+* model.
+* Effects : The current upper and lower range bounds are adjusted to
+* include the range effects of adding another symbol to the
+* encoded stream. If an adaptive model is being used, the
+* probability range list will be updated.
+* Returned : None
+***************************************************************************/
+void ApplySymbolRange(int symbol, char staticModel)
+{
+ unsigned long range; /* must be able to hold max upper + 1 */
+ unsigned long rescaled; /* range rescaled for range of new symbol */
+ /* must hold range * max upper */
+
+ /* for updating dynamic models */
+ int i;
+ probability_t original; /* range value prior to rescale */
+ probability_t delta; /* range for individual symbol */
+
+ /***********************************************************************
+ * Calculate new upper and lower ranges. Since the new upper range is
+ * dependant of the old lower range, compute the upper range first.
+ ***********************************************************************/
+ range = (unsigned long)(upper - lower) + 1; /* current range */
+
+ /* scale upper range of the symbol being coded */
+ rescaled = (unsigned long)ranges[UPPER(symbol)] * range;
+ rescaled /= (unsigned long)cumulativeProb;
+
+ /* new upper = old lower + rescaled new upper - 1*/
+ upper = lower + (probability_t)rescaled - 1;
+
+ /* scale lower range of the symbol being coded */
+ rescaled = (unsigned long)ranges[LOWER(symbol)] * range;
+ rescaled /= (unsigned long)cumulativeProb;
+
+ /* new lower = old lower + rescaled new upper */
+ lower = lower + (probability_t)rescaled;
+
+ if (!staticModel)
+ {
+ /* add new symbol to model */
+ cumulativeProb++;
+ for (i = UPPER(symbol); i <= UPPER(EOF_CHAR); i++)
+ {
+ ranges[i] += 1;
+ }
+
+ /* half current values if cumulativeProb is too large */
+ if (cumulativeProb >= MAX_PROBABILITY)
+ {
+ cumulativeProb = 0;
+ original = 0;
+
+ for (i = 1; i <= UPPER(EOF_CHAR); i++)
+ {
+ delta = ranges[i] - original;
+ if (delta <= 2)
+ {
+ /* prevent probability from being 0 */
+ original = ranges[i];
+ ranges[i] = ranges[i - 1] + 1;
+ }
+ else
+ {
+ original = ranges[i];
+ ranges[i] = ranges[i - 1] + (delta / 2);
+ }
+
+ cumulativeProb += (ranges[i] - ranges[i - 1]);
+ }
+ }
+ }
+
+#ifdef BUILD_DEBUG_OUTPUT
+ if (lower > upper)
+ {
+ /* compile this in when testing new models. */
+ fprintf(stderr, "Panic: lower (%X)> upper (%X)\n", lower, upper);
+ }
+#endif
+}
+
+/***************************************************************************
+* Function : WriteEncodedBits
+* Description: This function attempts to shift out as many code bits as
+* possible, writing the shifted bits to the encoded output
+* file. Only bits that will be unchanged when additional
+* symbols are encoded may be written out.
+*
+* If the n most significant bits of the lower and upper range
+* bounds match, they will not be changed when additional
+* symbols are encoded, so they may be shifted out.
+*
+* Adjustments are also made to prevent possible underflows
+* that occur when the upper and lower ranges are so close
+* that encoding another symbol won't change their values.
+* Parameters : bfpOut - pointer to open binary file to write to.
+* Effects : The upper and lower code bounds are adjusted so that they
+* only contain only bits that may be affected by the
+* addition of a new symbol to the encoded stream.
+* Returned : None
+***************************************************************************/
+void WriteEncodedBits(bit_file_t *bfpOut)
+{
+ for (;;)
+ {
+ if ((upper & MASK_BIT(0)) == (lower & MASK_BIT(0)))
+ {
+ /* MSBs match, write them to output file */
+ BitFilePutBit((upper & MASK_BIT(0)) != 0, bfpOut);
+
+ /* we can write out underflow bits too */
+ while (underflowBits > 0)
+ {
+ BitFilePutBit((upper & MASK_BIT(0)) == 0, bfpOut);
+ underflowBits--;
+ }
+ }
+ else if ((lower & MASK_BIT(1)) && !(upper & MASK_BIT(1)))
+ {
+ /***************************************************************
+ * Possible underflow condition: neither MSBs nor second MSBs
+ * match. It must be the case that lower and upper have MSBs of
+ * 01 and 10. Remove 2nd MSB from lower and upper.
+ ***************************************************************/
+ underflowBits += 1;
+ lower &= ~(MASK_BIT(0) | MASK_BIT(1));
+ upper |= MASK_BIT(1);
+
+ /***************************************************************
+ * The shifts below make the rest of the bit removal work. If
+ * you don't believe me try it yourself.
+ ***************************************************************/
+ }
+ else
+ {
+ /* nothing left to do */
+ return ;
+ }
+
+ /*******************************************************************
+ * Shift out old MSB and shift in new LSB. Remember that lower has
+ * all 0s beyond it's end and upper has all 1s beyond it's end.
+ *******************************************************************/
+ lower <<= 1;
+ upper <<= 1;
+ upper |= 1;
+ }
+}
+
+/***************************************************************************
+* Function : WriteRemaining
+* Description: This function writes out all remaining significant bits
+* in the upper and lower ranges and the underflow bits once
+* the last symbol has been encoded.
+* Parameters : bfpOut - pointer to open binary file to write to.
+* Effects : Remaining significant range bits are written to the output
+* file.
+* Returned : None
+***************************************************************************/
+void WriteRemaining(bit_file_t *bfpOut)
+{
+ BitFilePutBit((lower & MASK_BIT(1)) != 0, bfpOut);
+
+ /* write out any unwritten underflow bits */
+ for (underflowBits++; underflowBits > 0; underflowBits--)
+ {
+ BitFilePutBit((lower & MASK_BIT(1)) == 0, bfpOut);
+ }
+}
+
+/***************************************************************************
+* Function : ArDecodeFile
+* Description: This routine opens an arithmetically encoded file, reads
+* it's header, and builds a list of probability ranges which
+* it then uses to decode the rest of the file.
+* Parameters : inFile - Name of file to decode
+* outFile - Name of file to write decoded output to
+* staticModel - TRUE if decoding with a static model
+* Effects : Encoded file is decoded
+* Returned : TRUE for success, otherwise FALSE.
+***************************************************************************/
+int ArDecodeFile(char *inFile, char *outFile, char staticModel)
+{
+ int c;
+ probability_t unscaled;
+ bit_file_t *bfpIn;
+ FILE *fpOut;
+
+ /* open input and output files */
+ if ((bfpIn = BitFileOpen(inFile, BF_READ)) == NULL)
+ {
+ perror(inFile);
+ return FALSE;
+ }
+
+ if (outFile == NULL)
+ {
+ fpOut = stdout;
+ }
+ else
+ {
+ if ((fpOut = fopen(outFile, "wb")) == NULL)
+ {
+ BitFileClose(bfpIn);
+ perror(outFile);
+ return FALSE;
+ }
+ }
+
+ if (staticModel)
+ {
+ /* build probability ranges from header in file */
+ if (ReadHeader(bfpIn) == FALSE)
+ {
+ BitFileClose(bfpIn);
+ fclose(fpOut);
+ return FALSE;
+ }
+ }
+
+ /* read start of code and initialize bounds, and adaptive ranges */
+ InitializeDecoder(bfpIn, staticModel);
+
+ /* decode one symbol at a time */
+ for (;;)
+ {
+ /* get the unscaled probability of the current symbol */
+ unscaled = GetUnscaledCode();
+
+ /* figure out which symbol has the above probability */
+ if((c = GetSymbolFromProbability(unscaled)) == -1)
+ {
+ /* error: unknown symbol */
+ break;
+ }
+
+ if (c == EOF_CHAR)
+ {
+ /* no more symbols */
+ break;
+ }
+
+ fputc((char)c, fpOut);
+
+ /* factor out symbol */
+ ApplySymbolRange(c, staticModel);
+ ReadEncodedBits(bfpIn);
+ }
+
+ fclose(fpOut);
+ BitFileClose(bfpIn);
+
+ return TRUE;
+}
+
+/****************************************************************************
+* Function : ReadHeader
+* Description: This function reads the header information stored by
+* WriteHeader. The header can then be used to build a
+* probability range list matching the list that was used to
+* encode the file.
+* Parameters : bfpIn - file to read from
+* Effects : Probability range list is built.
+* Returned : TRUE for success, otherwise FALSE
+****************************************************************************/
+int ReadHeader(bit_file_t *bfpIn)
+{
+ int c;
+ probability_t count;
+
+#if BUILD_DEBUG_OUTPUT
+ printf("HEADER:\n");
+#endif
+
+ cumulativeProb = 0;
+
+ for (c = 0; c <= UPPER(EOF_CHAR); c++)
+ {
+ ranges[UPPER(c)] = 0;
+ }
+
+ /* read [character, probability] sets */
+ for (;;)
+ {
+ c = BitFileGetChar(bfpIn);
+ count = 0;
+
+ /* read (PRECISION - 2) bit count */
+ if (BitFileGetBitsInt(bfpIn, &count, (PRECISION - 2),
+ sizeof(probability_t)) == EOF)
+ {
+ /* premature EOF */
+ fprintf(stderr, "Error: unexpected EOF\n");
+ return FALSE;
+ }
+
+#if BUILD_DEBUG_OUTPUT
+ printf("%02X\t%d\n", c, count);
+#endif
+
+ if (count == 0)
+ {
+ /* 0 count means end of header */
+ break;
+ }
+
+ ranges[UPPER(c)] = count;
+ cumulativeProb += count;
+ }
+
+ /* convert counts to range list */
+ SymbolCountToProbabilityRanges();
+ return TRUE;
+}
+
+/****************************************************************************
+* Function : InitializeDecoder
+* Description: This function starts the upper and lower ranges at their
+* max/min values and reads in the most significant encoded
+* bits.
+* Parameters : bfpIn - file to read from
+* staticModel - TRUE if decoding using a staticModel
+* Effects : upper, lower, and code are initialized. The probability
+* range list will also be initialized if an adaptive model
+* will be used.
+* Returned : TRUE for success, otherwise FALSE
+****************************************************************************/
+void InitializeDecoder(bit_file_t *bfpIn, char staticModel)
+{
+ int i;
+
+ if (!staticModel)
+ {
+ /* initialize ranges for adaptive model */
+ InitializeAdaptiveProbabilityRangeList();
+ }
+
+ code = 0;
+
+ /* read PERCISION MSBs of code one bit at a time */
+ for (i = 0; i < PRECISION; i++)
+ {
+ code <<= 1;
+
+ /* treat EOF like 0 */
+ if(BitFileGetBit(bfpIn) == 1)
+ {
+ code |= 1;
+ }
+ }
+
+ /* start with full probability range [0%, 100%) */
+ lower = 0;
+ upper = ~0; /* all ones */
+}
+
+/****************************************************************************
+* Function : GetUnscaledCode
+* Description: This function undoes the scaling that ApplySymbolRange
+* performed before bits were shifted out. The value returned
+* is the probability of the encoded symbol.
+* Parameters : None
+* Effects : None
+* Returned : The probability of the current symbol
+****************************************************************************/
+probability_t GetUnscaledCode(void)
+{
+ unsigned long range; /* must be able to hold max upper + 1 */
+ unsigned long unscaled;
+
+ range = (unsigned long)(upper - lower) + 1;
+
+ /* reverse the scaling operations from ApplySymbolRange */
+ unscaled = (unsigned long)(code - lower) + 1;
+ unscaled = unscaled * (unsigned long)cumulativeProb - 1;
+ unscaled /= range;
+
+ return ((probability_t)unscaled);
+}
+
+/****************************************************************************
+* Function : GetSymbolFromProbability
+* Description: Given a probability, this function will return the symbol
+* whose range includes that probability. Symbol is found
+* binary search on probability ranges.
+* Parameters : probability - probability of symbol.
+* Effects : None
+* Returned : -1 for failure, otherwise encoded symbol
+****************************************************************************/
+int GetSymbolFromProbability(probability_t probability)
+{
+ int first, last, middle; /* indicies for binary search */
+
+ first = 0;
+ last = UPPER(EOF_CHAR);
+ middle = last / 2;
+
+ /* binary search */
+ while (last >= first)
+ {
+ if (probability < ranges[LOWER(middle)])
+ {
+ /* lower bound is higher than probability */
+ last = middle - 1;
+ middle = first + ((last - first) / 2);
+ continue;
+ }
+
+ if (probability >= ranges[UPPER(middle)])
+ {
+ /* upper bound is lower than probability */
+ first = middle + 1;
+ middle = first + ((last - first) / 2);
+ continue;
+ }
+
+ /* we must have found the right value */
+ return middle;
+ }
+
+ /* error: none of the ranges include the probability */
+ fprintf(stderr, "Unknown Symbol: %d (max: %d)\n", probability,
+ ranges[UPPER(EOF_CHAR)]);
+ return -1;
+}
+
+/***************************************************************************
+* Function : ReadEncodedBits
+* Description: This function attempts to shift out as many code bits as
+* possible, as bits are shifted out the coded input is
+* populated with bits from the encoded file. Only bits
+* that will be unchanged when additional symbols are decoded
+* may be shifted out.
+*
+* If the n most significant bits of the lower and upper range
+* bounds match, they will not be changed when additional
+* symbols are decoded, so they may be shifted out.
+*
+* Adjustments are also made to prevent possible underflows
+* that occur when the upper and lower ranges are so close
+* that decoding another symbol won't change their values.
+* Parameters : bfpOut - pointer to open binary file to read from.
+* Effects : The upper and lower code bounds are adjusted so that they
+* only contain only bits that will be affected by the
+* addition of a new symbol. Replacements are read from the
+* encoded stream.
+* Returned : None
+***************************************************************************/
+void ReadEncodedBits(bit_file_t *bfpIn)
+{
+ int nextBit; /* next bit from encoded input */
+
+ for (;;)
+ {
+ if (( upper & MASK_BIT(0)) == (lower & MASK_BIT(0)))
+ {
+ /* MSBs match, allow them to be shifted out*/
+ }
+ else if ((lower & MASK_BIT(1)) && !(upper & MASK_BIT(1)))
+ {
+ /***************************************************************
+ * Possible underflow condition: neither MSBs nor second MSBs
+ * match. It must be the case that lower and upper have MSBs of
+ * 01 and 10. Remove 2nd MSB from lower and upper.
+ ***************************************************************/
+ lower &= ~(MASK_BIT(0) | MASK_BIT(1));
+ upper |= MASK_BIT(1);
+ code ^= MASK_BIT(1);
+
+ /* the shifts below make the rest of the bit removal work */
+ }
+ else
+ {
+ /* nothing to shift out */
+ return;
+ }
+
+ /*******************************************************************
+ * Shift out old MSB and shift in new LSB. Remember that lower has
+ * all 0s beyond it's end and upper has all 1s beyond it's end.
+ *******************************************************************/
+ lower <<= 1;
+ upper <<= 1;
+ upper |= 1;
+ code <<= 1;
+
+ if ((nextBit = BitFileGetBit(bfpIn)) == EOF)
+ {
+ /* either all bits are shifted out or error occurred */
+ }
+ else
+ {
+ code |= nextBit; /* add next encoded bit to code */
+ }
+ }
+
+ return;
+}
diff --git a/test/compression/arcode.h b/test/compression/arcode.h
new file mode 100755
index 0000000..aac3208
--- /dev/null
+++ b/test/compression/arcode.h
@@ -0,0 +1,69 @@
+/***************************************************************************
+* Header for Arithmetic Encoding and Decoding Library
+*
+* File : arcode.h
+* Purpose : Provides prototypes for functions that use arithmetic coding
+* to encode/decode files.
+* Author : Michael Dipperstein
+* Date : April 2, 2004
+*
+****************************************************************************
+* UPDATES
+*
+* $Id: arcode.h,v 1.3 2007/09/08 15:47:02 michael Exp $
+* $Log: arcode.h,v $
+* Revision 1.3 2007/09/08 15:47:02 michael
+* Changes required for LGPL v3.
+*
+* Revision 1.2 2004/08/13 13:09:46 michael
+* Add support for adaptive encoding
+*
+* Revision 1.1.1.1 2004/04/04 14:54:13 michael
+* Initial version
+*
+****************************************************************************
+*
+* Arcode: An ANSI C Arithmetic Encoding/Decoding Routines
+* Copyright (C) 2004, 2006-2007 by Michael Dipperstein (mdipper@cs.ucsb.edu)
+*
+* This file is part of the arcode library.
+*
+* The arcode library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public License as
+* published by the Free Software Foundation; either version 3 of the
+* License, or (at your option) any later version.
+*
+* The arcode library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+* General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+***************************************************************************/
+
+#ifndef _ARCODE_H_
+#define _ARCODE_H_
+
+/***************************************************************************
+* CONSTANTS
+***************************************************************************/
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+/***************************************************************************
+* PROTOTYPES
+***************************************************************************/
+ /* encode inFile */
+int ArEncodeFile(char *inFile, char *outFile, char staticModel);
+
+/* decode inFile*/
+int ArDecodeFile(char *inFile, char *outFile, char staticModel);
+
+#endif /* ndef _ARCODE_H_ */
diff --git a/test/compression/armain.c b/test/compression/armain.c
new file mode 100755
index 0000000..8f37c4f
--- /dev/null
+++ b/test/compression/armain.c
@@ -0,0 +1,266 @@
+/***************************************************************************
+* Sample Program Using Arithmetic Encoding Library
+*
+* File : sample.c
+* Purpose : Demonstrate usage of arithmetic encoding library
+* Author : Michael Dipperstein
+* Date : March 10, 2004
+*
+****************************************************************************
+* UPDATES
+*
+* $Id: sample.c,v 1.3 2007/09/08 15:48:14 michael Exp $
+* $Log: sample.c,v $
+* Revision 1.3 2007/09/08 15:48:14 michael
+* Replace getopt with optlist.
+* Changes required for LGPL v3.
+*
+* Revision 1.2 2004/08/13 13:08:43 michael
+* Add support for adaptive encoding
+*
+* Use executable name in help messages
+*
+* Revision 1.1.1.1 2004/04/04 14:54:13 michael
+* Initial version
+*
+*
+****************************************************************************
+*
+* SAMPLE: Sample usage of the arcode Arithmetic Encoding Library
+* Copyright (C) 2004, 2007 by Michael Dipperstein (mdipper@cs.ucsb.edu)
+*
+* This file is part of the arcode library.
+*
+* The arcode library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public License as
+* published by the Free Software Foundation; either version 3 of the
+* License, or (at your option) any later version.
+*
+* The arcode library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+* General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+***************************************************************************/
+
+/***************************************************************************
+* INCLUDED FILES
+***************************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "optlist.h"
+#include "arcode.h"
+
+/***************************************************************************
+* PROTOTYPES
+***************************************************************************/
+char *RemovePath(char *fullPath);
+
+/***************************************************************************
+* FUNCTIONS
+***************************************************************************/
+
+/****************************************************************************
+* Function : main
+* Description: This is the main function for this program, it validates
+* the command line input and, if valid, it will call
+* functions to encode or decode a file using the arithmetic
+* coding algorithm.
+* Parameters : argc - number of parameters
+* argv - parameter list
+* Effects : Encodes/Decodes input file
+* Returned : EXIT_SUCCESS for success, otherwise EXIT_FAILURE.
+****************************************************************************/
+int main(int argc, char *argv[])
+{
+ option_t *optList, *thisOpt;
+ char *inFile, *outFile; /* name of input & output files */
+ char encode; /* encode/decode */
+ char staticModel; /* static/adaptive model*/
+
+ /* initialize data */
+ inFile = NULL;
+ outFile = NULL;
+ encode = TRUE;
+ staticModel = TRUE;
+
+ /* parse command line */
+ optList = GetOptList(argc, argv, "acdi:o:h?");
+ thisOpt = optList;
+
+ while (thisOpt != NULL)
+ {
+ switch(thisOpt->option)
+ {
+ case 'a': /* adaptive model vs. static */
+ staticModel = FALSE;
+ break;
+
+ case 'c': /* compression mode */
+ encode = TRUE;
+ break;
+
+ case 'd': /* decompression mode */
+ encode = FALSE;
+ break;
+
+ case 'i': /* input file name */
+ if (inFile != NULL)
+ {
+ fprintf(stderr, "Multiple input files not allowed.\n");
+ free(inFile);
+
+ if (outFile != NULL)
+ {
+ free(outFile);
+ }
+
+ FreeOptList(optList);
+ exit(EXIT_FAILURE);
+ }
+ else if ((inFile =
+ (char *)malloc(strlen(thisOpt->argument) + 1)) == NULL)
+ {
+ perror("Memory allocation");
+
+ if (outFile != NULL)
+ {
+ free(outFile);
+ }
+
+ FreeOptList(optList);
+ exit(EXIT_FAILURE);
+ }
+
+ strcpy(inFile, thisOpt->argument);
+ break;
+
+ case 'o': /* output file name */
+ if (outFile != NULL)
+ {
+ fprintf(stderr, "Multiple output files not allowed.\n");
+ free(outFile);
+
+ if (inFile != NULL)
+ {
+ free(inFile);
+ }
+
+ FreeOptList(optList);
+ exit(EXIT_FAILURE);
+ }
+ else if ((outFile =
+ (char *)malloc(strlen(thisOpt->argument) + 1)) == NULL)
+ {
+ perror("Memory allocation");
+
+ if (inFile != NULL)
+ {
+ free(inFile);
+ }
+
+ FreeOptList(optList);
+ exit(EXIT_FAILURE);
+ }
+
+ strcpy(outFile, thisOpt->argument);
+ break;
+
+ case 'h':
+ case '?':
+ printf("Usage: %s <options>\n\n", RemovePath(argv[0]));
+ printf("options:\n");
+ printf(" -c : Encode input file to output file.\n");
+ printf(" -d : Decode input file to output file.\n");
+ printf(" -i <filename> : Name of input file.\n");
+ printf(" -o <filename> : Name of output file.\n");
+ printf(" -a : Use adaptive model instead of static.\n");
+ printf(" -h | ? : Print out command line options.\n\n");
+ printf("Default: %s -c\n", RemovePath(argv[0]));
+
+ FreeOptList(optList);
+ return(EXIT_SUCCESS);
+ }
+
+ optList = thisOpt->next;
+ free(thisOpt);
+ thisOpt = optList;
+ }
+
+ /* validate command line */
+ if (inFile == NULL)
+ {
+ fprintf(stderr, "Input file must be provided\n");
+ fprintf(stderr, "Enter \"%s -?\" for help.\n", RemovePath(argv[0]));
+
+ if (outFile != NULL)
+ {
+ free(outFile);
+ }
+
+ exit (EXIT_FAILURE);
+ }
+ else if (outFile == NULL)
+ {
+ fprintf(stderr, "Output file must be provided\n");
+ fprintf(stderr, "Enter \"%s -?\" for help.\n", RemovePath(argv[0]));
+
+ if (inFile != NULL)
+ {
+ free(inFile);
+ }
+
+ exit (EXIT_FAILURE);
+ }
+
+ /* we have valid parameters encode or decode */
+ if (encode)
+ {
+ ArEncodeFile(inFile, outFile, staticModel);
+ }
+ else
+ {
+ ArDecodeFile(inFile, outFile, staticModel);
+ }
+
+ free(inFile);
+ free(outFile);
+ return EXIT_SUCCESS;
+}
+
+/****************************************************************************
+* Function : RemovePath
+* Description: This is function accepts a pointer to the name of a file
+* along with path information and returns a pointer to the
+* character that is not part of the path.
+* Parameters : fullPath - pointer to an array of characters containing
+* a file name and possible path modifiers.
+* Effects : None
+* Returned : Returns a pointer to the first character after any path
+* information.
+****************************************************************************/
+char *RemovePath(char *fullPath)
+{
+ int i;
+ char *start, *tmp; /* start of file name */
+ const char delim[3] = {'\\', '/', ':'}; /* path deliminators */
+
+ start = fullPath;
+
+ /* find the first character after all file path delimiters */
+ for (i = 0; i < 3; i++)
+ {
+ tmp = strrchr(start, delim[i]);
+
+ if (tmp != NULL)
+ {
+ start = tmp + 1;
+ }
+ }
+
+ return start;
+}
diff --git a/test/compression/bitfile.c b/test/compression/bitfile.c
new file mode 100644
index 0000000..7480ce9
--- /dev/null
+++ b/test/compression/bitfile.c
@@ -0,0 +1,1043 @@
+/***************************************************************************
+* Bit Stream File Implementation
+*
+* File : bitfile.c
+* Purpose : This file implements a simple library of I/O functions for
+* files that contain data in sizes that aren't integral bytes.
+* An attempt was made to make the functions in this library
+* analogous to functions provided to manipulate byte streams.
+* The functions contained in this library were created with
+* compression algorithms in mind, but may be suited to other
+* applications.
+* Author : Michael Dipperstein
+* Date : January 9, 2004
+*
+****************************************************************************
+* UPDATES
+*
+* $Id: bitfile.c,v 1.10 2007/08/26 21:53:48 michael Exp $
+* $Log: bitfile.c,v $
+* Revision 1.10 2007/08/26 21:53:48 michael
+* Changes required for LGPL v3.
+*
+* Revision 1.9 2007/07/10 05:34:07 michael
+* Remove ',' after last element in the enum endian_t.
+*
+* Revision 1.8 2007/02/06 06:22:07 michael
+* Used trim program to remove trailing spaces.
+*
+* Revision 1.7 2006/06/03 19:32:38 michael
+* Corrected error reporetd anonymous. The allocation of constants used to
+* open underlying read/write/append files did not account for a terminating
+* null.
+*
+* Used spell checker to correct spelling.
+*
+* Revision 1.6 2005/12/08 06:56:55 michael
+* Minor text corrections.
+*
+* Revision 1.5 2005/12/06 15:06:37 michael
+* Added BitFileGetBitsInt and BitFilePutBitsInt for integer types.
+*
+* Revision 1.4 2005/06/23 04:34:18 michael
+* Prevent BitfileGetBits/PutBits from accessing an extra byte when given
+* an integral number of bytes.
+*
+* Revision 1.3 2004/11/09 14:16:58 michael
+* Added functions to convert open bit_file_t to FILE and to
+* align open bit_file_t to the next byte.
+*
+* Revision 1.2 2004/06/15 13:15:58 michael
+* Use incomplete type to hide definition of bitfile structure
+*
+* Revision 1.1.1.1 2004/02/09 05:31:42 michael
+* Initial release
+*
+*
+****************************************************************************
+*
+* Bitfile: Bit stream File I/O Routines
+* Copyright (C) 2004-2007 by Michael Dipperstein (mdipper@cs.ucsb.edu)
+*
+* This file is part of the bit file library.
+*
+* The bit file library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public License as
+* published by the Free Software Foundation; either version 3 of the
+* License, or (at your option) any later version.
+*
+* The bit file library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+* General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+***************************************************************************/
+
+/***************************************************************************
+* INCLUDED FILES
+***************************************************************************/
+#include <stdlib.h>
+#include <errno.h>
+#include "bitfile.h"
+
+/***************************************************************************
+* TYPE DEFINITIONS
+***************************************************************************/
+
+typedef enum
+{
+ BF_UNKNOWN_ENDIAN,
+ BF_LITTLE_ENDIAN,
+ BF_BIG_ENDIAN
+} endian_t;
+
+struct bit_file_t
+{
+ FILE *fp; /* file pointer used by stdio functions */
+ endian_t endian; /* endianess of architecture */
+ unsigned char bitBuffer; /* bits waiting to be read/written */
+ unsigned char bitCount; /* number of bits in bitBuffer */
+ BF_MODES mode; /* open for read, write, or append */
+};
+
+/* union used to test for endianess */
+typedef union
+{
+ unsigned long word;
+ unsigned char bytes[sizeof(unsigned long)];
+} endian_test_t;
+
+/***************************************************************************
+* PROTOTYPES
+***************************************************************************/
+endian_t DetermineEndianess(void);
+
+int BitFilePutBitsLE(bit_file_t *stream, void *bits, const unsigned int count);
+int BitFilePutBitsBE(bit_file_t *stream, void *bits, const unsigned int count,
+ const size_t size);
+
+int BitFileGetBitsLE(bit_file_t *stream, void *bits, const unsigned int count);
+int BitFileGetBitsBE(bit_file_t *stream, void *bits, const unsigned int count,
+ const size_t size);
+
+/***************************************************************************
+* FUNCTIONS
+***************************************************************************/
+
+/***************************************************************************
+* Function : BitFileOpen
+* Description: This function opens a bit file for reading, writing,
+* or appending. If successful, a bit_file_t data
+* structure will be allocated and a pointer to the
+* structure will be returned.
+* Parameters : fileName - NULL terminated string containing the name of
+* the file to be opened.
+* mode - The mode of the file to be opened
+* Effects : The specified file will be opened and file structure will
+* be allocated.
+* Returned : Pointer to the bit_file_t structure for the bit file
+* opened, or NULL on failure. errno will be set for all
+* failure cases.
+***************************************************************************/
+bit_file_t *BitFileOpen(const char *fileName, const BF_MODES mode)
+{
+ char modes[3][3] = {"rb", "wb", "ab"}; /* binary modes for fopen */
+ bit_file_t *bf;
+
+ bf = (bit_file_t *)malloc(sizeof(bit_file_t));
+
+ if (bf == NULL)
+ {
+ /* malloc failed */
+ errno = ENOMEM;
+ }
+ else
+ {
+ bf->fp = fopen(fileName, modes[mode]);
+
+ if (bf->fp == NULL)
+ {
+ /* fopen failed */
+ free(bf);
+ bf = NULL;
+ }
+ else
+ {
+ /* fopen succeeded fill in remaining bf data */
+ bf->bitBuffer = 0;
+ bf->bitCount = 0;
+ bf->mode = mode;
+
+ /***************************************************************
+ * TO DO: Consider using the last byte in a file to indicate
+ * the number of bits in the previous byte that actually have
+ * data. If I do that, I'll need special handling of files
+ * opened with a mode of BF_APPEND.
+ ***************************************************************/
+ }
+ }
+
+ bf->endian = DetermineEndianess();
+ return (bf);
+}
+
+/***************************************************************************
+* Function : MakeBitFile
+* Description: This function naively wraps a standard file in a
+* bit_file_t structure. ANSI-C doesn't support file status
+* functions commonly found in other C variants, so the
+* caller must be passed as a parameter.
+* Parameters : stream - pointer to the standard file being wrapped.
+* mode - The mode of the file being wrapped.
+* Effects : A bit_file_t structure will be created for the stream
+* passed as a parameter.
+* Returned : Pointer to the bit_file_t structure for the bit file
+* or NULL on failure. errno will be set for all failure
+* cases.
+***************************************************************************/
+bit_file_t *MakeBitFile(FILE *stream, const BF_MODES mode)
+{
+ bit_file_t *bf;
+
+ if (stream == NULL)
+ {
+ /* can't wrapper empty steam */
+ errno = EBADF;
+ bf = NULL;
+ }
+ else
+ {
+ bf = (bit_file_t *)malloc(sizeof(bit_file_t));
+
+ if (bf == NULL)
+ {
+ /* malloc failed */
+ errno = ENOMEM;
+ }
+ else
+ {
+ /* set structure data */
+ bf->fp = stream;
+ bf->bitBuffer = 0;
+ bf->bitCount = 0;
+ bf->mode = mode;
+ }
+ }
+
+ bf->endian = DetermineEndianess();
+
+ return (bf);
+}
+
+/***************************************************************************
+* Function : DetermineEndianess
+* Description: This function determines the endianess of the current
+* hardware architecture. An unsigned long is set to 1. If
+* the 1st byte of the unsigned long gets the 1, this is a
+* little endian machine. If the last byte gets the 1, this
+* is a big endian machine.
+* Parameters : None
+* Effects : None
+* Returned : endian_t for current machine architecture
+***************************************************************************/
+endian_t DetermineEndianess(void)
+{
+ endian_t endian;
+ endian_test_t endianTest;
+
+ endianTest.word = 1;
+
+ if (endianTest.bytes[0] == 1)
+ {
+ /* LSB is 1st byte (little endian)*/
+ endian = BF_LITTLE_ENDIAN;
+ }
+ else if (endianTest.bytes[sizeof(unsigned long) - 1] == 1)
+ {
+ /* LSB is last byte (big endian)*/
+ endian = BF_BIG_ENDIAN;
+ }
+ else
+ {
+ endian = BF_UNKNOWN_ENDIAN;
+ }
+
+ return endian;
+}
+
+/***************************************************************************
+* Function : BitFileClose
+* Description: This function closes a bit file and frees all associated
+* data.
+* Parameters : stream - pointer to bit file stream being closed
+* Effects : The specified file will be closed and the file structure
+* will be freed.
+* Returned : 0 for success or EOF for failure.
+***************************************************************************/
+int BitFileClose(bit_file_t *stream)
+{
+ int returnValue = 0;
+
+ if (stream == NULL)
+ {
+ return(EOF);
+ }
+
+ if ((stream->mode == BF_WRITE) || (stream->mode == BF_APPEND))
+ {
+ /* write out any unwritten bits */
+ if (stream->bitCount != 0)
+ {
+ (stream->bitBuffer) <<= 8 - (stream->bitCount);
+ fputc(stream->bitBuffer, stream->fp); /* handle error? */
+ }
+ }
+
+ /***********************************************************************
+ * TO DO: Consider writing an additional byte indicating the number of
+ * valid bits (bitCount) in the previous byte.
+ ***********************************************************************/
+
+ /* close file */
+ returnValue = fclose(stream->fp);
+
+ /* free memory allocated for bit file */
+ free(stream);
+
+ return(returnValue);
+}
+
+/***************************************************************************
+* Function : BitFileToFILE
+* Description: This function flushes and frees the bitfile structure,
+* returning a pointer to a stdio file.
+* Parameters : stream - pointer to bit file stream being closed
+* Effects : The specified bitfile will be made usable as a stdio
+* FILE.
+* Returned : Pointer to FILE. NULL for failure.
+***************************************************************************/
+FILE *BitFileToFILE(bit_file_t *stream)
+{
+ FILE *fp = NULL;
+
+ if (stream == NULL)
+ {
+ return(NULL);
+ }
+
+ if ((stream->mode == BF_WRITE) || (stream->mode == BF_APPEND))
+ {
+ /* write out any unwritten bits */
+ if (stream->bitCount != 0)
+ {
+ (stream->bitBuffer) <<= 8 - (stream->bitCount);
+ fputc(stream->bitBuffer, stream->fp); /* handle error? */
+ }
+ }
+
+ /***********************************************************************
+ * TO DO: Consider writing an additional byte indicating the number of
+ * valid bits (bitCount) in the previous byte.
+ ***********************************************************************/
+
+ /* close file */
+ fp = stream->fp;
+
+ /* free memory allocated for bit file */
+ free(stream);
+
+ return(fp);
+}
+
+/***************************************************************************
+* Function : BitFileByteAlign
+* Description: This function aligns the bitfile to the nearest byte. For
+* output files, this means writing out the bit buffer with
+* extra bits set to 0. For input files, this means flushing
+* the bit buffer.
+* Parameters : stream - pointer to bit file stream to align
+* Effects : Flushes out the bit buffer.
+* Returned : EOF if stream is NULL. Otherwise, the contents of the
+* bit buffer.
+***************************************************************************/
+int BitFileByteAlign(bit_file_t *stream)
+{
+ int returnValue;
+
+ if (stream == NULL)
+ {
+ return(EOF);
+ }
+
+ returnValue = stream->bitBuffer;
+
+ if ((stream->mode == BF_WRITE) || (stream->mode == BF_APPEND))
+ {
+ /* write out any unwritten bits */
+ if (stream->bitCount != 0)
+ {
+ (stream->bitBuffer) <<= 8 - (stream->bitCount);
+ fputc(stream->bitBuffer, stream->fp); /* handle error? */
+ }
+ }
+
+ stream->bitBuffer = 0;
+ stream->bitCount = 0;
+
+ return (returnValue);
+}
+
+/***************************************************************************
+* Function : BitFileGetChar
+* Description: This function returns the next byte from the file passed as
+* a parameter.
+* Parameters : stream - pointer to bit file stream to read from
+* Effects : Reads next byte from file and updates buffer accordingly.
+* Returned : EOF if a whole byte cannot be obtained. Otherwise,
+* the character read.
+***************************************************************************/
+int BitFileGetChar(bit_file_t *stream)
+{
+ int returnValue;
+ unsigned char tmp;
+
+ if (stream == NULL)
+ {
+ return(EOF);
+ }
+
+ returnValue = fgetc(stream->fp);
+
+ if (stream->bitCount == 0)
+ {
+ /* we can just get byte from file */
+ return returnValue;
+ }
+
+ /* we have some buffered bits to return too */
+ if (returnValue != EOF)
+ {
+ /* figure out what to return */
+ tmp = ((unsigned char)returnValue) >> (stream->bitCount);
+ tmp |= ((stream->bitBuffer) << (8 - (stream->bitCount)));
+
+ /* put remaining in buffer. count shouldn't change. */
+ stream->bitBuffer = returnValue;
+
+ returnValue = tmp;
+ }
+
+ return returnValue;
+}
+
+/***************************************************************************
+* Function : BitFilePutChar
+* Description: This function writes the byte passed as a parameter to the
+* file passed a parameter.
+* Parameters : c - the character to be written
+* stream - pointer to bit file stream to write to
+* Effects : Writes a byte to the file and updates buffer accordingly.
+* Returned : On success, the character written, otherwise EOF.
+***************************************************************************/
+int BitFilePutChar(const int c, bit_file_t *stream)
+{
+ unsigned char tmp;
+
+ if (stream == NULL)
+ {
+ return(EOF);
+ }
+
+ if (stream->bitCount == 0)
+ {
+ /* we can just put byte from file */
+ return fputc(c, stream->fp);
+ }
+
+ /* figure out what to write */
+ tmp = ((unsigned char)c) >> (stream->bitCount);
+ tmp = tmp | ((stream->bitBuffer) << (8 - stream->bitCount));
+
+ if (fputc(tmp, stream->fp) != EOF)
+ {
+ /* put remaining in buffer. count shouldn't change. */
+ stream->bitBuffer = c;
+ }
+ else
+ {
+ return EOF;
+ }
+
+ return tmp;
+}
+
+/***************************************************************************
+* Function : BitFileGetBit
+* Description: This function returns the next bit from the file passed as
+* a parameter. The bit value returned is the msb in the
+* bit buffer.
+* Parameters : stream - pointer to bit file stream to read from
+* Effects : Reads next bit from bit buffer. If the buffer is empty,
+* a new byte will be read from the file.
+* Returned : 0 if bit == 0, 1 if bit == 1, and EOF if operation fails.
+***************************************************************************/
+int BitFileGetBit(bit_file_t *stream)
+{
+ int returnValue;
+
+ if (stream == NULL)
+ {
+ return(EOF);
+ }
+
+ if (stream->bitCount == 0)
+ {
+ /* buffer is empty, read another character */
+ if ((returnValue = fgetc(stream->fp)) == EOF)
+ {
+ return EOF;
+ }
+ else
+ {
+ stream->bitCount = 8;
+ stream->bitBuffer = returnValue;
+ }
+ }
+
+ /* bit to return is msb in buffer */
+ stream->bitCount--;
+ returnValue = (stream->bitBuffer) >> (stream->bitCount);
+
+ return (returnValue & 0x01);
+}
+
+/***************************************************************************
+* Function : BitFilePutBit
+* Description: This function writes the bit passed as a parameter to the
+* file passed a parameter.
+* Parameters : c - the bit value to be written
+* stream - pointer to bit file stream to write to
+* Effects : Writes a bit to the bit buffer. If the buffer has a byte,
+* the buffer is written to the file and cleared.
+* Returned : On success, the bit value written, otherwise EOF.
+***************************************************************************/
+int BitFilePutBit(const int c, bit_file_t *stream)
+{
+ int returnValue = c;
+
+ if (stream == NULL)
+ {
+ return(EOF);
+ }
+
+ stream->bitCount++;
+ stream->bitBuffer <<= 1;
+
+ if (c != 0)
+ {
+ stream->bitBuffer |= 1;
+ }
+
+ /* write bit buffer if we have 8 bits */
+ if (stream->bitCount == 8)
+ {
+ if (fputc(stream->bitBuffer, stream->fp) == EOF)
+ {
+ returnValue = EOF;
+ }
+
+ /* reset buffer */
+ stream->bitCount = 0;
+ stream->bitBuffer = 0;
+ }
+
+ return returnValue;
+}
+
+/***************************************************************************
+* Function : BitFileGetBits
+* Description: This function reads the specified number of bits from the
+* file passed as a parameter and writes them to the
+* requested memory location (msb to lsb).
+* Parameters : stream - pointer to bit file stream to read from
+* bits - address to store bits read
+* count - number of bits to read
+* Effects : Reads bits from the bit buffer and file stream. The bit
+* buffer will be modified as necessary.
+* Returned : EOF for failure, otherwise the number of bits read. If
+* an EOF is reached before all the bits are read, bits
+* will contain every bit through the last complete byte.
+***************************************************************************/
+int BitFileGetBits(bit_file_t *stream, void *bits, const unsigned int count)
+{
+ unsigned char *bytes, shifts;
+ int offset, remaining, returnValue;
+
+ bytes = (unsigned char *)bits;
+
+ if ((stream == NULL) || (bits == NULL))
+ {
+ return(EOF);
+ }
+
+ offset = 0;
+ remaining = count;
+
+ /* read whole bytes */
+ while (remaining >= 8)
+ {
+ returnValue = BitFileGetChar(stream);
+
+ if (returnValue == EOF)
+ {
+ return EOF;
+ }
+
+ bytes[offset] = (unsigned char)returnValue;
+ remaining -= 8;
+ offset++;
+ }
+
+ if (remaining != 0)
+ {
+ /* read remaining bits */
+ shifts = 8 - remaining;
+
+ while (remaining > 0)
+ {
+ returnValue = BitFileGetBit(stream);
+
+ if (returnValue == EOF)
+ {
+ return EOF;
+ }
+
+ bytes[offset] <<= 1;
+ bytes[offset] |= (returnValue & 0x01);
+ remaining--;
+ }
+
+ /* shift last bits into position */
+ bytes[offset] <<= shifts;
+ }
+
+ return count;
+}
+
+/***************************************************************************
+* Function : BitFilePutBits
+* Description: This function writes the specified number of bits from the
+* memory location passed as a parameter to the file passed
+* as a parameter. Bits are written msb to lsb.
+* Parameters : stream - pointer to bit file stream to write to
+* bits - pointer to bits to write
+* count - number of bits to write
+* Effects : Writes bits to the bit buffer and file stream. The bit
+* buffer will be modified as necessary.
+* Returned : EOF for failure, otherwise the number of bits written. If
+* an error occurs after a partial write, the partially
+* written bits will not be unwritten.
+***************************************************************************/
+int BitFilePutBits(bit_file_t *stream, void *bits, const unsigned int count)
+{
+ unsigned char *bytes, tmp;
+ int offset, remaining, returnValue;
+
+ bytes = (unsigned char *)bits;
+
+ if ((stream == NULL) || (bits == NULL))
+ {
+ return(EOF);
+ }
+
+ offset = 0;
+ remaining = count;
+
+ /* write whole bytes */
+ while (remaining >= 8)
+ {
+ returnValue = BitFilePutChar(bytes[offset], stream);
+
+ if (returnValue == EOF)
+ {
+ return EOF;
+ }
+
+ remaining -= 8;
+ offset++;
+ }
+
+ if (remaining != 0)
+ {
+ /* write remaining bits */
+ tmp = bytes[offset];
+
+ while (remaining > 0)
+ {
+ returnValue = BitFilePutBit((tmp & 0x80), stream);
+
+ if (returnValue == EOF)
+ {
+ return EOF;
+ }
+
+ tmp <<= 1;
+ remaining--;
+ }
+ }
+
+ return count;
+}
+
+/***************************************************************************
+* Function : BitFileGetBitsInt
+* Description: This function provides a machine independent layer that
+* allows a single function call to stuff an arbitrary number
+* of bits into an integer type variable.
+* Parameters : stream - pointer to bit file stream to read from
+* bits - address to store bits read
+* count - number of bits to read
+* size - sizeof type containing "bits"
+* Effects : Calls a function that reads bits from the bit buffer and
+* file stream. The bit buffer will be modified as necessary.
+* the bits will be written to "bits" from least significant
+* byte to most significant byte.
+* Returned : EOF for failure, otherwise the number of bits read by the
+* called function.
+***************************************************************************/
+int BitFileGetBitsInt(bit_file_t *stream, void *bits, const unsigned int count,
+ const size_t size)
+{
+ int returnValue;
+
+ if ((stream == NULL) || (bits == NULL))
+ {
+ return(EOF);
+ }
+
+ if (stream->endian == BF_LITTLE_ENDIAN)
+ {
+ returnValue = BitFileGetBitsLE(stream, bits, count);
+ }
+ else if (stream->endian == BF_BIG_ENDIAN)
+ {
+ returnValue = BitFileGetBitsBE(stream, bits, count, size);
+ }
+ else
+ {
+ returnValue = EOF;
+ }
+
+ return returnValue;
+}
+
+/***************************************************************************
+* Function : BitFileGetBitsLE (Little Endian)
+* Description: This function reads the specified number of bits from the
+* file passed as a parameter and writes them to the
+* requested memory location (LSB to MSB).
+* Parameters : stream - pointer to bit file stream to read from
+* bits - address to store bits read
+* count - number of bits to read
+* Effects : Reads bits from the bit buffer and file stream. The bit
+* buffer will be modified as necessary. bits is treated as
+* a little endian integer of length >= (count/8) + 1.
+* Returned : EOF for failure, otherwise the number of bits read. If
+* an EOF is reached before all the bits are read, bits
+* will contain every bit through the last successful read.
+***************************************************************************/
+int BitFileGetBitsLE(bit_file_t *stream, void *bits, const unsigned int count)
+{
+ unsigned char *bytes, shifts;
+ int offset, remaining, returnValue;
+
+ bytes = (unsigned char *)bits;
+
+ offset = 0;
+ remaining = count;
+
+ /* read whole bytes */
+ while (remaining >= 8)
+ {
+ returnValue = BitFileGetChar(stream);
+
+ if (returnValue == EOF)
+ {
+ return EOF;
+ }
+
+ bytes[offset] = (unsigned char)returnValue;
+ remaining -= 8;
+ offset++;
+ }
+
+ if (remaining != 0)
+ {
+ /* read remaining bits */
+ shifts = 8 - remaining;
+
+ while (remaining > 0)
+ {
+ returnValue = BitFileGetBit(stream);
+
+ if (returnValue == EOF)
+ {
+ return EOF;
+ }
+
+ bytes[offset] <<= 1;
+ bytes[offset] |= (returnValue & 0x01);
+ remaining--;
+ }
+
+ }
+
+ return count;
+}
+
+/***************************************************************************
+* Function : BitFileGetBitsBE (Big Endian)
+* Description: This function reads the specified number of bits from the
+* file passed as a parameter and writes them to the
+* requested memory location (LSB to MSB).
+* Parameters : stream - pointer to bit file stream to read from
+* bits - address to store bits read
+* count - number of bits to read
+* size - sizeof type containing "bits"
+* Effects : Reads bits from the bit buffer and file stream. The bit
+* buffer will be modified as necessary. bits is treated as
+* a big endian integer of length size.
+* Returned : EOF for failure, otherwise the number of bits read. If
+* an EOF is reached before all the bits are read, bits
+* will contain every bit through the last successful read.
+***************************************************************************/
+int BitFileGetBitsBE(bit_file_t *stream, void *bits, const unsigned int count,
+ const size_t size)
+{
+ unsigned char *bytes, shifts;
+ int offset, remaining, returnValue;
+
+ if (count > (size * 8))
+ {
+ /* too many bits to read */
+ return EOF;
+ }
+
+ bytes = (unsigned char *)bits;
+
+ offset = size - 1;
+ remaining = count;
+
+ /* read whole bytes */
+ while (remaining >= 8)
+ {
+ returnValue = BitFileGetChar(stream);
+
+ if (returnValue == EOF)
+ {
+ return EOF;
+ }
+
+ bytes[offset] = (unsigned char)returnValue;
+ remaining -= 8;
+ offset--;
+ }
+
+ if (remaining != 0)
+ {
+ /* read remaining bits */
+ shifts = 8 - remaining;
+
+ while (remaining > 0)
+ {
+ returnValue = BitFileGetBit(stream);
+
+ if (returnValue == EOF)
+ {
+ return EOF;
+ }
+
+ bytes[offset] <<= 1;
+ bytes[offset] |= (returnValue & 0x01);
+ remaining--;
+ }
+
+ }
+
+ return count;
+}
+
+/***************************************************************************
+* Function : BitFilePutBitsInt
+* Description: This function provides a machine independent layer that
+* allows a single function call to write an arbitrary number
+* of bits from an integer type variable into a file.
+* Parameters : stream - pointer to bit file stream to write to
+* bits - pointer to bits to write
+* count - number of bits to write
+* size - sizeof type containing "bits"
+* Effects : Calls a function that writes bits to the bit buffer and
+* file stream. The bit buffer will be modified as necessary.
+* the bits will be written to the file stream from least
+* significant byte to most significant byte.
+* Returned : EOF for failure, otherwise the number of bits written. If
+* an error occurs after a partial write, the partially
+* written bits will not be unwritten.
+***************************************************************************/
+int BitFilePutBitsInt(bit_file_t *stream, void *bits, const unsigned int count,
+ const size_t size)
+{
+ int returnValue;
+
+ if ((stream == NULL) || (bits == NULL))
+ {
+ return(EOF);
+ }
+
+ if (stream->endian == BF_LITTLE_ENDIAN)
+ {
+ returnValue = BitFilePutBitsLE(stream, bits, count);
+ }
+ else if (stream->endian == BF_BIG_ENDIAN)
+ {
+ returnValue = BitFilePutBitsBE(stream, bits, count, size);
+ }
+ else
+ {
+ returnValue = EOF;
+ }
+
+ return returnValue;
+}
+
+/***************************************************************************
+* Function : BitFilePutBitsLE (Little Endian)
+* Description: This function writes the specified number of bits from the
+* memory location passed as a parameter to the file passed
+* as a parameter. Bits are written LSB to MSB.
+* Parameters : stream - pointer to bit file stream to write to
+* bits - pointer to bits to write
+* count - number of bits to write
+* Effects : Writes bits to the bit buffer and file stream. The bit
+* buffer will be modified as necessary. bits is treated as
+* a little endian integer of length >= (count/8) + 1.
+* Returned : EOF for failure, otherwise the number of bits written. If
+* an error occurs after a partial write, the partially
+* written bits will not be unwritten.
+***************************************************************************/
+int BitFilePutBitsLE(bit_file_t *stream, void *bits, const unsigned int count)
+{
+ unsigned char *bytes, tmp;
+ int offset, remaining, returnValue;
+
+ bytes = (unsigned char *)bits;
+ offset = 0;
+ remaining = count;
+
+ /* write whole bytes */
+ while (remaining >= 8)
+ {
+ returnValue = BitFilePutChar(bytes[offset], stream);
+
+ if (returnValue == EOF)
+ {
+ return EOF;
+ }
+
+ remaining -= 8;
+ offset++;
+ }
+
+ if (remaining != 0)
+ {
+ /* write remaining bits */
+ tmp = bytes[offset];
+ tmp <<= (8 - remaining);
+
+ while (remaining > 0)
+ {
+ returnValue = BitFilePutBit((tmp & 0x80), stream);
+
+ if (returnValue == EOF)
+ {
+ return EOF;
+ }
+
+ tmp <<= 1;
+ remaining--;
+ }
+ }
+
+ return count;
+}
+
+/***************************************************************************
+* Function : BitFilePutBitsBE (Big Endian)
+* Description: This function writes the specified number of bits from the
+* memory location passed as a parameter to the file passed
+* as a parameter. Bits are written LSB to MSB.
+* Parameters : stream - pointer to bit file stream to write to
+* bits - pointer to bits to write
+* count - number of bits to write
+* Effects : Writes bits to the bit buffer and file stream. The bit
+* buffer will be modified as necessary. bits is treated as
+* a big endian integer of length size.
+* Returned : EOF for failure, otherwise the number of bits written. If
+* an error occurs after a partial write, the partially
+* written bits will not be unwritten.
+***************************************************************************/
+int BitFilePutBitsBE(bit_file_t *stream, void *bits, const unsigned int count,
+ const size_t size)
+{
+ unsigned char *bytes, tmp;
+ int offset, remaining, returnValue;
+
+ if (count > (size * 8))
+ {
+ /* too many bits to write */
+ return EOF;
+ }
+
+ bytes = (unsigned char *)bits;
+ offset = size - 1;
+ remaining = count;
+
+ /* write whole bytes */
+ while (remaining >= 8)
+ {
+ returnValue = BitFilePutChar(bytes[offset], stream);
+
+ if (returnValue == EOF)
+ {
+ return EOF;
+ }
+
+ remaining -= 8;
+ offset--;
+ }
+
+ if (remaining != 0)
+ {
+ /* write remaining bits */
+ tmp = bytes[offset];
+ tmp <<= (8 - remaining);
+
+ while (remaining > 0)
+ {
+ returnValue = BitFilePutBit((tmp & 0x80), stream);
+
+ if (returnValue == EOF)
+ {
+ return EOF;
+ }
+
+ tmp <<= 1;
+ remaining--;
+ }
+ }
+
+ return count;
+}
diff --git a/test/compression/bitfile.h b/test/compression/bitfile.h
new file mode 100644
index 0000000..20d102c
--- /dev/null
+++ b/test/compression/bitfile.h
@@ -0,0 +1,121 @@
+/***************************************************************************
+* Bit Stream File Header
+*
+* File : bitfile.h
+* Purpose : Provides definitions and prototypes for a simple library of
+* I/O functions for files that contain data in sizes that aren't
+* integral bytes.
+* An attempt was made to make the functions in this library
+* analogous to functions provided to manipulate byte streams.
+* The functions contained in this library were created with
+* compression algorithms in mind, but may be suited to other
+* applications.
+* Author : Michael Dipperstein
+* Date : January 9, 2004
+*
+****************************************************************************
+* UPDATES
+*
+* $Id: bitfile.h,v 1.6 2007/08/26 21:53:48 michael Exp $
+* $Log: bitfile.h,v $
+* Revision 1.6 2007/08/26 21:53:48 michael
+* Changes required for LGPL v3.
+*
+* Revision 1.5 2006/06/03 19:33:11 michael
+* Used spell checker to correct spelling.
+*
+* Revision 1.4 2005/12/06 15:06:37 michael
+* Added BitFileGetBitsInt and BitFilePutBitsInt for integer types.
+*
+* Revision 1.3 2004/11/09 14:16:58 michael
+* Added functions to convert open bit_file_t to FILE and to
+* align open bit_file_t to the next byte.
+*
+* Revision 1.2 2004/06/15 13:16:10 michael
+* Use incomplete type to hide definition of bitfile structure
+*
+* Revision 1.1.1.1 2004/02/09 05:31:42 michael
+* Initial release
+*
+*
+****************************************************************************
+*
+* Bitfile: Bit stream File I/O Routines
+* Copyright (C) 2004-2007 by Michael Dipperstein (mdipper@cs.ucsb.edu)
+*
+* This file is part of the bit file library.
+*
+* The bit file library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public License as
+* published by the Free Software Foundation; either version 3 of the
+* License, or (at your option) any later version.
+*
+* The bit file library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+* General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+***************************************************************************/
+
+#ifndef _BITFILE_H_
+#define _BITFILE_H_
+
+/***************************************************************************
+* INCLUDED FILES
+***************************************************************************/
+#include <stdio.h>
+
+/***************************************************************************
+* TYPE DEFINITIONS
+***************************************************************************/
+typedef enum
+{
+ BF_READ = 0,
+ BF_WRITE = 1,
+ BF_APPEND= 2,
+ BF_NO_MODE
+} BF_MODES;
+
+/* incomplete type to hide implementation */
+struct bit_file_t;
+typedef struct bit_file_t bit_file_t;
+
+/***************************************************************************
+* PROTOTYPES
+***************************************************************************/
+
+/* open/close file */
+bit_file_t *BitFileOpen(const char *fileName, const BF_MODES mode);
+bit_file_t *MakeBitFile(FILE *stream, const BF_MODES mode);
+int BitFileClose(bit_file_t *stream);
+FILE *BitFileToFILE(bit_file_t *stream);
+
+/* toss spare bits and byte align file */
+int BitFileByteAlign(bit_file_t *stream);
+
+/* get/put character */
+int BitFileGetChar(bit_file_t *stream);
+int BitFilePutChar(const int c, bit_file_t *stream);
+
+/* get/put single bit */
+int BitFileGetBit(bit_file_t *stream);
+int BitFilePutBit(const int c, bit_file_t *stream);
+
+/* get/put number of bits (most significant bit to least significat bit) */
+int BitFileGetBits(bit_file_t *stream, void *bits, const unsigned int count);
+int BitFilePutBits(bit_file_t *stream, void *bits, const unsigned int count);
+
+/***************************************************************************
+* get/put number of bits from integer types (short, int, long, ...)
+* machine endiness is accounted for.
+* size is the size of the data structure pointer to by bits.
+***************************************************************************/
+int BitFileGetBitsInt(bit_file_t *stream, void *bits, const unsigned int count,
+ const size_t size);
+int BitFilePutBitsInt(bit_file_t *stream, void *bits, const unsigned int count,
+ const size_t size);
+
+#endif /* _BITFILE_H_ */
diff --git a/test/compression/lzdecode.c b/test/compression/lzdecode.c
new file mode 100644
index 0000000..cb46301
--- /dev/null
+++ b/test/compression/lzdecode.c
@@ -0,0 +1,282 @@
+/***************************************************************************
+* Lempel, Ziv, Storer, and Szymanski Decoding
+*
+* File : lzdecode.c
+* Purpose : Use lzss coding (Storer and Szymanski's modified LZ77) to
+* decompress lzss encoded files.
+* Author : Michael Dipperstein
+* Date : November 07, 2004
+*
+****************************************************************************
+* UPDATES
+*
+* Date Change
+* 12/10/03 Changed handling of sliding window to better match standard
+* algorithm description.
+* 12/11/03 Remebered to copy encoded characters to the sliding window
+* even when there are no more characters in the input stream.
+*
+*
+* Revision 1.2 2004/02/22 17:14:26 michael
+* - Separated encode/decode, match finding, and main.
+* - Use bitfiles for reading/writing files
+* - Use traditional LZSS encoding where the coded/uncoded bits
+* precede the symbol they are associated with, rather than
+* aggregating the bits.
+*
+* Revision 1.1.1.1 2004/01/21 06:25:49 michael
+* Initial version
+*
+* 11/07/04 Separated encode and decode functions for improved
+* modularity.
+*
+* $Id: lzdecode.c,v 1.7 2007/09/20 04:34:25 michael Exp $
+* $Log: lzdecode.c,v $
+* Revision 1.7 2007/09/20 04:34:25 michael
+* Changes required for LGPL v3.
+*
+* Revision 1.6 2007/03/25 05:11:32 michael
+* Corrected file closure error reported by "Carl@Yahoo" . Now both input
+* and output files are closed.
+*
+* Revision 1.5 2006/12/26 04:09:09 michael
+* Updated e-mail address and minor text clean-up.
+*
+* Revision 1.4 2006/12/26 02:05:00 michael
+* Corrected bug identified by Andrej Sinicyn which resulted in stdin being
+* used as the default output for decoded data.
+*
+* Revision 1.3 2005/12/28 06:03:30 michael
+* Use slower but clearer Get/PutBitsInt for reading/writing bits.
+* Replace mod with conditional Wrap macro.
+*
+* Revision 1.2 2004/11/13 22:51:00 michael
+* Provide distinct names for by file and by name functions and add some
+* comments to make their usage clearer.
+*
+* Revision 1.1 2004/11/08 05:54:18 michael
+* 1. Split encode and decode routines for smarter linking
+* 2. Renamed lzsample.c sample.c to match my other samples
+* 3. Makefile now builds code as libraries for better LGPL compliance.
+*
+*
+*
+****************************************************************************
+*
+* LZDecode: An ANSI C LZSS Decoding Routines
+* Copyright (C) 2003-2007 by
+* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu)
+*
+* This file is part of the lzss library.
+*
+* The lzss library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public License as
+* published by the Free Software Foundation; either version 3 of the
+* License, or (at your option) any later version.
+*
+* The lzss library is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+* General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+***************************************************************************/
+
+/***************************************************************************
+* INCLUDED FILES
+***************************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "lzlocal.h"
+#include "bitfile.h"
+
+/***************************************************************************
+* TYPE DEFINITIONS
+***************************************************************************/
+
+/***************************************************************************
+* CONSTANTS
+***************************************************************************/
+
+/***************************************************************************
+* GLOBAL VARIABLES
+***************************************************************************/
+/* cyclic buffer sliding window of already read characters */
+extern unsigned char slidingWindow[];
+extern unsigned char uncodedLookahead[];
+
+/***************************************************************************
+* PROTOTYPES
+***************************************************************************/
+
+/***************************************************************************
+* FUNCTIONS
+***************************************************************************/
+
+/****************************************************************************
+* Function : DecodeLZSSByFile
+* Description: This function will read an LZSS encoded input file and
+* write an output file. This algorithm encodes strings as 16
+* bits (a 12 bit offset + a 4 bit length).
+* Parameters : fpIn - pointer to the open binary file to decode
+* fpOut - pointer to the open binary file to write decoded
+* output
+* Effects : fpIn is decoded and written to fpOut. Neither file is
+* closed after exit.
+* Returned : EXIT_SUCCESS or EXIT_FAILURE
+****************************************************************************/
+int DecodeLZSSByFile(FILE *fpIn, FILE *fpOut)
+{
+ bit_file_t *bfpIn;
+
+ int c;
+ unsigned int i, nextChar;
+ encoded_string_t code; /* offset/length code for string */
+
+ if (fpIn == NULL)
+ {
+ /* use stdin if no input file */
+ bfpIn = MakeBitFile(stdin, BF_READ);
+ }
+ else
+ {
+ /* convert output file to bitfile */
+ bfpIn = MakeBitFile(fpIn, BF_READ);
+ }
+
+ /* use stdout if no output file */
+ if (fpOut == NULL)
+ {
+ fpOut = stdout;
+ }
+
+ /************************************************************************
+ * Fill the sliding window buffer with some known vales. EncodeLZSS must
+ * use the same values. If common characters are used, there's an
+ * increased chance of matching to the earlier strings.
+ ************************************************************************/
+ memset(slidingWindow, ' ', WINDOW_SIZE * sizeof(unsigned char));
+
+ nextChar = 0;
+
+ while (TRUE)
+ {
+ if ((c = BitFileGetBit(bfpIn)) == EOF)
+ {
+ /* we hit the EOF */
+ break;
+ }
+
+ if (c == UNCODED)
+ {
+ /* uncoded character */
+ if ((c = BitFileGetChar(bfpIn)) == EOF)
+ {
+ break;
+ }
+
+ /* write out byte and put it in sliding window */
+ putc(c, fpOut);
+ slidingWindow[nextChar] = c;
+ nextChar = Wrap((nextChar + 1), WINDOW_SIZE);
+ }
+ else
+ {
+ /* offset and length */
+ code.offset = 0;
+ code.length = 0;
+
+ if ((BitFileGetBitsInt(bfpIn, &code.offset, OFFSET_BITS,
+ sizeof(unsigned int))) == EOF)
+ {
+ break;
+ }
+
+ if ((BitFileGetBitsInt(bfpIn, &code.length, LENGTH_BITS,
+ sizeof(unsigned int))) == EOF)
+ {
+ break;
+ }
+
+ code.length += MAX_UNCODED + 1;
+
+ /****************************************************************
+ * Write out decoded string to file and lookahead. It would be
+ * nice to write to the sliding window instead of the lookahead,
+ * but we could end up overwriting the matching string with the
+ * new string if abs(offset - next char) < match length.
+ ****************************************************************/
+ for (i = 0; i < code.length; i++)
+ {
+ c = slidingWindow[Wrap((code.offset + i), WINDOW_SIZE)];
+ putc(c, fpOut);
+ uncodedLookahead[i] = c;
+ }
+
+ /* write out decoded string to sliding window */
+ for (i = 0; i < code.length; i++)
+ {
+ slidingWindow[(nextChar + i) % WINDOW_SIZE] =
+ uncodedLookahead[i];
+ }
+
+ nextChar = Wrap((nextChar + code.length), WINDOW_SIZE);
+ }
+ }
+
+ /* we've decoded everything, free bitfile structure */
+ BitFileToFILE(bfpIn);
+
+ return (EXIT_SUCCESS);
+}
+
+/****************************************************************************
+* Function : DecodeLZSSByName
+* Description: This function is provided to maintain compatibility with
+* older versions of my LZSS implementation which used file
+* names instead of file pointers.
+* Parameters : inFile - name of file to decode
+* outFile - name of file to write decoded output
+* Effects : inFile is decoded and written to outFile
+* Returned : EXIT_SUCCESS or EXIT_FAILURE
+****************************************************************************/
+int DecodeLZSSByName(char *inFile, char *outFile)
+{
+ FILE *fpIn, *fpOut;
+ int returnValue;
+
+ if (inFile == NULL)
+ {
+ fpIn = stdin;
+ }
+ if ((fpIn = fopen(inFile, "rb")) == NULL)
+ {
+ perror(inFile);
+ return (EXIT_FAILURE);
+ }
+
+ if (outFile == NULL)
+ {
+ fpOut = stdout;
+ }
+ else
+ {
+ if ((fpOut = fopen(outFile, "wb")) == NULL)
+ {
+ fclose(fpIn);
+ perror(outFile);
+ return (EXIT_FAILURE);
+ }
+ }
+
+ returnValue = DecodeLZSSByFile(fpIn, fpOut);
+
+ /* close files */
+ fclose(fpIn);
+ fclose(fpOut);
+
+ return (returnValue);
+}
diff --git a/test/compression/lzencode.c b/test/compression/lzencode.c
new file mode 100644
index 0000000..35f8b83
--- /dev/null
+++ b/test/compression/lzencode.c
@@ -0,0 +1,300 @@
+/***************************************************************************
+* Lempel, Ziv, Storer, and Szymanski Encoding
+*
+* File : lzdecode.c
+* Purpose : Use lzss coding (Storer and Szymanski's modified LZ77) to
+* compress lzss data files.
+* Author : Michael Dipperstein
+* Date : November 07, 2004
+*
+****************************************************************************
+* UPDATES
+*
+* Date Change
+* 12/10/03 Changed handling of sliding window to better match standard
+* algorithm description.
+* 12/11/03 Remebered to copy encoded characters to the sliding window
+* even when there are no more characters in the input stream.
+*
+*
+* Revision 1.2 2004/02/22 17:14:26 michael
+* - Separated encode/decode, match finding, and main.
+* - Use bitfiles for reading/writing files
+* - Use traditional LZSS encoding where the coded/uncoded bits
+* precede the symbol they are associated with, rather than
+* aggregating the bits.
+*
+* Revision 1.1.1.1 2004/01/21 06:25:49 michael
+* Initial version
+*
+* 11/07/04 Separated encode and decode functions for improved
+* modularity.
+*
+* $Id: lzencode.c,v 1.6 2007/09/20 04:34:25 michael Exp $
+* $Log: lzencode.c,v $
+* Revision 1.6 2007/09/20 04:34:25 michael
+* Changes required for LGPL v3.
+*
+* Revision 1.5 2007/03/25 05:11:32 michael
+* Corrected file closure error reported by "Carl@Yahoo" . Now both input
+* and output files are closed.
+*
+* Revision 1.4 2006/12/26 04:09:09 michael
+* Updated e-mail address and minor text clean-up.
+*
+* Revision 1.3 2005/12/28 06:03:30 michael
+* Use slower but clearer Get/PutBitsInt for reading/writing bits.
+* Replace mod with conditional Wrap macro.
+*
+* Revision 1.2 2004/11/13 22:51:00 michael
+* Provide distinct names for by file and by name functions and add some
+* comments to make their usage clearer.
+*
+* Revision 1.1 2004/11/08 05:54:18 michael
+* 1. Split encode and decode routines for smarter linking
+* 2. Renamed lzsample.c sample.c to match my other samples
+* 3. Makefile now builds code as libraries for better LGPL compliance.
+*
+*
+*
+****************************************************************************
+*
+* LZEncode: An ANSI C LZSS Encoding Routines
+* Copyright (C) 2003-2007 by
+* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu)
+*
+* This file is part of the lzss library.
+*
+* The lzss library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public License as
+* published by the Free Software Foundation; either version 3 of the
+* License, or (at your option) any later version.
+*
+* The lzss library is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+* General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+***************************************************************************/
+
+/***************************************************************************
+* INCLUDED FILES
+***************************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "lzlocal.h"
+#include "bitfile.h"
+
+/***************************************************************************
+* TYPE DEFINITIONS
+***************************************************************************/
+
+/***************************************************************************
+* CONSTANTS
+***************************************************************************/
+
+/***************************************************************************
+* GLOBAL VARIABLES
+***************************************************************************/
+/* cyclic buffer sliding window of already read characters */
+extern unsigned char slidingWindow[];
+extern unsigned char uncodedLookahead[];
+
+/***************************************************************************
+* PROTOTYPES
+***************************************************************************/
+
+/***************************************************************************
+* FUNCTIONS
+***************************************************************************/
+
+/****************************************************************************
+* Function : EncodeLZSSByFile
+* Description: This function will read an input file and write an output
+* file encoded according to the traditional LZSS algorithm.
+* This algorithm encodes strings as 16 bits (a 12 bit offset
+* + a 4 bit length).
+* Parameters : fpIn - pointer to the open binary file to encode
+* fpOut - pointer to the open binary file to write encoded
+* output
+* Effects : fpIn is encoded and written to fpOut. Neither file is
+* closed after exit.
+* Returned : EXIT_SUCCESS or EXIT_FAILURE
+****************************************************************************/
+int EncodeLZSSByFile(FILE *fpIn, FILE *fpOut)
+{
+ bit_file_t *bfpOut;
+
+ encoded_string_t matchData;
+ unsigned int i, c;
+ unsigned int len; /* length of string */
+
+ /* head of sliding window and lookahead */
+ unsigned int windowHead, uncodedHead;
+
+ /* use stdin if no input file */
+ if (fpIn == NULL)
+ {
+ fpIn = stdin;
+ }
+
+ if (fpOut == NULL)
+ {
+ /* use stdout if no output file */
+ bfpOut = MakeBitFile(stdout, BF_WRITE);
+ }
+ else
+ {
+ /* convert output file to bitfile */
+ bfpOut = MakeBitFile(fpOut, BF_WRITE);
+ }
+
+ windowHead = 0;
+ uncodedHead = 0;
+
+ /************************************************************************
+ * Fill the sliding window buffer with some known vales. DecodeLZSS must
+ * use the same values. If common characters are used, there's an
+ * increased chance of matching to the earlier strings.
+ ************************************************************************/
+ memset(slidingWindow, ' ', WINDOW_SIZE * sizeof(unsigned char));
+
+ /************************************************************************
+ * Copy MAX_CODED bytes from the input file into the uncoded lookahead
+ * buffer.
+ ************************************************************************/
+ for (len = 0; len < MAX_CODED; len++) {
+ c = getc(fpIn);
+ if (c == EOF) break;
+ uncodedLookahead[len] = c;
+ }
+
+ if (len == 0)
+ {
+ return (EXIT_SUCCESS); /* inFile was empty */
+ }
+
+ /* Look for matching string in sliding window */
+ InitializeSearchStructures();
+ FindMatch(&matchData, windowHead, uncodedHead);
+
+ /* now encoded the rest of the file until an EOF is read */
+ while (len > 0)
+ {
+ if (matchData.length > len)
+ {
+ /* garbage beyond last data happened to extend match length */
+ matchData.length = len;
+ }
+
+ if (matchData.length <= MAX_UNCODED)
+ {
+ /* not long enough match. write uncoded flag and character */
+ BitFilePutBit(UNCODED, bfpOut);
+ BitFilePutChar(uncodedLookahead[uncodedHead], bfpOut);
+
+ matchData.length = 1; /* set to 1 for 1 byte uncoded */
+ }
+ else
+ {
+ unsigned int adjustedLen;
+
+ /* adjust the length of the match so minimun encoded len is 0*/
+ adjustedLen = matchData.length - (MAX_UNCODED + 1);
+
+ /* match length > MAX_UNCODED. Encode as offset and length. */
+ BitFilePutBit(ENCODED, bfpOut);
+ BitFilePutBitsInt(bfpOut, &matchData.offset, OFFSET_BITS,
+ sizeof(unsigned int));
+ BitFilePutBitsInt(bfpOut, &adjustedLen, LENGTH_BITS,
+ sizeof(unsigned int));
+ }
+
+ /********************************************************************
+ * Replace the matchData.length worth of bytes we've matched in the
+ * sliding window with new bytes from the input file.
+ ********************************************************************/
+ i = 0;
+ while ((i < matchData.length) && ((c = getc(fpIn)) != EOF))
+ {
+ /* add old byte into sliding window and new into lookahead */
+ ReplaceChar(windowHead, uncodedLookahead[uncodedHead]);
+ uncodedLookahead[uncodedHead] = c;
+ windowHead = Wrap((windowHead + 1), WINDOW_SIZE);
+ uncodedHead = Wrap((uncodedHead + 1), MAX_CODED);
+ i++;
+ }
+
+ /* handle case where we hit EOF before filling lookahead */
+ while (i < matchData.length)
+ {
+ ReplaceChar(windowHead, uncodedLookahead[uncodedHead]);
+ /* nothing to add to lookahead here */
+ windowHead = Wrap((windowHead + 1), WINDOW_SIZE);
+ uncodedHead = Wrap((uncodedHead + 1), MAX_CODED);
+ len--;
+ i++;
+ }
+
+ /* find match for the remaining characters */
+ FindMatch(&matchData, windowHead, uncodedHead);
+ }
+
+ /* we've decoded everything, free bitfile structure */
+ BitFileToFILE(bfpOut);
+
+ return (EXIT_SUCCESS);
+}
+
+/****************************************************************************
+* Function : EncodeLZSSByName
+* Description: This function is provided to maintain compatibility with
+* older versions of my LZSS implementation which used file
+* names instead of file pointers.
+* Parameters : inFile - name of file to encode
+* outFile - name of file to write encoded output
+* Effects : inFile is encoded and written to outFile
+* Returned : EXIT_SUCCESS or EXIT_FAILURE
+****************************************************************************/
+int EncodeLZSSByName(char *inFile, char *outFile)
+{
+ FILE *fpIn, *fpOut;
+ int returnValue;
+
+ /* open binary input and output files */
+ if (inFile == NULL)
+ {
+ fpIn = stdin;
+ }
+ if ((fpIn = fopen(inFile, "rb")) == NULL)
+ {
+ perror(inFile);
+ return (EXIT_FAILURE);
+ }
+
+ if (outFile == NULL)
+ {
+ fpOut = stdout;
+ }
+ else
+ {
+ if ((fpOut = fopen(outFile, "wb")) == NULL)
+ {
+ fclose(fpIn);
+ perror(outFile);
+ return (EXIT_FAILURE);
+ }
+ }
+
+ returnValue = EncodeLZSSByFile(fpIn, fpOut);
+
+ /* close files */
+ fclose(fpIn);
+ fclose(fpOut);
+
+ return (returnValue);
+}
diff --git a/test/compression/lzhash.c b/test/compression/lzhash.c
new file mode 100644
index 0000000..83cbe94
--- /dev/null
+++ b/test/compression/lzhash.c
@@ -0,0 +1,358 @@
+#include <stdio.h>
+/***************************************************************************
+* Lempel, Ziv, Storer, and Szymanski Encoding and Decoding
+*
+* File : hash.c
+* Purpose : Implement hash table optimized matching of uncoded strings
+* for LZSS algorithm.
+* Author : Michael Dipperstein
+* Date : February 21, 2004
+*
+****************************************************************************
+* UPDATES
+* $Id: hash.c,v 1.7 2007/09/20 04:34:25 michael Exp $
+* $Log: hash.c,v $
+* Revision 1.7 2007/09/20 04:34:25 michael
+* Changes required for LGPL v3.
+*
+* Revision 1.6 2006/12/26 04:09:09 michael
+* Updated e-mail address and minor text clean-up.
+*
+* Revision 1.5 2005/12/29 14:37:56 michael
+* Remove debug statements.
+*
+* Revision 1.4 2005/12/29 14:32:56 michael
+* Correct algorithm for replacing characters in dictionary.
+*
+* Revision 1.3 2005/12/29 07:06:24 michael
+* Let the hash table size vary with the size of the sliding window.
+*
+* Revision 1.2 2005/12/28 06:03:30 michael
+* Use slower but clearer Get/PutBitsInt for reading/writing bits.
+* Replace mod with conditional Wrap macro.
+*
+* Revision 1.1 2004/02/22 17:23:02 michael
+* Initial revision of hash table search. Mostly code from lzhash.c.
+*
+*
+****************************************************************************
+*
+* Hash: Hash table optimized matching routines used by LZSS
+* Encoding/Decoding Routine
+* Copyright (C) 2004-2007 by
+* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu)
+*
+* This file is part of the lzss library.
+*
+* The lzss library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public License as
+* published by the Free Software Foundation; either version 3 of the
+* License, or (at your option) any later version.
+*
+* The lzss library is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+* General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+***************************************************************************/
+
+/***************************************************************************
+* INCLUDED FILES
+***************************************************************************/
+#include "lzlocal.h"
+
+/***************************************************************************
+* TYPE DEFINITIONS
+***************************************************************************/
+
+/***************************************************************************
+* CONSTANTS
+***************************************************************************/
+#define NULL_INDEX (WINDOW_SIZE + 1)
+
+#define HASH_SIZE (WINDOW_SIZE >> 2) /* size of hash table */
+
+/***************************************************************************
+* GLOBAL VARIABLES
+***************************************************************************/
+/* cyclic buffer sliding window of already read characters */
+extern unsigned char slidingWindow[];
+extern unsigned char uncodedLookahead[];
+
+unsigned int hashTable[HASH_SIZE]; /* list head for each hash key */
+unsigned int next[WINDOW_SIZE]; /* indices of next in hash list */
+
+/***************************************************************************
+* PROTOTYPES
+***************************************************************************/
+
+/***************************************************************************
+* FUNCTIONS
+***************************************************************************/
+
+/****************************************************************************
+* Function : HashKey
+* Description: This function generates a hash key for a (MAX_UNCODED + 1)
+* long string located either in the sliding window or the
+* uncoded lookahead. The key generation algorithm is
+* supposed to be based on the algorithm used by gzip. As
+* reported in K. Sadakane, H. Imai. "Improving the Speed of
+* LZ77 Compression by Hashing and Suffix Sorting". IEICE
+* Trans. Fundamentals, Vol. E83-A, No. 12 (December 2000)
+* Parameters : offset - offset into either the uncoded lookahead or the
+* sliding window.
+* lookahead - TRUE iff offset is an offset into the uncoded
+* lookahead buffer.
+* Effects : NONE
+* Returned : The sliding window index where the match starts and the
+* length of the match. If there is no match a length of
+* zero will be returned.
+****************************************************************************/
+unsigned int HashKey(unsigned int offset, unsigned char lookahead)
+{
+ int i, hashKey;
+
+ hashKey = 0;
+
+ if (lookahead)
+ {
+ /* string is in the lookahead buffer */
+ for (i = 0; i < (MAX_UNCODED + 1); i++)
+ {
+ hashKey = (hashKey << 5) ^ (uncodedLookahead[offset]);
+ hashKey %= HASH_SIZE;
+ offset = Wrap((offset + 1), MAX_CODED);
+ }
+ }
+ else
+ {
+ /* string is in the sliding window */
+ for (i = 0; i < (MAX_UNCODED + 1); i++)
+ {
+ hashKey = (hashKey << 5) ^ (slidingWindow[offset]);
+ hashKey %= HASH_SIZE;
+ offset = Wrap((offset + 1), WINDOW_SIZE);
+ }
+ }
+
+ return hashKey;
+}
+
+/****************************************************************************
+* Function : InitializeSearchStructures
+* Description: This function initializes structures used to speed up the
+* process of mathcing uncoded strings to strings in the
+* sliding window. For hashed searches, this means that a
+* hash table pointing to linked lists is initialized.
+* Parameters : None
+* Effects : The hash table and next array are initialized.
+* Returned : None
+*
+* NOTE: This function assumes that the sliding window is initially filled
+* with all identical characters.
+****************************************************************************/
+void InitializeSearchStructures()
+{
+ unsigned int i;
+
+ /************************************************************************
+ * Since the encode routine only fills the sliding window with one
+ * character, there is only one hash key for the entier sliding window.
+ * That means all positions are in the same linked list.
+ ************************************************************************/
+ for (i = 0; i < (WINDOW_SIZE - 1); i++)
+ {
+ next[i] = i + 1;
+ }
+
+ /* there's no next for the last character */
+ next[WINDOW_SIZE - 1] = NULL_INDEX;
+
+ /* the only list right now is the " " list */
+ for (i = 0; i < HASH_SIZE; i++)
+ {
+ hashTable[i] = NULL_INDEX;
+ }
+
+ hashTable[HashKey(0, FALSE)] = 0;
+}
+
+/****************************************************************************
+* Function : FindMatch
+* Description: This function will search through the slidingWindow
+* dictionary for the longest sequence matching the MAX_CODED
+* long string stored in uncodedLookahead.
+* Parameters : uncodedHead - head of uncoded lookahead buffer
+* Effects : NONE
+* Returned : The sliding window index where the match starts and the
+* length of the match. If there is no match a length of
+* zero will be returned.
+****************************************************************************/
+/* XL: no struct return */
+void FindMatch(encoded_string_t * matchData,
+ unsigned int windowHead, unsigned int uncodedHead)
+{
+ unsigned int i, j;
+
+ matchData->length = 0;
+ matchData->offset = 0;
+
+ i = hashTable[HashKey(uncodedHead, TRUE)]; /* start of proper list */
+ j = 0;
+
+ while (i != NULL_INDEX)
+ {
+ if (slidingWindow[i] == uncodedLookahead[uncodedHead])
+ {
+ /* we matched one how many more match? */
+ j = 1;
+
+ while(slidingWindow[Wrap((i + j), WINDOW_SIZE)] ==
+ uncodedLookahead[Wrap((uncodedHead + j), MAX_CODED)])
+ {
+ if (j >= MAX_CODED)
+ {
+ break;
+ }
+ j++;
+ }
+
+ if (j > matchData->length)
+ {
+ matchData->length = j;
+ matchData->offset = i;
+ }
+ }
+
+ if (j >= MAX_CODED)
+ {
+ matchData->length = MAX_CODED;
+ break;
+ }
+
+ i = next[i]; /* try next in list */
+ }
+}
+
+/****************************************************************************
+* Function : AddString
+* Description: This function adds the (MAX_UNCODED + 1) long string
+* starting at slidingWindow[charIndex] to the hash table's
+* linked list associated with its hash key.
+* Parameters : charIndex - sliding window index of the string to be
+* added to the linked list.
+* Effects : The string starting at slidingWindow[charIndex] is appended
+* to the end of the appropriate linked list.
+* Returned : NONE
+****************************************************************************/
+void AddString(unsigned int charIndex)
+{
+ unsigned int i, hashKey;
+
+ /* inserted character will be at the end of the list */
+ next[charIndex] = NULL_INDEX;
+
+ hashKey = HashKey(charIndex, FALSE);
+
+ if (hashTable[hashKey] == NULL_INDEX)
+ {
+ /* this is the only character in it's list */
+ hashTable[hashKey] = charIndex;
+ return;
+ }
+
+ /* find the end of the list */
+ i = hashTable[hashKey];
+
+ while(next[i] != NULL_INDEX)
+ {
+ i = next[i];
+ }
+
+ /* add new character to the list end */
+ next[i] = charIndex;
+}
+
+/****************************************************************************
+* Function : RemoveString
+* Description: This function removes the (MAX_UNCODED + 1) long string
+* starting at slidingWindow[charIndex] from the hash table's
+* linked list associated with its hash key.
+* Parameters : charIndex - sliding window index of the string to be
+* removed from the linked list.
+* Effects : The string starting at slidingWindow[charIndex] is removed
+* from its linked list.
+* Returned : NONE
+****************************************************************************/
+void RemoveString(unsigned int charIndex)
+{
+ unsigned int i, hashKey;
+ unsigned int nextIndex;
+
+ nextIndex = next[charIndex]; /* remember where this points to */
+ next[charIndex] = NULL_INDEX;
+
+ hashKey = HashKey(charIndex, FALSE);
+
+ if (hashTable[hashKey] == charIndex)
+ {
+ /* we're deleting a list head */
+ hashTable[hashKey] = nextIndex;
+ return;
+ }
+
+ /* find character pointing to ours */
+ i = hashTable[hashKey];
+
+ while(next[i] != charIndex)
+ {
+ i = next[i];
+ }
+
+ /* point the previous next */
+ next[i] = nextIndex;
+}
+
+/****************************************************************************
+* Function : ReplaceChar
+* Description: This function replaces the character stored in
+* slidingWindow[charIndex] with the one specified by
+* replacement. The hash table entries effected by the
+* replacement are also corrected.
+* Parameters : charIndex - sliding window index of the character to be
+* removed from the linked list.
+* Effects : slidingWindow[charIndex] is replaced by replacement. Old
+* hash entries for strings containing slidingWindow[charIndex]
+* are removed and new ones are added.
+* Returned : NONE
+****************************************************************************/
+void ReplaceChar(unsigned int charIndex, unsigned char replacement)
+{
+ unsigned int firstIndex, i;
+
+ if (charIndex < MAX_UNCODED)
+ {
+ firstIndex = (WINDOW_SIZE + charIndex) - MAX_UNCODED;
+ }
+ else
+ {
+ firstIndex = charIndex - MAX_UNCODED;
+ }
+
+ /* remove all hash entries containing character at char index */
+ for (i = 0; i < (MAX_UNCODED + 1); i++)
+ {
+ RemoveString(Wrap((firstIndex + i), WINDOW_SIZE));
+ }
+
+ slidingWindow[charIndex] = replacement;
+
+ /* add all hash entries containing character at char index */
+ for (i = 0; i < (MAX_UNCODED + 1); i++)
+ {
+ AddString(Wrap((firstIndex + i), WINDOW_SIZE));
+ }
+}
diff --git a/test/compression/lzlocal.h b/test/compression/lzlocal.h
new file mode 100644
index 0000000..876de91
--- /dev/null
+++ b/test/compression/lzlocal.h
@@ -0,0 +1,123 @@
+/***************************************************************************
+* Lempel, Ziv, Storer, and Szymanski Encoding and Decoding
+*
+* File : lzlocal.h
+* Purpose : Internal headers for LZSS encode and decode routines.
+* Contains the prototypes to be used by the different match
+* finding algorithms.
+* Author : Michael Dipperstein
+* Date : February 18, 2004
+*
+****************************************************************************
+* UPDATES
+*
+* $Id: lzlocal.h,v 1.5 2007/09/20 04:34:25 michael Exp $
+* $Log: lzlocal.h,v $
+* Revision 1.5 2007/09/20 04:34:25 michael
+* Changes required for LGPL v3.
+*
+* Revision 1.4 2006/12/26 04:09:09 michael
+* Updated e-mail address and minor text clean-up.
+*
+* Revision 1.3 2005/12/28 05:58:52 michael
+* Add Wrap macro to replace mod when value is less than twice the limit.
+*
+* Revision 1.2 2004/11/08 05:54:18 michael
+* 1. Split encode and decode routines for smarter linking
+* 2. Renamed lzsample.c sample.c to match my other samples
+* 3. Makefile now builds code as libraries for better LGPL compliance.
+*
+* Revision 1.1 2004/02/22 17:32:40 michael
+* Initial revision of header files for sliding window search implementations.
+*
+*
+****************************************************************************
+*
+* LZSS: An ANSI C LZSS Encoding/Decoding Routine
+* Copyright (C) 2004-2007 by
+* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu)
+*
+* This file is part of the lzss library.
+*
+* The lzss library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public License as
+* published by the Free Software Foundation; either version 3 of the
+* License, or (at your option) any later version.
+*
+* The lzss library is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+* General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+***************************************************************************/
+#ifndef _LZSS_LOCAL_H
+#define _LZSS_LOCAL_H
+
+/***************************************************************************
+* INCLUDED FILES
+***************************************************************************/
+#include <limits.h>
+
+/***************************************************************************
+* CONSTANTS
+***************************************************************************/
+#define FALSE 0
+#define TRUE 1
+
+#define OFFSET_BITS 12
+#define LENGTH_BITS 4
+
+#if (((1 << (OFFSET_BITS + LENGTH_BITS)) - 1) > UINT_MAX)
+#error "Size of encoded data must not exceed the size of an unsigned int"
+#endif
+
+/* We want a sliding window*/
+#define WINDOW_SIZE (1 << OFFSET_BITS)
+
+/* maximum match length not encoded and maximum length encoded (4 bits) */
+#define MAX_UNCODED 2
+#define MAX_CODED ((1 << LENGTH_BITS) + MAX_UNCODED)
+
+#define ENCODED 0 /* encoded string */
+#define UNCODED 1 /* unencoded character */
+
+/***************************************************************************
+* TYPE DEFINITIONS
+***************************************************************************/
+
+/***************************************************************************
+* This data structure stores an encoded string in (offset, length) format.
+* The actual encoded string is stored using OFFSET_BITS for the offset and
+* LENGTH_BITS for the length.
+***************************************************************************/
+typedef struct encoded_string_t
+{
+ unsigned int offset; /* offset to start of longest match */
+ unsigned int length; /* length of longest match */
+} encoded_string_t;
+
+/***************************************************************************
+* MACROS
+***************************************************************************/
+/* wraps array index within array bounds (assumes value < 2 * limit) */
+#if 0
+#define Wrap(value, limit) (((value) < (limit)) ? (value) : ((value) - (limit)))
+#else
+#define Wrap(value, limit) ((unsigned int)(value) % (unsigned int)(limit))
+#endif
+
+/***************************************************************************
+* PROTOTYPES
+***************************************************************************/
+void InitializeSearchStructures(void);
+
+/* XL: no struct return */
+void FindMatch(encoded_string_t * /*out*/,
+ unsigned int windowHead, unsigned int uncodedHead);
+
+void ReplaceChar(unsigned int charIndex, unsigned char replacement);
+
+#endif /* ndef _LZSS_LOCAL_H */
diff --git a/test/compression/lzss.h b/test/compression/lzss.h
new file mode 100644
index 0000000..990d893
--- /dev/null
+++ b/test/compression/lzss.h
@@ -0,0 +1,96 @@
+/***************************************************************************
+* Lempel, Ziv, Storer, and Szymanski Encoding and Decoding
+*
+* File : lzss.h
+* Purpose : Header for LZSS encode and decode routines. Contains the
+* prototypes to be used by programs linking to the LZSS
+* library.
+* Author : Michael Dipperstein
+* Date : February 21, 2004
+*
+****************************************************************************
+* UPDATES
+*
+* $Id: lzss.h,v 1.5 2007/09/20 04:34:25 michael Exp $
+* $Log: lzss.h,v $
+* Revision 1.5 2007/09/20 04:34:25 michael
+* Changes required for LGPL v3.
+*
+* Revision 1.4 2006/12/26 04:09:09 michael
+* Updated e-mail address and minor text clean-up.
+*
+* Revision 1.3 2004/11/13 22:51:00 michael
+* Provide distinct names for by file and by name functions and add some
+* comments to make their usage clearer.
+*
+* Revision 1.2 2004/11/08 05:54:18 michael
+* 1. Split encode and decode routines for smarter linking
+* 2. Renamed lzsample.c sample.c to match my other samples
+* 3. Makefile now builds code as libraries for better LGPL compliance.
+*
+* Revision 1.1 2004/02/22 17:37:50 michael
+* Initial revision of headers for encode and decode routines.
+*
+*
+****************************************************************************
+*
+* LZSS: An ANSI C LZSS Encoding/Decoding Routine
+* Copyright (C) 2004, 2006, 2007 by
+* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu)
+*
+* This file is part of the lzss library.
+*
+* The lzss library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public License as
+* published by the Free Software Foundation; either version 3 of the
+* License, or (at your option) any later version.
+*
+* The lzss library is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+* General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+***************************************************************************/
+#ifndef _LZSS_H
+#define _LZSS_H
+
+/***************************************************************************
+* INCLUDED FILES
+***************************************************************************/
+#include <stdio.h>
+
+/***************************************************************************
+* MACROS
+***************************************************************************/
+/* macros for compatibility with older library */
+#define EncodeLZSS(in, out) EncodeLZSSByName((in), (out))
+#define DecodeLZSS(in, out) DecodeLZSSByName((in), (out))
+
+/***************************************************************************
+* PROTOTYPES
+***************************************************************************/
+/***************************************************************************
+* LZSS encoding and decoding prototypes for functions with file name
+* parameters. Provide these functions with name of the file to be
+* encoded/decoded (inFile) and the name of the target file (outFile).
+* These functions return EXIT_SUCCESS or EXIT_FAILURE.
+***************************************************************************/
+int EncodeLZSSByName(char *inFile, char *outFile);
+int DecodeLZSSByName(char *inFile, char *outFile);
+
+/***************************************************************************
+* LZSS encoding and decoding prototypes for functions with file pointer
+* parameters. Provide these functions with a pointer to the open binary
+* file to be encoded/decoded (fpIn) and pointer to the open binary target
+* file (fpOut). It is the job of the function caller to open the files
+* prior to callings these functions and to close the file after these
+* functions have been called.
+* These functions return EXIT_SUCCESS or EXIT_FAILURE.
+***************************************************************************/
+int EncodeLZSSByFile(FILE *fpIn, FILE *fpOut);
+int DecodeLZSSByFile(FILE *fpIn, FILE *fpOut);
+
+#endif /* ndef _LZSS_H */
diff --git a/test/compression/lzssmain.c b/test/compression/lzssmain.c
new file mode 100644
index 0000000..30fc5ab
--- /dev/null
+++ b/test/compression/lzssmain.c
@@ -0,0 +1,278 @@
+/***************************************************************************
+* Sample Program Using LZSS Library
+*
+* File : sample.c
+* Purpose : Demonstrate usage of LZSS library
+* Author : Michael Dipperstein
+* Date : February 21, 2004
+*
+****************************************************************************
+* UPDATES
+*
+* Revision 1.1 2004/02/22 17:36:30 michael
+* Initial revision. Mostly code form old lzss.c.
+*
+* 11/07/04 Name changed to sample.c
+*
+* $Id: sample.c,v 1.5 2007/09/20 04:34:45 michael Exp $
+* $Log: sample.c,v $
+* Revision 1.5 2007/09/20 04:34:45 michael
+* Replace getopt with optlist.
+* Changes required for LGPL v3.
+*
+* Revision 1.4 2006/12/26 04:09:09 michael
+* Updated e-mail address and minor text clean-up.
+*
+* Revision 1.3 2004/11/13 22:51:01 michael
+* Provide distinct names for by file and by name functions and add some
+* comments to make their usage clearer.
+*
+* Revision 1.2 2004/11/11 14:37:26 michael
+* Open input and output files as binary.
+*
+* Revision 1.1 2004/11/08 05:54:18 michael
+* 1. Split encode and decode routines for smarter linking
+* 2. Renamed lzsample.c sample.c to match my other samples
+* 3. Makefile now builds code as libraries for better LGPL compliance.
+*
+*
+****************************************************************************
+*
+* SAMPLE: Sample usage of LZSS Library
+* Copyright (C) 2004, 2006, 2007 by
+* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu)
+*
+* This file is part of the lzss library.
+*
+* The lzss library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public License as
+* published by the Free Software Foundation; either version 3 of the
+* License, or (at your option) any later version.
+*
+* The lzss library is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+* General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+***************************************************************************/
+
+/***************************************************************************
+* INCLUDED FILES
+***************************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "lzss.h"
+#include "optlist.h"
+
+/***************************************************************************
+* TYPE DEFINITIONS
+***************************************************************************/
+typedef enum
+{
+ ENCODE,
+ DECODE
+} MODES;
+
+/***************************************************************************
+* CONSTANTS
+***************************************************************************/
+char *RemovePath(char *fullPath);
+
+/***************************************************************************
+* GLOBAL VARIABLES
+***************************************************************************/
+
+/***************************************************************************
+* PROTOTYPES
+***************************************************************************/
+
+/***************************************************************************
+* FUNCTIONS
+***************************************************************************/
+
+/****************************************************************************
+* Function : main
+* Description: This is the main function for this program, it validates
+* the command line input and, if valid, it will either
+* encode a file using the LZSS algorithm or decode a
+* file encoded with the LZSS algorithm.
+* Parameters : argc - number of parameters
+* argv - parameter list
+* Effects : Encodes/Decodes input file
+* Returned : EXIT_SUCCESS for success, otherwise EXIT_FAILURE.
+****************************************************************************/
+int main(int argc, char *argv[])
+{
+ option_t *optList, *thisOpt;
+ FILE *fpIn, *fpOut; /* pointer to open input & output files */
+ MODES mode;
+
+ /* initialize data */
+ fpIn = NULL;
+ fpOut = NULL;
+ mode = ENCODE;
+
+ /* parse command line */
+ optList = GetOptList(argc, argv, "cdi:o:h?");
+ thisOpt = optList;
+
+ while (thisOpt != NULL)
+ {
+ switch(thisOpt->option)
+ {
+ case 'c': /* compression mode */
+ mode = ENCODE;
+ break;
+
+ case 'd': /* decompression mode */
+ mode = DECODE;
+ break;
+
+ case 'i': /* input file name */
+ if (fpIn != NULL)
+ {
+ fprintf(stderr, "Multiple input files not allowed.\n");
+ fclose(fpIn);
+
+ if (fpOut != NULL)
+ {
+ fclose(fpOut);
+ }
+
+ FreeOptList(optList);
+ exit(EXIT_FAILURE);
+ }
+
+ /* open input file as binary */
+ fpIn = fopen(thisOpt->argument, "rb");
+ if (fpIn == NULL)
+ {
+ perror("Opening input file");
+
+ if (fpOut != NULL)
+ {
+ fclose(fpOut);
+ }
+
+ FreeOptList(optList);
+ exit(EXIT_FAILURE);
+ }
+ break;
+
+ case 'o': /* output file name */
+ if (fpOut != NULL)
+ {
+ fprintf(stderr, "Multiple output files not allowed.\n");
+ fclose(fpOut);
+
+ if (fpIn != NULL)
+ {
+ fclose(fpIn);
+ }
+
+ FreeOptList(optList);
+ exit(EXIT_FAILURE);
+ }
+
+ /* open output file as binary */
+ fpOut = fopen(thisOpt->argument, "wb");
+ if (fpOut == NULL)
+ {
+ perror("Opening output file");
+
+ if (fpIn != NULL)
+ {
+ fclose(fpIn);
+ }
+
+ FreeOptList(optList);
+ exit(EXIT_FAILURE);
+ }
+ break;
+
+ case 'h':
+ case '?':
+ printf("Usage: %s <options>\n\n", RemovePath(argv[0]));
+ printf("options:\n");
+ printf(" -c : Encode input file to output file.\n");
+ printf(" -d : Decode input file to output file.\n");
+ printf(" -i <filename> : Name of input file.\n");
+ printf(" -o <filename> : Name of output file.\n");
+ printf(" -h | ? : Print out command line options.\n\n");
+ printf("Default: %s -c -i stdin -o stdout\n",
+ RemovePath(argv[0]));
+
+ FreeOptList(optList);
+ return(EXIT_SUCCESS);
+ }
+
+ optList = thisOpt->next;
+ free(thisOpt);
+ thisOpt = optList;
+ }
+
+
+ /* use stdin/out if no files are provided */
+ if (fpIn == NULL)
+ {
+ fpIn = stdin;
+ }
+
+
+ if (fpOut == NULL)
+ {
+ fpOut = stdout;
+ }
+
+ /* we have valid parameters encode or decode */
+ if (mode == ENCODE)
+ {
+ EncodeLZSSByFile(fpIn, fpOut);
+ }
+ else
+ {
+ DecodeLZSSByFile(fpIn, fpOut);
+ }
+
+ /* remember to close files */
+ fclose(fpIn);
+ fclose(fpOut);
+ return EXIT_SUCCESS;
+}
+
+/***************************************************************************
+* Function : RemovePath
+* Description: This is function accepts a pointer to the name of a file
+* along with path information and returns a pointer to the
+* character that is not part of the path.
+* Parameters : fullPath - pointer to an array of characters containing
+* a file name and possible path modifiers.
+* Effects : None
+* Returned : Returns a pointer to the first character after any path
+* information.
+***************************************************************************/
+char *RemovePath(char *fullPath)
+{
+ int i;
+ char *start, *tmp; /* start of file name */
+ const char delim[3] = {'\\', '/', ':'}; /* path deliminators */
+
+ start = fullPath;
+
+ /* find the first character after all file path delimiters */
+ for (i = 0; i < 3; i++)
+ {
+ tmp = strrchr(start, delim[i]);
+
+ if (tmp != NULL)
+ {
+ start = tmp + 1;
+ }
+ }
+
+ return start;
+}
diff --git a/test/compression/lzvars.c b/test/compression/lzvars.c
new file mode 100644
index 0000000..cc82a6f
--- /dev/null
+++ b/test/compression/lzvars.c
@@ -0,0 +1,76 @@
+/***************************************************************************
+* Lempel, Ziv, Storer, and Szymanski Global Variables
+*
+* File : lzvars.c
+* Purpose : Declare global variables used lzss coding
+* compress/decompress files.
+* Author : Michael Dipperstein
+* Date : November 07, 2003
+*
+****************************************************************************
+* UPDATES
+*
+* $Id: lzvars.c,v 1.3 2007/09/20 04:34:25 michael Exp $
+* $Log: lzvars.c,v $
+* Revision 1.3 2007/09/20 04:34:25 michael
+* Changes required for LGPL v3.
+*
+* Revision 1.2 2006/12/26 04:09:09 michael
+* Updated e-mail address and minor text clean-up.
+*
+* Revision 1.1 2004/11/08 05:54:18 michael
+* 1. Split encode and decode routines for smarter linking
+* 2. Renamed lzsample.c sample.c to match my other samples
+* 3. Makefile now builds code as libraries for better LGPL compliance.
+*
+*
+****************************************************************************
+*
+* LZvars: Global variables used in ANSI C LZSS Encoding/Decoding Routines
+* Copyright (C) 2003, 2004, 2006, 2007 by
+* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu)
+*
+* This file is part of the lzss library.
+*
+* The lzss library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public License as
+* published by the Free Software Foundation; either version 3 of the
+* License, or (at your option) any later version.
+*
+* The lzss library is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+* General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+***************************************************************************/
+
+/***************************************************************************
+* INCLUDED FILES
+***************************************************************************/
+#include "lzlocal.h"
+
+/***************************************************************************
+* TYPE DEFINITIONS
+***************************************************************************/
+
+/***************************************************************************
+* CONSTANTS
+***************************************************************************/
+
+/***************************************************************************
+* GLOBAL VARIABLES
+***************************************************************************/
+/* cyclic buffer sliding window of already read characters */
+unsigned char slidingWindow[WINDOW_SIZE];
+unsigned char uncodedLookahead[MAX_CODED];
+
+/***************************************************************************
+* PROTOTYPES
+***************************************************************************/
+
+/***************************************************************************
+* FUNCTIONS
+***************************************************************************/
diff --git a/test/compression/lzw.h b/test/compression/lzw.h
new file mode 100644
index 0000000..89255f0
--- /dev/null
+++ b/test/compression/lzw.h
@@ -0,0 +1,70 @@
+/***************************************************************************
+* Header for Lempel-Ziv-Welch Encoding and Decoding Library
+*
+* File : arcode.h
+* Purpose : Provides prototypes for functions that use Lempel-Ziv-Welch
+* coding to encode/decode files.
+* Author : Michael Dipperstein
+* Date : January 30, 2004
+*
+****************************************************************************
+* UPDATES
+*
+* $Id: lzw.h,v 1.3 2007/09/29 01:28:09 michael Exp $
+* $Log: lzw.h,v $
+* Revision 1.3 2007/09/29 01:28:09 michael
+* Changes required for LGPL v3.
+*
+* Revision 1.2 2005/03/27 21:12:03 michael
+* Update e-mail address in copyright notice.
+*
+* Revision 1.1.1.1 2005/02/21 03:35:34 michael
+* Initial Release
+*
+****************************************************************************
+*
+* LZW: An ANSI C Lempel-Ziv-Welch Encoding/Decoding Routines
+* Copyright (C) 2005, 2007 by
+* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu)
+*
+* This file is part of the lzw library.
+*
+* The lzw library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public License as
+* published by the Free Software Foundation; either version 3 of the
+* License, or (at your option) any later version.
+*
+* The lzw library is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+* General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+***************************************************************************/
+
+#ifndef _LZW_H_
+#define _LZW_H_
+
+/***************************************************************************
+* CONSTANTS
+***************************************************************************/
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+/***************************************************************************
+* PROTOTYPES
+***************************************************************************/
+ /* encode inFile */
+int LZWEncodeFile(char *inFile, char *outFile);
+
+/* decode inFile*/
+int LZWDecodeFile(char *inFile, char *outFile);
+
+#endif /* ndef _ARCODE_H_ */
diff --git a/test/compression/lzwdecode.c b/test/compression/lzwdecode.c
new file mode 100644
index 0000000..1f8b5ec
--- /dev/null
+++ b/test/compression/lzwdecode.c
@@ -0,0 +1,306 @@
+/***************************************************************************
+* Lempel-Ziv-Welch Decoding Functions
+*
+* File : lzwdecode.c
+* Purpose : Provides a function for decoding Lempel-Ziv-Welch encoded
+* file streams
+* Author : Michael Dipperstein
+* Date : January 30, 2005
+*
+****************************************************************************
+* UPDATES
+*
+* $Id: lzwdecode.c,v 1.2 2007/09/29 01:28:09 michael Exp $
+* $Log: lzwdecode.c,v $
+* Revision 1.2 2007/09/29 01:28:09 michael
+* Changes required for LGPL v3.
+*
+* Revision 1.1 2005/04/09 03:11:22 michael
+* Separated encode and decode routines into two different files in order to
+* make future enhancements easier.
+*
+* Revision 1.4 2005/03/27 15:56:47 michael
+* Use variable length code words.
+* Include new e-mail addres.
+*
+* Revision 1.3 2005/03/10 14:26:58 michael
+* User variable names that match discription in web page.
+*
+* Revision 1.2 2005/03/10 05:44:02 michael
+* Minimize the size of the dictionary.
+*
+* Revision 1.1.1.1 2005/02/21 03:35:34 michael
+* Initial Release
+*
+****************************************************************************
+*
+* LZW: An ANSI C Lempel-Ziv-Welch Encoding/Decoding Routines
+* Copyright (C) 2005, 2007 by
+* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu)
+*
+* This file is part of the lzw library.
+*
+* The lzw library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public License as
+* published by the Free Software Foundation; either version 3 of the
+* License, or (at your option) any later version.
+*
+* The lzw library is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+* General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+***************************************************************************/
+
+/***************************************************************************
+* INCLUDED FILES
+***************************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include "lzw.h"
+#include "bitfile.h"
+
+/***************************************************************************
+* TYPE DEFINITIONS
+***************************************************************************/
+typedef struct
+{
+ unsigned char suffixChar; /* last char in encoded string */
+ unsigned int prefixCode; /* code for remaining chars in string */
+} decode_dictionary_t;
+
+/***************************************************************************
+* CONSTANTS
+***************************************************************************/
+
+#define EMPTY -1
+
+#define MIN_CODE_LEN 9 /* min # bits in a code word */
+#define MAX_CODE_LEN 15 /* max # bits in a code word */
+
+#define FIRST_CODE (1 << CHAR_BIT) /* value of 1st string code */
+
+#define MAX_CODES (1 << MAX_CODE_LEN)
+
+#define DICT_SIZE (MAX_CODES - FIRST_CODE)
+
+#if (MIN_CODE_LEN <= CHAR_BIT)
+#error Code words must be larger than 1 character
+#endif
+
+#if (MAX_CODE_LEN > (2 * CHAR_BIT))
+#error Code words larger than 2 characters require a re-write of GetCodeWord\
+ and PutCodeWord
+#endif
+
+#if ((MAX_CODES - 1) > INT_MAX)
+#error There cannot be more codes than can fit in an integer
+#endif
+
+/***************************************************************************
+* MACROS
+***************************************************************************/
+#define CODE_MS_BITS(BITS) ((BITS) - CHAR_BIT)
+#define MS_BITS_MASK(BITS) (UCHAR_MAX << (CHAR_BIT - CODE_MS_BITS(BITS)))
+#define CURRENT_MAX_CODES(BITS) (1 << (BITS))
+
+/***************************************************************************
+* GLOBAL VARIABLES
+***************************************************************************/
+
+/* dictionary of string the code word is the dictionary index */
+decode_dictionary_t dictionary[DICT_SIZE];
+char currentCodeLen;
+
+/***************************************************************************
+* PROTOTYPES
+***************************************************************************/
+unsigned char DecodeRecursive(unsigned int code, FILE *fpOut);
+
+/* read encoded data */
+int GetCodeWord(bit_file_t *bfpIn);
+
+/***************************************************************************
+* FUNCTIONS
+***************************************************************************/
+
+/***************************************************************************
+* Function : LZWDecodeFile
+* Description: This routine reads an input file 1 encoded string at a
+* time and decodes it using the LZW algorithm.
+* Parameters : inFile - Name of file to decode
+* outFile - Name of file to write decoded output to
+* Effects : File is decoded using the LZW algorithm with CODE_LEN
+* codes.
+* Returned : TRUE for success, otherwise FALSE.
+***************************************************************************/
+int LZWDecodeFile(char *inFile, char *outFile)
+{
+ bit_file_t *bfpIn; /* encoded input */
+ FILE *fpOut; /* decoded output */
+
+ unsigned int nextCode; /* value of next code */
+ unsigned int lastCode; /* last decoded code word */
+ unsigned int code; /* code word to decode */
+ unsigned char c; /* last decoded character */
+
+ /* open input and output files */
+ if (NULL == (bfpIn = BitFileOpen(inFile, BF_READ)))
+ {
+ perror(inFile);
+ return FALSE;
+ }
+
+ if (NULL == outFile)
+ {
+ fpOut = stdout;
+ }
+ else
+ {
+ if (NULL == (fpOut = fopen(outFile, "wb")))
+ {
+ BitFileClose(bfpIn);
+ perror(outFile);
+ return FALSE;
+ }
+ }
+
+ /* start with 9 bit code words */
+ currentCodeLen = 9;
+
+ /* initialize for decoding */
+ nextCode = FIRST_CODE; /* code for next (first) string */
+
+ /* first code from file must be a character. use it for initial values */
+ lastCode = GetCodeWord(bfpIn);
+ c = lastCode;
+ fputc(lastCode, fpOut);
+
+ /* decode rest of file */
+ while ((code = GetCodeWord(bfpIn)) != EOF)
+ {
+
+ /* look for code length increase marker */
+ if (((CURRENT_MAX_CODES(currentCodeLen) - 1) == code) &&
+ (currentCodeLen < MAX_CODE_LEN))
+ {
+ currentCodeLen++;
+ code = GetCodeWord(bfpIn);
+ }
+
+ if (code < nextCode)
+ {
+ /* we have a known code. decode it */
+ c = DecodeRecursive(code, fpOut);
+ }
+ else
+ {
+ /***************************************************************
+ * We got a code that's not in our dictionary. This must be due
+ * to the string + char + string + char + string exception.
+ * Build the decoded string using the last character + the
+ * string from the last code.
+ ***************************************************************/
+ unsigned char tmp;
+
+ tmp = c;
+ c = DecodeRecursive(lastCode, fpOut);
+ fputc(tmp, fpOut);
+ }
+
+ /* if room, add new code to the dictionary */
+ if (nextCode < MAX_CODES)
+ {
+ dictionary[nextCode - FIRST_CODE].prefixCode = lastCode;
+ dictionary[nextCode - FIRST_CODE].suffixChar = c;
+ nextCode++;
+ }
+
+ /* save character and code for use in unknown code word case */
+ lastCode = code;
+ }
+
+ fclose(fpOut);
+ BitFileClose(bfpIn);
+ return TRUE;
+}
+
+/***************************************************************************
+* Function : DecodeRecursive
+* Description: This function uses the dictionary to decode a code word
+* into the string it represents and write it to the output
+* file. The string is actually built in reverse order and
+* recursion is used to write it out in the correct order.
+* Parameters : code - the code word to decode
+* fpOut - the file that the decoded code word is written to
+* Effects : Decoded code word is written to a file
+* Returned : The first character in the decoded string
+***************************************************************************/
+unsigned char DecodeRecursive(unsigned int code, FILE *fpOut)
+{
+ unsigned char c;
+ unsigned char firstChar;
+
+ if (code >= FIRST_CODE)
+ {
+ /* code word is string + c */
+ c = dictionary[code - FIRST_CODE].suffixChar;
+ code = dictionary[code - FIRST_CODE].prefixCode;
+
+ /* evaluate new code word for remaining string */
+ firstChar = DecodeRecursive(code, fpOut);
+ }
+ else
+ {
+ /* code word is just c */
+ c = code;
+ firstChar = code;
+ }
+
+ fputc(c, fpOut);
+
+ return firstChar;
+}
+
+/***************************************************************************
+* Function : GetCodeWord
+* Description: This function reads and returns a code word from an
+* encoded file. In order to deal with endian issue the
+* code word is read least significant byte followed by the
+* remaining bits.
+* Parameters : bfpIn - bit file containing the encoded data
+* Effects : code word is read from encoded input
+* Returned : The next code word in the encoded file. EOF if the end
+* of file has been reached.
+*
+* NOTE: If the code word contains more than 16 bits, this routine should
+* be modified to read in all the bytes from least significant to
+* most significant followed by any left over bits.
+***************************************************************************/
+int GetCodeWord(bit_file_t *bfpIn)
+{
+ unsigned char byte;
+ int code;
+
+ /* get LS character */
+ if ((code = BitFileGetChar(bfpIn)) == EOF)
+ {
+ return EOF;
+ }
+
+
+ /* get remaining bits */
+ if (BitFileGetBits(bfpIn, &byte, CODE_MS_BITS(currentCodeLen)) == EOF)
+ {
+ return EOF;
+ }
+
+ code |= ((int)byte) << CODE_MS_BITS(currentCodeLen);
+
+ return code;
+}
diff --git a/test/compression/lzwencode.c b/test/compression/lzwencode.c
new file mode 100644
index 0000000..efd4a12
--- /dev/null
+++ b/test/compression/lzwencode.c
@@ -0,0 +1,465 @@
+/***************************************************************************
+* Lempel-Ziv-Welch Encoding Functions
+*
+* File : lzwencode.c
+* Purpose : Provides a function for Lempel-Ziv-Welch encoding of file
+* streams
+* Author : Michael Dipperstein
+* Date : January 30, 2005
+*
+****************************************************************************
+* UPDATES
+*
+* $Id: lzwencode.c,v 1.3 2007/09/29 01:28:09 michael Exp $
+* $Log: lzwencode.c,v $
+* Revision 1.3 2007/09/29 01:28:09 michael
+* Changes required for LGPL v3.
+*
+* Revision 1.2 2005/04/21 04:26:57 michael
+* Encoding dictionary is now built using a binary tree.
+*
+* Revision 1.1 2005/04/09 03:11:22 michael
+* Separated encode and decode routines into two different files in order to
+* make future enhancements easier.
+*
+* Revision 1.4 2005/03/27 15:56:47 michael
+* Use variable length code words.
+* Include new e-mail addres.
+*
+* Revision 1.3 2005/03/10 14:26:58 michael
+* User variable names that match discription in web page.
+*
+* Revision 1.2 2005/03/10 05:44:02 michael
+* Minimize the size of the dictionary.
+*
+* Revision 1.1.1.1 2005/02/21 03:35:34 michael
+* Initial Release
+*
+****************************************************************************
+*
+* LZW: An ANSI C Lempel-Ziv-Welch Encoding/Decoding Routines
+* Copyright (C) 2005, 2007 by
+* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu)
+*
+* This file is part of the lzw library.
+*
+* The lzw library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public License as
+* published by the Free Software Foundation; either version 3 of the
+* License, or (at your option) any later version.
+*
+* The lzw library is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+* General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+***************************************************************************/
+
+/***************************************************************************
+* INCLUDED FILES
+***************************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include "lzw.h"
+#include "bitfile.h"
+
+/***************************************************************************
+* TYPE DEFINITIONS
+***************************************************************************/
+/* node in dictionary tree */
+typedef struct dict_node_t
+{
+ unsigned int codeWord; /* code word for this entry */
+ unsigned char suffixChar; /* last char in encoded string */
+ unsigned int prefixCode; /* code for remaining chars in string */
+
+ /* pointer to child nodes */
+ struct dict_node_t *left; /* child with < key */
+ struct dict_node_t *right; /* child with >= key */
+} dict_node_t;
+
+/***************************************************************************
+* CONSTANTS
+***************************************************************************/
+
+#define MIN_CODE_LEN 9 /* min # bits in a code word */
+#define MAX_CODE_LEN 15 /* max # bits in a code word */
+
+#define FIRST_CODE (1 << CHAR_BIT) /* value of 1st string code */
+
+#define MAX_CODES (1 << MAX_CODE_LEN)
+
+#if (MIN_CODE_LEN <= CHAR_BIT)
+#error Code words must be larger than 1 character
+#endif
+
+#if (MAX_CODE_LEN > (2 * CHAR_BIT))
+#error Code words larger than 2 characters require a re-write of GetCodeWord\
+ and PutCodeWord
+#endif
+
+#if ((MAX_CODES - 1) > INT_MAX)
+#error There cannot be more codes than can fit in an integer
+#endif
+
+/***************************************************************************
+* MACROS
+***************************************************************************/
+#define CODE_MS_BITS(BITS) ((BITS) - CHAR_BIT)
+#define MS_BITS_MASK(BITS) (UCHAR_MAX << (CHAR_BIT - CODE_MS_BITS(BITS)))
+#define CURRENT_MAX_CODES(BITS) (1 << (BITS))
+
+/***************************************************************************
+* GLOBAL VARIABLES
+***************************************************************************/
+
+/* dictionary of string codes (encode process uses a hash key) */
+/* XL hack: reuse that of lzwdecode.c */
+extern char currentCodeLen;
+
+/***************************************************************************
+* PROTOTYPES
+***************************************************************************/
+
+/* dictionary tree node create/free */
+dict_node_t *MakeNode(unsigned int codeWord,
+ unsigned int prefixCode, unsigned char suffixChar);
+void FreeTree(dict_node_t *node);
+
+/* searches tree for matching dictionary entry */
+dict_node_t *FindDictionaryEntry(dict_node_t *root, int prefixCode,
+ unsigned char c);
+
+/* makes key from prefix code and character */
+unsigned int MakeKey(unsigned int prefixCode, unsigned char suffixChar);
+
+/* write encoded data */
+void PutCodeWord(int code, bit_file_t *bfpOut);
+
+/***************************************************************************
+* FUNCTIONS
+***************************************************************************/
+
+/***************************************************************************
+* Function : LZWEncodeFile
+* Description: This routine reads an input file 1 character at a time and
+* writes out an LZW encoded version of that file.
+* Parameters : inFile - Name of file to encode
+* outFile - Name of file to write encoded output to
+* Effects : File is encoded using the LZW algorithm with CODE_LEN
+* codes.
+* Returned : TRUE for success, otherwise FALSE.
+***************************************************************************/
+int LZWEncodeFile(char *inFile, char *outFile)
+{
+ FILE *fpIn; /* uncoded input */
+ bit_file_t *bfpOut; /* encoded output */
+
+ int code; /* code for current string */
+ unsigned int nextCode; /* next available code index */
+ int c; /* character to add to string */
+
+ dict_node_t *dictRoot; /* root of dictionary tree */
+ dict_node_t *node; /* node of dictionary tree */
+
+ /* open input and output files */
+ if (NULL == (fpIn = fopen(inFile, "rb")))
+ {
+ perror(inFile);
+ return FALSE;
+ }
+
+ if (NULL == outFile)
+ {
+ bfpOut = MakeBitFile(stdout, BF_WRITE);
+ }
+ else
+ {
+ if (NULL == (bfpOut = BitFileOpen(outFile, BF_WRITE)))
+ {
+ fclose(fpIn);
+ perror(outFile);
+ return FALSE;
+ }
+ }
+
+ /* initialize dictionary as empty */
+ dictRoot = NULL;
+
+ /* start with 9 bit code words */
+ currentCodeLen = 9;
+
+ nextCode = FIRST_CODE; /* code for next (first) string */
+
+ /* now start the actual encoding process */
+
+ code = fgetc(fpIn); /* start with code string = first character */
+
+ /* create a tree root from 1st 2 character string */
+ if ((c = fgetc(fpIn)) != EOF)
+ {
+ /* special case for NULL root */
+ dictRoot = MakeNode(nextCode, code, c);
+ nextCode++;
+
+ /* write code for 1st char */
+ PutCodeWord(code, bfpOut);
+
+ /* new code is just 2nd char */
+ code = c;
+ }
+
+ /* now encode normally */
+ while ((c = fgetc(fpIn)) != EOF)
+ {
+ /* look for code + c in the dictionary */
+ node = FindDictionaryEntry(dictRoot, code, c);
+
+ if ((node->prefixCode == code) &&
+ (node->suffixChar == c))
+ {
+ /* code + c is in the dictionary, make it's code the new code */
+ code = node->codeWord;
+ }
+ else
+ {
+ /* code + c is not in the dictionary, add it if there's room */
+ if (nextCode < MAX_CODES)
+ {
+ dict_node_t *tmp;
+
+ tmp = MakeNode(nextCode, code, c);
+ nextCode++;
+
+ if (MakeKey(code, c) <
+ MakeKey(node->prefixCode, node->suffixChar))
+ {
+ node->left = tmp;
+ }
+ else
+ {
+ node->right = tmp;
+ }
+ }
+
+ /* are we using enough bits to write out this code word? */
+ if ((code >= (CURRENT_MAX_CODES(currentCodeLen) - 1)) &&
+ (currentCodeLen < MAX_CODE_LEN))
+ {
+ /* mark need for bigger code word with all ones */
+ PutCodeWord((CURRENT_MAX_CODES(currentCodeLen) - 1), bfpOut);
+ currentCodeLen++;
+ }
+
+ /* write out code for the string before c was added */
+ PutCodeWord(code, bfpOut);
+
+ /* new code is just c */
+ code = c;
+ }
+ }
+
+ /* no more input. write out last of the code. */
+ PutCodeWord(code, bfpOut);
+
+ fclose(fpIn);
+ BitFileClose(bfpOut);
+
+ /* free the dictionary */
+ if (dictRoot != NULL)
+ {
+ FreeTree(dictRoot);
+ }
+
+ return TRUE;
+}
+
+/***************************************************************************
+* Function : MakeKey
+* Description: This routine creates a simple key from a prefix code and
+* an appended character. The key may be used to establish
+* an order when building/searching a dictionary tree.
+* Parameters : prefixCode - code for all but the last character of a
+* string.
+* suffixChar - the last character of a string
+* Effects : None
+* Returned : Key built from string represented as a prefix + char. Key
+* format is {ms nibble of c} + prefix + {ls nibble of c}
+***************************************************************************/
+unsigned int MakeKey(unsigned int prefixCode, unsigned char suffixChar)
+{
+ unsigned int key;
+
+ /* position ms nibble */
+ key = suffixChar & 0xF0;
+ key <<= MAX_CODE_LEN;
+
+ /* include prefix code */
+ key |= (prefixCode << 4);
+
+ /* inclulde ls nibble */
+ key |= (suffixChar & 0x0F);
+
+ return key;
+}
+
+/***************************************************************************
+* Function : MakeNode
+* Description: This routine creates and initializes a dictionary entry
+* for a string and the code word that encodes it.
+* Parameters : codeWord - code word used to encode the string prefixCode +
+* suffixChar
+* prefixCode - code for all but the last character of a
+* string.
+* suffixChar - the last character of a string
+* Effects : Node is allocated for new dictionary entry
+* Returned : Pointer to newly allocated node
+***************************************************************************/
+dict_node_t *MakeNode(unsigned int codeWord,
+ unsigned int prefixCode, unsigned char suffixChar)
+{
+ dict_node_t *node;
+
+ node = malloc(sizeof(dict_node_t));
+
+ if (NULL != node)
+ {
+ node->codeWord = codeWord;
+ node->prefixCode = prefixCode;
+ node->suffixChar = suffixChar;
+
+ node->left = NULL;
+ node->right = NULL;
+ }
+ else
+ {
+ perror("allocating dictionary node");
+ exit(EXIT_FAILURE);
+ }
+
+ return node;
+}
+
+/***************************************************************************
+* Function : FreeTree
+* Description: This routine will free all nodes of a tree rooted at the
+* node past as a parameter.
+* Parameters : node - root of tree to free
+* Effects : frees allocated tree node from initial parameter down.
+* Returned : none
+***************************************************************************/
+void FreeTree(dict_node_t *node)
+{
+ /* free left branch */
+ if (node->left != NULL)
+ {
+ FreeTree(node->left);
+ }
+
+ /* free right branch */
+ if (node->right != NULL)
+ {
+ FreeTree(node->right);
+ }
+
+ /* free root */
+ free(node);
+}
+
+/***************************************************************************
+* Function : FindDictionaryEntry
+* Description: This routine searches the dictionary tree for an entry
+* with a matching string (prefix code + suffix character).
+* If one isn't found, the parent node for that string is
+* returned.
+* Parameters : prefixCode - code for the prefix of string
+* c - last character in string
+* Effects : None
+* Returned : If string is in dictionary, pointer to node containing
+* string, otherwise pointer to suitable parent node. NULL
+* is returned for an empty tree.
+***************************************************************************/
+dict_node_t *FindDictionaryEntry(dict_node_t *root, int prefixCode,
+ unsigned char c)
+{
+ unsigned int searchKey, key;
+
+ if (NULL == root)
+ {
+ return NULL;
+ }
+
+ searchKey = MakeKey(prefixCode, c); /* key of string to find */
+
+ while (1)
+ {
+ /* key of current node */
+ key = MakeKey(root->prefixCode, root->suffixChar);
+
+ if (key == searchKey)
+ {
+ /* current node contains string */
+ return root;
+ }
+ else if (searchKey < key)
+ {
+ if (NULL != root->left)
+ {
+ /* check left branch for string */
+ root = root->left;
+ }
+ else
+ {
+ /* string isn't in tree, it can be added as a left child */
+ return root;
+ }
+ }
+ else
+ {
+ if (NULL != root->right)
+ {
+ /* check right branch for string */
+ root = root->right;
+ }
+ else
+ {
+ /* string isn't in tree, it can be added as a right child */
+ return root;
+ }
+ }
+ }
+}
+
+/***************************************************************************
+* Function : PutCodeWord
+* Description: This function writes a code word from to an encoded file.
+* In order to deal with endian issue the code word is
+* written least significant byte followed by the remaining
+* bits.
+* Parameters : bfpOut - bit file containing the encoded data
+* code - code word to add to the encoded data
+* Effects : code word is written to the encoded output
+* Returned : None
+*
+* NOTE: If the code word contains more than 16 bits, this routine should
+* be modified to write out all the bytes from least significant to
+* most significant followed by any left over bits.
+***************************************************************************/
+void PutCodeWord(int code, bit_file_t *bfpOut)
+{
+ unsigned char byte;
+
+ /* write LS character */
+ byte = code & 0xFF;
+ BitFilePutChar(byte, bfpOut);
+
+ /* write remaining bits */
+ byte = (code >> CODE_MS_BITS(currentCodeLen)) &
+ MS_BITS_MASK(currentCodeLen);
+ BitFilePutBits(bfpOut, &byte, CODE_MS_BITS(currentCodeLen));
+}
diff --git a/test/compression/lzwmain.c b/test/compression/lzwmain.c
new file mode 100644
index 0000000..9f5a8b8
--- /dev/null
+++ b/test/compression/lzwmain.c
@@ -0,0 +1,260 @@
+/***************************************************************************
+* Sample Program Using LZW Encoding Library
+*
+* File : sample.c
+* Purpose : Demonstrate usage of LZW encoding library
+* Author : Michael Dipperstein
+* Date : January 30, 2005
+*
+****************************************************************************
+* UPDATES
+*
+* $Id: sample.c,v 1.4 2007/09/29 01:28:27 michael Exp $
+* $Log: sample.c,v $
+* Revision 1.4 2007/09/29 01:28:27 michael
+* Replace getopt with optlist.
+* Changes required for LGPL v3.
+*
+* Revision 1.3 2005/03/27 21:10:22 michael
+* Update e-mail address in copyright notice.
+*
+* Revision 1.2 2005/02/21 03:52:06 michael
+* Correct comments in headers.
+*
+* Revision 1.1.1.1 2005/02/21 03:35:34 michael
+* Initial Release
+*
+****************************************************************************
+*
+* SAMPLE: Sample usage of Lempel-Ziv-Welch Encoding Library
+* Copyright (C) 2005, 2007 by
+* Michael Dipperstein (mdipper@alumni.engr.ucsb.edu)
+*
+* This file is part of the lzw library.
+*
+* The lzw library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public License as
+* published by the Free Software Foundation; either version 3 of the
+* License, or (at your option) any later version.
+*
+* The lzw library is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+* General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+***************************************************************************/
+
+/***************************************************************************
+* INCLUDED FILES
+***************************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "optlist.h"
+#include "lzw.h"
+
+/***************************************************************************
+* PROTOTYPES
+***************************************************************************/
+char *RemovePath(char *fullPath);
+
+/***************************************************************************
+* FUNCTIONS
+***************************************************************************/
+
+/****************************************************************************
+* Function : main
+* Description: This is the main function for this program, it validates
+* the command line input and, if valid, it will call
+* functions to encode or decode a file using the lzw
+* coding algorithm.
+* Parameters : argc - number of parameters
+* argv - parameter list
+* Effects : Encodes/Decodes input file
+* Returned : EXIT_SUCCESS for success, otherwise EXIT_FAILURE.
+****************************************************************************/
+int main(int argc, char *argv[])
+{
+ option_t *optList, *thisOpt;
+ char *inFile, *outFile; /* name of input & output files */
+ char encode; /* encode/decode */
+
+ /* initialize data */
+ inFile = NULL;
+ outFile = NULL;
+ encode = TRUE;
+
+ /* parse command line */
+ optList = GetOptList(argc, argv, "cdi:o:h?");
+ thisOpt = optList;
+
+ while (thisOpt != NULL)
+ {
+ switch(thisOpt->option)
+ {
+ case 'c': /* compression mode */
+ encode = TRUE;
+ break;
+
+ case 'd': /* decompression mode */
+ encode = FALSE;
+ break;
+
+ case 'i': /* input file name */
+ if (inFile != NULL)
+ {
+ fprintf(stderr, "Multiple input files not allowed.\n");
+ free(inFile);
+
+ if (outFile != NULL)
+ {
+ free(outFile);
+ }
+
+ FreeOptList(optList);
+ exit(EXIT_FAILURE);
+ }
+ else if ((inFile =
+ (char *)malloc(strlen(thisOpt->argument) + 1)) == NULL)
+ {
+ perror("Memory allocation");
+
+ if (outFile != NULL)
+ {
+ free(outFile);
+ }
+
+ FreeOptList(optList);
+ exit(EXIT_FAILURE);
+ }
+
+ strcpy(inFile, thisOpt->argument);
+ break;
+
+ case 'o': /* output file name */
+ if (outFile != NULL)
+ {
+ fprintf(stderr, "Multiple output files not allowed.\n");
+ free(outFile);
+
+ if (inFile != NULL)
+ {
+ free(inFile);
+ }
+
+ FreeOptList(optList);
+ exit(EXIT_FAILURE);
+ }
+ else if ((outFile =
+ (char *)malloc(strlen(thisOpt->argument) + 1)) == NULL)
+ {
+ perror("Memory allocation");
+
+ if (inFile != NULL)
+ {
+ free(inFile);
+ }
+
+ FreeOptList(optList);
+ exit(EXIT_FAILURE);
+ }
+
+ strcpy(outFile, thisOpt->argument);
+ break;
+
+ case 'h':
+ case '?':
+ printf("Usage: %s <options>\n\n", RemovePath(argv[0]));
+ printf("options:\n");
+ printf(" -c : Encode input file to output file.\n");
+ printf(" -d : Decode input file to output file.\n");
+ printf(" -i <filename> : Name of input file.\n");
+ printf(" -o <filename> : Name of output file.\n");
+ printf(" -h | ? : Print out command line options.\n\n");
+ printf("Default: %s -c\n", RemovePath(argv[0]));
+
+ FreeOptList(optList);
+ return(EXIT_SUCCESS);
+ }
+
+ optList = thisOpt->next;
+ free(thisOpt);
+ thisOpt = optList;
+ }
+
+ /* validate command line */
+ if (inFile == NULL)
+ {
+ fprintf(stderr, "Input file must be provided\n");
+ fprintf(stderr, "Enter \"%s -?\" for help.\n", RemovePath(argv[0]));
+
+ if (outFile != NULL)
+ {
+ free(outFile);
+ }
+
+ exit (EXIT_FAILURE);
+ }
+ else if (outFile == NULL)
+ {
+ fprintf(stderr, "Output file must be provided\n");
+ fprintf(stderr, "Enter \"%s -?\" for help.\n", RemovePath(argv[0]));
+
+ if (inFile != NULL)
+ {
+ free(inFile);
+ }
+
+ exit (EXIT_FAILURE);
+ }
+
+ /* we have valid parameters encode or decode */
+ if (encode)
+ {
+ LZWEncodeFile(inFile, outFile);
+ }
+ else
+ {
+ LZWDecodeFile(inFile, outFile);
+ }
+
+ free(inFile);
+ free(outFile);
+ return EXIT_SUCCESS;
+}
+
+/****************************************************************************
+* Function : RemovePath
+* Description: This is function accepts a pointer to the name of a file
+* along with path information and returns a pointer to the
+* character that is not part of the path.
+* Parameters : fullPath - pointer to an array of characters containing
+* a file name and possible path modifiers.
+* Effects : None
+* Returned : Returns a pointer to the first character after any path
+* information.
+****************************************************************************/
+char *RemovePath(char *fullPath)
+{
+ int i;
+ char *start, *tmp; /* start of file name */
+ const char delim[3] = {'\\', '/', ':'}; /* path deliminators */
+
+ start = fullPath;
+
+ /* find the first character after all file path delimiters */
+ for (i = 0; i < 3; i++)
+ {
+ tmp = strrchr(start, delim[i]);
+
+ if (tmp != NULL)
+ {
+ start = tmp + 1;
+ }
+ }
+
+ return start;
+}
diff --git a/test/compression/optlist.c b/test/compression/optlist.c
new file mode 100644
index 0000000..d1fc013
--- /dev/null
+++ b/test/compression/optlist.c
@@ -0,0 +1,228 @@
+/***************************************************************************
+* Command Line Option Parser
+*
+* File : optlist.c
+* Purpose : Provide getopt style command line option parsing
+* Author : Michael Dipperstein
+* Date : August 1, 2007
+*
+****************************************************************************
+* HISTORY
+*
+* $Id: optlist.c,v 1.1.1.2 2007/09/04 04:45:42 michael Exp $
+* $Log: optlist.c,v $
+* Revision 1.1.1.2 2007/09/04 04:45:42 michael
+* Added FreeOptList.
+*
+* Revision 1.1.1.1 2007/08/07 05:01:48 michael
+* Initial Release
+*
+****************************************************************************
+*
+* OptList: A command line option parsing library
+* Copyright (C) 2007 by Michael Dipperstein (mdipper@alumni.engr.ucsb.edu)
+*
+* This file is part of the OptList library.
+*
+* OptList is free software; you can redistribute it and/or modify it
+* under the terms of the GNU Lesser General Public License as published by
+* the Free Software Foundation; either version 3 of the License, or (at
+* your option) any later version.
+*
+* OptList is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+* General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+***************************************************************************/
+
+/***************************************************************************
+* INCLUDED FILES
+***************************************************************************/
+#include "optlist.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/***************************************************************************
+* TYPE DEFINITIONS
+***************************************************************************/
+
+/***************************************************************************
+* CONSTANTS
+***************************************************************************/
+
+/***************************************************************************
+* GLOBAL VARIABLES
+***************************************************************************/
+
+/***************************************************************************
+* PROTOTYPES
+***************************************************************************/
+option_t *MakeOpt(const char option, char *const argument, const int index);
+
+/***************************************************************************
+* FUNCTIONS
+***************************************************************************/
+
+/****************************************************************************
+* Function : GetOptList
+* Description: This function is similar to the POSIX function getopt. All
+* options and their corresponding arguments are returned in a
+* linked list. This function should only be called once per
+* an option list and it does not modify argv or argc.
+* Parameters : argc - the number of command line arguments (including the
+* name of the executable)
+* argv - pointer to the open binary file to write encoded
+* output
+* options - getopt style option list. A NULL terminated
+* string of single character options. Follow an
+* option with a colon to indicate that it requires
+* an argument.
+* Effects : Creates a link list of command line options and their
+* arguments.
+* Returned : option_t type value where the option and arguement fields
+* contain the next option symbol and its argument (if any).
+* The argument field will be set to NULL if the option is
+* specified as having no arguments or no arguments are found.
+* The option field will be set to PO_NO_OPT if no more
+* options are found.
+*
+* NOTE: The caller is responsible for freeing up the option list when it
+* is no longer needed.
+****************************************************************************/
+option_t *GetOptList(const int argc, char *const argv[], char *const options)
+{
+ int nextArg;
+ option_t *head, *tail;
+ int optIndex;
+
+ /* start with first argument and nothing found */
+ nextArg = 1;
+ head = NULL;
+ tail = NULL;
+
+ /* loop through all of the command line arguments */
+ while (nextArg < argc)
+ {
+ if ((strlen(argv[nextArg]) > 1) && ('-' == argv[nextArg][0]))
+ {
+ /* possible option */
+ optIndex = 0;
+
+ /* attempt to find a matching option */
+ while ((options[optIndex] != '\0') &&
+ (options[optIndex] != argv[nextArg][1]))
+ {
+ do
+ {
+ optIndex++;
+ }
+ while ((options[optIndex] != '\0') &&
+ (':' == options[optIndex]));
+ }
+
+ if (options[optIndex] == argv[nextArg][1])
+ {
+ /* we found the matching option */
+ if (NULL == head)
+ {
+ head = MakeOpt(options[optIndex], NULL, OL_NOINDEX);
+ tail = head;
+ }
+ else
+ {
+ tail->next = MakeOpt(options[optIndex], NULL, OL_NOINDEX);
+ tail = tail->next;
+ }
+
+ if (':' == options[optIndex + 1])
+ {
+ /* the option found should have a text arguement */
+ if (strlen(argv[nextArg]) > 2)
+ {
+ /* no space between argument and option */
+ tail->argument = &(argv[nextArg][2]);
+ tail->argIndex = nextArg;
+ }
+ else if (nextArg < argc)
+ {
+ /* there must be space between the argument option */
+ nextArg++;
+ tail->argument = argv[nextArg];
+ tail->argIndex = nextArg;
+ }
+ }
+ }
+ }
+
+ nextArg++;
+ }
+
+ return head;
+}
+
+/****************************************************************************
+* Function : MakeOpt
+* Description: This function uses malloc to allocate space for an option_t
+* type structure and initailizes the structure with the
+* values passed as a parameter.
+* Parameters : option - this option character
+* argument - pointer string containg the argument for option.
+* Use NULL for no argument
+* index - argv[index] contains argument us OL_NOINDEX for
+* no argument
+* Effects : A new option_t type variable is created on the heap.
+* Returned : Pointer to newly created and initialized option_t type
+* structure. NULL if space for structure can't be allocated.
+****************************************************************************/
+option_t *MakeOpt(const char option, char *const argument, const int index)
+{
+ option_t *opt;
+
+ opt = malloc(sizeof(option_t));
+
+ if (opt != NULL)
+ {
+ opt->option = option;
+ opt->argument = argument;
+ opt->argIndex = index;
+ opt->next = NULL;
+ }
+ else
+ {
+ perror("Failed to Allocate option_t");
+ }
+
+ return opt;
+}
+
+/****************************************************************************
+* Function : FreeOptList
+* Description: This function will free all the elements in an option_t
+* type linked list starting from the node passed as a
+* parameter.
+* Parameters : list - head of linked list to be freed
+* Effects : All elements of the linked list pointed to by list will
+* be freed and list will be set to NULL.
+* Returned : None
+****************************************************************************/
+void FreeOptList(option_t *list)
+{
+ option_t *head, *next;
+
+ head = list;
+ list = NULL;
+
+ while (head != NULL)
+ {
+ next = head->next;
+ free(head);
+ head = next;
+ }
+
+ return;
+}
diff --git a/test/compression/optlist.h b/test/compression/optlist.h
new file mode 100644
index 0000000..0461263
--- /dev/null
+++ b/test/compression/optlist.h
@@ -0,0 +1,74 @@
+/***************************************************************************
+* Command Line Option Parser
+*
+* File : optlist.h
+* Purpose : Header for getopt style command line option parsing
+* Author : Michael Dipperstein
+* Date : August 1, 2007
+*
+****************************************************************************
+* HISTORY
+*
+* $Id: optlist.h,v 1.1.1.2 2007/09/04 04:45:42 michael Exp $
+* $Log: optlist.h,v $
+* Revision 1.1.1.2 2007/09/04 04:45:42 michael
+* Added FreeOptList.
+*
+* Revision 1.1.1.1 2007/08/07 05:01:48 michael
+* Initial Release
+*
+****************************************************************************
+*
+* OptList: A command line option parsing library
+* Copyright (C) 2007 by Michael Dipperstein (mdipper@alumni.engr.ucsb.edu)
+*
+* This file is part of the OptList library.
+*
+* OptList is free software; you can redistribute it and/or modify it
+* under the terms of the GNU Lesser General Public License as published by
+* the Free Software Foundation; either version 3 of the License, or (at
+* your option) any later version.
+*
+* OptList is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+* General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*
+***************************************************************************/
+#ifndef OPTLIST_H
+#define OPTLIST_H
+
+/***************************************************************************
+* INCLUDED FILES
+***************************************************************************/
+
+/***************************************************************************
+* MACROS
+***************************************************************************/
+
+/***************************************************************************
+* CONSTANTS
+***************************************************************************/
+#define OL_NOINDEX -1 /* this option has no arguement */
+
+/***************************************************************************
+* TYPE DEFINITIONS
+***************************************************************************/
+typedef struct option_t
+{
+ char option;
+ char *argument;
+ int argIndex;
+ struct option_t *next;
+} option_t;
+
+/***************************************************************************
+* PROTOTYPES
+***************************************************************************/
+option_t *GetOptList(int argc, char *const argv[], char *const options);
+void FreeOptList(option_t *list);
+
+#endif /* ndef OPTLIST_H */