From 917e891d06e16516fe90e286f184062e6b7409fe Mon Sep 17 00:00:00 2001
From: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>
Date: Thu, 29 Jun 2006 16:07:01 +0000
Subject: Version C des tests Cminor

git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@40 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
---
 test/c/Makefile    |  46 ++++
 test/c/aes.c       | 728 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 test/c/almabench.c | 190 ++++++++++++++
 test/c/fft.c       | 158 ++++++++++++
 test/c/fib.c       |   7 +
 test/c/integr.c    |  23 ++
 test/c/qsort.c     |  16 ++
 test/c/sha1.c      | 171 +++++++++++++
 8 files changed, 1339 insertions(+)
 create mode 100644 test/c/Makefile
 create mode 100644 test/c/aes.c
 create mode 100644 test/c/almabench.c
 create mode 100644 test/c/fft.c
 create mode 100644 test/c/fib.c
 create mode 100644 test/c/integr.c
 create mode 100644 test/c/qsort.c
 create mode 100644 test/c/sha1.c

(limited to 'test/c')

diff --git a/test/c/Makefile b/test/c/Makefile
new file mode 100644
index 0000000..436e222
--- /dev/null
+++ b/test/c/Makefile
@@ -0,0 +1,46 @@
+CC=gcc
+CFLAGS=-O2 -Wall
+
+VPATH=../harness ../lib
+
+PROGS=fib integr qsort fft sha1 aes almabench
+
+all: $(PROGS)
+
+fib: fib.o mainfib.o
+	$(CC) $(CFLAGS) -o fib fib.o mainfib.o
+clean::
+	rm -f fib
+
+integr: integr.o mainintegr.o
+	$(CC) $(CFLAGS) -o integr integr.o mainintegr.o
+clean::
+	rm -f integr
+
+qsort: qsort.o mainqsort.o
+	$(CC) $(CFLAGS) -o qsort qsort.o mainqsort.o
+clean::
+	rm -f qsort
+
+fft: fft.o mainfft.o staticlib.o
+	$(CC) $(CFLAGS) -o fft fft.o mainfft.o staticlib.o -lm
+clean::
+	rm -f fft
+
+sha1: sha1.o mainsha1.o staticlib.o
+	$(CC) $(CFLAGS) -o sha1 sha1.o mainsha1.o staticlib.o
+clean::
+	rm -f sha1 sha1.cm
+
+aes: aes.o mainaes.o
+	$(CC) $(CFLAGS) -o aes aes.o mainaes.o
+clean::
+	rm -f aes aes.cm
+
+almabench: almabench.o mainalmabench.o staticlib.o
+	$(CC) $(CFLAGS) -o almabench almabench.o mainalmabench.o staticlib.o -lm
+clean::
+	rm -f almabench almabench.cm
+
+clean::
+	rm -f *.s *.o *~
diff --git a/test/c/aes.c b/test/c/aes.c
new file mode 100644
index 0000000..90abf57
--- /dev/null
+++ b/test/c/aes.c
@@ -0,0 +1,728 @@
+/**
+ * rijndael-alg-fst.c
+ *
+ * @version 3.0 (December 2000)
+ *
+ * Optimised ANSI C code for the Rijndael cipher (now AES)
+ *
+ * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
+ * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
+ * @author Paulo Barreto <paulo.barreto@terra.com.br>
+ *
+ * This code is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <assert.h>
+#include <stdlib.h>
+
+#define MAXKC	(256/32)
+#define MAXKB	(256/8)
+#define MAXNR	14
+
+typedef unsigned char	u8;	
+typedef unsigned short	u16;	
+typedef unsigned int	u32;
+
+#define ARCH_BIG_ENDIAN
+
+#ifdef ARCH_BIG_ENDIAN
+#define GETU32(pt) (*(u32 *)(pt))
+#define PUTU32(ct,st) (*(u32 *)(ct) = (st))
+#else
+#define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] <<  8) ^ ((u32)(pt)[3]))
+#define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >>  8); (ct)[3] = (u8)(st); }
+#endif
+
+extern u32 Te0[256], Te1[256], Te2[256], Te3[256], Te4[256];
+extern u32 Td0[256], Td1[256], Td2[256], Td3[256], Td4[256];
+extern u32 rcon[10];
+
+/**
+ * Expand the cipher key into the encryption key schedule.
+ *
+ * @return	the number of rounds for the given cipher key size.
+ */
+int rijndaelKeySetupEnc(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits) {
+   	int i = 0;
+	u32 temp;
+
+	rk[0] = GETU32(cipherKey     );
+	rk[1] = GETU32(cipherKey +  4);
+	rk[2] = GETU32(cipherKey +  8);
+	rk[3] = GETU32(cipherKey + 12);
+	if (keyBits == 128) {
+		for (;;) {
+			temp  = rk[3];
+			rk[4] = rk[0] ^
+				(Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+				(Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
+				(Te4[(temp      ) & 0xff] & 0x0000ff00) ^
+				(Te4[(temp >> 24)       ] & 0x000000ff) ^
+				rcon[i];
+			rk[5] = rk[1] ^ rk[4];
+			rk[6] = rk[2] ^ rk[5];
+			rk[7] = rk[3] ^ rk[6];
+			if (++i == 10) {
+				return 10;
+			}
+			rk += 4;
+		}
+	}
+	rk[4] = GETU32(cipherKey + 16);
+	rk[5] = GETU32(cipherKey + 20);
+	if (keyBits == 192) {
+		for (;;) {
+			temp = rk[ 5];
+			rk[ 6] = rk[ 0] ^
+				(Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+				(Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
+				(Te4[(temp      ) & 0xff] & 0x0000ff00) ^
+				(Te4[(temp >> 24)       ] & 0x000000ff) ^
+				rcon[i];
+			rk[ 7] = rk[ 1] ^ rk[ 6];
+			rk[ 8] = rk[ 2] ^ rk[ 7];
+			rk[ 9] = rk[ 3] ^ rk[ 8];
+			if (++i == 8) {
+				return 12;
+			}
+			rk[10] = rk[ 4] ^ rk[ 9];
+			rk[11] = rk[ 5] ^ rk[10];
+			rk += 6;
+		}
+	}
+	rk[6] = GETU32(cipherKey + 24);
+	rk[7] = GETU32(cipherKey + 28);
+	if (keyBits == 256) {
+        for (;;) {
+        	temp = rk[ 7];
+        	rk[ 8] = rk[ 0] ^
+        		(Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+        		(Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
+        		(Te4[(temp      ) & 0xff] & 0x0000ff00) ^
+        		(Te4[(temp >> 24)       ] & 0x000000ff) ^
+        		rcon[i];
+        	rk[ 9] = rk[ 1] ^ rk[ 8];
+        	rk[10] = rk[ 2] ^ rk[ 9];
+        	rk[11] = rk[ 3] ^ rk[10];
+			if (++i == 7) {
+				return 14;
+			}
+        	temp = rk[11];
+        	rk[12] = rk[ 4] ^
+        		(Te4[(temp >> 24)       ] & 0xff000000) ^
+        		(Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
+        		(Te4[(temp >>  8) & 0xff] & 0x0000ff00) ^
+        		(Te4[(temp      ) & 0xff] & 0x000000ff);
+        	rk[13] = rk[ 5] ^ rk[12];
+        	rk[14] = rk[ 6] ^ rk[13];
+        	rk[15] = rk[ 7] ^ rk[14];
+
+			rk += 8;
+        }
+	}
+	return 0;
+}
+
+/**
+ * Expand the cipher key into the decryption key schedule.
+ *
+ * @return	the number of rounds for the given cipher key size.
+ */
+int rijndaelKeySetupDec(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits) {
+	int Nr, i, j;
+	u32 temp;
+
+	/* expand the cipher key: */
+	Nr = rijndaelKeySetupEnc(rk, cipherKey, keyBits);
+	/* invert the order of the round keys: */
+	for (i = 0, j = 4*Nr; i < j; i += 4, j -= 4) {
+		temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
+		temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
+		temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
+		temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
+	}
+	/* apply the inverse MixColumn transform to all round keys but the first and the last: */
+	for (i = 1; i < Nr; i++) {
+		rk += 4;
+		rk[0] =
+			Td0[Te4[(rk[0] >> 24)       ] & 0xff] ^
+			Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
+			Td2[Te4[(rk[0] >>  8) & 0xff] & 0xff] ^
+			Td3[Te4[(rk[0]      ) & 0xff] & 0xff];
+		rk[1] =
+			Td0[Te4[(rk[1] >> 24)       ] & 0xff] ^
+			Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
+			Td2[Te4[(rk[1] >>  8) & 0xff] & 0xff] ^
+			Td3[Te4[(rk[1]      ) & 0xff] & 0xff];
+		rk[2] =
+			Td0[Te4[(rk[2] >> 24)       ] & 0xff] ^
+			Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
+			Td2[Te4[(rk[2] >>  8) & 0xff] & 0xff] ^
+			Td3[Te4[(rk[2]      ) & 0xff] & 0xff];
+		rk[3] =
+			Td0[Te4[(rk[3] >> 24)       ] & 0xff] ^
+			Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
+			Td2[Te4[(rk[3] >>  8) & 0xff] & 0xff] ^
+			Td3[Te4[(rk[3]      ) & 0xff] & 0xff];
+	}
+	return Nr;
+}
+
+void rijndaelEncrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 pt[16], u8 ct[16]) {
+	u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+    int r;
+#endif /* ?FULL_UNROLL */
+
+    /*
+	 * map byte array block to cipher state
+	 * and add initial round key:
+	 */
+	s0 = GETU32(pt     ) ^ rk[0];
+	s1 = GETU32(pt +  4) ^ rk[1];
+	s2 = GETU32(pt +  8) ^ rk[2];
+	s3 = GETU32(pt + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+    /* round 1: */
+   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
+   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
+   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
+   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
+   	/* round 2: */
+   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
+   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
+   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
+   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
+    /* round 3: */
+   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
+   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
+   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
+   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
+   	/* round 4: */
+   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
+   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
+   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
+   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
+    /* round 5: */
+   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
+   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
+   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
+   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
+   	/* round 6: */
+   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
+   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
+   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
+   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
+    /* round 7: */
+   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
+   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
+   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
+   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
+   	/* round 8: */
+   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
+   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
+   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
+   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
+    /* round 9: */
+   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
+   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
+   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
+   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
+    if (Nr > 10) {
+        /* round 10: */
+        s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
+        s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
+        s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
+        s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
+        /* round 11: */
+        t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
+        t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
+        t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
+        t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
+        if (Nr > 12) {
+            /* round 12: */
+            s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
+            s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
+            s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
+            s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
+            /* round 13: */
+            t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
+            t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
+            t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
+            t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
+        }
+    }
+    rk += Nr << 2;
+#else  /* !FULL_UNROLL */
+    /*
+	 * Nr - 1 full rounds:
+	 */
+    r = Nr >> 1;
+    for (;;) {
+        t0 =
+            Te0[(s0 >> 24)       ] ^
+            Te1[(s1 >> 16) & 0xff] ^
+            Te2[(s2 >>  8) & 0xff] ^
+            Te3[(s3      ) & 0xff] ^
+            rk[4];
+        t1 =
+            Te0[(s1 >> 24)       ] ^
+            Te1[(s2 >> 16) & 0xff] ^
+            Te2[(s3 >>  8) & 0xff] ^
+            Te3[(s0      ) & 0xff] ^
+            rk[5];
+        t2 =
+            Te0[(s2 >> 24)       ] ^
+            Te1[(s3 >> 16) & 0xff] ^
+            Te2[(s0 >>  8) & 0xff] ^
+            Te3[(s1      ) & 0xff] ^
+            rk[6];
+        t3 =
+            Te0[(s3 >> 24)       ] ^
+            Te1[(s0 >> 16) & 0xff] ^
+            Te2[(s1 >>  8) & 0xff] ^
+            Te3[(s2      ) & 0xff] ^
+            rk[7];
+
+        rk += 8;
+        if (--r == 0) {
+            break;
+        }
+
+        s0 =
+            Te0[(t0 >> 24)       ] ^
+            Te1[(t1 >> 16) & 0xff] ^
+            Te2[(t2 >>  8) & 0xff] ^
+            Te3[(t3      ) & 0xff] ^
+            rk[0];
+        s1 =
+            Te0[(t1 >> 24)       ] ^
+            Te1[(t2 >> 16) & 0xff] ^
+            Te2[(t3 >>  8) & 0xff] ^
+            Te3[(t0      ) & 0xff] ^
+            rk[1];
+        s2 =
+            Te0[(t2 >> 24)       ] ^
+            Te1[(t3 >> 16) & 0xff] ^
+            Te2[(t0 >>  8) & 0xff] ^
+            Te3[(t1      ) & 0xff] ^
+            rk[2];
+        s3 =
+            Te0[(t3 >> 24)       ] ^
+            Te1[(t0 >> 16) & 0xff] ^
+            Te2[(t1 >>  8) & 0xff] ^
+            Te3[(t2      ) & 0xff] ^
+            rk[3];
+    }
+#endif /* ?FULL_UNROLL */
+    /*
+	 * apply last round and
+	 * map cipher state to byte array block:
+	 */
+	s0 =
+		(Te4[(t0 >> 24)       ] & 0xff000000) ^
+		(Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+		(Te4[(t2 >>  8) & 0xff] & 0x0000ff00) ^
+		(Te4[(t3      ) & 0xff] & 0x000000ff) ^
+		rk[0];
+	PUTU32(ct     , s0);
+	s1 =
+		(Te4[(t1 >> 24)       ] & 0xff000000) ^
+		(Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+		(Te4[(t3 >>  8) & 0xff] & 0x0000ff00) ^
+		(Te4[(t0      ) & 0xff] & 0x000000ff) ^
+		rk[1];
+	PUTU32(ct +  4, s1);
+	s2 =
+		(Te4[(t2 >> 24)       ] & 0xff000000) ^
+		(Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+		(Te4[(t0 >>  8) & 0xff] & 0x0000ff00) ^
+		(Te4[(t1      ) & 0xff] & 0x000000ff) ^
+		rk[2];
+	PUTU32(ct +  8, s2);
+	s3 =
+		(Te4[(t3 >> 24)       ] & 0xff000000) ^
+		(Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+		(Te4[(t1 >>  8) & 0xff] & 0x0000ff00) ^
+		(Te4[(t2      ) & 0xff] & 0x000000ff) ^
+		rk[3];
+	PUTU32(ct + 12, s3);
+}
+
+void rijndaelDecrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 ct[16], u8 pt[16]) {
+	u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+    int r;
+#endif /* ?FULL_UNROLL */
+
+    /*
+	 * map byte array block to cipher state
+	 * and add initial round key:
+	 */
+    s0 = GETU32(ct     ) ^ rk[0];
+    s1 = GETU32(ct +  4) ^ rk[1];
+    s2 = GETU32(ct +  8) ^ rk[2];
+    s3 = GETU32(ct + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+    /* round 1: */
+    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
+    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
+    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
+    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
+    /* round 2: */
+    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
+    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
+    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
+    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
+    /* round 3: */
+    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
+    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
+    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
+    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
+    /* round 4: */
+    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
+    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
+    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
+    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
+    /* round 5: */
+    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
+    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
+    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
+    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
+    /* round 6: */
+    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
+    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
+    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
+    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
+    /* round 7: */
+    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
+    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
+    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
+    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
+    /* round 8: */
+    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
+    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
+    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
+    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
+    /* round 9: */
+    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
+    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
+    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
+    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
+    if (Nr > 10) {
+        /* round 10: */
+        s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
+        s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
+        s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
+        s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
+        /* round 11: */
+        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
+        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
+        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
+        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
+        if (Nr > 12) {
+            /* round 12: */
+            s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
+            s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
+            s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
+            s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
+            /* round 13: */
+            t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
+            t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
+            t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
+            t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
+        }
+    }
+	rk += Nr << 2;
+#else  /* !FULL_UNROLL */
+    /*
+     * Nr - 1 full rounds:
+     */
+    r = Nr >> 1;
+    for (;;) {
+        t0 =
+            Td0[(s0 >> 24)       ] ^
+            Td1[(s3 >> 16) & 0xff] ^
+            Td2[(s2 >>  8) & 0xff] ^
+            Td3[(s1      ) & 0xff] ^
+            rk[4];
+        t1 =
+            Td0[(s1 >> 24)       ] ^
+            Td1[(s0 >> 16) & 0xff] ^
+            Td2[(s3 >>  8) & 0xff] ^
+            Td3[(s2      ) & 0xff] ^
+            rk[5];
+        t2 =
+            Td0[(s2 >> 24)       ] ^
+            Td1[(s1 >> 16) & 0xff] ^
+            Td2[(s0 >>  8) & 0xff] ^
+            Td3[(s3      ) & 0xff] ^
+            rk[6];
+        t3 =
+            Td0[(s3 >> 24)       ] ^
+            Td1[(s2 >> 16) & 0xff] ^
+            Td2[(s1 >>  8) & 0xff] ^
+            Td3[(s0      ) & 0xff] ^
+            rk[7];
+
+        rk += 8;
+        if (--r == 0) {
+            break;
+        }
+
+        s0 =
+            Td0[(t0 >> 24)       ] ^
+            Td1[(t3 >> 16) & 0xff] ^
+            Td2[(t2 >>  8) & 0xff] ^
+            Td3[(t1      ) & 0xff] ^
+            rk[0];
+        s1 =
+            Td0[(t1 >> 24)       ] ^
+            Td1[(t0 >> 16) & 0xff] ^
+            Td2[(t3 >>  8) & 0xff] ^
+            Td3[(t2      ) & 0xff] ^
+            rk[1];
+        s2 =
+            Td0[(t2 >> 24)       ] ^
+            Td1[(t1 >> 16) & 0xff] ^
+            Td2[(t0 >>  8) & 0xff] ^
+            Td3[(t3      ) & 0xff] ^
+            rk[2];
+        s3 =
+            Td0[(t3 >> 24)       ] ^
+            Td1[(t2 >> 16) & 0xff] ^
+            Td2[(t1 >>  8) & 0xff] ^
+            Td3[(t0      ) & 0xff] ^
+            rk[3];
+    }
+#endif /* ?FULL_UNROLL */
+    /*
+	 * apply last round and
+	 * map cipher state to byte array block:
+	 */
+   	s0 =
+   		(Td4[(t0 >> 24)       ] & 0xff000000) ^
+   		(Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+   		(Td4[(t2 >>  8) & 0xff] & 0x0000ff00) ^
+   		(Td4[(t1      ) & 0xff] & 0x000000ff) ^
+   		rk[0];
+	PUTU32(pt     , s0);
+   	s1 =
+   		(Td4[(t1 >> 24)       ] & 0xff000000) ^
+   		(Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+   		(Td4[(t3 >>  8) & 0xff] & 0x0000ff00) ^
+   		(Td4[(t2      ) & 0xff] & 0x000000ff) ^
+   		rk[1];
+	PUTU32(pt +  4, s1);
+   	s2 =
+   		(Td4[(t2 >> 24)       ] & 0xff000000) ^
+   		(Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+   		(Td4[(t0 >>  8) & 0xff] & 0x0000ff00) ^
+   		(Td4[(t3      ) & 0xff] & 0x000000ff) ^
+   		rk[2];
+	PUTU32(pt +  8, s2);
+   	s3 =
+   		(Td4[(t3 >> 24)       ] & 0xff000000) ^
+   		(Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+   		(Td4[(t1 >>  8) & 0xff] & 0x0000ff00) ^
+   		(Td4[(t0      ) & 0xff] & 0x000000ff) ^
+   		rk[3];
+	PUTU32(pt + 12, s3);
+}
+
+#ifdef INTERMEDIATE_VALUE_KAT
+
+void rijndaelEncryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds) {
+	int r;
+	u32 s0, s1, s2, s3, t0, t1, t2, t3;
+
+    /*
+	 * map byte array block to cipher state
+	 * and add initial round key:
+	 */
+	s0 = GETU32(block     ) ^ rk[0];
+	s1 = GETU32(block +  4) ^ rk[1];
+	s2 = GETU32(block +  8) ^ rk[2];
+	s3 = GETU32(block + 12) ^ rk[3];
+    rk += 4;
+
+    /*
+	 * Nr - 1 full rounds:
+	 */
+	for (r = (rounds < Nr ? rounds : Nr - 1); r > 0; r--) {
+		t0 =
+			Te0[(s0 >> 24)       ] ^
+			Te1[(s1 >> 16) & 0xff] ^
+			Te2[(s2 >>  8) & 0xff] ^
+			Te3[(s3      ) & 0xff] ^
+			rk[0];
+		t1 =
+			Te0[(s1 >> 24)       ] ^
+			Te1[(s2 >> 16) & 0xff] ^
+			Te2[(s3 >>  8) & 0xff] ^
+			Te3[(s0      ) & 0xff] ^
+			rk[1];
+		t2 =
+			Te0[(s2 >> 24)       ] ^
+			Te1[(s3 >> 16) & 0xff] ^
+			Te2[(s0 >>  8) & 0xff] ^
+			Te3[(s1      ) & 0xff] ^
+			rk[2];
+		t3 =
+			Te0[(s3 >> 24)       ] ^
+			Te1[(s0 >> 16) & 0xff] ^
+			Te2[(s1 >>  8) & 0xff] ^
+			Te3[(s2      ) & 0xff] ^
+			rk[3];
+
+		s0 = t0;
+		s1 = t1;
+		s2 = t2;
+		s3 = t3;
+		rk += 4;
+
+    }
+
+    /*
+	 * apply last round and
+	 * map cipher state to byte array block:
+	 */
+	if (rounds == Nr) {
+    	t0 =
+    		(Te4[(s0 >> 24)       ] & 0xff000000) ^
+    		(Te4[(s1 >> 16) & 0xff] & 0x00ff0000) ^
+    		(Te4[(s2 >>  8) & 0xff] & 0x0000ff00) ^
+    		(Te4[(s3      ) & 0xff] & 0x000000ff) ^
+    		rk[0];
+    	t1 =
+    		(Te4[(s1 >> 24)       ] & 0xff000000) ^
+    		(Te4[(s2 >> 16) & 0xff] & 0x00ff0000) ^
+    		(Te4[(s3 >>  8) & 0xff] & 0x0000ff00) ^
+    		(Te4[(s0      ) & 0xff] & 0x000000ff) ^
+    		rk[1];
+    	t2 =
+    		(Te4[(s2 >> 24)       ] & 0xff000000) ^
+    		(Te4[(s3 >> 16) & 0xff] & 0x00ff0000) ^
+    		(Te4[(s0 >>  8) & 0xff] & 0x0000ff00) ^
+    		(Te4[(s1      ) & 0xff] & 0x000000ff) ^
+    		rk[2];
+    	t3 =
+    		(Te4[(s3 >> 24)       ] & 0xff000000) ^
+    		(Te4[(s0 >> 16) & 0xff] & 0x00ff0000) ^
+    		(Te4[(s1 >>  8) & 0xff] & 0x0000ff00) ^
+    		(Te4[(s2      ) & 0xff] & 0x000000ff) ^
+    		rk[3];
+		
+		s0 = t0;
+		s1 = t1;
+		s2 = t2;
+		s3 = t3;
+	}
+
+	PUTU32(block     , s0);
+	PUTU32(block +  4, s1);
+	PUTU32(block +  8, s2);
+	PUTU32(block + 12, s3);
+}
+
+void rijndaelDecryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds) {
+	int r;
+	u32 s0, s1, s2, s3, t0, t1, t2, t3;
+
+    /*
+	 * map byte array block to cipher state
+	 * and add initial round key:
+	 */
+	s0 = GETU32(block     ) ^ rk[0];
+	s1 = GETU32(block +  4) ^ rk[1];
+	s2 = GETU32(block +  8) ^ rk[2];
+	s3 = GETU32(block + 12) ^ rk[3];
+    rk += 4;
+
+    /*
+	 * Nr - 1 full rounds:
+	 */
+	for (r = (rounds < Nr ? rounds : Nr) - 1; r > 0; r--) {
+		t0 =
+			Td0[(s0 >> 24)       ] ^
+			Td1[(s3 >> 16) & 0xff] ^
+			Td2[(s2 >>  8) & 0xff] ^
+			Td3[(s1      ) & 0xff] ^
+			rk[0];
+		t1 =
+			Td0[(s1 >> 24)       ] ^
+			Td1[(s0 >> 16) & 0xff] ^
+			Td2[(s3 >>  8) & 0xff] ^
+			Td3[(s2      ) & 0xff] ^
+			rk[1];
+		t2 =
+			Td0[(s2 >> 24)       ] ^
+			Td1[(s1 >> 16) & 0xff] ^
+			Td2[(s0 >>  8) & 0xff] ^
+			Td3[(s3      ) & 0xff] ^
+			rk[2];
+		t3 =
+			Td0[(s3 >> 24)       ] ^
+			Td1[(s2 >> 16) & 0xff] ^
+			Td2[(s1 >>  8) & 0xff] ^
+			Td3[(s0      ) & 0xff] ^
+			rk[3];
+
+		s0 = t0;
+		s1 = t1;
+		s2 = t2;
+		s3 = t3;
+		rk += 4;
+
+    }
+
+    /*
+	 * complete the last round and
+	 * map cipher state to byte array block:
+	 */
+	t0 =
+		(Td4[(s0 >> 24)       ] & 0xff000000) ^
+		(Td4[(s3 >> 16) & 0xff] & 0x00ff0000) ^
+		(Td4[(s2 >>  8) & 0xff] & 0x0000ff00) ^
+		(Td4[(s1      ) & 0xff] & 0x000000ff);
+	t1 =
+		(Td4[(s1 >> 24)       ] & 0xff000000) ^
+		(Td4[(s0 >> 16) & 0xff] & 0x00ff0000) ^
+		(Td4[(s3 >>  8) & 0xff] & 0x0000ff00) ^
+		(Td4[(s2      ) & 0xff] & 0x000000ff);
+	t2 =
+		(Td4[(s2 >> 24)       ] & 0xff000000) ^
+		(Td4[(s1 >> 16) & 0xff] & 0x00ff0000) ^
+		(Td4[(s0 >>  8) & 0xff] & 0x0000ff00) ^
+		(Td4[(s3      ) & 0xff] & 0x000000ff);
+	t3 =
+		(Td4[(s3 >> 24)       ] & 0xff000000) ^
+		(Td4[(s2 >> 16) & 0xff] & 0x00ff0000) ^
+		(Td4[(s1 >>  8) & 0xff] & 0x0000ff00) ^
+		(Td4[(s0      ) & 0xff] & 0x000000ff);
+
+	if (rounds == Nr) {
+	    t0 ^= rk[0];
+	    t1 ^= rk[1];
+	    t2 ^= rk[2];
+	    t3 ^= rk[3];
+	}
+
+	PUTU32(block     , t0);
+	PUTU32(block +  4, t1);
+	PUTU32(block +  8, t2);
+	PUTU32(block + 12, t3);
+}
+
+#endif /* INTERMEDIATE_VALUE_KAT */
diff --git a/test/c/almabench.c b/test/c/almabench.c
new file mode 100644
index 0000000..ef92a59
--- /dev/null
+++ b/test/c/almabench.c
@@ -0,0 +1,190 @@
+/*
+    almabench
+    Standard C version
+    17 April 2003
+
+    Written by Scott Robert Ladd.
+    No rights reserved. This is public domain software, for use by anyone.
+
+    A number-crunching benchmark that can be used as a fitness test for
+    evolving optimal compiler options via genetic algorithm.
+
+    This program calculates the daily ephemeris (at noon) for the years
+    2000-2099 using an algorithm developed by J.L. Simon, P. Bretagnon, J.
+    Chapront, M. Chapront-Touze, G. Francou and J. Laskar of the Bureau des
+    Longitudes, Paris, France), as detailed in Astronomy & Astrophysics
+    282, 663 (1994)
+
+    Note that the code herein is design for the purpose of testing 
+    computational performance; error handling and other such "niceties"
+    is virtually non-existent.
+
+    Actual benchmark results can be found at:
+            http://www.coyotegulch.com
+
+    Please do not use this information or algorithm in any way that might
+    upset the balance of the universe or otherwise cause planets to impact
+    upon one another.
+*/
+
+#include <math.h>
+#include <string.h>
+
+#define PI 3.14159265358979323846
+#define J2000 2451545.0
+#define JCENTURY 36525.0
+#define JMILLENIA 365250.0
+#define TWOPI (2.0 * PI)
+#define A2R (PI / 648000.0)
+#define R2H (12.0 / PI)
+#define R2D (180.0 / PI)
+#define GAUSSK 0.01720209895
+#define TEST_LOOPS 20
+#define TEST_LENGTH 36525
+#define sineps 0.3977771559319137
+#define coseps 0.9174820620691818
+
+extern double amas[8];
+extern double a[8][3], dlm[8][3], e[8][3], pi[8][3], dinc[8][3], omega[8][3];
+extern double kp[8][9], ca[8][9], sa[8][9], kq[8][10], cl[8][10], sl[8][10];
+
+extern double cos_static(double), sin_static(double),
+  atan2_static(double, double), asin_static(double), sqrt_static(double),
+  fmod_static(double, double), fabs_static(double);
+
+#define cos cos_static
+#define sin sin_static
+#define atan2 atan2_static
+#define asin asin_static
+#define sqrt sqrt_static
+#define fmod fmod_static
+#define fabs fabs_static
+
+//---------------------------------------------------------------------------
+// Normalize angle into the range -pi <= A < +pi.
+double anpm (double a)
+{
+    double w = fmod(a,TWOPI);
+    
+    if (fabs(w) >= PI) 
+        w = w - ((a < 0) ? -TWOPI : TWOPI);
+        
+    return w;
+}
+
+//---------------------------------------------------------------------------    
+// The reference frame is equatorial and is with respect to the
+//    mean equator and equinox of epoch j2000.
+void planetpv (double epoch[2], int np, double pv[2][3])
+{
+    // working storage
+    int i, j, k;
+    double t, da, dl, de, dp, di, doh, dmu, arga, argl, am;
+    double ae, dae, ae2, at, r, v, si2, xq, xp, tl, xsw;
+    double xcw, xm2, xf, ci2, xms, xmc, xpxq2, x, y, z;
+
+    // time: julian millennia since j2000.
+    t = ((epoch[0] - J2000) + epoch[1]) / JMILLENIA;
+
+    // compute the mean elements.
+    da  = a[np][0] + (a[np][1] + a[np][2] * t ) * t;
+    dl  = (3600.0 * dlm[np][0] + (dlm[np][1] + dlm[np][2] * t ) * t ) * A2R;
+    de  = e[np][0] + (e[np][1] + e[np][2] * t ) * t;
+    dp  = anpm((3600.0 * pi[np][0] + (pi[np][1] + pi[np][2] * t ) * t ) * A2R );
+    di  = (3600.0 * dinc[np][0] + (dinc[np][1] + dinc[np][2] * t ) * t ) * A2R;
+    doh = anpm((3600.0 * omega[np][0] + (omega[np][1] + omega[np][2] * t ) * t ) * A2R );
+    
+    // apply the trigonometric terms.
+    dmu = 0.35953620 * t;
+    
+    for (k = 0; k < 8; ++k)
+    {
+        arga = kp[np][k] * dmu;
+        argl = kq[np][k] * dmu;
+        da   = da + (ca[np][k] * cos(arga) + sa[np][k] * sin(arga)) * 0.0000001;
+        dl   = dl + (cl[np][k] * cos(argl) + sl[np][k] * sin(argl)) * 0.0000001;
+    }
+
+    arga = kp[np][8] * dmu;
+    da   = da + t * (ca[np][8] * cos(arga) + sa[np][8] * sin(arga)) * 0.0000001;
+
+    for (k = 8; k <= 9; ++k)
+    {
+        argl = kq[np][k] * dmu;
+        dl   = dl + t * ( cl[np][k] * cos(argl) + sl[np][k] * sin(argl) ) * 0.0000001;
+    }
+
+    dl = fmod(dl,TWOPI);
+
+    // iterative solution of kepler's equation to get eccentric anomaly.
+    am = dl - dp;
+    ae = am + de * sin(am);
+    k  = 0;
+
+    while (1)
+    {
+        dae = (am - ae + de * sin(ae)) / (1.0 - de * cos(ae));
+        ae  = ae + dae;
+        k   = k + 1;
+    
+        if ((k >= 10) || (fabs(dae) < 1e-12))
+            break;
+    }
+
+    // true anomaly.
+    ae2 = ae / 2.0;
+    at  = 2.0 * atan2(sqrt((1.0 + de)/(1.0 - de)) * sin(ae2), cos(ae2));
+
+    // distance (au) and speed (radians per day).
+    r = da * (1.0 - de * cos(ae));
+    v = GAUSSK * sqrt((1.0 + 1.0 / amas[np] ) / (da * da * da));
+
+    si2   = sin(di / 2.0);
+    xq    = si2 * cos(doh);
+    xp    = si2 * sin(doh);
+    tl    = at + dp;
+    xsw   = sin(tl);
+    xcw   = cos(tl);
+    xm2   = 2.0 * (xp * xcw - xq * xsw );
+    xf    = da / sqrt(1.0 - de*de);
+    ci2   = cos(di / 2.0);
+    xms   = (de * sin(dp) + xsw) * xf;
+    xmc   = (de * cos(dp) + xcw) * xf;
+    xpxq2 = 2.0 * xp * xq;
+
+    // position (j2000 ecliptic x,y,z in au).
+    x = r * (xcw - xm2 * xp);
+    y = r * (xsw + xm2 * xq);
+    z = r * (-xm2 * ci2);
+
+    // rotate to equatorial.
+    pv[0][0] = x;
+    pv[0][1] = y * coseps - z * sineps;
+    pv[0][2] = y * sineps + z * coseps;
+
+    // velocity (j2000 ecliptic xdot,ydot,zdot in au/d).
+    x = v * ((-1.0 + 2.0 * xp * xp) * xms + xpxq2 * xmc);
+    y = v * (( 1.0 - 2.0 * xq * xq ) * xmc - xpxq2 * xms);
+    z = v * (2.0 * ci2 * (xp * xms + xq * xmc));
+
+    // rotate to equatorial.
+    pv[1][0] = x;
+    pv[1][1] = y * coseps - z * sineps;
+    pv[1][2] = y * sineps + z * coseps;
+}
+
+//---------------------------------------------------------------------------
+// Computes RA, Declination, and distance from a state vector returned by
+// planetpv.
+void radecdist(double state[2][3], double rdd[3])
+{
+    // distance
+    rdd[2] = sqrt(state[0][0] * state[0][0] + state[0][1] * state[0][1] + state[0][2] * state[0][2]);
+
+    // RA
+    rdd[0] = atan2(state[0][1], state[0][0]) * R2H;
+    if (rdd[0] < 0.0) rdd[0] += 24.0;
+
+    // Declination
+    rdd[1] = asin(state[0][2] / rdd[2]) * R2D;
+}
diff --git a/test/c/fft.c b/test/c/fft.c
new file mode 100644
index 0000000..6f24af5
--- /dev/null
+++ b/test/c/fft.c
@@ -0,0 +1,158 @@
+/*
+ C Program: Test_Fast_Fourier_Transform_in_Double_Precision (TFFTDP.c)
+ by: Dave Edelblute, edelblut@cod.nosc.mil, 05 Jan 1993
+*/
+
+#include <math.h>
+
+#ifndef PI
+#define PI  3.14159265358979323846
+#endif
+
+extern double cos_static(double), sin_static(double);
+
+#define cos cos_static
+#define sin sin_static
+
+/********************************************************/
+/*     A Duhamel-Hollman split-radix dif fft            */
+/*     Ref: Electronics Letters, Jan. 5, 1984           */
+/*     Complex input and output data in arrays x and y  */
+/*     Length is n.                                     */
+/********************************************************/
+
+int dfft(double x[], double y[], int np)
+{
+double *px,*py;
+int i,j,k,m,n,i0,i1,i2,i3,is,id,n1,n2,n4;
+double  a,e,a3,cc1,ss1,cc3,ss3,r1,r2,s1,s2,s3,xt,tpi;
+
+  px = x - 1; 
+  py = y - 1;
+  i = 2; 
+  m = 1; 
+  
+  while (i < np) 
+  { 
+  i = i+i; 
+  m = m+1; 
+  }
+  
+  n = i; 
+  
+  if (n != np) 
+  {
+     for (i = np+1; i <= n; i++)  
+     { 
+     *(px + i) = 0.0; 
+     *(py + i) = 0.0; 
+     }
+     /*printf("nuse %d point fft",n); */
+  }
+
+  n2 = n+n;
+  tpi = 2.0 * PI;
+  for (k = 1;  k <= m-1; k++ ) 
+  {
+    n2 = n2 / 2; 
+    n4 = n2 / 4; 
+    e  = tpi / (double)n2; 
+    a  = 0.0;
+
+    for (j = 1; j<= n4 ; j++) 
+    {
+      a3 = 3.0 * a; 
+      cc1 = cos(a); 
+      ss1 = sin(a);
+      
+      cc3 = cos(a3); 
+      ss3 = sin(a3); 
+      a = e * (double)j; 
+      is = j; 
+      id = 2 * n2;
+	  
+	  while ( is < n ) 
+	  {
+	     for (i0 = is; i0 <= n-1; i0 = i0 + id) 
+	     {
+	     i1 = i0 + n4; 
+	     i2 = i1 + n4; 
+	     i3 = i2 + n4;
+	     r1 = *(px+i0) - *(px+i2);
+	     *(px+i0) = *(px+i0) + *(px+i2);
+	     r2 = *(px+i1) - *(px+i3);
+	     *(px+i1) = *(px+i1) + *(px+i3);
+	     s1 = *(py+i0) - *(py+i2);
+	     *(py+i0) = *(py+i0) + *(py+i2);
+	     s2 = *(py+i1) - *(py+i3);
+	     *(py+i1) = *(py+i1) + *(py+i3);
+	     s3 = r1 - s2; r1 = r1 + s2; 
+	     s2 = r2 - s1; r2 = r2 + s1;
+	     *(px+i2) = r1*cc1 - s2*ss1; 
+	     *(py+i2) = -s2*cc1 - r1*ss1;
+	     *(px+i3) = s3*cc3 + r2*ss3;
+	     *(py+i3) = r2*cc3 - s3*ss3;
+	     }
+	  is = 2 * id - n2 + j; 
+	  id = 4 * id;
+	  }
+    }
+  }
+
+/************************************/
+/*  Last stage, length=2 butterfly  */
+/************************************/
+  is = 1; 
+  id = 4;
+  
+  while ( is < n) 
+  {
+     for (i0 = is; i0 <= n; i0 = i0 + id) 
+     {
+      i1 = i0 + 1; 
+      r1 = *(px+i0);
+      *(px+i0) = r1 + *(px+i1);
+      *(px+i1) = r1 - *(px+i1);
+      r1 = *(py+i0);
+      *(py+i0) = r1 + *(py+i1);
+      *(py+i1) = r1 - *(py+i1);
+     }
+  is = 2*id - 1; 
+  id = 4 * id; 
+  }
+
+/*************************/
+/*  Bit reverse counter  */
+/*************************/
+  j = 1; 
+  n1 = n - 1;
+  
+  for (i = 1; i <= n1; i++) 
+  {
+    if (i < j) 
+    {
+    xt = *(px+j);
+    *(px+j) = *(px+i); 
+    *(px+i) = xt;
+    xt = *(py+j); 
+    *(py+j) = *(py+i);
+    *(py+i) = xt;
+    }
+    
+    k = n / 2; 
+    while (k < j) 
+    { 
+    j = j - k; 
+    k = k / 2; 
+    }
+    j = j + k;
+  }
+
+/*
+  for (i = 1; i<=16; i++) printf("%d  %g   %gn",i,*(px+i),(py+i));
+*/
+  
+  return(n);
+}
+
+
diff --git a/test/c/fib.c b/test/c/fib.c
new file mode 100644
index 0000000..34e7baa
--- /dev/null
+++ b/test/c/fib.c
@@ -0,0 +1,7 @@
+int fib(int n)
+{
+  if (n < 2) 
+    return 1;
+  else
+    return fib(n-1) + fib(n-2);
+}
diff --git a/test/c/integr.c b/test/c/integr.c
new file mode 100644
index 0000000..abcbd28
--- /dev/null
+++ b/test/c/integr.c
@@ -0,0 +1,23 @@
+static double square(double x)
+{
+  return x * x;
+}
+
+static double integr(double (*f)(double), double low, double high, int n)
+{
+  double h, x, s;
+  int i;
+
+  h = (high - low) / n;
+  s = 0;
+  for (i = n, x = low; i > 0; i--, x += h) s += f(x);
+  return s * h;
+}
+
+double test(int n)
+{
+  return integr(square, 0.0, 1.0, n);
+}
+
+    
+  
diff --git a/test/c/qsort.c b/test/c/qsort.c
new file mode 100644
index 0000000..9f8e5b1
--- /dev/null
+++ b/test/c/qsort.c
@@ -0,0 +1,16 @@
+void quicksort(int lo, int hi, int base[])
+{ 
+  int i,j;
+  int pivot,temp;
+    
+  if (lo<hi) {
+    for (i=lo,j=hi,pivot=base[hi];i<j;) {
+      while (i<hi && base[i]<=pivot) i++;
+      while (j>lo && base[j]>=pivot) j--;
+      if (i<j) { temp=base[i]; base[i]=base[j]; base[j]=temp; }
+    }
+    temp=base[i]; base[i]=base[hi]; base[hi]=temp;
+    quicksort(lo,i-1,base);  quicksort(i+1,hi,base);
+  }
+}
+
diff --git a/test/c/sha1.c b/test/c/sha1.c
new file mode 100644
index 0000000..024e8ad
--- /dev/null
+++ b/test/c/sha1.c
@@ -0,0 +1,171 @@
+/* SHA-1 cryptographic hash function */
+/* Ref: Handbook of Applied Cryptography, section 9.4.2, algorithm 9.53 */
+
+#include <string.h>
+
+extern void memcpy_static(void *, void *, int),
+  memset_static(void *, int, char);
+
+#define memcpy memcpy_static
+#define memset memset_static
+
+#define ARCH_BIG_ENDIAN
+
+typedef unsigned int u32;
+
+struct SHA1Context {
+  u32 state[5];
+  u32 length[2];
+  int numbytes;
+  unsigned char buffer[64];
+};
+
+#define rol1(x) (((x) << 1) | ((x) >> 31))
+#define rol5(x) (((x) << 5) | ((x) >> 27))
+#define rol30(x) (((x) << 30) | ((x) >> 2))
+
+static void SHA1_copy_and_swap(void * src, void * dst, int numwords)
+{
+#ifdef ARCH_BIG_ENDIAN
+  memcpy(dst, src, numwords * sizeof(u32));
+#else
+  unsigned char * s, * d;
+  unsigned char a, b;
+  for (s = src, d = dst; numwords > 0; s += 4, d += 4, numwords--) {
+    a = s[0];
+    b = s[1];
+    d[0] = s[3];
+    d[1] = s[2];
+    d[2] = b;
+    d[3] = a;
+  }
+#endif
+}
+
+#define F(x,y,z) ( z ^ (x & (y ^ z) ) )
+#define G(x,y,z) ( (x & y) | (z & (x | y) ) )
+#define H(x,y,z) ( x ^ y ^ z )
+
+#define Y1 0x5A827999U
+#define Y2 0x6ED9EBA1U
+#define Y3 0x8F1BBCDCU
+#define Y4 0xCA62C1D6U
+
+static void SHA1_transform(struct SHA1Context * ctx)
+{
+  int i;
+  register u32 a, b, c, d, e, t;
+  u32 data[80];
+
+  /* Convert buffer data to 16 big-endian integers */
+  SHA1_copy_and_swap(ctx->buffer, data, 16);
+
+  /* Expand into 80 integers */
+  for (i = 16; i < 80; i++) {
+    t = data[i-3] ^ data[i-8] ^ data[i-14] ^ data[i-16];
+    data[i] = rol1(t);
+  }
+
+  /* Initialize working variables */
+  a = ctx->state[0];
+  b = ctx->state[1];
+  c = ctx->state[2];
+  d = ctx->state[3];
+  e = ctx->state[4];
+
+  /* Perform rounds */
+  for (i = 0; i < 20; i++) {
+    t = F(b, c, d) + Y1 + rol5(a) + e + data[i];
+    e = d; d = c; c = rol30(b); b = a; a = t;
+  }
+  for (/*nothing*/; i < 40; i++) {
+    t = H(b, c, d) + Y2 + rol5(a) + e + data[i];
+    e = d; d = c; c = rol30(b); b = a; a = t;
+  }
+  for (/*nothing*/; i < 60; i++) {
+    t = G(b, c, d) + Y3 + rol5(a) + e + data[i];
+    e = d; d = c; c = rol30(b); b = a; a = t;
+  }
+  for (/*nothing*/; i < 80; i++) {
+    t = H(b, c, d) + Y4 + rol5(a) + e + data[i];
+    e = d; d = c; c = rol30(b); b = a; a = t;
+  }
+
+  /* Update chaining values */
+  ctx->state[0] += a;
+  ctx->state[1] += b;
+  ctx->state[2] += c;
+  ctx->state[3] += d;
+  ctx->state[4] += e;
+}
+
+void SHA1_init(struct SHA1Context * ctx)
+{
+  ctx->state[0] = 0x67452301U;
+  ctx->state[1] = 0xEFCDAB89U;
+  ctx->state[2] = 0x98BADCFEU;
+  ctx->state[3] = 0x10325476U;
+  ctx->state[4] = 0xC3D2E1F0U;
+  ctx->numbytes = 0;
+  ctx->length[0] = 0;
+  ctx->length[1] = 0;
+}
+
+void SHA1_add_data(struct SHA1Context * ctx, unsigned char * data,
+                   unsigned long len)
+{
+  u32 t;
+
+  /* Update length */
+  t = ctx->length[1];
+  if ((ctx->length[1] = t + (u32) (len << 3)) < t)
+    ctx->length[0]++;    /* carry from low 32 bits to high 32 bits */
+  ctx->length[0] += (u32) (len >> 29);
+
+  /* If data was left in buffer, pad it with fresh data and munge block */
+  if (ctx->numbytes != 0) {
+    t = 64 - ctx->numbytes;
+    if (len < t) {
+      memcpy(ctx->buffer + ctx->numbytes, data, len);
+      ctx->numbytes += len;
+      return;
+    }
+    memcpy(ctx->buffer + ctx->numbytes, data, t);
+    SHA1_transform(ctx);
+    data += t;
+    len -= t;
+  }
+  /* Munge data in 64-byte chunks */
+  while (len >= 64) {
+    memcpy(ctx->buffer, data, 64);
+    SHA1_transform(ctx);
+    data += 64;
+    len -= 64;
+  }
+  /* Save remaining data */
+  memcpy(ctx->buffer, data, len);
+  ctx->numbytes = len;
+}
+
+void SHA1_finish(struct SHA1Context * ctx, unsigned char output[20])
+{
+  int i = ctx->numbytes;
+
+  /* Set first char of padding to 0x80. There is always room. */
+  ctx->buffer[i++] = 0x80;
+  /* If we do not have room for the length (8 bytes), pad to 64 bytes
+     with zeroes and munge the data block */
+  if (i > 56) {
+    memset(ctx->buffer + i, 0, 64 - i);
+    SHA1_transform(ctx);
+    i = 0;
+  }
+  /* Pad to byte 56 with zeroes */
+  memset(ctx->buffer + i, 0, 56 - i);
+  /* Add length in big-endian */
+  SHA1_copy_and_swap(ctx->length, ctx->buffer + 56, 2);
+  /* Munge the final block */
+  SHA1_transform(ctx);
+  /* Final hash value is in ctx->state modulo big-endian conversion */
+  SHA1_copy_and_swap(ctx->state, output, 5);
+}
-- 
cgit v1.2.3