summaryrefslogtreecommitdiff
path: root/test/c
diff options
context:
space:
mode:
Diffstat (limited to 'test/c')
-rw-r--r--test/c/Makefile46
-rw-r--r--test/c/aes.c728
-rw-r--r--test/c/almabench.c190
-rw-r--r--test/c/fft.c158
-rw-r--r--test/c/fib.c7
-rw-r--r--test/c/integr.c23
-rw-r--r--test/c/qsort.c16
-rw-r--r--test/c/sha1.c171
8 files changed, 1339 insertions, 0 deletions
diff --git a/test/c/Makefile b/test/c/Makefile
new file mode 100644
index 0000000..436e222
--- /dev/null
+++ b/test/c/Makefile
@@ -0,0 +1,46 @@
+CC=gcc
+CFLAGS=-O2 -Wall
+
+VPATH=../harness ../lib
+
+PROGS=fib integr qsort fft sha1 aes almabench
+
+all: $(PROGS)
+
+fib: fib.o mainfib.o
+ $(CC) $(CFLAGS) -o fib fib.o mainfib.o
+clean::
+ rm -f fib
+
+integr: integr.o mainintegr.o
+ $(CC) $(CFLAGS) -o integr integr.o mainintegr.o
+clean::
+ rm -f integr
+
+qsort: qsort.o mainqsort.o
+ $(CC) $(CFLAGS) -o qsort qsort.o mainqsort.o
+clean::
+ rm -f qsort
+
+fft: fft.o mainfft.o staticlib.o
+ $(CC) $(CFLAGS) -o fft fft.o mainfft.o staticlib.o -lm
+clean::
+ rm -f fft
+
+sha1: sha1.o mainsha1.o staticlib.o
+ $(CC) $(CFLAGS) -o sha1 sha1.o mainsha1.o staticlib.o
+clean::
+ rm -f sha1 sha1.cm
+
+aes: aes.o mainaes.o
+ $(CC) $(CFLAGS) -o aes aes.o mainaes.o
+clean::
+ rm -f aes aes.cm
+
+almabench: almabench.o mainalmabench.o staticlib.o
+ $(CC) $(CFLAGS) -o almabench almabench.o mainalmabench.o staticlib.o -lm
+clean::
+ rm -f almabench almabench.cm
+
+clean::
+ rm -f *.s *.o *~
diff --git a/test/c/aes.c b/test/c/aes.c
new file mode 100644
index 0000000..90abf57
--- /dev/null
+++ b/test/c/aes.c
@@ -0,0 +1,728 @@
+/**
+ * rijndael-alg-fst.c
+ *
+ * @version 3.0 (December 2000)
+ *
+ * Optimised ANSI C code for the Rijndael cipher (now AES)
+ *
+ * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
+ * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
+ * @author Paulo Barreto <paulo.barreto@terra.com.br>
+ *
+ * This code is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <assert.h>
+#include <stdlib.h>
+
+#define MAXKC (256/32)
+#define MAXKB (256/8)
+#define MAXNR 14
+
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+
+#define ARCH_BIG_ENDIAN
+
+#ifdef ARCH_BIG_ENDIAN
+#define GETU32(pt) (*(u32 *)(pt))
+#define PUTU32(ct,st) (*(u32 *)(ct) = (st))
+#else
+#define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3]))
+#define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); }
+#endif
+
+extern u32 Te0[256], Te1[256], Te2[256], Te3[256], Te4[256];
+extern u32 Td0[256], Td1[256], Td2[256], Td3[256], Td4[256];
+extern u32 rcon[10];
+
+/**
+ * Expand the cipher key into the encryption key schedule.
+ *
+ * @return the number of rounds for the given cipher key size.
+ */
+int rijndaelKeySetupEnc(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits) {
+ int i = 0;
+ u32 temp;
+
+ rk[0] = GETU32(cipherKey );
+ rk[1] = GETU32(cipherKey + 4);
+ rk[2] = GETU32(cipherKey + 8);
+ rk[3] = GETU32(cipherKey + 12);
+ if (keyBits == 128) {
+ for (;;) {
+ temp = rk[3];
+ rk[4] = rk[0] ^
+ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i];
+ rk[5] = rk[1] ^ rk[4];
+ rk[6] = rk[2] ^ rk[5];
+ rk[7] = rk[3] ^ rk[6];
+ if (++i == 10) {
+ return 10;
+ }
+ rk += 4;
+ }
+ }
+ rk[4] = GETU32(cipherKey + 16);
+ rk[5] = GETU32(cipherKey + 20);
+ if (keyBits == 192) {
+ for (;;) {
+ temp = rk[ 5];
+ rk[ 6] = rk[ 0] ^
+ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i];
+ rk[ 7] = rk[ 1] ^ rk[ 6];
+ rk[ 8] = rk[ 2] ^ rk[ 7];
+ rk[ 9] = rk[ 3] ^ rk[ 8];
+ if (++i == 8) {
+ return 12;
+ }
+ rk[10] = rk[ 4] ^ rk[ 9];
+ rk[11] = rk[ 5] ^ rk[10];
+ rk += 6;
+ }
+ }
+ rk[6] = GETU32(cipherKey + 24);
+ rk[7] = GETU32(cipherKey + 28);
+ if (keyBits == 256) {
+ for (;;) {
+ temp = rk[ 7];
+ rk[ 8] = rk[ 0] ^
+ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i];
+ rk[ 9] = rk[ 1] ^ rk[ 8];
+ rk[10] = rk[ 2] ^ rk[ 9];
+ rk[11] = rk[ 3] ^ rk[10];
+ if (++i == 7) {
+ return 14;
+ }
+ temp = rk[11];
+ rk[12] = rk[ 4] ^
+ (Te4[(temp >> 24) ] & 0xff000000) ^
+ (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(temp ) & 0xff] & 0x000000ff);
+ rk[13] = rk[ 5] ^ rk[12];
+ rk[14] = rk[ 6] ^ rk[13];
+ rk[15] = rk[ 7] ^ rk[14];
+
+ rk += 8;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Expand the cipher key into the decryption key schedule.
+ *
+ * @return the number of rounds for the given cipher key size.
+ */
+int rijndaelKeySetupDec(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits) {
+ int Nr, i, j;
+ u32 temp;
+
+ /* expand the cipher key: */
+ Nr = rijndaelKeySetupEnc(rk, cipherKey, keyBits);
+ /* invert the order of the round keys: */
+ for (i = 0, j = 4*Nr; i < j; i += 4, j -= 4) {
+ temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
+ temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
+ temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
+ temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
+ }
+ /* apply the inverse MixColumn transform to all round keys but the first and the last: */
+ for (i = 1; i < Nr; i++) {
+ rk += 4;
+ rk[0] =
+ Td0[Te4[(rk[0] >> 24) ] & 0xff] ^
+ Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[0] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[(rk[0] ) & 0xff] & 0xff];
+ rk[1] =
+ Td0[Te4[(rk[1] >> 24) ] & 0xff] ^
+ Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[1] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[(rk[1] ) & 0xff] & 0xff];
+ rk[2] =
+ Td0[Te4[(rk[2] >> 24) ] & 0xff] ^
+ Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[2] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[(rk[2] ) & 0xff] & 0xff];
+ rk[3] =
+ Td0[Te4[(rk[3] >> 24) ] & 0xff] ^
+ Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[3] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[(rk[3] ) & 0xff] & 0xff];
+ }
+ return Nr;
+}
+
+void rijndaelEncrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 pt[16], u8 ct[16]) {
+ u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+ int r;
+#endif /* ?FULL_UNROLL */
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(pt ) ^ rk[0];
+ s1 = GETU32(pt + 4) ^ rk[1];
+ s2 = GETU32(pt + 8) ^ rk[2];
+ s3 = GETU32(pt + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+ /* round 1: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
+ /* round 2: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
+ /* round 3: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
+ /* round 4: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
+ /* round 5: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
+ /* round 6: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
+ /* round 7: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
+ /* round 8: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
+ /* round 9: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
+ if (Nr > 10) {
+ /* round 10: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
+ /* round 11: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
+ if (Nr > 12) {
+ /* round 12: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
+ /* round 13: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
+ }
+ }
+ rk += Nr << 2;
+#else /* !FULL_UNROLL */
+ /*
+ * Nr - 1 full rounds:
+ */
+ r = Nr >> 1;
+ for (;;) {
+ t0 =
+ Te0[(s0 >> 24) ] ^
+ Te1[(s1 >> 16) & 0xff] ^
+ Te2[(s2 >> 8) & 0xff] ^
+ Te3[(s3 ) & 0xff] ^
+ rk[4];
+ t1 =
+ Te0[(s1 >> 24) ] ^
+ Te1[(s2 >> 16) & 0xff] ^
+ Te2[(s3 >> 8) & 0xff] ^
+ Te3[(s0 ) & 0xff] ^
+ rk[5];
+ t2 =
+ Te0[(s2 >> 24) ] ^
+ Te1[(s3 >> 16) & 0xff] ^
+ Te2[(s0 >> 8) & 0xff] ^
+ Te3[(s1 ) & 0xff] ^
+ rk[6];
+ t3 =
+ Te0[(s3 >> 24) ] ^
+ Te1[(s0 >> 16) & 0xff] ^
+ Te2[(s1 >> 8) & 0xff] ^
+ Te3[(s2 ) & 0xff] ^
+ rk[7];
+
+ rk += 8;
+ if (--r == 0) {
+ break;
+ }
+
+ s0 =
+ Te0[(t0 >> 24) ] ^
+ Te1[(t1 >> 16) & 0xff] ^
+ Te2[(t2 >> 8) & 0xff] ^
+ Te3[(t3 ) & 0xff] ^
+ rk[0];
+ s1 =
+ Te0[(t1 >> 24) ] ^
+ Te1[(t2 >> 16) & 0xff] ^
+ Te2[(t3 >> 8) & 0xff] ^
+ Te3[(t0 ) & 0xff] ^
+ rk[1];
+ s2 =
+ Te0[(t2 >> 24) ] ^
+ Te1[(t3 >> 16) & 0xff] ^
+ Te2[(t0 >> 8) & 0xff] ^
+ Te3[(t1 ) & 0xff] ^
+ rk[2];
+ s3 =
+ Te0[(t3 >> 24) ] ^
+ Te1[(t0 >> 16) & 0xff] ^
+ Te2[(t1 >> 8) & 0xff] ^
+ Te3[(t2 ) & 0xff] ^
+ rk[3];
+ }
+#endif /* ?FULL_UNROLL */
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+ s0 =
+ (Te4[(t0 >> 24) ] & 0xff000000) ^
+ (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(t3 ) & 0xff] & 0x000000ff) ^
+ rk[0];
+ PUTU32(ct , s0);
+ s1 =
+ (Te4[(t1 >> 24) ] & 0xff000000) ^
+ (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(t0 ) & 0xff] & 0x000000ff) ^
+ rk[1];
+ PUTU32(ct + 4, s1);
+ s2 =
+ (Te4[(t2 >> 24) ] & 0xff000000) ^
+ (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(t1 ) & 0xff] & 0x000000ff) ^
+ rk[2];
+ PUTU32(ct + 8, s2);
+ s3 =
+ (Te4[(t3 >> 24) ] & 0xff000000) ^
+ (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(t2 ) & 0xff] & 0x000000ff) ^
+ rk[3];
+ PUTU32(ct + 12, s3);
+}
+
+void rijndaelDecrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 ct[16], u8 pt[16]) {
+ u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+ int r;
+#endif /* ?FULL_UNROLL */
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(ct ) ^ rk[0];
+ s1 = GETU32(ct + 4) ^ rk[1];
+ s2 = GETU32(ct + 8) ^ rk[2];
+ s3 = GETU32(ct + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+ /* round 1: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
+ /* round 2: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
+ /* round 3: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
+ /* round 4: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
+ /* round 5: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
+ /* round 6: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
+ /* round 7: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
+ /* round 8: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
+ /* round 9: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
+ if (Nr > 10) {
+ /* round 10: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
+ /* round 11: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
+ if (Nr > 12) {
+ /* round 12: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
+ /* round 13: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
+ }
+ }
+ rk += Nr << 2;
+#else /* !FULL_UNROLL */
+ /*
+ * Nr - 1 full rounds:
+ */
+ r = Nr >> 1;
+ for (;;) {
+ t0 =
+ Td0[(s0 >> 24) ] ^
+ Td1[(s3 >> 16) & 0xff] ^
+ Td2[(s2 >> 8) & 0xff] ^
+ Td3[(s1 ) & 0xff] ^
+ rk[4];
+ t1 =
+ Td0[(s1 >> 24) ] ^
+ Td1[(s0 >> 16) & 0xff] ^
+ Td2[(s3 >> 8) & 0xff] ^
+ Td3[(s2 ) & 0xff] ^
+ rk[5];
+ t2 =
+ Td0[(s2 >> 24) ] ^
+ Td1[(s1 >> 16) & 0xff] ^
+ Td2[(s0 >> 8) & 0xff] ^
+ Td3[(s3 ) & 0xff] ^
+ rk[6];
+ t3 =
+ Td0[(s3 >> 24) ] ^
+ Td1[(s2 >> 16) & 0xff] ^
+ Td2[(s1 >> 8) & 0xff] ^
+ Td3[(s0 ) & 0xff] ^
+ rk[7];
+
+ rk += 8;
+ if (--r == 0) {
+ break;
+ }
+
+ s0 =
+ Td0[(t0 >> 24) ] ^
+ Td1[(t3 >> 16) & 0xff] ^
+ Td2[(t2 >> 8) & 0xff] ^
+ Td3[(t1 ) & 0xff] ^
+ rk[0];
+ s1 =
+ Td0[(t1 >> 24) ] ^
+ Td1[(t0 >> 16) & 0xff] ^
+ Td2[(t3 >> 8) & 0xff] ^
+ Td3[(t2 ) & 0xff] ^
+ rk[1];
+ s2 =
+ Td0[(t2 >> 24) ] ^
+ Td1[(t1 >> 16) & 0xff] ^
+ Td2[(t0 >> 8) & 0xff] ^
+ Td3[(t3 ) & 0xff] ^
+ rk[2];
+ s3 =
+ Td0[(t3 >> 24) ] ^
+ Td1[(t2 >> 16) & 0xff] ^
+ Td2[(t1 >> 8) & 0xff] ^
+ Td3[(t0 ) & 0xff] ^
+ rk[3];
+ }
+#endif /* ?FULL_UNROLL */
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+ s0 =
+ (Td4[(t0 >> 24) ] & 0xff000000) ^
+ (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[(t1 ) & 0xff] & 0x000000ff) ^
+ rk[0];
+ PUTU32(pt , s0);
+ s1 =
+ (Td4[(t1 >> 24) ] & 0xff000000) ^
+ (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[(t2 ) & 0xff] & 0x000000ff) ^
+ rk[1];
+ PUTU32(pt + 4, s1);
+ s2 =
+ (Td4[(t2 >> 24) ] & 0xff000000) ^
+ (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[(t3 ) & 0xff] & 0x000000ff) ^
+ rk[2];
+ PUTU32(pt + 8, s2);
+ s3 =
+ (Td4[(t3 >> 24) ] & 0xff000000) ^
+ (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[(t0 ) & 0xff] & 0x000000ff) ^
+ rk[3];
+ PUTU32(pt + 12, s3);
+}
+
+#ifdef INTERMEDIATE_VALUE_KAT
+
+void rijndaelEncryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds) {
+ int r;
+ u32 s0, s1, s2, s3, t0, t1, t2, t3;
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(block ) ^ rk[0];
+ s1 = GETU32(block + 4) ^ rk[1];
+ s2 = GETU32(block + 8) ^ rk[2];
+ s3 = GETU32(block + 12) ^ rk[3];
+ rk += 4;
+
+ /*
+ * Nr - 1 full rounds:
+ */
+ for (r = (rounds < Nr ? rounds : Nr - 1); r > 0; r--) {
+ t0 =
+ Te0[(s0 >> 24) ] ^
+ Te1[(s1 >> 16) & 0xff] ^
+ Te2[(s2 >> 8) & 0xff] ^
+ Te3[(s3 ) & 0xff] ^
+ rk[0];
+ t1 =
+ Te0[(s1 >> 24) ] ^
+ Te1[(s2 >> 16) & 0xff] ^
+ Te2[(s3 >> 8) & 0xff] ^
+ Te3[(s0 ) & 0xff] ^
+ rk[1];
+ t2 =
+ Te0[(s2 >> 24) ] ^
+ Te1[(s3 >> 16) & 0xff] ^
+ Te2[(s0 >> 8) & 0xff] ^
+ Te3[(s1 ) & 0xff] ^
+ rk[2];
+ t3 =
+ Te0[(s3 >> 24) ] ^
+ Te1[(s0 >> 16) & 0xff] ^
+ Te2[(s1 >> 8) & 0xff] ^
+ Te3[(s2 ) & 0xff] ^
+ rk[3];
+
+ s0 = t0;
+ s1 = t1;
+ s2 = t2;
+ s3 = t3;
+ rk += 4;
+
+ }
+
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+ if (rounds == Nr) {
+ t0 =
+ (Te4[(s0 >> 24) ] & 0xff000000) ^
+ (Te4[(s1 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(s2 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(s3 ) & 0xff] & 0x000000ff) ^
+ rk[0];
+ t1 =
+ (Te4[(s1 >> 24) ] & 0xff000000) ^
+ (Te4[(s2 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(s3 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(s0 ) & 0xff] & 0x000000ff) ^
+ rk[1];
+ t2 =
+ (Te4[(s2 >> 24) ] & 0xff000000) ^
+ (Te4[(s3 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(s0 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(s1 ) & 0xff] & 0x000000ff) ^
+ rk[2];
+ t3 =
+ (Te4[(s3 >> 24) ] & 0xff000000) ^
+ (Te4[(s0 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(s1 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(s2 ) & 0xff] & 0x000000ff) ^
+ rk[3];
+
+ s0 = t0;
+ s1 = t1;
+ s2 = t2;
+ s3 = t3;
+ }
+
+ PUTU32(block , s0);
+ PUTU32(block + 4, s1);
+ PUTU32(block + 8, s2);
+ PUTU32(block + 12, s3);
+}
+
+void rijndaelDecryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds) {
+ int r;
+ u32 s0, s1, s2, s3, t0, t1, t2, t3;
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(block ) ^ rk[0];
+ s1 = GETU32(block + 4) ^ rk[1];
+ s2 = GETU32(block + 8) ^ rk[2];
+ s3 = GETU32(block + 12) ^ rk[3];
+ rk += 4;
+
+ /*
+ * Nr - 1 full rounds:
+ */
+ for (r = (rounds < Nr ? rounds : Nr) - 1; r > 0; r--) {
+ t0 =
+ Td0[(s0 >> 24) ] ^
+ Td1[(s3 >> 16) & 0xff] ^
+ Td2[(s2 >> 8) & 0xff] ^
+ Td3[(s1 ) & 0xff] ^
+ rk[0];
+ t1 =
+ Td0[(s1 >> 24) ] ^
+ Td1[(s0 >> 16) & 0xff] ^
+ Td2[(s3 >> 8) & 0xff] ^
+ Td3[(s2 ) & 0xff] ^
+ rk[1];
+ t2 =
+ Td0[(s2 >> 24) ] ^
+ Td1[(s1 >> 16) & 0xff] ^
+ Td2[(s0 >> 8) & 0xff] ^
+ Td3[(s3 ) & 0xff] ^
+ rk[2];
+ t3 =
+ Td0[(s3 >> 24) ] ^
+ Td1[(s2 >> 16) & 0xff] ^
+ Td2[(s1 >> 8) & 0xff] ^
+ Td3[(s0 ) & 0xff] ^
+ rk[3];
+
+ s0 = t0;
+ s1 = t1;
+ s2 = t2;
+ s3 = t3;
+ rk += 4;
+
+ }
+
+ /*
+ * complete the last round and
+ * map cipher state to byte array block:
+ */
+ t0 =
+ (Td4[(s0 >> 24) ] & 0xff000000) ^
+ (Td4[(s3 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(s2 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[(s1 ) & 0xff] & 0x000000ff);
+ t1 =
+ (Td4[(s1 >> 24) ] & 0xff000000) ^
+ (Td4[(s0 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(s3 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[(s2 ) & 0xff] & 0x000000ff);
+ t2 =
+ (Td4[(s2 >> 24) ] & 0xff000000) ^
+ (Td4[(s1 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(s0 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[(s3 ) & 0xff] & 0x000000ff);
+ t3 =
+ (Td4[(s3 >> 24) ] & 0xff000000) ^
+ (Td4[(s2 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(s1 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[(s0 ) & 0xff] & 0x000000ff);
+
+ if (rounds == Nr) {
+ t0 ^= rk[0];
+ t1 ^= rk[1];
+ t2 ^= rk[2];
+ t3 ^= rk[3];
+ }
+
+ PUTU32(block , t0);
+ PUTU32(block + 4, t1);
+ PUTU32(block + 8, t2);
+ PUTU32(block + 12, t3);
+}
+
+#endif /* INTERMEDIATE_VALUE_KAT */
diff --git a/test/c/almabench.c b/test/c/almabench.c
new file mode 100644
index 0000000..ef92a59
--- /dev/null
+++ b/test/c/almabench.c
@@ -0,0 +1,190 @@
+/*
+ almabench
+ Standard C version
+ 17 April 2003
+
+ Written by Scott Robert Ladd.
+ No rights reserved. This is public domain software, for use by anyone.
+
+ A number-crunching benchmark that can be used as a fitness test for
+ evolving optimal compiler options via genetic algorithm.
+
+ This program calculates the daily ephemeris (at noon) for the years
+ 2000-2099 using an algorithm developed by J.L. Simon, P. Bretagnon, J.
+ Chapront, M. Chapront-Touze, G. Francou and J. Laskar of the Bureau des
+ Longitudes, Paris, France), as detailed in Astronomy & Astrophysics
+ 282, 663 (1994)
+
+ Note that the code herein is design for the purpose of testing
+ computational performance; error handling and other such "niceties"
+ is virtually non-existent.
+
+ Actual benchmark results can be found at:
+ http://www.coyotegulch.com
+
+ Please do not use this information or algorithm in any way that might
+ upset the balance of the universe or otherwise cause planets to impact
+ upon one another.
+*/
+
+#include <math.h>
+#include <string.h>
+
+#define PI 3.14159265358979323846
+#define J2000 2451545.0
+#define JCENTURY 36525.0
+#define JMILLENIA 365250.0
+#define TWOPI (2.0 * PI)
+#define A2R (PI / 648000.0)
+#define R2H (12.0 / PI)
+#define R2D (180.0 / PI)
+#define GAUSSK 0.01720209895
+#define TEST_LOOPS 20
+#define TEST_LENGTH 36525
+#define sineps 0.3977771559319137
+#define coseps 0.9174820620691818
+
+extern double amas[8];
+extern double a[8][3], dlm[8][3], e[8][3], pi[8][3], dinc[8][3], omega[8][3];
+extern double kp[8][9], ca[8][9], sa[8][9], kq[8][10], cl[8][10], sl[8][10];
+
+extern double cos_static(double), sin_static(double),
+ atan2_static(double, double), asin_static(double), sqrt_static(double),
+ fmod_static(double, double), fabs_static(double);
+
+#define cos cos_static
+#define sin sin_static
+#define atan2 atan2_static
+#define asin asin_static
+#define sqrt sqrt_static
+#define fmod fmod_static
+#define fabs fabs_static
+
+//---------------------------------------------------------------------------
+// Normalize angle into the range -pi <= A < +pi.
+double anpm (double a)
+{
+ double w = fmod(a,TWOPI);
+
+ if (fabs(w) >= PI)
+ w = w - ((a < 0) ? -TWOPI : TWOPI);
+
+ return w;
+}
+
+//---------------------------------------------------------------------------
+// The reference frame is equatorial and is with respect to the
+// mean equator and equinox of epoch j2000.
+void planetpv (double epoch[2], int np, double pv[2][3])
+{
+ // working storage
+ int i, j, k;
+ double t, da, dl, de, dp, di, doh, dmu, arga, argl, am;
+ double ae, dae, ae2, at, r, v, si2, xq, xp, tl, xsw;
+ double xcw, xm2, xf, ci2, xms, xmc, xpxq2, x, y, z;
+
+ // time: julian millennia since j2000.
+ t = ((epoch[0] - J2000) + epoch[1]) / JMILLENIA;
+
+ // compute the mean elements.
+ da = a[np][0] + (a[np][1] + a[np][2] * t ) * t;
+ dl = (3600.0 * dlm[np][0] + (dlm[np][1] + dlm[np][2] * t ) * t ) * A2R;
+ de = e[np][0] + (e[np][1] + e[np][2] * t ) * t;
+ dp = anpm((3600.0 * pi[np][0] + (pi[np][1] + pi[np][2] * t ) * t ) * A2R );
+ di = (3600.0 * dinc[np][0] + (dinc[np][1] + dinc[np][2] * t ) * t ) * A2R;
+ doh = anpm((3600.0 * omega[np][0] + (omega[np][1] + omega[np][2] * t ) * t ) * A2R );
+
+ // apply the trigonometric terms.
+ dmu = 0.35953620 * t;
+
+ for (k = 0; k < 8; ++k)
+ {
+ arga = kp[np][k] * dmu;
+ argl = kq[np][k] * dmu;
+ da = da + (ca[np][k] * cos(arga) + sa[np][k] * sin(arga)) * 0.0000001;
+ dl = dl + (cl[np][k] * cos(argl) + sl[np][k] * sin(argl)) * 0.0000001;
+ }
+
+ arga = kp[np][8] * dmu;
+ da = da + t * (ca[np][8] * cos(arga) + sa[np][8] * sin(arga)) * 0.0000001;
+
+ for (k = 8; k <= 9; ++k)
+ {
+ argl = kq[np][k] * dmu;
+ dl = dl + t * ( cl[np][k] * cos(argl) + sl[np][k] * sin(argl) ) * 0.0000001;
+ }
+
+ dl = fmod(dl,TWOPI);
+
+ // iterative solution of kepler's equation to get eccentric anomaly.
+ am = dl - dp;
+ ae = am + de * sin(am);
+ k = 0;
+
+ while (1)
+ {
+ dae = (am - ae + de * sin(ae)) / (1.0 - de * cos(ae));
+ ae = ae + dae;
+ k = k + 1;
+
+ if ((k >= 10) || (fabs(dae) < 1e-12))
+ break;
+ }
+
+ // true anomaly.
+ ae2 = ae / 2.0;
+ at = 2.0 * atan2(sqrt((1.0 + de)/(1.0 - de)) * sin(ae2), cos(ae2));
+
+ // distance (au) and speed (radians per day).
+ r = da * (1.0 - de * cos(ae));
+ v = GAUSSK * sqrt((1.0 + 1.0 / amas[np] ) / (da * da * da));
+
+ si2 = sin(di / 2.0);
+ xq = si2 * cos(doh);
+ xp = si2 * sin(doh);
+ tl = at + dp;
+ xsw = sin(tl);
+ xcw = cos(tl);
+ xm2 = 2.0 * (xp * xcw - xq * xsw );
+ xf = da / sqrt(1.0 - de*de);
+ ci2 = cos(di / 2.0);
+ xms = (de * sin(dp) + xsw) * xf;
+ xmc = (de * cos(dp) + xcw) * xf;
+ xpxq2 = 2.0 * xp * xq;
+
+ // position (j2000 ecliptic x,y,z in au).
+ x = r * (xcw - xm2 * xp);
+ y = r * (xsw + xm2 * xq);
+ z = r * (-xm2 * ci2);
+
+ // rotate to equatorial.
+ pv[0][0] = x;
+ pv[0][1] = y * coseps - z * sineps;
+ pv[0][2] = y * sineps + z * coseps;
+
+ // velocity (j2000 ecliptic xdot,ydot,zdot in au/d).
+ x = v * ((-1.0 + 2.0 * xp * xp) * xms + xpxq2 * xmc);
+ y = v * (( 1.0 - 2.0 * xq * xq ) * xmc - xpxq2 * xms);
+ z = v * (2.0 * ci2 * (xp * xms + xq * xmc));
+
+ // rotate to equatorial.
+ pv[1][0] = x;
+ pv[1][1] = y * coseps - z * sineps;
+ pv[1][2] = y * sineps + z * coseps;
+}
+
+//---------------------------------------------------------------------------
+// Computes RA, Declination, and distance from a state vector returned by
+// planetpv.
+void radecdist(double state[2][3], double rdd[3])
+{
+ // distance
+ rdd[2] = sqrt(state[0][0] * state[0][0] + state[0][1] * state[0][1] + state[0][2] * state[0][2]);
+
+ // RA
+ rdd[0] = atan2(state[0][1], state[0][0]) * R2H;
+ if (rdd[0] < 0.0) rdd[0] += 24.0;
+
+ // Declination
+ rdd[1] = asin(state[0][2] / rdd[2]) * R2D;
+}
diff --git a/test/c/fft.c b/test/c/fft.c
new file mode 100644
index 0000000..6f24af5
--- /dev/null
+++ b/test/c/fft.c
@@ -0,0 +1,158 @@
+/*
+ C Program: Test_Fast_Fourier_Transform_in_Double_Precision (TFFTDP.c)
+ by: Dave Edelblute, edelblut@cod.nosc.mil, 05 Jan 1993
+*/
+
+#include <math.h>
+
+#ifndef PI
+#define PI 3.14159265358979323846
+#endif
+
+extern double cos_static(double), sin_static(double);
+
+#define cos cos_static
+#define sin sin_static
+
+/********************************************************/
+/* A Duhamel-Hollman split-radix dif fft */
+/* Ref: Electronics Letters, Jan. 5, 1984 */
+/* Complex input and output data in arrays x and y */
+/* Length is n. */
+/********************************************************/
+
+int dfft(double x[], double y[], int np)
+{
+double *px,*py;
+int i,j,k,m,n,i0,i1,i2,i3,is,id,n1,n2,n4;
+double a,e,a3,cc1,ss1,cc3,ss3,r1,r2,s1,s2,s3,xt,tpi;
+
+ px = x - 1;
+ py = y - 1;
+ i = 2;
+ m = 1;
+
+ while (i < np)
+ {
+ i = i+i;
+ m = m+1;
+ }
+
+ n = i;
+
+ if (n != np)
+ {
+ for (i = np+1; i <= n; i++)
+ {
+ *(px + i) = 0.0;
+ *(py + i) = 0.0;
+ }
+ /*printf("nuse %d point fft",n); */
+ }
+
+ n2 = n+n;
+ tpi = 2.0 * PI;
+ for (k = 1; k <= m-1; k++ )
+ {
+ n2 = n2 / 2;
+ n4 = n2 / 4;
+ e = tpi / (double)n2;
+ a = 0.0;
+
+ for (j = 1; j<= n4 ; j++)
+ {
+ a3 = 3.0 * a;
+ cc1 = cos(a);
+ ss1 = sin(a);
+
+ cc3 = cos(a3);
+ ss3 = sin(a3);
+ a = e * (double)j;
+ is = j;
+ id = 2 * n2;
+
+ while ( is < n )
+ {
+ for (i0 = is; i0 <= n-1; i0 = i0 + id)
+ {
+ i1 = i0 + n4;
+ i2 = i1 + n4;
+ i3 = i2 + n4;
+ r1 = *(px+i0) - *(px+i2);
+ *(px+i0) = *(px+i0) + *(px+i2);
+ r2 = *(px+i1) - *(px+i3);
+ *(px+i1) = *(px+i1) + *(px+i3);
+ s1 = *(py+i0) - *(py+i2);
+ *(py+i0) = *(py+i0) + *(py+i2);
+ s2 = *(py+i1) - *(py+i3);
+ *(py+i1) = *(py+i1) + *(py+i3);
+ s3 = r1 - s2; r1 = r1 + s2;
+ s2 = r2 - s1; r2 = r2 + s1;
+ *(px+i2) = r1*cc1 - s2*ss1;
+ *(py+i2) = -s2*cc1 - r1*ss1;
+ *(px+i3) = s3*cc3 + r2*ss3;
+ *(py+i3) = r2*cc3 - s3*ss3;
+ }
+ is = 2 * id - n2 + j;
+ id = 4 * id;
+ }
+ }
+ }
+
+/************************************/
+/* Last stage, length=2 butterfly */
+/************************************/
+ is = 1;
+ id = 4;
+
+ while ( is < n)
+ {
+ for (i0 = is; i0 <= n; i0 = i0 + id)
+ {
+ i1 = i0 + 1;
+ r1 = *(px+i0);
+ *(px+i0) = r1 + *(px+i1);
+ *(px+i1) = r1 - *(px+i1);
+ r1 = *(py+i0);
+ *(py+i0) = r1 + *(py+i1);
+ *(py+i1) = r1 - *(py+i1);
+ }
+ is = 2*id - 1;
+ id = 4 * id;
+ }
+
+/*************************/
+/* Bit reverse counter */
+/*************************/
+ j = 1;
+ n1 = n - 1;
+
+ for (i = 1; i <= n1; i++)
+ {
+ if (i < j)
+ {
+ xt = *(px+j);
+ *(px+j) = *(px+i);
+ *(px+i) = xt;
+ xt = *(py+j);
+ *(py+j) = *(py+i);
+ *(py+i) = xt;
+ }
+
+ k = n / 2;
+ while (k < j)
+ {
+ j = j - k;
+ k = k / 2;
+ }
+ j = j + k;
+ }
+
+/*
+ for (i = 1; i<=16; i++) printf("%d %g %gn",i,*(px+i),(py+i));
+*/
+
+ return(n);
+}
+
+
diff --git a/test/c/fib.c b/test/c/fib.c
new file mode 100644
index 0000000..34e7baa
--- /dev/null
+++ b/test/c/fib.c
@@ -0,0 +1,7 @@
+int fib(int n)
+{
+ if (n < 2)
+ return 1;
+ else
+ return fib(n-1) + fib(n-2);
+}
diff --git a/test/c/integr.c b/test/c/integr.c
new file mode 100644
index 0000000..abcbd28
--- /dev/null
+++ b/test/c/integr.c
@@ -0,0 +1,23 @@
+static double square(double x)
+{
+ return x * x;
+}
+
+static double integr(double (*f)(double), double low, double high, int n)
+{
+ double h, x, s;
+ int i;
+
+ h = (high - low) / n;
+ s = 0;
+ for (i = n, x = low; i > 0; i--, x += h) s += f(x);
+ return s * h;
+}
+
+double test(int n)
+{
+ return integr(square, 0.0, 1.0, n);
+}
+
+
+
diff --git a/test/c/qsort.c b/test/c/qsort.c
new file mode 100644
index 0000000..9f8e5b1
--- /dev/null
+++ b/test/c/qsort.c
@@ -0,0 +1,16 @@
+void quicksort(int lo, int hi, int base[])
+{
+ int i,j;
+ int pivot,temp;
+
+ if (lo<hi) {
+ for (i=lo,j=hi,pivot=base[hi];i<j;) {
+ while (i<hi && base[i]<=pivot) i++;
+ while (j>lo && base[j]>=pivot) j--;
+ if (i<j) { temp=base[i]; base[i]=base[j]; base[j]=temp; }
+ }
+ temp=base[i]; base[i]=base[hi]; base[hi]=temp;
+ quicksort(lo,i-1,base); quicksort(i+1,hi,base);
+ }
+}
+
diff --git a/test/c/sha1.c b/test/c/sha1.c
new file mode 100644
index 0000000..024e8ad
--- /dev/null
+++ b/test/c/sha1.c
@@ -0,0 +1,171 @@
+/* SHA-1 cryptographic hash function */
+/* Ref: Handbook of Applied Cryptography, section 9.4.2, algorithm 9.53 */
+
+#include <string.h>
+
+extern void memcpy_static(void *, void *, int),
+ memset_static(void *, int, char);
+
+#define memcpy memcpy_static
+#define memset memset_static
+
+#define ARCH_BIG_ENDIAN
+
+typedef unsigned int u32;
+
+struct SHA1Context {
+ u32 state[5];
+ u32 length[2];
+ int numbytes;
+ unsigned char buffer[64];
+};
+
+#define rol1(x) (((x) << 1) | ((x) >> 31))
+#define rol5(x) (((x) << 5) | ((x) >> 27))
+#define rol30(x) (((x) << 30) | ((x) >> 2))
+
+static void SHA1_copy_and_swap(void * src, void * dst, int numwords)
+{
+#ifdef ARCH_BIG_ENDIAN
+ memcpy(dst, src, numwords * sizeof(u32));
+#else
+ unsigned char * s, * d;
+ unsigned char a, b;
+ for (s = src, d = dst; numwords > 0; s += 4, d += 4, numwords--) {
+ a = s[0];
+ b = s[1];
+ d[0] = s[3];
+ d[1] = s[2];
+ d[2] = b;
+ d[3] = a;
+ }
+#endif
+}
+
+#define F(x,y,z) ( z ^ (x & (y ^ z) ) )
+#define G(x,y,z) ( (x & y) | (z & (x | y) ) )
+#define H(x,y,z) ( x ^ y ^ z )
+
+#define Y1 0x5A827999U
+#define Y2 0x6ED9EBA1U
+#define Y3 0x8F1BBCDCU
+#define Y4 0xCA62C1D6U
+
+static void SHA1_transform(struct SHA1Context * ctx)
+{
+ int i;
+ register u32 a, b, c, d, e, t;
+ u32 data[80];
+
+ /* Convert buffer data to 16 big-endian integers */
+ SHA1_copy_and_swap(ctx->buffer, data, 16);
+
+ /* Expand into 80 integers */
+ for (i = 16; i < 80; i++) {
+ t = data[i-3] ^ data[i-8] ^ data[i-14] ^ data[i-16];
+ data[i] = rol1(t);
+ }
+
+ /* Initialize working variables */
+ a = ctx->state[0];
+ b = ctx->state[1];
+ c = ctx->state[2];
+ d = ctx->state[3];
+ e = ctx->state[4];
+
+ /* Perform rounds */
+ for (i = 0; i < 20; i++) {
+ t = F(b, c, d) + Y1 + rol5(a) + e + data[i];
+ e = d; d = c; c = rol30(b); b = a; a = t;
+ }
+ for (/*nothing*/; i < 40; i++) {
+ t = H(b, c, d) + Y2 + rol5(a) + e + data[i];
+ e = d; d = c; c = rol30(b); b = a; a = t;
+ }
+ for (/*nothing*/; i < 60; i++) {
+ t = G(b, c, d) + Y3 + rol5(a) + e + data[i];
+ e = d; d = c; c = rol30(b); b = a; a = t;
+ }
+ for (/*nothing*/; i < 80; i++) {
+ t = H(b, c, d) + Y4 + rol5(a) + e + data[i];
+ e = d; d = c; c = rol30(b); b = a; a = t;
+ }
+
+ /* Update chaining values */
+ ctx->state[0] += a;
+ ctx->state[1] += b;
+ ctx->state[2] += c;
+ ctx->state[3] += d;
+ ctx->state[4] += e;
+}
+
+void SHA1_init(struct SHA1Context * ctx)
+{
+ ctx->state[0] = 0x67452301U;
+ ctx->state[1] = 0xEFCDAB89U;
+ ctx->state[2] = 0x98BADCFEU;
+ ctx->state[3] = 0x10325476U;
+ ctx->state[4] = 0xC3D2E1F0U;
+ ctx->numbytes = 0;
+ ctx->length[0] = 0;
+ ctx->length[1] = 0;
+}
+
+void SHA1_add_data(struct SHA1Context * ctx, unsigned char * data,
+ unsigned long len)
+{
+ u32 t;
+
+ /* Update length */
+ t = ctx->length[1];
+ if ((ctx->length[1] = t + (u32) (len << 3)) < t)
+ ctx->length[0]++; /* carry from low 32 bits to high 32 bits */
+ ctx->length[0] += (u32) (len >> 29);
+
+ /* If data was left in buffer, pad it with fresh data and munge block */
+ if (ctx->numbytes != 0) {
+ t = 64 - ctx->numbytes;
+ if (len < t) {
+ memcpy(ctx->buffer + ctx->numbytes, data, len);
+ ctx->numbytes += len;
+ return;
+ }
+ memcpy(ctx->buffer + ctx->numbytes, data, t);
+ SHA1_transform(ctx);
+ data += t;
+ len -= t;
+ }
+ /* Munge data in 64-byte chunks */
+ while (len >= 64) {
+ memcpy(ctx->buffer, data, 64);
+ SHA1_transform(ctx);
+ data += 64;
+ len -= 64;
+ }
+ /* Save remaining data */
+ memcpy(ctx->buffer, data, len);
+ ctx->numbytes = len;
+}
+
+void SHA1_finish(struct SHA1Context * ctx, unsigned char output[20])
+{
+ int i = ctx->numbytes;
+
+ /* Set first char of padding to 0x80. There is always room. */
+ ctx->buffer[i++] = 0x80;
+ /* If we do not have room for the length (8 bytes), pad to 64 bytes
+ with zeroes and munge the data block */
+ if (i > 56) {
+ memset(ctx->buffer + i, 0, 64 - i);
+ SHA1_transform(ctx);
+ i = 0;
+ }
+ /* Pad to byte 56 with zeroes */
+ memset(ctx->buffer + i, 0, 56 - i);
+ /* Add length in big-endian */
+ SHA1_copy_and_swap(ctx->length, ctx->buffer + 56, 2);
+ /* Munge the final block */
+ SHA1_transform(ctx);
+ /* Final hash value is in ctx->state modulo big-endian conversion */
+ SHA1_copy_and_swap(ctx->state, output, 5);
+}