From 917e891d06e16516fe90e286f184062e6b7409fe Mon Sep 17 00:00:00 2001 From: xleroy Date: Thu, 29 Jun 2006 16:07:01 +0000 Subject: Version C des tests Cminor git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@40 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- test/c/Makefile | 46 ++++ test/c/aes.c | 728 +++++++++++++++++++++++++++++++++++++++++++++++++++++ test/c/almabench.c | 190 ++++++++++++++ test/c/fft.c | 158 ++++++++++++ test/c/fib.c | 7 + test/c/integr.c | 23 ++ test/c/qsort.c | 16 ++ test/c/sha1.c | 171 +++++++++++++ 8 files changed, 1339 insertions(+) create mode 100644 test/c/Makefile create mode 100644 test/c/aes.c create mode 100644 test/c/almabench.c create mode 100644 test/c/fft.c create mode 100644 test/c/fib.c create mode 100644 test/c/integr.c create mode 100644 test/c/qsort.c create mode 100644 test/c/sha1.c (limited to 'test/c') diff --git a/test/c/Makefile b/test/c/Makefile new file mode 100644 index 0000000..436e222 --- /dev/null +++ b/test/c/Makefile @@ -0,0 +1,46 @@ +CC=gcc +CFLAGS=-O2 -Wall + +VPATH=../harness ../lib + +PROGS=fib integr qsort fft sha1 aes almabench + +all: $(PROGS) + +fib: fib.o mainfib.o + $(CC) $(CFLAGS) -o fib fib.o mainfib.o +clean:: + rm -f fib + +integr: integr.o mainintegr.o + $(CC) $(CFLAGS) -o integr integr.o mainintegr.o +clean:: + rm -f integr + +qsort: qsort.o mainqsort.o + $(CC) $(CFLAGS) -o qsort qsort.o mainqsort.o +clean:: + rm -f qsort + +fft: fft.o mainfft.o staticlib.o + $(CC) $(CFLAGS) -o fft fft.o mainfft.o staticlib.o -lm +clean:: + rm -f fft + +sha1: sha1.o mainsha1.o staticlib.o + $(CC) $(CFLAGS) -o sha1 sha1.o mainsha1.o staticlib.o +clean:: + rm -f sha1 sha1.cm + +aes: aes.o mainaes.o + $(CC) $(CFLAGS) -o aes aes.o mainaes.o +clean:: + rm -f aes aes.cm + +almabench: almabench.o mainalmabench.o staticlib.o + $(CC) $(CFLAGS) -o almabench almabench.o mainalmabench.o staticlib.o -lm +clean:: + rm -f almabench almabench.cm + +clean:: + rm -f *.s *.o *~ diff --git a/test/c/aes.c b/test/c/aes.c new file mode 100644 index 0000000..90abf57 --- /dev/null +++ b/test/c/aes.c @@ -0,0 +1,728 @@ +/** + * rijndael-alg-fst.c + * + * @version 3.0 (December 2000) + * + * Optimised ANSI C code for the Rijndael cipher (now AES) + * + * @author Vincent Rijmen + * @author Antoon Bosselaers + * @author Paulo Barreto + * + * This code is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include + +#define MAXKC (256/32) +#define MAXKB (256/8) +#define MAXNR 14 + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; + +#define ARCH_BIG_ENDIAN + +#ifdef ARCH_BIG_ENDIAN +#define GETU32(pt) (*(u32 *)(pt)) +#define PUTU32(ct,st) (*(u32 *)(ct) = (st)) +#else +#define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3])) +#define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); } +#endif + +extern u32 Te0[256], Te1[256], Te2[256], Te3[256], Te4[256]; +extern u32 Td0[256], Td1[256], Td2[256], Td3[256], Td4[256]; +extern u32 rcon[10]; + +/** + * Expand the cipher key into the encryption key schedule. + * + * @return the number of rounds for the given cipher key size. + */ +int rijndaelKeySetupEnc(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits) { + int i = 0; + u32 temp; + + rk[0] = GETU32(cipherKey ); + rk[1] = GETU32(cipherKey + 4); + rk[2] = GETU32(cipherKey + 8); + rk[3] = GETU32(cipherKey + 12); + if (keyBits == 128) { + for (;;) { + temp = rk[3]; + rk[4] = rk[0] ^ + (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ + (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (Te4[(temp ) & 0xff] & 0x0000ff00) ^ + (Te4[(temp >> 24) ] & 0x000000ff) ^ + rcon[i]; + rk[5] = rk[1] ^ rk[4]; + rk[6] = rk[2] ^ rk[5]; + rk[7] = rk[3] ^ rk[6]; + if (++i == 10) { + return 10; + } + rk += 4; + } + } + rk[4] = GETU32(cipherKey + 16); + rk[5] = GETU32(cipherKey + 20); + if (keyBits == 192) { + for (;;) { + temp = rk[ 5]; + rk[ 6] = rk[ 0] ^ + (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ + (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (Te4[(temp ) & 0xff] & 0x0000ff00) ^ + (Te4[(temp >> 24) ] & 0x000000ff) ^ + rcon[i]; + rk[ 7] = rk[ 1] ^ rk[ 6]; + rk[ 8] = rk[ 2] ^ rk[ 7]; + rk[ 9] = rk[ 3] ^ rk[ 8]; + if (++i == 8) { + return 12; + } + rk[10] = rk[ 4] ^ rk[ 9]; + rk[11] = rk[ 5] ^ rk[10]; + rk += 6; + } + } + rk[6] = GETU32(cipherKey + 24); + rk[7] = GETU32(cipherKey + 28); + if (keyBits == 256) { + for (;;) { + temp = rk[ 7]; + rk[ 8] = rk[ 0] ^ + (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ + (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (Te4[(temp ) & 0xff] & 0x0000ff00) ^ + (Te4[(temp >> 24) ] & 0x000000ff) ^ + rcon[i]; + rk[ 9] = rk[ 1] ^ rk[ 8]; + rk[10] = rk[ 2] ^ rk[ 9]; + rk[11] = rk[ 3] ^ rk[10]; + if (++i == 7) { + return 14; + } + temp = rk[11]; + rk[12] = rk[ 4] ^ + (Te4[(temp >> 24) ] & 0xff000000) ^ + (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(temp ) & 0xff] & 0x000000ff); + rk[13] = rk[ 5] ^ rk[12]; + rk[14] = rk[ 6] ^ rk[13]; + rk[15] = rk[ 7] ^ rk[14]; + + rk += 8; + } + } + return 0; +} + +/** + * Expand the cipher key into the decryption key schedule. + * + * @return the number of rounds for the given cipher key size. + */ +int rijndaelKeySetupDec(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits) { + int Nr, i, j; + u32 temp; + + /* expand the cipher key: */ + Nr = rijndaelKeySetupEnc(rk, cipherKey, keyBits); + /* invert the order of the round keys: */ + for (i = 0, j = 4*Nr; i < j; i += 4, j -= 4) { + temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp; + temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp; + temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp; + temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp; + } + /* apply the inverse MixColumn transform to all round keys but the first and the last: */ + for (i = 1; i < Nr; i++) { + rk += 4; + rk[0] = + Td0[Te4[(rk[0] >> 24) ] & 0xff] ^ + Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^ + Td2[Te4[(rk[0] >> 8) & 0xff] & 0xff] ^ + Td3[Te4[(rk[0] ) & 0xff] & 0xff]; + rk[1] = + Td0[Te4[(rk[1] >> 24) ] & 0xff] ^ + Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^ + Td2[Te4[(rk[1] >> 8) & 0xff] & 0xff] ^ + Td3[Te4[(rk[1] ) & 0xff] & 0xff]; + rk[2] = + Td0[Te4[(rk[2] >> 24) ] & 0xff] ^ + Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^ + Td2[Te4[(rk[2] >> 8) & 0xff] & 0xff] ^ + Td3[Te4[(rk[2] ) & 0xff] & 0xff]; + rk[3] = + Td0[Te4[(rk[3] >> 24) ] & 0xff] ^ + Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^ + Td2[Te4[(rk[3] >> 8) & 0xff] & 0xff] ^ + Td3[Te4[(rk[3] ) & 0xff] & 0xff]; + } + return Nr; +} + +void rijndaelEncrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 pt[16], u8 ct[16]) { + u32 s0, s1, s2, s3, t0, t1, t2, t3; +#ifndef FULL_UNROLL + int r; +#endif /* ?FULL_UNROLL */ + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(pt ) ^ rk[0]; + s1 = GETU32(pt + 4) ^ rk[1]; + s2 = GETU32(pt + 8) ^ rk[2]; + s3 = GETU32(pt + 12) ^ rk[3]; +#ifdef FULL_UNROLL + /* round 1: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7]; + /* round 2: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11]; + /* round 3: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15]; + /* round 4: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19]; + /* round 5: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23]; + /* round 6: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27]; + /* round 7: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31]; + /* round 8: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35]; + /* round 9: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39]; + if (Nr > 10) { + /* round 10: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43]; + /* round 11: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47]; + if (Nr > 12) { + /* round 12: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51]; + /* round 13: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55]; + } + } + rk += Nr << 2; +#else /* !FULL_UNROLL */ + /* + * Nr - 1 full rounds: + */ + r = Nr >> 1; + for (;;) { + t0 = + Te0[(s0 >> 24) ] ^ + Te1[(s1 >> 16) & 0xff] ^ + Te2[(s2 >> 8) & 0xff] ^ + Te3[(s3 ) & 0xff] ^ + rk[4]; + t1 = + Te0[(s1 >> 24) ] ^ + Te1[(s2 >> 16) & 0xff] ^ + Te2[(s3 >> 8) & 0xff] ^ + Te3[(s0 ) & 0xff] ^ + rk[5]; + t2 = + Te0[(s2 >> 24) ] ^ + Te1[(s3 >> 16) & 0xff] ^ + Te2[(s0 >> 8) & 0xff] ^ + Te3[(s1 ) & 0xff] ^ + rk[6]; + t3 = + Te0[(s3 >> 24) ] ^ + Te1[(s0 >> 16) & 0xff] ^ + Te2[(s1 >> 8) & 0xff] ^ + Te3[(s2 ) & 0xff] ^ + rk[7]; + + rk += 8; + if (--r == 0) { + break; + } + + s0 = + Te0[(t0 >> 24) ] ^ + Te1[(t1 >> 16) & 0xff] ^ + Te2[(t2 >> 8) & 0xff] ^ + Te3[(t3 ) & 0xff] ^ + rk[0]; + s1 = + Te0[(t1 >> 24) ] ^ + Te1[(t2 >> 16) & 0xff] ^ + Te2[(t3 >> 8) & 0xff] ^ + Te3[(t0 ) & 0xff] ^ + rk[1]; + s2 = + Te0[(t2 >> 24) ] ^ + Te1[(t3 >> 16) & 0xff] ^ + Te2[(t0 >> 8) & 0xff] ^ + Te3[(t1 ) & 0xff] ^ + rk[2]; + s3 = + Te0[(t3 >> 24) ] ^ + Te1[(t0 >> 16) & 0xff] ^ + Te2[(t1 >> 8) & 0xff] ^ + Te3[(t2 ) & 0xff] ^ + rk[3]; + } +#endif /* ?FULL_UNROLL */ + /* + * apply last round and + * map cipher state to byte array block: + */ + s0 = + (Te4[(t0 >> 24) ] & 0xff000000) ^ + (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(t3 ) & 0xff] & 0x000000ff) ^ + rk[0]; + PUTU32(ct , s0); + s1 = + (Te4[(t1 >> 24) ] & 0xff000000) ^ + (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(t0 ) & 0xff] & 0x000000ff) ^ + rk[1]; + PUTU32(ct + 4, s1); + s2 = + (Te4[(t2 >> 24) ] & 0xff000000) ^ + (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(t1 ) & 0xff] & 0x000000ff) ^ + rk[2]; + PUTU32(ct + 8, s2); + s3 = + (Te4[(t3 >> 24) ] & 0xff000000) ^ + (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(t2 ) & 0xff] & 0x000000ff) ^ + rk[3]; + PUTU32(ct + 12, s3); +} + +void rijndaelDecrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 ct[16], u8 pt[16]) { + u32 s0, s1, s2, s3, t0, t1, t2, t3; +#ifndef FULL_UNROLL + int r; +#endif /* ?FULL_UNROLL */ + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(ct ) ^ rk[0]; + s1 = GETU32(ct + 4) ^ rk[1]; + s2 = GETU32(ct + 8) ^ rk[2]; + s3 = GETU32(ct + 12) ^ rk[3]; +#ifdef FULL_UNROLL + /* round 1: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7]; + /* round 2: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11]; + /* round 3: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15]; + /* round 4: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19]; + /* round 5: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23]; + /* round 6: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27]; + /* round 7: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31]; + /* round 8: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35]; + /* round 9: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39]; + if (Nr > 10) { + /* round 10: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43]; + /* round 11: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47]; + if (Nr > 12) { + /* round 12: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51]; + /* round 13: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55]; + } + } + rk += Nr << 2; +#else /* !FULL_UNROLL */ + /* + * Nr - 1 full rounds: + */ + r = Nr >> 1; + for (;;) { + t0 = + Td0[(s0 >> 24) ] ^ + Td1[(s3 >> 16) & 0xff] ^ + Td2[(s2 >> 8) & 0xff] ^ + Td3[(s1 ) & 0xff] ^ + rk[4]; + t1 = + Td0[(s1 >> 24) ] ^ + Td1[(s0 >> 16) & 0xff] ^ + Td2[(s3 >> 8) & 0xff] ^ + Td3[(s2 ) & 0xff] ^ + rk[5]; + t2 = + Td0[(s2 >> 24) ] ^ + Td1[(s1 >> 16) & 0xff] ^ + Td2[(s0 >> 8) & 0xff] ^ + Td3[(s3 ) & 0xff] ^ + rk[6]; + t3 = + Td0[(s3 >> 24) ] ^ + Td1[(s2 >> 16) & 0xff] ^ + Td2[(s1 >> 8) & 0xff] ^ + Td3[(s0 ) & 0xff] ^ + rk[7]; + + rk += 8; + if (--r == 0) { + break; + } + + s0 = + Td0[(t0 >> 24) ] ^ + Td1[(t3 >> 16) & 0xff] ^ + Td2[(t2 >> 8) & 0xff] ^ + Td3[(t1 ) & 0xff] ^ + rk[0]; + s1 = + Td0[(t1 >> 24) ] ^ + Td1[(t0 >> 16) & 0xff] ^ + Td2[(t3 >> 8) & 0xff] ^ + Td3[(t2 ) & 0xff] ^ + rk[1]; + s2 = + Td0[(t2 >> 24) ] ^ + Td1[(t1 >> 16) & 0xff] ^ + Td2[(t0 >> 8) & 0xff] ^ + Td3[(t3 ) & 0xff] ^ + rk[2]; + s3 = + Td0[(t3 >> 24) ] ^ + Td1[(t2 >> 16) & 0xff] ^ + Td2[(t1 >> 8) & 0xff] ^ + Td3[(t0 ) & 0xff] ^ + rk[3]; + } +#endif /* ?FULL_UNROLL */ + /* + * apply last round and + * map cipher state to byte array block: + */ + s0 = + (Td4[(t0 >> 24) ] & 0xff000000) ^ + (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(t1 ) & 0xff] & 0x000000ff) ^ + rk[0]; + PUTU32(pt , s0); + s1 = + (Td4[(t1 >> 24) ] & 0xff000000) ^ + (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(t2 ) & 0xff] & 0x000000ff) ^ + rk[1]; + PUTU32(pt + 4, s1); + s2 = + (Td4[(t2 >> 24) ] & 0xff000000) ^ + (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(t3 ) & 0xff] & 0x000000ff) ^ + rk[2]; + PUTU32(pt + 8, s2); + s3 = + (Td4[(t3 >> 24) ] & 0xff000000) ^ + (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(t0 ) & 0xff] & 0x000000ff) ^ + rk[3]; + PUTU32(pt + 12, s3); +} + +#ifdef INTERMEDIATE_VALUE_KAT + +void rijndaelEncryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds) { + int r; + u32 s0, s1, s2, s3, t0, t1, t2, t3; + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(block ) ^ rk[0]; + s1 = GETU32(block + 4) ^ rk[1]; + s2 = GETU32(block + 8) ^ rk[2]; + s3 = GETU32(block + 12) ^ rk[3]; + rk += 4; + + /* + * Nr - 1 full rounds: + */ + for (r = (rounds < Nr ? rounds : Nr - 1); r > 0; r--) { + t0 = + Te0[(s0 >> 24) ] ^ + Te1[(s1 >> 16) & 0xff] ^ + Te2[(s2 >> 8) & 0xff] ^ + Te3[(s3 ) & 0xff] ^ + rk[0]; + t1 = + Te0[(s1 >> 24) ] ^ + Te1[(s2 >> 16) & 0xff] ^ + Te2[(s3 >> 8) & 0xff] ^ + Te3[(s0 ) & 0xff] ^ + rk[1]; + t2 = + Te0[(s2 >> 24) ] ^ + Te1[(s3 >> 16) & 0xff] ^ + Te2[(s0 >> 8) & 0xff] ^ + Te3[(s1 ) & 0xff] ^ + rk[2]; + t3 = + Te0[(s3 >> 24) ] ^ + Te1[(s0 >> 16) & 0xff] ^ + Te2[(s1 >> 8) & 0xff] ^ + Te3[(s2 ) & 0xff] ^ + rk[3]; + + s0 = t0; + s1 = t1; + s2 = t2; + s3 = t3; + rk += 4; + + } + + /* + * apply last round and + * map cipher state to byte array block: + */ + if (rounds == Nr) { + t0 = + (Te4[(s0 >> 24) ] & 0xff000000) ^ + (Te4[(s1 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(s2 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(s3 ) & 0xff] & 0x000000ff) ^ + rk[0]; + t1 = + (Te4[(s1 >> 24) ] & 0xff000000) ^ + (Te4[(s2 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(s3 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(s0 ) & 0xff] & 0x000000ff) ^ + rk[1]; + t2 = + (Te4[(s2 >> 24) ] & 0xff000000) ^ + (Te4[(s3 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(s0 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(s1 ) & 0xff] & 0x000000ff) ^ + rk[2]; + t3 = + (Te4[(s3 >> 24) ] & 0xff000000) ^ + (Te4[(s0 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(s1 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(s2 ) & 0xff] & 0x000000ff) ^ + rk[3]; + + s0 = t0; + s1 = t1; + s2 = t2; + s3 = t3; + } + + PUTU32(block , s0); + PUTU32(block + 4, s1); + PUTU32(block + 8, s2); + PUTU32(block + 12, s3); +} + +void rijndaelDecryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds) { + int r; + u32 s0, s1, s2, s3, t0, t1, t2, t3; + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(block ) ^ rk[0]; + s1 = GETU32(block + 4) ^ rk[1]; + s2 = GETU32(block + 8) ^ rk[2]; + s3 = GETU32(block + 12) ^ rk[3]; + rk += 4; + + /* + * Nr - 1 full rounds: + */ + for (r = (rounds < Nr ? rounds : Nr) - 1; r > 0; r--) { + t0 = + Td0[(s0 >> 24) ] ^ + Td1[(s3 >> 16) & 0xff] ^ + Td2[(s2 >> 8) & 0xff] ^ + Td3[(s1 ) & 0xff] ^ + rk[0]; + t1 = + Td0[(s1 >> 24) ] ^ + Td1[(s0 >> 16) & 0xff] ^ + Td2[(s3 >> 8) & 0xff] ^ + Td3[(s2 ) & 0xff] ^ + rk[1]; + t2 = + Td0[(s2 >> 24) ] ^ + Td1[(s1 >> 16) & 0xff] ^ + Td2[(s0 >> 8) & 0xff] ^ + Td3[(s3 ) & 0xff] ^ + rk[2]; + t3 = + Td0[(s3 >> 24) ] ^ + Td1[(s2 >> 16) & 0xff] ^ + Td2[(s1 >> 8) & 0xff] ^ + Td3[(s0 ) & 0xff] ^ + rk[3]; + + s0 = t0; + s1 = t1; + s2 = t2; + s3 = t3; + rk += 4; + + } + + /* + * complete the last round and + * map cipher state to byte array block: + */ + t0 = + (Td4[(s0 >> 24) ] & 0xff000000) ^ + (Td4[(s3 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(s2 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(s1 ) & 0xff] & 0x000000ff); + t1 = + (Td4[(s1 >> 24) ] & 0xff000000) ^ + (Td4[(s0 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(s3 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(s2 ) & 0xff] & 0x000000ff); + t2 = + (Td4[(s2 >> 24) ] & 0xff000000) ^ + (Td4[(s1 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(s0 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(s3 ) & 0xff] & 0x000000ff); + t3 = + (Td4[(s3 >> 24) ] & 0xff000000) ^ + (Td4[(s2 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(s1 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(s0 ) & 0xff] & 0x000000ff); + + if (rounds == Nr) { + t0 ^= rk[0]; + t1 ^= rk[1]; + t2 ^= rk[2]; + t3 ^= rk[3]; + } + + PUTU32(block , t0); + PUTU32(block + 4, t1); + PUTU32(block + 8, t2); + PUTU32(block + 12, t3); +} + +#endif /* INTERMEDIATE_VALUE_KAT */ diff --git a/test/c/almabench.c b/test/c/almabench.c new file mode 100644 index 0000000..ef92a59 --- /dev/null +++ b/test/c/almabench.c @@ -0,0 +1,190 @@ +/* + almabench + Standard C version + 17 April 2003 + + Written by Scott Robert Ladd. + No rights reserved. This is public domain software, for use by anyone. + + A number-crunching benchmark that can be used as a fitness test for + evolving optimal compiler options via genetic algorithm. + + This program calculates the daily ephemeris (at noon) for the years + 2000-2099 using an algorithm developed by J.L. Simon, P. Bretagnon, J. + Chapront, M. Chapront-Touze, G. Francou and J. Laskar of the Bureau des + Longitudes, Paris, France), as detailed in Astronomy & Astrophysics + 282, 663 (1994) + + Note that the code herein is design for the purpose of testing + computational performance; error handling and other such "niceties" + is virtually non-existent. + + Actual benchmark results can be found at: + http://www.coyotegulch.com + + Please do not use this information or algorithm in any way that might + upset the balance of the universe or otherwise cause planets to impact + upon one another. +*/ + +#include +#include + +#define PI 3.14159265358979323846 +#define J2000 2451545.0 +#define JCENTURY 36525.0 +#define JMILLENIA 365250.0 +#define TWOPI (2.0 * PI) +#define A2R (PI / 648000.0) +#define R2H (12.0 / PI) +#define R2D (180.0 / PI) +#define GAUSSK 0.01720209895 +#define TEST_LOOPS 20 +#define TEST_LENGTH 36525 +#define sineps 0.3977771559319137 +#define coseps 0.9174820620691818 + +extern double amas[8]; +extern double a[8][3], dlm[8][3], e[8][3], pi[8][3], dinc[8][3], omega[8][3]; +extern double kp[8][9], ca[8][9], sa[8][9], kq[8][10], cl[8][10], sl[8][10]; + +extern double cos_static(double), sin_static(double), + atan2_static(double, double), asin_static(double), sqrt_static(double), + fmod_static(double, double), fabs_static(double); + +#define cos cos_static +#define sin sin_static +#define atan2 atan2_static +#define asin asin_static +#define sqrt sqrt_static +#define fmod fmod_static +#define fabs fabs_static + +//--------------------------------------------------------------------------- +// Normalize angle into the range -pi <= A < +pi. +double anpm (double a) +{ + double w = fmod(a,TWOPI); + + if (fabs(w) >= PI) + w = w - ((a < 0) ? -TWOPI : TWOPI); + + return w; +} + +//--------------------------------------------------------------------------- +// The reference frame is equatorial and is with respect to the +// mean equator and equinox of epoch j2000. +void planetpv (double epoch[2], int np, double pv[2][3]) +{ + // working storage + int i, j, k; + double t, da, dl, de, dp, di, doh, dmu, arga, argl, am; + double ae, dae, ae2, at, r, v, si2, xq, xp, tl, xsw; + double xcw, xm2, xf, ci2, xms, xmc, xpxq2, x, y, z; + + // time: julian millennia since j2000. + t = ((epoch[0] - J2000) + epoch[1]) / JMILLENIA; + + // compute the mean elements. + da = a[np][0] + (a[np][1] + a[np][2] * t ) * t; + dl = (3600.0 * dlm[np][0] + (dlm[np][1] + dlm[np][2] * t ) * t ) * A2R; + de = e[np][0] + (e[np][1] + e[np][2] * t ) * t; + dp = anpm((3600.0 * pi[np][0] + (pi[np][1] + pi[np][2] * t ) * t ) * A2R ); + di = (3600.0 * dinc[np][0] + (dinc[np][1] + dinc[np][2] * t ) * t ) * A2R; + doh = anpm((3600.0 * omega[np][0] + (omega[np][1] + omega[np][2] * t ) * t ) * A2R ); + + // apply the trigonometric terms. + dmu = 0.35953620 * t; + + for (k = 0; k < 8; ++k) + { + arga = kp[np][k] * dmu; + argl = kq[np][k] * dmu; + da = da + (ca[np][k] * cos(arga) + sa[np][k] * sin(arga)) * 0.0000001; + dl = dl + (cl[np][k] * cos(argl) + sl[np][k] * sin(argl)) * 0.0000001; + } + + arga = kp[np][8] * dmu; + da = da + t * (ca[np][8] * cos(arga) + sa[np][8] * sin(arga)) * 0.0000001; + + for (k = 8; k <= 9; ++k) + { + argl = kq[np][k] * dmu; + dl = dl + t * ( cl[np][k] * cos(argl) + sl[np][k] * sin(argl) ) * 0.0000001; + } + + dl = fmod(dl,TWOPI); + + // iterative solution of kepler's equation to get eccentric anomaly. + am = dl - dp; + ae = am + de * sin(am); + k = 0; + + while (1) + { + dae = (am - ae + de * sin(ae)) / (1.0 - de * cos(ae)); + ae = ae + dae; + k = k + 1; + + if ((k >= 10) || (fabs(dae) < 1e-12)) + break; + } + + // true anomaly. + ae2 = ae / 2.0; + at = 2.0 * atan2(sqrt((1.0 + de)/(1.0 - de)) * sin(ae2), cos(ae2)); + + // distance (au) and speed (radians per day). + r = da * (1.0 - de * cos(ae)); + v = GAUSSK * sqrt((1.0 + 1.0 / amas[np] ) / (da * da * da)); + + si2 = sin(di / 2.0); + xq = si2 * cos(doh); + xp = si2 * sin(doh); + tl = at + dp; + xsw = sin(tl); + xcw = cos(tl); + xm2 = 2.0 * (xp * xcw - xq * xsw ); + xf = da / sqrt(1.0 - de*de); + ci2 = cos(di / 2.0); + xms = (de * sin(dp) + xsw) * xf; + xmc = (de * cos(dp) + xcw) * xf; + xpxq2 = 2.0 * xp * xq; + + // position (j2000 ecliptic x,y,z in au). + x = r * (xcw - xm2 * xp); + y = r * (xsw + xm2 * xq); + z = r * (-xm2 * ci2); + + // rotate to equatorial. + pv[0][0] = x; + pv[0][1] = y * coseps - z * sineps; + pv[0][2] = y * sineps + z * coseps; + + // velocity (j2000 ecliptic xdot,ydot,zdot in au/d). + x = v * ((-1.0 + 2.0 * xp * xp) * xms + xpxq2 * xmc); + y = v * (( 1.0 - 2.0 * xq * xq ) * xmc - xpxq2 * xms); + z = v * (2.0 * ci2 * (xp * xms + xq * xmc)); + + // rotate to equatorial. + pv[1][0] = x; + pv[1][1] = y * coseps - z * sineps; + pv[1][2] = y * sineps + z * coseps; +} + +//--------------------------------------------------------------------------- +// Computes RA, Declination, and distance from a state vector returned by +// planetpv. +void radecdist(double state[2][3], double rdd[3]) +{ + // distance + rdd[2] = sqrt(state[0][0] * state[0][0] + state[0][1] * state[0][1] + state[0][2] * state[0][2]); + + // RA + rdd[0] = atan2(state[0][1], state[0][0]) * R2H; + if (rdd[0] < 0.0) rdd[0] += 24.0; + + // Declination + rdd[1] = asin(state[0][2] / rdd[2]) * R2D; +} diff --git a/test/c/fft.c b/test/c/fft.c new file mode 100644 index 0000000..6f24af5 --- /dev/null +++ b/test/c/fft.c @@ -0,0 +1,158 @@ +/* + C Program: Test_Fast_Fourier_Transform_in_Double_Precision (TFFTDP.c) + by: Dave Edelblute, edelblut@cod.nosc.mil, 05 Jan 1993 +*/ + +#include + +#ifndef PI +#define PI 3.14159265358979323846 +#endif + +extern double cos_static(double), sin_static(double); + +#define cos cos_static +#define sin sin_static + +/********************************************************/ +/* A Duhamel-Hollman split-radix dif fft */ +/* Ref: Electronics Letters, Jan. 5, 1984 */ +/* Complex input and output data in arrays x and y */ +/* Length is n. */ +/********************************************************/ + +int dfft(double x[], double y[], int np) +{ +double *px,*py; +int i,j,k,m,n,i0,i1,i2,i3,is,id,n1,n2,n4; +double a,e,a3,cc1,ss1,cc3,ss3,r1,r2,s1,s2,s3,xt,tpi; + + px = x - 1; + py = y - 1; + i = 2; + m = 1; + + while (i < np) + { + i = i+i; + m = m+1; + } + + n = i; + + if (n != np) + { + for (i = np+1; i <= n; i++) + { + *(px + i) = 0.0; + *(py + i) = 0.0; + } + /*printf("nuse %d point fft",n); */ + } + + n2 = n+n; + tpi = 2.0 * PI; + for (k = 1; k <= m-1; k++ ) + { + n2 = n2 / 2; + n4 = n2 / 4; + e = tpi / (double)n2; + a = 0.0; + + for (j = 1; j<= n4 ; j++) + { + a3 = 3.0 * a; + cc1 = cos(a); + ss1 = sin(a); + + cc3 = cos(a3); + ss3 = sin(a3); + a = e * (double)j; + is = j; + id = 2 * n2; + + while ( is < n ) + { + for (i0 = is; i0 <= n-1; i0 = i0 + id) + { + i1 = i0 + n4; + i2 = i1 + n4; + i3 = i2 + n4; + r1 = *(px+i0) - *(px+i2); + *(px+i0) = *(px+i0) + *(px+i2); + r2 = *(px+i1) - *(px+i3); + *(px+i1) = *(px+i1) + *(px+i3); + s1 = *(py+i0) - *(py+i2); + *(py+i0) = *(py+i0) + *(py+i2); + s2 = *(py+i1) - *(py+i3); + *(py+i1) = *(py+i1) + *(py+i3); + s3 = r1 - s2; r1 = r1 + s2; + s2 = r2 - s1; r2 = r2 + s1; + *(px+i2) = r1*cc1 - s2*ss1; + *(py+i2) = -s2*cc1 - r1*ss1; + *(px+i3) = s3*cc3 + r2*ss3; + *(py+i3) = r2*cc3 - s3*ss3; + } + is = 2 * id - n2 + j; + id = 4 * id; + } + } + } + +/************************************/ +/* Last stage, length=2 butterfly */ +/************************************/ + is = 1; + id = 4; + + while ( is < n) + { + for (i0 = is; i0 <= n; i0 = i0 + id) + { + i1 = i0 + 1; + r1 = *(px+i0); + *(px+i0) = r1 + *(px+i1); + *(px+i1) = r1 - *(px+i1); + r1 = *(py+i0); + *(py+i0) = r1 + *(py+i1); + *(py+i1) = r1 - *(py+i1); + } + is = 2*id - 1; + id = 4 * id; + } + +/*************************/ +/* Bit reverse counter */ +/*************************/ + j = 1; + n1 = n - 1; + + for (i = 1; i <= n1; i++) + { + if (i < j) + { + xt = *(px+j); + *(px+j) = *(px+i); + *(px+i) = xt; + xt = *(py+j); + *(py+j) = *(py+i); + *(py+i) = xt; + } + + k = n / 2; + while (k < j) + { + j = j - k; + k = k / 2; + } + j = j + k; + } + +/* + for (i = 1; i<=16; i++) printf("%d %g %gn",i,*(px+i),(py+i)); +*/ + + return(n); +} + + diff --git a/test/c/fib.c b/test/c/fib.c new file mode 100644 index 0000000..34e7baa --- /dev/null +++ b/test/c/fib.c @@ -0,0 +1,7 @@ +int fib(int n) +{ + if (n < 2) + return 1; + else + return fib(n-1) + fib(n-2); +} diff --git a/test/c/integr.c b/test/c/integr.c new file mode 100644 index 0000000..abcbd28 --- /dev/null +++ b/test/c/integr.c @@ -0,0 +1,23 @@ +static double square(double x) +{ + return x * x; +} + +static double integr(double (*f)(double), double low, double high, int n) +{ + double h, x, s; + int i; + + h = (high - low) / n; + s = 0; + for (i = n, x = low; i > 0; i--, x += h) s += f(x); + return s * h; +} + +double test(int n) +{ + return integr(square, 0.0, 1.0, n); +} + + + diff --git a/test/c/qsort.c b/test/c/qsort.c new file mode 100644 index 0000000..9f8e5b1 --- /dev/null +++ b/test/c/qsort.c @@ -0,0 +1,16 @@ +void quicksort(int lo, int hi, int base[]) +{ + int i,j; + int pivot,temp; + + if (lolo && base[j]>=pivot) j--; + if (i + +extern void memcpy_static(void *, void *, int), + memset_static(void *, int, char); + +#define memcpy memcpy_static +#define memset memset_static + +#define ARCH_BIG_ENDIAN + +typedef unsigned int u32; + +struct SHA1Context { + u32 state[5]; + u32 length[2]; + int numbytes; + unsigned char buffer[64]; +}; + +#define rol1(x) (((x) << 1) | ((x) >> 31)) +#define rol5(x) (((x) << 5) | ((x) >> 27)) +#define rol30(x) (((x) << 30) | ((x) >> 2)) + +static void SHA1_copy_and_swap(void * src, void * dst, int numwords) +{ +#ifdef ARCH_BIG_ENDIAN + memcpy(dst, src, numwords * sizeof(u32)); +#else + unsigned char * s, * d; + unsigned char a, b; + for (s = src, d = dst; numwords > 0; s += 4, d += 4, numwords--) { + a = s[0]; + b = s[1]; + d[0] = s[3]; + d[1] = s[2]; + d[2] = b; + d[3] = a; + } +#endif +} + +#define F(x,y,z) ( z ^ (x & (y ^ z) ) ) +#define G(x,y,z) ( (x & y) | (z & (x | y) ) ) +#define H(x,y,z) ( x ^ y ^ z ) + +#define Y1 0x5A827999U +#define Y2 0x6ED9EBA1U +#define Y3 0x8F1BBCDCU +#define Y4 0xCA62C1D6U + +static void SHA1_transform(struct SHA1Context * ctx) +{ + int i; + register u32 a, b, c, d, e, t; + u32 data[80]; + + /* Convert buffer data to 16 big-endian integers */ + SHA1_copy_and_swap(ctx->buffer, data, 16); + + /* Expand into 80 integers */ + for (i = 16; i < 80; i++) { + t = data[i-3] ^ data[i-8] ^ data[i-14] ^ data[i-16]; + data[i] = rol1(t); + } + + /* Initialize working variables */ + a = ctx->state[0]; + b = ctx->state[1]; + c = ctx->state[2]; + d = ctx->state[3]; + e = ctx->state[4]; + + /* Perform rounds */ + for (i = 0; i < 20; i++) { + t = F(b, c, d) + Y1 + rol5(a) + e + data[i]; + e = d; d = c; c = rol30(b); b = a; a = t; + } + for (/*nothing*/; i < 40; i++) { + t = H(b, c, d) + Y2 + rol5(a) + e + data[i]; + e = d; d = c; c = rol30(b); b = a; a = t; + } + for (/*nothing*/; i < 60; i++) { + t = G(b, c, d) + Y3 + rol5(a) + e + data[i]; + e = d; d = c; c = rol30(b); b = a; a = t; + } + for (/*nothing*/; i < 80; i++) { + t = H(b, c, d) + Y4 + rol5(a) + e + data[i]; + e = d; d = c; c = rol30(b); b = a; a = t; + } + + /* Update chaining values */ + ctx->state[0] += a; + ctx->state[1] += b; + ctx->state[2] += c; + ctx->state[3] += d; + ctx->state[4] += e; +} + +void SHA1_init(struct SHA1Context * ctx) +{ + ctx->state[0] = 0x67452301U; + ctx->state[1] = 0xEFCDAB89U; + ctx->state[2] = 0x98BADCFEU; + ctx->state[3] = 0x10325476U; + ctx->state[4] = 0xC3D2E1F0U; + ctx->numbytes = 0; + ctx->length[0] = 0; + ctx->length[1] = 0; +} + +void SHA1_add_data(struct SHA1Context * ctx, unsigned char * data, + unsigned long len) +{ + u32 t; + + /* Update length */ + t = ctx->length[1]; + if ((ctx->length[1] = t + (u32) (len << 3)) < t) + ctx->length[0]++; /* carry from low 32 bits to high 32 bits */ + ctx->length[0] += (u32) (len >> 29); + + /* If data was left in buffer, pad it with fresh data and munge block */ + if (ctx->numbytes != 0) { + t = 64 - ctx->numbytes; + if (len < t) { + memcpy(ctx->buffer + ctx->numbytes, data, len); + ctx->numbytes += len; + return; + } + memcpy(ctx->buffer + ctx->numbytes, data, t); + SHA1_transform(ctx); + data += t; + len -= t; + } + /* Munge data in 64-byte chunks */ + while (len >= 64) { + memcpy(ctx->buffer, data, 64); + SHA1_transform(ctx); + data += 64; + len -= 64; + } + /* Save remaining data */ + memcpy(ctx->buffer, data, len); + ctx->numbytes = len; +} + +void SHA1_finish(struct SHA1Context * ctx, unsigned char output[20]) +{ + int i = ctx->numbytes; + + /* Set first char of padding to 0x80. There is always room. */ + ctx->buffer[i++] = 0x80; + /* If we do not have room for the length (8 bytes), pad to 64 bytes + with zeroes and munge the data block */ + if (i > 56) { + memset(ctx->buffer + i, 0, 64 - i); + SHA1_transform(ctx); + i = 0; + } + /* Pad to byte 56 with zeroes */ + memset(ctx->buffer + i, 0, 56 - i); + /* Add length in big-endian */ + SHA1_copy_and_swap(ctx->length, ctx->buffer + 56, 2); + /* Munge the final block */ + SHA1_transform(ctx); + /* Final hash value is in ctx->state modulo big-endian conversion */ + SHA1_copy_and_swap(ctx->state, output, 5); +} -- cgit v1.2.3