summaryrefslogtreecommitdiff
path: root/src/library/bcrypt/x86.S
diff options
context:
space:
mode:
Diffstat (limited to 'src/library/bcrypt/x86.S')
-rw-r--r--src/library/bcrypt/x86.S203
1 files changed, 203 insertions, 0 deletions
diff --git a/src/library/bcrypt/x86.S b/src/library/bcrypt/x86.S
new file mode 100644
index 0000000..b0f1cd2
--- /dev/null
+++ b/src/library/bcrypt/x86.S
@@ -0,0 +1,203 @@
+/*
+ * Written by Solar Designer <solar at openwall.com> in 1998-2010.
+ * No copyright is claimed, and the software is hereby placed in the public
+ * domain. In case this attempt to disclaim copyright and place the software
+ * in the public domain is deemed null and void, then the software is
+ * Copyright (c) 1998-2010 Solar Designer and it is hereby released to the
+ * general public under the following terms:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted.
+ *
+ * There's ABSOLUTELY NO WARRANTY, express or implied.
+ *
+ * See crypt_blowfish.c for more information.
+ */
+
+#ifdef __i386__
+
+#if defined(__OpenBSD__) && !defined(__ELF__)
+#define UNDERSCORES
+#define ALIGN_LOG
+#endif
+
+#if defined(__CYGWIN32__) || defined(__MINGW32__)
+#define UNDERSCORES
+#endif
+
+#ifdef __DJGPP__
+#define UNDERSCORES
+#define ALIGN_LOG
+#endif
+
+#ifdef UNDERSCORES
+#define _BF_body_r __BF_body_r
+#endif
+
+#ifdef ALIGN_LOG
+#define DO_ALIGN(log) .align (log)
+#elif defined(DUMBAS)
+#define DO_ALIGN(log) .align 1 << log
+#else
+#define DO_ALIGN(log) .align (1 << (log))
+#endif
+
+#define BF_FRAME 0x200
+#define ctx %esp
+
+#define BF_ptr (ctx)
+
+#define S(N, r) N+BF_FRAME(ctx,r,4)
+#ifdef DUMBAS
+#define P(N) 0x1000+N+N+N+N+BF_FRAME(ctx)
+#else
+#define P(N) 0x1000+4*N+BF_FRAME(ctx)
+#endif
+
+/*
+ * This version of the assembly code is optimized primarily for the original
+ * Intel Pentium but is also careful to avoid partial register stalls on the
+ * Pentium Pro family of processors (tested up to Pentium III Coppermine).
+ *
+ * It is possible to do 15% faster on the Pentium Pro family and probably on
+ * many non-Intel x86 processors, but, unfortunately, that would make things
+ * twice slower for the original Pentium.
+ *
+ * An additional 2% speedup may be achieved with non-reentrant code.
+ */
+
+#define L %esi
+#define R %edi
+#define tmp1 %eax
+#define tmp1_lo %al
+#define tmp2 %ecx
+#define tmp2_hi %ch
+#define tmp3 %edx
+#define tmp3_lo %dl
+#define tmp4 %ebx
+#define tmp4_hi %bh
+#define tmp5 %ebp
+
+.text
+
+#define BF_ROUND(L, R, N) \
+ xorl L,tmp2; \
+ xorl tmp1,tmp1; \
+ movl tmp2,L; \
+ shrl $16,tmp2; \
+ movl L,tmp4; \
+ movb tmp2_hi,tmp1_lo; \
+ andl $0xFF,tmp2; \
+ movb tmp4_hi,tmp3_lo; \
+ andl $0xFF,tmp4; \
+ movl S(0,tmp1),tmp1; \
+ movl S(0x400,tmp2),tmp5; \
+ addl tmp5,tmp1; \
+ movl S(0x800,tmp3),tmp5; \
+ xorl tmp5,tmp1; \
+ movl S(0xC00,tmp4),tmp5; \
+ addl tmp1,tmp5; \
+ movl 4+P(N),tmp2; \
+ xorl tmp5,R
+
+#define BF_ENCRYPT_START \
+ BF_ROUND(L, R, 0); \
+ BF_ROUND(R, L, 1); \
+ BF_ROUND(L, R, 2); \
+ BF_ROUND(R, L, 3); \
+ BF_ROUND(L, R, 4); \
+ BF_ROUND(R, L, 5); \
+ BF_ROUND(L, R, 6); \
+ BF_ROUND(R, L, 7); \
+ BF_ROUND(L, R, 8); \
+ BF_ROUND(R, L, 9); \
+ BF_ROUND(L, R, 10); \
+ BF_ROUND(R, L, 11); \
+ BF_ROUND(L, R, 12); \
+ BF_ROUND(R, L, 13); \
+ BF_ROUND(L, R, 14); \
+ BF_ROUND(R, L, 15); \
+ movl BF_ptr,tmp5; \
+ xorl L,tmp2; \
+ movl P(17),L
+
+#define BF_ENCRYPT_END \
+ xorl R,L; \
+ movl tmp2,R
+
+DO_ALIGN(5)
+.globl _BF_body_r
+_BF_body_r:
+ movl 4(%esp),%eax
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ subl $BF_FRAME-8,%eax
+ xorl L,L
+ cmpl %esp,%eax
+ ja BF_die
+ xchgl %eax,%esp
+ xorl R,R
+ pushl %eax
+ leal 0x1000+BF_FRAME-4(ctx),%eax
+ movl 0x1000+BF_FRAME-4(ctx),tmp2
+ pushl %eax
+ xorl tmp3,tmp3
+BF_loop_P:
+ BF_ENCRYPT_START
+ addl $8,tmp5
+ BF_ENCRYPT_END
+ leal 0x1000+18*4+BF_FRAME(ctx),tmp1
+ movl tmp5,BF_ptr
+ cmpl tmp5,tmp1
+ movl L,-8(tmp5)
+ movl R,-4(tmp5)
+ movl P(0),tmp2
+ ja BF_loop_P
+ leal BF_FRAME(ctx),tmp5
+ xorl tmp3,tmp3
+ movl tmp5,BF_ptr
+BF_loop_S:
+ BF_ENCRYPT_START
+ BF_ENCRYPT_END
+ movl P(0),tmp2
+ movl L,(tmp5)
+ movl R,4(tmp5)
+ BF_ENCRYPT_START
+ BF_ENCRYPT_END
+ movl P(0),tmp2
+ movl L,8(tmp5)
+ movl R,12(tmp5)
+ BF_ENCRYPT_START
+ BF_ENCRYPT_END
+ movl P(0),tmp2
+ movl L,16(tmp5)
+ movl R,20(tmp5)
+ BF_ENCRYPT_START
+ addl $32,tmp5
+ BF_ENCRYPT_END
+ leal 0x1000+BF_FRAME(ctx),tmp1
+ movl tmp5,BF_ptr
+ cmpl tmp5,tmp1
+ movl P(0),tmp2
+ movl L,-8(tmp5)
+ movl R,-4(tmp5)
+ ja BF_loop_S
+ movl 4(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+
+BF_die:
+/* Oops, need to re-compile with a larger BF_FRAME. */
+ hlt
+ jmp BF_die
+
+#endif
+
+#if defined(__ELF__) && defined(__linux__)
+.section .note.GNU-stack,"",@progbits
+#endif