summaryrefslogtreecommitdiff
path: root/src/library/bcrypt/x86.S
blob: b0f1cd2ef124b893cbd0e6c174244691e6b9a61d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
/*
 * Written by Solar Designer <solar at openwall.com> in 1998-2010.
 * No copyright is claimed, and the software is hereby placed in the public
 * domain.  In case this attempt to disclaim copyright and place the software
 * in the public domain is deemed null and void, then the software is
 * Copyright (c) 1998-2010 Solar Designer and it is hereby released to the
 * general public under the following terms:
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted.
 *
 * There's ABSOLUTELY NO WARRANTY, express or implied.
 *
 * See crypt_blowfish.c for more information.
 */

#ifdef __i386__

#if defined(__OpenBSD__) && !defined(__ELF__)
#define UNDERSCORES
#define ALIGN_LOG
#endif

#if defined(__CYGWIN32__) || defined(__MINGW32__)
#define UNDERSCORES
#endif

#ifdef __DJGPP__
#define UNDERSCORES
#define ALIGN_LOG
#endif

#ifdef UNDERSCORES
#define _BF_body_r			__BF_body_r
#endif

#ifdef ALIGN_LOG
#define DO_ALIGN(log)			.align (log)
#elif defined(DUMBAS)
#define DO_ALIGN(log)			.align 1 << log
#else
#define DO_ALIGN(log)			.align (1 << (log))
#endif

#define BF_FRAME			0x200
#define ctx				%esp

#define BF_ptr				(ctx)

#define S(N, r)				N+BF_FRAME(ctx,r,4)
#ifdef DUMBAS
#define P(N)				0x1000+N+N+N+N+BF_FRAME(ctx)
#else
#define P(N)				0x1000+4*N+BF_FRAME(ctx)
#endif

/*
 * This version of the assembly code is optimized primarily for the original
 * Intel Pentium but is also careful to avoid partial register stalls on the
 * Pentium Pro family of processors (tested up to Pentium III Coppermine).
 *
 * It is possible to do 15% faster on the Pentium Pro family and probably on
 * many non-Intel x86 processors, but, unfortunately, that would make things
 * twice slower for the original Pentium.
 *
 * An additional 2% speedup may be achieved with non-reentrant code.
 */

#define L				%esi
#define R				%edi
#define tmp1				%eax
#define tmp1_lo				%al
#define tmp2				%ecx
#define tmp2_hi				%ch
#define tmp3				%edx
#define tmp3_lo				%dl
#define tmp4				%ebx
#define tmp4_hi				%bh
#define tmp5				%ebp

.text

#define BF_ROUND(L, R, N) \
	xorl L,tmp2; \
	xorl tmp1,tmp1; \
	movl tmp2,L; \
	shrl $16,tmp2; \
	movl L,tmp4; \
	movb tmp2_hi,tmp1_lo; \
	andl $0xFF,tmp2; \
	movb tmp4_hi,tmp3_lo; \
	andl $0xFF,tmp4; \
	movl S(0,tmp1),tmp1; \
	movl S(0x400,tmp2),tmp5; \
	addl tmp5,tmp1; \
	movl S(0x800,tmp3),tmp5; \
	xorl tmp5,tmp1; \
	movl S(0xC00,tmp4),tmp5; \
	addl tmp1,tmp5; \
	movl 4+P(N),tmp2; \
	xorl tmp5,R

#define BF_ENCRYPT_START \
	BF_ROUND(L, R, 0); \
	BF_ROUND(R, L, 1); \
	BF_ROUND(L, R, 2); \
	BF_ROUND(R, L, 3); \
	BF_ROUND(L, R, 4); \
	BF_ROUND(R, L, 5); \
	BF_ROUND(L, R, 6); \
	BF_ROUND(R, L, 7); \
	BF_ROUND(L, R, 8); \
	BF_ROUND(R, L, 9); \
	BF_ROUND(L, R, 10); \
	BF_ROUND(R, L, 11); \
	BF_ROUND(L, R, 12); \
	BF_ROUND(R, L, 13); \
	BF_ROUND(L, R, 14); \
	BF_ROUND(R, L, 15); \
	movl BF_ptr,tmp5; \
	xorl L,tmp2; \
	movl P(17),L

#define BF_ENCRYPT_END \
	xorl R,L; \
	movl tmp2,R

DO_ALIGN(5)
.globl _BF_body_r
_BF_body_r:
	movl 4(%esp),%eax
	pushl %ebp
	pushl %ebx
	pushl %esi
	pushl %edi
	subl $BF_FRAME-8,%eax
	xorl L,L
	cmpl %esp,%eax
	ja BF_die
	xchgl %eax,%esp
	xorl R,R
	pushl %eax
	leal 0x1000+BF_FRAME-4(ctx),%eax
	movl 0x1000+BF_FRAME-4(ctx),tmp2
	pushl %eax
	xorl tmp3,tmp3
BF_loop_P:
	BF_ENCRYPT_START
	addl $8,tmp5
	BF_ENCRYPT_END
	leal 0x1000+18*4+BF_FRAME(ctx),tmp1
	movl tmp5,BF_ptr
	cmpl tmp5,tmp1
	movl L,-8(tmp5)
	movl R,-4(tmp5)
	movl P(0),tmp2
	ja BF_loop_P
	leal BF_FRAME(ctx),tmp5
	xorl tmp3,tmp3
	movl tmp5,BF_ptr
BF_loop_S:
	BF_ENCRYPT_START
	BF_ENCRYPT_END
	movl P(0),tmp2
	movl L,(tmp5)
	movl R,4(tmp5)
	BF_ENCRYPT_START
	BF_ENCRYPT_END
	movl P(0),tmp2
	movl L,8(tmp5)
	movl R,12(tmp5)
	BF_ENCRYPT_START
	BF_ENCRYPT_END
	movl P(0),tmp2
	movl L,16(tmp5)
	movl R,20(tmp5)
	BF_ENCRYPT_START
	addl $32,tmp5
	BF_ENCRYPT_END
	leal 0x1000+BF_FRAME(ctx),tmp1
	movl tmp5,BF_ptr
	cmpl tmp5,tmp1
	movl P(0),tmp2
	movl L,-8(tmp5)
	movl R,-4(tmp5)
	ja BF_loop_S
	movl 4(%esp),%esp
	popl %edi
	popl %esi
	popl %ebx
	popl %ebp
	ret

BF_die:
/* Oops, need to re-compile with a larger BF_FRAME. */
	hlt
	jmp BF_die

#endif

#if defined(__ELF__) && defined(__linux__)
.section .note.GNU-stack,"",@progbits
#endif