1 // -------------------------------------------------------------------------
2 // Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
3 // All rights reserved.
7 // The free distribution and use of this software in both source and binary
8 // form is allowed (with or without changes) provided that:
10 // 1. distributions of this source code include the above copyright
11 // notice, this list of conditions and the following disclaimer//
13 // 2. distributions in binary form include the above copyright
14 // notice, this list of conditions and the following disclaimer
15 // in the documentation and/or other associated materials//
17 // 3. the copyright holder's name is not used to endorse products
18 // built using this software without specific written permission.
21 // ALTERNATIVELY, provided that this notice is retained in full, this product
22 // may be distributed under the terms of the GNU General Public License (GPL),
23 // in which case the provisions of the GPL apply INSTEAD OF those given above.
25 // Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
26 // Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
30 // This software is provided 'as is' with no explicit or implied warranties
31 // in respect of its properties including, but not limited to, correctness
32 // and fitness for purpose.
33 // -------------------------------------------------------------------------
34 // Issue Date: 29/07/2002
36 .file "aes-i586-asm.S"
39 // aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
40 // aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
42 #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
44 // offsets to parameters with one register pushed onto stack
46 #define in_blk 8 // input byte array address parameter
47 #define out_blk 12 // output byte array address parameter
48 #define ctx 16 // AES context structure
50 // offsets in context structure
52 #define ekey 0 // encryption key schedule base address
53 #define nrnd 256 // number of rounds
54 #define dkey 260 // decryption key schedule base address
56 // register mapping for encrypt and decrypt subroutines
75 #define _h(reg) reg##h
76 #define h(reg) _h(reg)
78 #define _l(reg) reg##l
79 #define l(reg) _l(reg)
81 // This macro takes a 32-bit word representing a column and uses
82 // each of its four bytes to index into four tables of 256 32-bit
83 // words to obtain values that are then xored into the appropriate
84 // output registers r0, r1, r4 or r5.
91 // %5 table base address
92 // %6 input register for the round (destroyed)
93 // %7 scratch register for the round
95 #define do_col(a1, a2, a3, a4, a5, a6, a7) \
100 xor a5+tlen(,%a7,4),%a2; \
103 xor a5+2*tlen(,%a7,4),%a3; \
104 xor a5+3*tlen(,%a6,4),%a4;
106 // initialise output registers from the key schedule
108 #define do_fcol(a1, a2, a3, a4, a5, a6, a7, a8) \
112 xor a5(,%a7,4),%a1; \
116 xor a5+tlen(,%a7,4),%a2; \
119 xor a5+3*tlen(,%a6,4),%a4; \
122 xor a5+2*tlen(,%a7,4),%a3;
124 // initialise output registers from the key schedule
126 #define do_icol(a1, a2, a3, a4, a5, a6, a7, a8) \
130 xor a5(,%a7,4),%a1; \
134 xor a5+tlen(,%a7,4),%a2; \
137 xor a5+3*tlen(,%a6,4),%a4; \
140 xor a5+2*tlen(,%a7,4),%a3;
143 // original Gladman had conditional saves to MMX regs.
144 #define save(a1, a2) \
147 #define restore(a1, a2) \
150 // This macro performs a forward encryption cycle. It is entered with
151 // the first previous round column values in r0, r1, r4 and r5 and
152 // exits with the final values in the same registers, using the MMX
153 // registers mm0-mm1 or the stack for temporary storage
155 // mov current column values into the MMX registers
156 #define fwd_rnd(arg, table) \
157 /* mov current column values into the MMX registers */ \
162 /* compute new column values */ \
163 do_fcol(r0,r5,r4,r1,table, r2,r3, arg); \
164 do_col (r4,r1,r0,r5,table, r2,r3); \
166 do_col (r1,r0,r5,r4,table, r2,r3); \
168 do_col (r5,r4,r1,r0,table, r2,r3);
170 // This macro performs an inverse encryption cycle. It is entered with
171 // the first previous round column values in r0, r1, r4 and r5 and
172 // exits with the final values in the same registers, using the MMX
173 // registers mm0-mm1 or the stack for temporary storage
175 #define inv_rnd(arg, table) \
176 /* mov current column values into the MMX registers */ \
181 /* compute new column values */ \
182 do_icol(r0,r1,r4,r5, table, r2,r3, arg); \
183 do_col (r4,r5,r0,r1, table, r2,r3); \
185 do_col (r1,r4,r5,r0, table, r2,r3); \
187 do_col (r5,r0,r1,r4, table, r2,r3);
189 // AES (Rijndael) Encryption Subroutine
200 mov ctx(%esp),%ebp // pointer to context
203 // CAUTION: the order and the values used in these assigns
204 // rely on the register mappings
207 mov in_blk+4(%esp),%r2
209 mov nrnd(%ebp),%r3 // number of rounds
211 lea ekey(%ebp),%r6 // key pointer
213 // input four columns and xor in first round key
224 sub $8,%esp // space for register saves on stack
225 add $16,%r6 // increment to next round key
227 je 4f // 10 rounds for 128-bit key
230 je 3f // 12 rounds for 128-bit key
233 2: fwd_rnd( -64(%r6) ,ft_tab) // 14 rounds for 128-bit key
234 fwd_rnd( -48(%r6) ,ft_tab)
235 3: fwd_rnd( -32(%r6) ,ft_tab) // 12 rounds for 128-bit key
236 fwd_rnd( -16(%r6) ,ft_tab)
237 4: fwd_rnd( (%r6) ,ft_tab) // 10 rounds for 128-bit key
238 fwd_rnd( +16(%r6) ,ft_tab)
239 fwd_rnd( +32(%r6) ,ft_tab)
240 fwd_rnd( +48(%r6) ,ft_tab)
241 fwd_rnd( +64(%r6) ,ft_tab)
242 fwd_rnd( +80(%r6) ,ft_tab)
243 fwd_rnd( +96(%r6) ,ft_tab)
244 fwd_rnd(+112(%r6) ,ft_tab)
245 fwd_rnd(+128(%r6) ,ft_tab)
246 fwd_rnd(+144(%r6) ,fl_tab) // last round uses a different table
248 // move final values to the output array. CAUTION: the
249 // order of these assigns rely on the register mappings
252 mov out_blk+12(%esp),%r6
264 // AES (Rijndael) Decryption Subroutine
275 mov ctx(%esp),%ebp // pointer to context
278 // CAUTION: the order and the values used in these assigns
279 // rely on the register mappings
282 mov in_blk+4(%esp),%r2
284 mov nrnd(%ebp),%r3 // number of rounds
286 lea dkey(%ebp),%r6 // key pointer
291 // input four columns and xor in first round key
302 sub $8,%esp // space for register saves on stack
303 sub $16,%r6 // increment to next round key
305 je 4f // 10 rounds for 128-bit key
308 je 3f // 12 rounds for 128-bit key
311 2: inv_rnd( +64(%r6), it_tab) // 14 rounds for 128-bit key
312 inv_rnd( +48(%r6), it_tab)
313 3: inv_rnd( +32(%r6), it_tab) // 12 rounds for 128-bit key
314 inv_rnd( +16(%r6), it_tab)
315 4: inv_rnd( (%r6), it_tab) // 10 rounds for 128-bit key
316 inv_rnd( -16(%r6), it_tab)
317 inv_rnd( -32(%r6), it_tab)
318 inv_rnd( -48(%r6), it_tab)
319 inv_rnd( -64(%r6), it_tab)
320 inv_rnd( -80(%r6), it_tab)
321 inv_rnd( -96(%r6), it_tab)
322 inv_rnd(-112(%r6), it_tab)
323 inv_rnd(-128(%r6), it_tab)
324 inv_rnd(-144(%r6), il_tab) // last round uses a different table
326 // move final values to the output array. CAUTION: the
327 // order of these assigns rely on the register mappings
330 mov out_blk+12(%esp),%r6