X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=arch%2Fi386%2Fcrypto%2Faes-i586-asm.S;h=f942f0c8f6306d19875b069a419743efe010c979;hb=97bf2856c6014879bd04983a3e9dfcdac1e7fe85;hp=e8a04713df26e5ed550935dc671097a553fb45cb;hpb=5fc42a6ed0ec81088c37caadb45898ae6cd0ad2c;p=linux-2.6.git diff --git a/arch/i386/crypto/aes-i586-asm.S b/arch/i386/crypto/aes-i586-asm.S index e8a04713d..f942f0c8f 100644 --- a/arch/i386/crypto/aes-i586-asm.S +++ b/arch/i386/crypto/aes-i586-asm.S @@ -36,22 +36,19 @@ .file "aes-i586-asm.S" .text -// aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])// -// aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])// - -#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) - -// offsets to parameters with one register pushed onto stack +#include -#define in_blk 8 // input byte array address parameter -#define out_blk 12 // output byte array address parameter -#define ctx 16 // AES context structure +#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) -// offsets in context structure +/* offsets to parameters with one register pushed onto stack */ +#define tfm 8 +#define out_blk 12 +#define in_blk 16 -#define ekey 0 // encryption key schedule base address -#define nrnd 256 // number of rounds -#define dkey 260 // decryption key schedule base address +/* offsets in crypto_tfm structure */ +#define ekey (crypto_tfm_ctx_offset + 0) +#define nrnd (crypto_tfm_ctx_offset + 256) +#define dkey (crypto_tfm_ctx_offset + 260) // register mapping for encrypt and decrypt subroutines @@ -61,7 +58,6 @@ #define r3 edx #define r4 esi #define r5 edi -#define r6 ebp #define eaxl al #define eaxh ah @@ -84,60 +80,63 @@ // output registers r0, r1, r4 or r5. // Parameters: +// table table base address // %1 out_state[0] // %2 out_state[1] // %3 out_state[2] // %4 out_state[3] -// %5 table base address -// %6 input register for the round (destroyed) -// %7 scratch register for the round - -#define do_col(a1, a2, a3, a4, a5, a6, a7) \ - movzx %l(a6),%a7; \ - xor a5(,%a7,4),%a1; \ - movzx %h(a6),%a7; \ - shr $16,%a6; \ - xor a5+tlen(,%a7,4),%a2; \ - movzx %l(a6),%a7; \ - movzx %h(a6),%a6; \ - xor a5+2*tlen(,%a7,4),%a3; \ - xor a5+3*tlen(,%a6,4),%a4; +// idx input register for the round (destroyed) +// tmp scratch register for the round +// sched key schedule + +#define do_col(table, a1,a2,a3,a4, idx, tmp) \ + movzx %l(idx),%tmp; \ + xor table(,%tmp,4),%a1; \ + movzx %h(idx),%tmp; \ + shr $16,%idx; \ + xor table+tlen(,%tmp,4),%a2; \ + movzx %l(idx),%tmp; \ + movzx %h(idx),%idx; \ + xor table+2*tlen(,%tmp,4),%a3; \ + xor table+3*tlen(,%idx,4),%a4; // initialise output registers from the key schedule - -#define do_fcol(a1, a2, a3, a4, a5, a6, a7, a8) \ - mov 0 a8,%a1; \ - movzx %l(a6),%a7; \ - mov 12 a8,%a2; \ - xor a5(,%a7,4),%a1; \ - mov 4 a8,%a4; \ - movzx %h(a6),%a7; \ - shr $16,%a6; \ - xor a5+tlen(,%a7,4),%a2; \ - movzx %l(a6),%a7; \ - movzx %h(a6),%a6; \ - xor a5+3*tlen(,%a6,4),%a4; \ - mov %a3,%a6; \ - mov 8 a8,%a3; \ - xor a5+2*tlen(,%a7,4),%a3; +// NB1: original value of a3 is in idx on exit +// NB2: original values of a1,a2,a4 aren't used +#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \ + mov 0 sched,%a1; \ + movzx %l(idx),%tmp; \ + mov 12 sched,%a2; \ + xor table(,%tmp,4),%a1; \ + mov 4 sched,%a4; \ + movzx %h(idx),%tmp; \ + shr $16,%idx; \ + xor table+tlen(,%tmp,4),%a2; \ + movzx %l(idx),%tmp; \ + movzx %h(idx),%idx; \ + xor table+3*tlen(,%idx,4),%a4; \ + mov %a3,%idx; \ + mov 8 sched,%a3; \ + xor table+2*tlen(,%tmp,4),%a3; // initialise output registers from the key schedule - -#define do_icol(a1, a2, a3, a4, a5, a6, a7, a8) \ - mov 0 a8,%a1; \ - movzx %l(a6),%a7; \ - mov 4 a8,%a2; \ - xor a5(,%a7,4),%a1; \ - mov 12 a8,%a4; \ - movzx %h(a6),%a7; \ - shr $16,%a6; \ - xor a5+tlen(,%a7,4),%a2; \ - movzx %l(a6),%a7; \ - movzx %h(a6),%a6; \ - xor a5+3*tlen(,%a6,4),%a4; \ - mov %a3,%a6; \ - mov 8 a8,%a3; \ - xor a5+2*tlen(,%a7,4),%a3; +// NB1: original value of a3 is in idx on exit +// NB2: original values of a1,a2,a4 aren't used +#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \ + mov 0 sched,%a1; \ + movzx %l(idx),%tmp; \ + mov 4 sched,%a2; \ + xor table(,%tmp,4),%a1; \ + mov 12 sched,%a4; \ + movzx %h(idx),%tmp; \ + shr $16,%idx; \ + xor table+tlen(,%tmp,4),%a2; \ + movzx %l(idx),%tmp; \ + movzx %h(idx),%idx; \ + xor table+3*tlen(,%idx,4),%a4; \ + mov %a3,%idx; \ + mov 8 sched,%a3; \ + xor table+2*tlen(,%tmp,4),%a3; // original Gladman had conditional saves to MMX regs. @@ -147,46 +146,78 @@ #define restore(a1, a2) \ mov 4*a2(%esp),%a1 -// This macro performs a forward encryption cycle. It is entered with -// the first previous round column values in r0, r1, r4 and r5 and -// exits with the final values in the same registers, using the MMX -// registers mm0-mm1 or the stack for temporary storage - -// mov current column values into the MMX registers -#define fwd_rnd(arg, table) \ - /* mov current column values into the MMX registers */ \ - mov %r0,%r2; \ - save (0,r1); \ - save (1,r5); \ - \ - /* compute new column values */ \ - do_fcol(r0,r5,r4,r1,table, r2,r3, arg); \ - do_col (r4,r1,r0,r5,table, r2,r3); \ - restore(r2,0); \ - do_col (r1,r0,r5,r4,table, r2,r3); \ - restore(r2,1); \ - do_col (r5,r4,r1,r0,table, r2,r3); - -// This macro performs an inverse encryption cycle. It is entered with -// the first previous round column values in r0, r1, r4 and r5 and -// exits with the final values in the same registers, using the MMX -// registers mm0-mm1 or the stack for temporary storage - -#define inv_rnd(arg, table) \ - /* mov current column values into the MMX registers */ \ - mov %r0,%r2; \ - save (0,r1); \ - save (1,r5); \ - \ - /* compute new column values */ \ - do_icol(r0,r1,r4,r5, table, r2,r3, arg); \ - do_col (r4,r5,r0,r1, table, r2,r3); \ - restore(r2,0); \ - do_col (r1,r4,r5,r0, table, r2,r3); \ - restore(r2,1); \ - do_col (r5,r0,r1,r4, table, r2,r3); +// These macros perform a forward encryption cycle. They are entered with +// the first previous round column values in r0,r1,r4,r5 and +// exit with the final values in the same registers, using stack +// for temporary storage. + +// round column values +// on entry: r0,r1,r4,r5 +// on exit: r2,r1,r4,r5 +#define fwd_rnd1(arg, table) \ + save (0,r1); \ + save (1,r5); \ + \ + /* compute new column values */ \ + do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \ + do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \ + restore(r0,0); \ + do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \ + restore(r0,1); \ + do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */ + +// round column values +// on entry: r2,r1,r4,r5 +// on exit: r0,r1,r4,r5 +#define fwd_rnd2(arg, table) \ + save (0,r1); \ + save (1,r5); \ + \ + /* compute new column values */ \ + do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \ + do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \ + restore(r2,0); \ + do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \ + restore(r2,1); \ + do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */ + +// These macros performs an inverse encryption cycle. They are entered with +// the first previous round column values in r0,r1,r4,r5 and +// exit with the final values in the same registers, using stack +// for temporary storage + +// round column values +// on entry: r0,r1,r4,r5 +// on exit: r2,r1,r4,r5 +#define inv_rnd1(arg, table) \ + save (0,r1); \ + save (1,r5); \ + \ + /* compute new column values */ \ + do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \ + do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \ + restore(r0,0); \ + do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \ + restore(r0,1); \ + do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */ + +// round column values +// on entry: r2,r1,r4,r5 +// on exit: r0,r1,r4,r5 +#define inv_rnd2(arg, table) \ + save (0,r1); \ + save (1,r5); \ + \ + /* compute new column values */ \ + do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \ + do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \ + restore(r2,0); \ + do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \ + restore(r2,1); \ + do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ // AES (Rijndael) Encryption Subroutine +/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ .global aes_enc_blk @@ -197,8 +228,7 @@ aes_enc_blk: push %ebp - mov ctx(%esp),%ebp // pointer to context - xor %eax,%eax + mov tfm(%esp),%ebp // CAUTION: the order and the values used in these assigns // rely on the register mappings @@ -208,7 +238,9 @@ aes_enc_blk: push %esi mov nrnd(%ebp),%r3 // number of rounds push %edi - lea ekey(%ebp),%r6 // key pointer +#if ekey != 0 + lea ekey(%ebp),%ebp // key pointer +#endif // input four columns and xor in first round key @@ -216,52 +248,52 @@ aes_enc_blk: mov 4(%r2),%r1 mov 8(%r2),%r4 mov 12(%r2),%r5 - xor (%r6),%r0 - xor 4(%r6),%r1 - xor 8(%r6),%r4 - xor 12(%r6),%r5 - - sub $8,%esp // space for register saves on stack - add $16,%r6 // increment to next round key - sub $10,%r3 - je 4f // 10 rounds for 128-bit key - add $32,%r6 - sub $2,%r3 - je 3f // 12 rounds for 128-bit key - add $32,%r6 - -2: fwd_rnd( -64(%r6) ,ft_tab) // 14 rounds for 128-bit key - fwd_rnd( -48(%r6) ,ft_tab) -3: fwd_rnd( -32(%r6) ,ft_tab) // 12 rounds for 128-bit key - fwd_rnd( -16(%r6) ,ft_tab) -4: fwd_rnd( (%r6) ,ft_tab) // 10 rounds for 128-bit key - fwd_rnd( +16(%r6) ,ft_tab) - fwd_rnd( +32(%r6) ,ft_tab) - fwd_rnd( +48(%r6) ,ft_tab) - fwd_rnd( +64(%r6) ,ft_tab) - fwd_rnd( +80(%r6) ,ft_tab) - fwd_rnd( +96(%r6) ,ft_tab) - fwd_rnd(+112(%r6) ,ft_tab) - fwd_rnd(+128(%r6) ,ft_tab) - fwd_rnd(+144(%r6) ,fl_tab) // last round uses a different table + xor (%ebp),%r0 + xor 4(%ebp),%r1 + xor 8(%ebp),%r4 + xor 12(%ebp),%r5 + + sub $8,%esp // space for register saves on stack + add $16,%ebp // increment to next round key + cmp $12,%r3 + jb 4f // 10 rounds for 128-bit key + lea 32(%ebp),%ebp + je 3f // 12 rounds for 192-bit key + lea 32(%ebp),%ebp + +2: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 256-bit key + fwd_rnd2( -48(%ebp) ,ft_tab) +3: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 192-bit key + fwd_rnd2( -16(%ebp) ,ft_tab) +4: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key + fwd_rnd2( +16(%ebp) ,ft_tab) + fwd_rnd1( +32(%ebp) ,ft_tab) + fwd_rnd2( +48(%ebp) ,ft_tab) + fwd_rnd1( +64(%ebp) ,ft_tab) + fwd_rnd2( +80(%ebp) ,ft_tab) + fwd_rnd1( +96(%ebp) ,ft_tab) + fwd_rnd2(+112(%ebp) ,ft_tab) + fwd_rnd1(+128(%ebp) ,ft_tab) + fwd_rnd2(+144(%ebp) ,fl_tab) // last round uses a different table // move final values to the output array. CAUTION: the // order of these assigns rely on the register mappings add $8,%esp - mov out_blk+12(%esp),%r6 - mov %r5,12(%r6) + mov out_blk+12(%esp),%ebp + mov %r5,12(%ebp) pop %edi - mov %r4,8(%r6) + mov %r4,8(%ebp) pop %esi - mov %r1,4(%r6) + mov %r1,4(%ebp) pop %ebx - mov %r0,(%r6) + mov %r0,(%ebp) pop %ebp mov $1,%eax ret // AES (Rijndael) Decryption Subroutine +/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ .global aes_dec_blk @@ -272,8 +304,7 @@ aes_enc_blk: aes_dec_blk: push %ebp - mov ctx(%esp),%ebp // pointer to context - xor %eax,%eax + mov tfm(%esp),%ebp // CAUTION: the order and the values used in these assigns // rely on the register mappings @@ -283,10 +314,12 @@ aes_dec_blk: push %esi mov nrnd(%ebp),%r3 // number of rounds push %edi - lea dkey(%ebp),%r6 // key pointer +#if dkey != 0 + lea dkey(%ebp),%ebp // key pointer +#endif mov %r3,%r0 shl $4,%r0 - add %r0,%r6 + add %r0,%ebp // input four columns and xor in first round key @@ -294,47 +327,46 @@ aes_dec_blk: mov 4(%r2),%r1 mov 8(%r2),%r4 mov 12(%r2),%r5 - xor (%r6),%r0 - xor 4(%r6),%r1 - xor 8(%r6),%r4 - xor 12(%r6),%r5 - - sub $8,%esp // space for register saves on stack - sub $16,%r6 // increment to next round key - sub $10,%r3 - je 4f // 10 rounds for 128-bit key - sub $32,%r6 - sub $2,%r3 - je 3f // 12 rounds for 128-bit key - sub $32,%r6 - -2: inv_rnd( +64(%r6), it_tab) // 14 rounds for 128-bit key - inv_rnd( +48(%r6), it_tab) -3: inv_rnd( +32(%r6), it_tab) // 12 rounds for 128-bit key - inv_rnd( +16(%r6), it_tab) -4: inv_rnd( (%r6), it_tab) // 10 rounds for 128-bit key - inv_rnd( -16(%r6), it_tab) - inv_rnd( -32(%r6), it_tab) - inv_rnd( -48(%r6), it_tab) - inv_rnd( -64(%r6), it_tab) - inv_rnd( -80(%r6), it_tab) - inv_rnd( -96(%r6), it_tab) - inv_rnd(-112(%r6), it_tab) - inv_rnd(-128(%r6), it_tab) - inv_rnd(-144(%r6), il_tab) // last round uses a different table + xor (%ebp),%r0 + xor 4(%ebp),%r1 + xor 8(%ebp),%r4 + xor 12(%ebp),%r5 + + sub $8,%esp // space for register saves on stack + sub $16,%ebp // increment to next round key + cmp $12,%r3 + jb 4f // 10 rounds for 128-bit key + lea -32(%ebp),%ebp + je 3f // 12 rounds for 192-bit key + lea -32(%ebp),%ebp + +2: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 256-bit key + inv_rnd2( +48(%ebp), it_tab) +3: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 192-bit key + inv_rnd2( +16(%ebp), it_tab) +4: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key + inv_rnd2( -16(%ebp), it_tab) + inv_rnd1( -32(%ebp), it_tab) + inv_rnd2( -48(%ebp), it_tab) + inv_rnd1( -64(%ebp), it_tab) + inv_rnd2( -80(%ebp), it_tab) + inv_rnd1( -96(%ebp), it_tab) + inv_rnd2(-112(%ebp), it_tab) + inv_rnd1(-128(%ebp), it_tab) + inv_rnd2(-144(%ebp), il_tab) // last round uses a different table // move final values to the output array. CAUTION: the // order of these assigns rely on the register mappings add $8,%esp - mov out_blk+12(%esp),%r6 - mov %r5,12(%r6) + mov out_blk+12(%esp),%ebp + mov %r5,12(%ebp) pop %edi - mov %r4,8(%r6) + mov %r4,8(%ebp) pop %esi - mov %r1,4(%r6) + mov %r1,4(%ebp) pop %ebx - mov %r0,(%r6) + mov %r0,(%ebp) pop %ebp mov $1,%eax ret