162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 262306a36Sopenharmony_ci/*************************************************************************** 362306a36Sopenharmony_ci* Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de> * 462306a36Sopenharmony_ci* * 562306a36Sopenharmony_ci***************************************************************************/ 662306a36Sopenharmony_ci 762306a36Sopenharmony_ci.file "twofish-i586-asm.S" 862306a36Sopenharmony_ci.text 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <linux/linkage.h> 1162306a36Sopenharmony_ci#include <asm/asm-offsets.h> 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci/* return address at 0 */ 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci#define in_blk 12 /* input byte array address parameter*/ 1662306a36Sopenharmony_ci#define out_blk 8 /* output byte array address parameter*/ 1762306a36Sopenharmony_ci#define ctx 4 /* Twofish context structure */ 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci#define a_offset 0 2062306a36Sopenharmony_ci#define b_offset 4 2162306a36Sopenharmony_ci#define c_offset 8 2262306a36Sopenharmony_ci#define d_offset 12 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci/* Structure of the crypto context struct*/ 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci#define s0 0 /* S0 Array 256 Words each */ 2762306a36Sopenharmony_ci#define s1 1024 /* S1 Array */ 2862306a36Sopenharmony_ci#define s2 2048 /* S2 Array */ 2962306a36Sopenharmony_ci#define s3 3072 /* S3 Array */ 3062306a36Sopenharmony_ci#define w 4096 /* 8 whitening keys (word) */ 3162306a36Sopenharmony_ci#define k 4128 /* key 1-32 ( word ) */ 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci/* define a few register aliases to allow macro substitution */ 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci#define R0D %eax 3662306a36Sopenharmony_ci#define R0B %al 3762306a36Sopenharmony_ci#define R0H %ah 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci#define R1D %ebx 4062306a36Sopenharmony_ci#define R1B %bl 4162306a36Sopenharmony_ci#define R1H %bh 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci#define R2D %ecx 4462306a36Sopenharmony_ci#define R2B %cl 4562306a36Sopenharmony_ci#define R2H %ch 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci#define R3D %edx 4862306a36Sopenharmony_ci#define R3B %dl 4962306a36Sopenharmony_ci#define R3H %dh 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci/* performs input whitening */ 5362306a36Sopenharmony_ci#define input_whitening(src,context,offset)\ 5462306a36Sopenharmony_ci xor w+offset(context), src; 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci/* performs input whitening */ 5762306a36Sopenharmony_ci#define output_whitening(src,context,offset)\ 5862306a36Sopenharmony_ci xor w+16+offset(context), src; 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci/* 6162306a36Sopenharmony_ci * a input register containing a (rotated 16) 6262306a36Sopenharmony_ci * b input register containing b 6362306a36Sopenharmony_ci * c input register containing c 6462306a36Sopenharmony_ci * d input register containing d (already rol $1) 6562306a36Sopenharmony_ci * operations on a and b are interleaved to increase performance 6662306a36Sopenharmony_ci */ 6762306a36Sopenharmony_ci#define encrypt_round(a,b,c,d,round)\ 6862306a36Sopenharmony_ci push d ## D;\ 6962306a36Sopenharmony_ci movzx b ## B, %edi;\ 7062306a36Sopenharmony_ci mov s1(%ebp,%edi,4),d ## D;\ 7162306a36Sopenharmony_ci movzx a ## B, %edi;\ 7262306a36Sopenharmony_ci mov s2(%ebp,%edi,4),%esi;\ 7362306a36Sopenharmony_ci movzx b ## H, %edi;\ 7462306a36Sopenharmony_ci ror $16, b ## D;\ 7562306a36Sopenharmony_ci xor s2(%ebp,%edi,4),d ## D;\ 7662306a36Sopenharmony_ci movzx a ## H, %edi;\ 7762306a36Sopenharmony_ci ror $16, a ## D;\ 7862306a36Sopenharmony_ci xor s3(%ebp,%edi,4),%esi;\ 7962306a36Sopenharmony_ci movzx b ## B, %edi;\ 8062306a36Sopenharmony_ci xor s3(%ebp,%edi,4),d ## D;\ 8162306a36Sopenharmony_ci movzx a ## B, %edi;\ 8262306a36Sopenharmony_ci xor (%ebp,%edi,4), %esi;\ 8362306a36Sopenharmony_ci movzx b ## H, %edi;\ 8462306a36Sopenharmony_ci ror $15, b ## D;\ 8562306a36Sopenharmony_ci xor (%ebp,%edi,4), d ## D;\ 8662306a36Sopenharmony_ci movzx a ## H, %edi;\ 8762306a36Sopenharmony_ci xor s1(%ebp,%edi,4),%esi;\ 8862306a36Sopenharmony_ci pop %edi;\ 8962306a36Sopenharmony_ci add d ## D, %esi;\ 9062306a36Sopenharmony_ci add %esi, d ## D;\ 9162306a36Sopenharmony_ci add k+round(%ebp), %esi;\ 9262306a36Sopenharmony_ci xor %esi, c ## D;\ 9362306a36Sopenharmony_ci rol $15, c ## D;\ 9462306a36Sopenharmony_ci add k+4+round(%ebp),d ## D;\ 9562306a36Sopenharmony_ci xor %edi, d ## D; 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci/* 9862306a36Sopenharmony_ci * a input register containing a (rotated 16) 9962306a36Sopenharmony_ci * b input register containing b 10062306a36Sopenharmony_ci * c input register containing c 10162306a36Sopenharmony_ci * d input register containing d (already rol $1) 10262306a36Sopenharmony_ci * operations on a and b are interleaved to increase performance 10362306a36Sopenharmony_ci * last round has different rotations for the output preparation 10462306a36Sopenharmony_ci */ 10562306a36Sopenharmony_ci#define encrypt_last_round(a,b,c,d,round)\ 10662306a36Sopenharmony_ci push d ## D;\ 10762306a36Sopenharmony_ci movzx b ## B, %edi;\ 10862306a36Sopenharmony_ci mov s1(%ebp,%edi,4),d ## D;\ 10962306a36Sopenharmony_ci movzx a ## B, %edi;\ 11062306a36Sopenharmony_ci mov s2(%ebp,%edi,4),%esi;\ 11162306a36Sopenharmony_ci movzx b ## H, %edi;\ 11262306a36Sopenharmony_ci ror $16, b ## D;\ 11362306a36Sopenharmony_ci xor s2(%ebp,%edi,4),d ## D;\ 11462306a36Sopenharmony_ci movzx a ## H, %edi;\ 11562306a36Sopenharmony_ci ror $16, a ## D;\ 11662306a36Sopenharmony_ci xor s3(%ebp,%edi,4),%esi;\ 11762306a36Sopenharmony_ci movzx b ## B, %edi;\ 11862306a36Sopenharmony_ci xor s3(%ebp,%edi,4),d ## D;\ 11962306a36Sopenharmony_ci movzx a ## B, %edi;\ 12062306a36Sopenharmony_ci xor (%ebp,%edi,4), %esi;\ 12162306a36Sopenharmony_ci movzx b ## H, %edi;\ 12262306a36Sopenharmony_ci ror $16, b ## D;\ 12362306a36Sopenharmony_ci xor (%ebp,%edi,4), d ## D;\ 12462306a36Sopenharmony_ci movzx a ## H, %edi;\ 12562306a36Sopenharmony_ci xor s1(%ebp,%edi,4),%esi;\ 12662306a36Sopenharmony_ci pop %edi;\ 12762306a36Sopenharmony_ci add d ## D, %esi;\ 12862306a36Sopenharmony_ci add %esi, d ## D;\ 12962306a36Sopenharmony_ci add k+round(%ebp), %esi;\ 13062306a36Sopenharmony_ci xor %esi, c ## D;\ 13162306a36Sopenharmony_ci ror $1, c ## D;\ 13262306a36Sopenharmony_ci add k+4+round(%ebp),d ## D;\ 13362306a36Sopenharmony_ci xor %edi, d ## D; 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci/* 13662306a36Sopenharmony_ci * a input register containing a 13762306a36Sopenharmony_ci * b input register containing b (rotated 16) 13862306a36Sopenharmony_ci * c input register containing c 13962306a36Sopenharmony_ci * d input register containing d (already rol $1) 14062306a36Sopenharmony_ci * operations on a and b are interleaved to increase performance 14162306a36Sopenharmony_ci */ 14262306a36Sopenharmony_ci#define decrypt_round(a,b,c,d,round)\ 14362306a36Sopenharmony_ci push c ## D;\ 14462306a36Sopenharmony_ci movzx a ## B, %edi;\ 14562306a36Sopenharmony_ci mov (%ebp,%edi,4), c ## D;\ 14662306a36Sopenharmony_ci movzx b ## B, %edi;\ 14762306a36Sopenharmony_ci mov s3(%ebp,%edi,4),%esi;\ 14862306a36Sopenharmony_ci movzx a ## H, %edi;\ 14962306a36Sopenharmony_ci ror $16, a ## D;\ 15062306a36Sopenharmony_ci xor s1(%ebp,%edi,4),c ## D;\ 15162306a36Sopenharmony_ci movzx b ## H, %edi;\ 15262306a36Sopenharmony_ci ror $16, b ## D;\ 15362306a36Sopenharmony_ci xor (%ebp,%edi,4), %esi;\ 15462306a36Sopenharmony_ci movzx a ## B, %edi;\ 15562306a36Sopenharmony_ci xor s2(%ebp,%edi,4),c ## D;\ 15662306a36Sopenharmony_ci movzx b ## B, %edi;\ 15762306a36Sopenharmony_ci xor s1(%ebp,%edi,4),%esi;\ 15862306a36Sopenharmony_ci movzx a ## H, %edi;\ 15962306a36Sopenharmony_ci ror $15, a ## D;\ 16062306a36Sopenharmony_ci xor s3(%ebp,%edi,4),c ## D;\ 16162306a36Sopenharmony_ci movzx b ## H, %edi;\ 16262306a36Sopenharmony_ci xor s2(%ebp,%edi,4),%esi;\ 16362306a36Sopenharmony_ci pop %edi;\ 16462306a36Sopenharmony_ci add %esi, c ## D;\ 16562306a36Sopenharmony_ci add c ## D, %esi;\ 16662306a36Sopenharmony_ci add k+round(%ebp), c ## D;\ 16762306a36Sopenharmony_ci xor %edi, c ## D;\ 16862306a36Sopenharmony_ci add k+4+round(%ebp),%esi;\ 16962306a36Sopenharmony_ci xor %esi, d ## D;\ 17062306a36Sopenharmony_ci rol $15, d ## D; 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci/* 17362306a36Sopenharmony_ci * a input register containing a 17462306a36Sopenharmony_ci * b input register containing b (rotated 16) 17562306a36Sopenharmony_ci * c input register containing c 17662306a36Sopenharmony_ci * d input register containing d (already rol $1) 17762306a36Sopenharmony_ci * operations on a and b are interleaved to increase performance 17862306a36Sopenharmony_ci * last round has different rotations for the output preparation 17962306a36Sopenharmony_ci */ 18062306a36Sopenharmony_ci#define decrypt_last_round(a,b,c,d,round)\ 18162306a36Sopenharmony_ci push c ## D;\ 18262306a36Sopenharmony_ci movzx a ## B, %edi;\ 18362306a36Sopenharmony_ci mov (%ebp,%edi,4), c ## D;\ 18462306a36Sopenharmony_ci movzx b ## B, %edi;\ 18562306a36Sopenharmony_ci mov s3(%ebp,%edi,4),%esi;\ 18662306a36Sopenharmony_ci movzx a ## H, %edi;\ 18762306a36Sopenharmony_ci ror $16, a ## D;\ 18862306a36Sopenharmony_ci xor s1(%ebp,%edi,4),c ## D;\ 18962306a36Sopenharmony_ci movzx b ## H, %edi;\ 19062306a36Sopenharmony_ci ror $16, b ## D;\ 19162306a36Sopenharmony_ci xor (%ebp,%edi,4), %esi;\ 19262306a36Sopenharmony_ci movzx a ## B, %edi;\ 19362306a36Sopenharmony_ci xor s2(%ebp,%edi,4),c ## D;\ 19462306a36Sopenharmony_ci movzx b ## B, %edi;\ 19562306a36Sopenharmony_ci xor s1(%ebp,%edi,4),%esi;\ 19662306a36Sopenharmony_ci movzx a ## H, %edi;\ 19762306a36Sopenharmony_ci ror $16, a ## D;\ 19862306a36Sopenharmony_ci xor s3(%ebp,%edi,4),c ## D;\ 19962306a36Sopenharmony_ci movzx b ## H, %edi;\ 20062306a36Sopenharmony_ci xor s2(%ebp,%edi,4),%esi;\ 20162306a36Sopenharmony_ci pop %edi;\ 20262306a36Sopenharmony_ci add %esi, c ## D;\ 20362306a36Sopenharmony_ci add c ## D, %esi;\ 20462306a36Sopenharmony_ci add k+round(%ebp), c ## D;\ 20562306a36Sopenharmony_ci xor %edi, c ## D;\ 20662306a36Sopenharmony_ci add k+4+round(%ebp),%esi;\ 20762306a36Sopenharmony_ci xor %esi, d ## D;\ 20862306a36Sopenharmony_ci ror $1, d ## D; 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ciSYM_FUNC_START(twofish_enc_blk) 21162306a36Sopenharmony_ci push %ebp /* save registers according to calling convention*/ 21262306a36Sopenharmony_ci push %ebx 21362306a36Sopenharmony_ci push %esi 21462306a36Sopenharmony_ci push %edi 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base 21762306a36Sopenharmony_ci * pointer to the ctx address */ 21862306a36Sopenharmony_ci mov in_blk+16(%esp),%edi /* input address in edi */ 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci mov (%edi), %eax 22162306a36Sopenharmony_ci mov b_offset(%edi), %ebx 22262306a36Sopenharmony_ci mov c_offset(%edi), %ecx 22362306a36Sopenharmony_ci mov d_offset(%edi), %edx 22462306a36Sopenharmony_ci input_whitening(%eax,%ebp,a_offset) 22562306a36Sopenharmony_ci ror $16, %eax 22662306a36Sopenharmony_ci input_whitening(%ebx,%ebp,b_offset) 22762306a36Sopenharmony_ci input_whitening(%ecx,%ebp,c_offset) 22862306a36Sopenharmony_ci input_whitening(%edx,%ebp,d_offset) 22962306a36Sopenharmony_ci rol $1, %edx 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci encrypt_round(R0,R1,R2,R3,0); 23262306a36Sopenharmony_ci encrypt_round(R2,R3,R0,R1,8); 23362306a36Sopenharmony_ci encrypt_round(R0,R1,R2,R3,2*8); 23462306a36Sopenharmony_ci encrypt_round(R2,R3,R0,R1,3*8); 23562306a36Sopenharmony_ci encrypt_round(R0,R1,R2,R3,4*8); 23662306a36Sopenharmony_ci encrypt_round(R2,R3,R0,R1,5*8); 23762306a36Sopenharmony_ci encrypt_round(R0,R1,R2,R3,6*8); 23862306a36Sopenharmony_ci encrypt_round(R2,R3,R0,R1,7*8); 23962306a36Sopenharmony_ci encrypt_round(R0,R1,R2,R3,8*8); 24062306a36Sopenharmony_ci encrypt_round(R2,R3,R0,R1,9*8); 24162306a36Sopenharmony_ci encrypt_round(R0,R1,R2,R3,10*8); 24262306a36Sopenharmony_ci encrypt_round(R2,R3,R0,R1,11*8); 24362306a36Sopenharmony_ci encrypt_round(R0,R1,R2,R3,12*8); 24462306a36Sopenharmony_ci encrypt_round(R2,R3,R0,R1,13*8); 24562306a36Sopenharmony_ci encrypt_round(R0,R1,R2,R3,14*8); 24662306a36Sopenharmony_ci encrypt_last_round(R2,R3,R0,R1,15*8); 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci output_whitening(%eax,%ebp,c_offset) 24962306a36Sopenharmony_ci output_whitening(%ebx,%ebp,d_offset) 25062306a36Sopenharmony_ci output_whitening(%ecx,%ebp,a_offset) 25162306a36Sopenharmony_ci output_whitening(%edx,%ebp,b_offset) 25262306a36Sopenharmony_ci mov out_blk+16(%esp),%edi; 25362306a36Sopenharmony_ci mov %eax, c_offset(%edi) 25462306a36Sopenharmony_ci mov %ebx, d_offset(%edi) 25562306a36Sopenharmony_ci mov %ecx, (%edi) 25662306a36Sopenharmony_ci mov %edx, b_offset(%edi) 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci pop %edi 25962306a36Sopenharmony_ci pop %esi 26062306a36Sopenharmony_ci pop %ebx 26162306a36Sopenharmony_ci pop %ebp 26262306a36Sopenharmony_ci mov $1, %eax 26362306a36Sopenharmony_ci RET 26462306a36Sopenharmony_ciSYM_FUNC_END(twofish_enc_blk) 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ciSYM_FUNC_START(twofish_dec_blk) 26762306a36Sopenharmony_ci push %ebp /* save registers according to calling convention*/ 26862306a36Sopenharmony_ci push %ebx 26962306a36Sopenharmony_ci push %esi 27062306a36Sopenharmony_ci push %edi 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base 27462306a36Sopenharmony_ci * pointer to the ctx address */ 27562306a36Sopenharmony_ci mov in_blk+16(%esp),%edi /* input address in edi */ 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci mov (%edi), %eax 27862306a36Sopenharmony_ci mov b_offset(%edi), %ebx 27962306a36Sopenharmony_ci mov c_offset(%edi), %ecx 28062306a36Sopenharmony_ci mov d_offset(%edi), %edx 28162306a36Sopenharmony_ci output_whitening(%eax,%ebp,a_offset) 28262306a36Sopenharmony_ci output_whitening(%ebx,%ebp,b_offset) 28362306a36Sopenharmony_ci ror $16, %ebx 28462306a36Sopenharmony_ci output_whitening(%ecx,%ebp,c_offset) 28562306a36Sopenharmony_ci output_whitening(%edx,%ebp,d_offset) 28662306a36Sopenharmony_ci rol $1, %ecx 28762306a36Sopenharmony_ci 28862306a36Sopenharmony_ci decrypt_round(R0,R1,R2,R3,15*8); 28962306a36Sopenharmony_ci decrypt_round(R2,R3,R0,R1,14*8); 29062306a36Sopenharmony_ci decrypt_round(R0,R1,R2,R3,13*8); 29162306a36Sopenharmony_ci decrypt_round(R2,R3,R0,R1,12*8); 29262306a36Sopenharmony_ci decrypt_round(R0,R1,R2,R3,11*8); 29362306a36Sopenharmony_ci decrypt_round(R2,R3,R0,R1,10*8); 29462306a36Sopenharmony_ci decrypt_round(R0,R1,R2,R3,9*8); 29562306a36Sopenharmony_ci decrypt_round(R2,R3,R0,R1,8*8); 29662306a36Sopenharmony_ci decrypt_round(R0,R1,R2,R3,7*8); 29762306a36Sopenharmony_ci decrypt_round(R2,R3,R0,R1,6*8); 29862306a36Sopenharmony_ci decrypt_round(R0,R1,R2,R3,5*8); 29962306a36Sopenharmony_ci decrypt_round(R2,R3,R0,R1,4*8); 30062306a36Sopenharmony_ci decrypt_round(R0,R1,R2,R3,3*8); 30162306a36Sopenharmony_ci decrypt_round(R2,R3,R0,R1,2*8); 30262306a36Sopenharmony_ci decrypt_round(R0,R1,R2,R3,1*8); 30362306a36Sopenharmony_ci decrypt_last_round(R2,R3,R0,R1,0); 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci input_whitening(%eax,%ebp,c_offset) 30662306a36Sopenharmony_ci input_whitening(%ebx,%ebp,d_offset) 30762306a36Sopenharmony_ci input_whitening(%ecx,%ebp,a_offset) 30862306a36Sopenharmony_ci input_whitening(%edx,%ebp,b_offset) 30962306a36Sopenharmony_ci mov out_blk+16(%esp),%edi; 31062306a36Sopenharmony_ci mov %eax, c_offset(%edi) 31162306a36Sopenharmony_ci mov %ebx, d_offset(%edi) 31262306a36Sopenharmony_ci mov %ecx, (%edi) 31362306a36Sopenharmony_ci mov %edx, b_offset(%edi) 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_ci pop %edi 31662306a36Sopenharmony_ci pop %esi 31762306a36Sopenharmony_ci pop %ebx 31862306a36Sopenharmony_ci pop %ebp 31962306a36Sopenharmony_ci mov $1, %eax 32062306a36Sopenharmony_ci RET 32162306a36Sopenharmony_ciSYM_FUNC_END(twofish_dec_blk) 322