162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
262306a36Sopenharmony_ci/***************************************************************************
362306a36Sopenharmony_ci*   Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de>        *
462306a36Sopenharmony_ci*                                                                         *
562306a36Sopenharmony_ci***************************************************************************/
662306a36Sopenharmony_ci
762306a36Sopenharmony_ci.file "twofish-i586-asm.S"
862306a36Sopenharmony_ci.text
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <linux/linkage.h>
1162306a36Sopenharmony_ci#include <asm/asm-offsets.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci/* return address at 0 */
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci#define in_blk    12  /* input byte array address parameter*/
1662306a36Sopenharmony_ci#define out_blk   8  /* output byte array address parameter*/
1762306a36Sopenharmony_ci#define ctx       4  /* Twofish context structure */
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci#define a_offset	0
2062306a36Sopenharmony_ci#define b_offset	4
2162306a36Sopenharmony_ci#define c_offset	8
2262306a36Sopenharmony_ci#define d_offset	12
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci/* Structure of the crypto context struct*/
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci#define s0	0	/* S0 Array 256 Words each */
2762306a36Sopenharmony_ci#define s1	1024	/* S1 Array */
2862306a36Sopenharmony_ci#define s2	2048	/* S2 Array */
2962306a36Sopenharmony_ci#define s3	3072	/* S3 Array */
3062306a36Sopenharmony_ci#define w	4096	/* 8 whitening keys (word) */
3162306a36Sopenharmony_ci#define k	4128	/* key 1-32 ( word ) */
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci/* define a few register aliases to allow macro substitution */
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci#define R0D    %eax
3662306a36Sopenharmony_ci#define R0B    %al
3762306a36Sopenharmony_ci#define R0H    %ah
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci#define R1D    %ebx
4062306a36Sopenharmony_ci#define R1B    %bl
4162306a36Sopenharmony_ci#define R1H    %bh
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci#define R2D    %ecx
4462306a36Sopenharmony_ci#define R2B    %cl
4562306a36Sopenharmony_ci#define R2H    %ch
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci#define R3D    %edx
4862306a36Sopenharmony_ci#define R3B    %dl
4962306a36Sopenharmony_ci#define R3H    %dh
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci/* performs input whitening */
5362306a36Sopenharmony_ci#define input_whitening(src,context,offset)\
5462306a36Sopenharmony_ci	xor	w+offset(context),	src;
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci/* performs input whitening */
5762306a36Sopenharmony_ci#define output_whitening(src,context,offset)\
5862306a36Sopenharmony_ci	xor	w+16+offset(context),	src;
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci/*
6162306a36Sopenharmony_ci * a input register containing a (rotated 16)
6262306a36Sopenharmony_ci * b input register containing b
6362306a36Sopenharmony_ci * c input register containing c
6462306a36Sopenharmony_ci * d input register containing d (already rol $1)
6562306a36Sopenharmony_ci * operations on a and b are interleaved to increase performance
6662306a36Sopenharmony_ci */
6762306a36Sopenharmony_ci#define encrypt_round(a,b,c,d,round)\
6862306a36Sopenharmony_ci	push	d ## D;\
6962306a36Sopenharmony_ci	movzx	b ## B,		%edi;\
7062306a36Sopenharmony_ci	mov	s1(%ebp,%edi,4),d ## D;\
7162306a36Sopenharmony_ci	movzx	a ## B,		%edi;\
7262306a36Sopenharmony_ci	mov	s2(%ebp,%edi,4),%esi;\
7362306a36Sopenharmony_ci	movzx	b ## H,		%edi;\
7462306a36Sopenharmony_ci	ror	$16,		b ## D;\
7562306a36Sopenharmony_ci	xor	s2(%ebp,%edi,4),d ## D;\
7662306a36Sopenharmony_ci	movzx	a ## H,		%edi;\
7762306a36Sopenharmony_ci	ror	$16,		a ## D;\
7862306a36Sopenharmony_ci	xor	s3(%ebp,%edi,4),%esi;\
7962306a36Sopenharmony_ci	movzx	b ## B,		%edi;\
8062306a36Sopenharmony_ci	xor	s3(%ebp,%edi,4),d ## D;\
8162306a36Sopenharmony_ci	movzx	a ## B,		%edi;\
8262306a36Sopenharmony_ci	xor	(%ebp,%edi,4),	%esi;\
8362306a36Sopenharmony_ci	movzx	b ## H,		%edi;\
8462306a36Sopenharmony_ci	ror	$15,		b ## D;\
8562306a36Sopenharmony_ci	xor	(%ebp,%edi,4),	d ## D;\
8662306a36Sopenharmony_ci	movzx	a ## H,		%edi;\
8762306a36Sopenharmony_ci	xor	s1(%ebp,%edi,4),%esi;\
8862306a36Sopenharmony_ci	pop	%edi;\
8962306a36Sopenharmony_ci	add	d ## D,		%esi;\
9062306a36Sopenharmony_ci	add	%esi,		d ## D;\
9162306a36Sopenharmony_ci	add	k+round(%ebp),	%esi;\
9262306a36Sopenharmony_ci	xor	%esi,		c ## D;\
9362306a36Sopenharmony_ci	rol	$15,		c ## D;\
9462306a36Sopenharmony_ci	add	k+4+round(%ebp),d ## D;\
9562306a36Sopenharmony_ci	xor	%edi,		d ## D;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci/*
9862306a36Sopenharmony_ci * a input register containing a (rotated 16)
9962306a36Sopenharmony_ci * b input register containing b
10062306a36Sopenharmony_ci * c input register containing c
10162306a36Sopenharmony_ci * d input register containing d (already rol $1)
10262306a36Sopenharmony_ci * operations on a and b are interleaved to increase performance
10362306a36Sopenharmony_ci * last round has different rotations for the output preparation
10462306a36Sopenharmony_ci */
10562306a36Sopenharmony_ci#define encrypt_last_round(a,b,c,d,round)\
10662306a36Sopenharmony_ci	push	d ## D;\
10762306a36Sopenharmony_ci	movzx	b ## B,		%edi;\
10862306a36Sopenharmony_ci	mov	s1(%ebp,%edi,4),d ## D;\
10962306a36Sopenharmony_ci	movzx	a ## B,		%edi;\
11062306a36Sopenharmony_ci	mov	s2(%ebp,%edi,4),%esi;\
11162306a36Sopenharmony_ci	movzx	b ## H,		%edi;\
11262306a36Sopenharmony_ci	ror	$16,		b ## D;\
11362306a36Sopenharmony_ci	xor	s2(%ebp,%edi,4),d ## D;\
11462306a36Sopenharmony_ci	movzx	a ## H,		%edi;\
11562306a36Sopenharmony_ci	ror	$16,		a ## D;\
11662306a36Sopenharmony_ci	xor	s3(%ebp,%edi,4),%esi;\
11762306a36Sopenharmony_ci	movzx	b ## B,		%edi;\
11862306a36Sopenharmony_ci	xor	s3(%ebp,%edi,4),d ## D;\
11962306a36Sopenharmony_ci	movzx	a ## B,		%edi;\
12062306a36Sopenharmony_ci	xor	(%ebp,%edi,4),	%esi;\
12162306a36Sopenharmony_ci	movzx	b ## H,		%edi;\
12262306a36Sopenharmony_ci	ror	$16,		b ## D;\
12362306a36Sopenharmony_ci	xor	(%ebp,%edi,4),	d ## D;\
12462306a36Sopenharmony_ci	movzx	a ## H,		%edi;\
12562306a36Sopenharmony_ci	xor	s1(%ebp,%edi,4),%esi;\
12662306a36Sopenharmony_ci	pop	%edi;\
12762306a36Sopenharmony_ci	add	d ## D,		%esi;\
12862306a36Sopenharmony_ci	add	%esi,		d ## D;\
12962306a36Sopenharmony_ci	add	k+round(%ebp),	%esi;\
13062306a36Sopenharmony_ci	xor	%esi,		c ## D;\
13162306a36Sopenharmony_ci	ror	$1,		c ## D;\
13262306a36Sopenharmony_ci	add	k+4+round(%ebp),d ## D;\
13362306a36Sopenharmony_ci	xor	%edi,		d ## D;
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci/*
13662306a36Sopenharmony_ci * a input register containing a
13762306a36Sopenharmony_ci * b input register containing b (rotated 16)
13862306a36Sopenharmony_ci * c input register containing c
13962306a36Sopenharmony_ci * d input register containing d (already rol $1)
14062306a36Sopenharmony_ci * operations on a and b are interleaved to increase performance
14162306a36Sopenharmony_ci */
14262306a36Sopenharmony_ci#define decrypt_round(a,b,c,d,round)\
14362306a36Sopenharmony_ci	push	c ## D;\
14462306a36Sopenharmony_ci	movzx	a ## B,		%edi;\
14562306a36Sopenharmony_ci	mov	(%ebp,%edi,4),	c ## D;\
14662306a36Sopenharmony_ci	movzx	b ## B,		%edi;\
14762306a36Sopenharmony_ci	mov	s3(%ebp,%edi,4),%esi;\
14862306a36Sopenharmony_ci	movzx	a ## H,		%edi;\
14962306a36Sopenharmony_ci	ror	$16,		a ## D;\
15062306a36Sopenharmony_ci	xor	s1(%ebp,%edi,4),c ## D;\
15162306a36Sopenharmony_ci	movzx	b ## H,		%edi;\
15262306a36Sopenharmony_ci	ror	$16,		b ## D;\
15362306a36Sopenharmony_ci	xor	(%ebp,%edi,4),	%esi;\
15462306a36Sopenharmony_ci	movzx	a ## B,		%edi;\
15562306a36Sopenharmony_ci	xor	s2(%ebp,%edi,4),c ## D;\
15662306a36Sopenharmony_ci	movzx	b ## B,		%edi;\
15762306a36Sopenharmony_ci	xor	s1(%ebp,%edi,4),%esi;\
15862306a36Sopenharmony_ci	movzx	a ## H,		%edi;\
15962306a36Sopenharmony_ci	ror	$15,		a ## D;\
16062306a36Sopenharmony_ci	xor	s3(%ebp,%edi,4),c ## D;\
16162306a36Sopenharmony_ci	movzx	b ## H,		%edi;\
16262306a36Sopenharmony_ci	xor	s2(%ebp,%edi,4),%esi;\
16362306a36Sopenharmony_ci	pop	%edi;\
16462306a36Sopenharmony_ci	add	%esi,		c ## D;\
16562306a36Sopenharmony_ci	add	c ## D,		%esi;\
16662306a36Sopenharmony_ci	add	k+round(%ebp),	c ## D;\
16762306a36Sopenharmony_ci	xor	%edi,		c ## D;\
16862306a36Sopenharmony_ci	add	k+4+round(%ebp),%esi;\
16962306a36Sopenharmony_ci	xor	%esi,		d ## D;\
17062306a36Sopenharmony_ci	rol	$15,		d ## D;
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci/*
17362306a36Sopenharmony_ci * a input register containing a
17462306a36Sopenharmony_ci * b input register containing b (rotated 16)
17562306a36Sopenharmony_ci * c input register containing c
17662306a36Sopenharmony_ci * d input register containing d (already rol $1)
17762306a36Sopenharmony_ci * operations on a and b are interleaved to increase performance
17862306a36Sopenharmony_ci * last round has different rotations for the output preparation
17962306a36Sopenharmony_ci */
18062306a36Sopenharmony_ci#define decrypt_last_round(a,b,c,d,round)\
18162306a36Sopenharmony_ci	push	c ## D;\
18262306a36Sopenharmony_ci	movzx	a ## B,		%edi;\
18362306a36Sopenharmony_ci	mov	(%ebp,%edi,4),	c ## D;\
18462306a36Sopenharmony_ci	movzx	b ## B,		%edi;\
18562306a36Sopenharmony_ci	mov	s3(%ebp,%edi,4),%esi;\
18662306a36Sopenharmony_ci	movzx	a ## H,		%edi;\
18762306a36Sopenharmony_ci	ror	$16,		a ## D;\
18862306a36Sopenharmony_ci	xor	s1(%ebp,%edi,4),c ## D;\
18962306a36Sopenharmony_ci	movzx	b ## H,		%edi;\
19062306a36Sopenharmony_ci	ror	$16,		b ## D;\
19162306a36Sopenharmony_ci	xor	(%ebp,%edi,4),	%esi;\
19262306a36Sopenharmony_ci	movzx	a ## B,		%edi;\
19362306a36Sopenharmony_ci	xor	s2(%ebp,%edi,4),c ## D;\
19462306a36Sopenharmony_ci	movzx	b ## B,		%edi;\
19562306a36Sopenharmony_ci	xor	s1(%ebp,%edi,4),%esi;\
19662306a36Sopenharmony_ci	movzx	a ## H,		%edi;\
19762306a36Sopenharmony_ci	ror	$16,		a ## D;\
19862306a36Sopenharmony_ci	xor	s3(%ebp,%edi,4),c ## D;\
19962306a36Sopenharmony_ci	movzx	b ## H,		%edi;\
20062306a36Sopenharmony_ci	xor	s2(%ebp,%edi,4),%esi;\
20162306a36Sopenharmony_ci	pop	%edi;\
20262306a36Sopenharmony_ci	add	%esi,		c ## D;\
20362306a36Sopenharmony_ci	add	c ## D,		%esi;\
20462306a36Sopenharmony_ci	add	k+round(%ebp),	c ## D;\
20562306a36Sopenharmony_ci	xor	%edi,		c ## D;\
20662306a36Sopenharmony_ci	add	k+4+round(%ebp),%esi;\
20762306a36Sopenharmony_ci	xor	%esi,		d ## D;\
20862306a36Sopenharmony_ci	ror	$1,		d ## D;
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ciSYM_FUNC_START(twofish_enc_blk)
21162306a36Sopenharmony_ci	push	%ebp			/* save registers according to calling convention*/
21262306a36Sopenharmony_ci	push    %ebx
21362306a36Sopenharmony_ci	push    %esi
21462306a36Sopenharmony_ci	push    %edi
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	mov	ctx + 16(%esp),	%ebp	/* abuse the base pointer: set new base
21762306a36Sopenharmony_ci					 * pointer to the ctx address */
21862306a36Sopenharmony_ci	mov     in_blk+16(%esp),%edi	/* input address in edi */
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci	mov	(%edi),		%eax
22162306a36Sopenharmony_ci	mov	b_offset(%edi),	%ebx
22262306a36Sopenharmony_ci	mov	c_offset(%edi),	%ecx
22362306a36Sopenharmony_ci	mov	d_offset(%edi),	%edx
22462306a36Sopenharmony_ci	input_whitening(%eax,%ebp,a_offset)
22562306a36Sopenharmony_ci	ror	$16,	%eax
22662306a36Sopenharmony_ci	input_whitening(%ebx,%ebp,b_offset)
22762306a36Sopenharmony_ci	input_whitening(%ecx,%ebp,c_offset)
22862306a36Sopenharmony_ci	input_whitening(%edx,%ebp,d_offset)
22962306a36Sopenharmony_ci	rol	$1,	%edx
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci	encrypt_round(R0,R1,R2,R3,0);
23262306a36Sopenharmony_ci	encrypt_round(R2,R3,R0,R1,8);
23362306a36Sopenharmony_ci	encrypt_round(R0,R1,R2,R3,2*8);
23462306a36Sopenharmony_ci	encrypt_round(R2,R3,R0,R1,3*8);
23562306a36Sopenharmony_ci	encrypt_round(R0,R1,R2,R3,4*8);
23662306a36Sopenharmony_ci	encrypt_round(R2,R3,R0,R1,5*8);
23762306a36Sopenharmony_ci	encrypt_round(R0,R1,R2,R3,6*8);
23862306a36Sopenharmony_ci	encrypt_round(R2,R3,R0,R1,7*8);
23962306a36Sopenharmony_ci	encrypt_round(R0,R1,R2,R3,8*8);
24062306a36Sopenharmony_ci	encrypt_round(R2,R3,R0,R1,9*8);
24162306a36Sopenharmony_ci	encrypt_round(R0,R1,R2,R3,10*8);
24262306a36Sopenharmony_ci	encrypt_round(R2,R3,R0,R1,11*8);
24362306a36Sopenharmony_ci	encrypt_round(R0,R1,R2,R3,12*8);
24462306a36Sopenharmony_ci	encrypt_round(R2,R3,R0,R1,13*8);
24562306a36Sopenharmony_ci	encrypt_round(R0,R1,R2,R3,14*8);
24662306a36Sopenharmony_ci	encrypt_last_round(R2,R3,R0,R1,15*8);
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci	output_whitening(%eax,%ebp,c_offset)
24962306a36Sopenharmony_ci	output_whitening(%ebx,%ebp,d_offset)
25062306a36Sopenharmony_ci	output_whitening(%ecx,%ebp,a_offset)
25162306a36Sopenharmony_ci	output_whitening(%edx,%ebp,b_offset)
25262306a36Sopenharmony_ci	mov	out_blk+16(%esp),%edi;
25362306a36Sopenharmony_ci	mov	%eax,		c_offset(%edi)
25462306a36Sopenharmony_ci	mov	%ebx,		d_offset(%edi)
25562306a36Sopenharmony_ci	mov	%ecx,		(%edi)
25662306a36Sopenharmony_ci	mov	%edx,		b_offset(%edi)
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci	pop	%edi
25962306a36Sopenharmony_ci	pop	%esi
26062306a36Sopenharmony_ci	pop	%ebx
26162306a36Sopenharmony_ci	pop	%ebp
26262306a36Sopenharmony_ci	mov	$1,	%eax
26362306a36Sopenharmony_ci	RET
26462306a36Sopenharmony_ciSYM_FUNC_END(twofish_enc_blk)
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_ciSYM_FUNC_START(twofish_dec_blk)
26762306a36Sopenharmony_ci	push	%ebp			/* save registers according to calling convention*/
26862306a36Sopenharmony_ci	push    %ebx
26962306a36Sopenharmony_ci	push    %esi
27062306a36Sopenharmony_ci	push    %edi
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_ci	mov	ctx + 16(%esp),	%ebp	/* abuse the base pointer: set new base
27462306a36Sopenharmony_ci					 * pointer to the ctx address */
27562306a36Sopenharmony_ci	mov     in_blk+16(%esp),%edi	/* input address in edi */
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci	mov	(%edi),		%eax
27862306a36Sopenharmony_ci	mov	b_offset(%edi),	%ebx
27962306a36Sopenharmony_ci	mov	c_offset(%edi),	%ecx
28062306a36Sopenharmony_ci	mov	d_offset(%edi),	%edx
28162306a36Sopenharmony_ci	output_whitening(%eax,%ebp,a_offset)
28262306a36Sopenharmony_ci	output_whitening(%ebx,%ebp,b_offset)
28362306a36Sopenharmony_ci	ror	$16,	%ebx
28462306a36Sopenharmony_ci	output_whitening(%ecx,%ebp,c_offset)
28562306a36Sopenharmony_ci	output_whitening(%edx,%ebp,d_offset)
28662306a36Sopenharmony_ci	rol	$1,	%ecx
28762306a36Sopenharmony_ci
28862306a36Sopenharmony_ci	decrypt_round(R0,R1,R2,R3,15*8);
28962306a36Sopenharmony_ci	decrypt_round(R2,R3,R0,R1,14*8);
29062306a36Sopenharmony_ci	decrypt_round(R0,R1,R2,R3,13*8);
29162306a36Sopenharmony_ci	decrypt_round(R2,R3,R0,R1,12*8);
29262306a36Sopenharmony_ci	decrypt_round(R0,R1,R2,R3,11*8);
29362306a36Sopenharmony_ci	decrypt_round(R2,R3,R0,R1,10*8);
29462306a36Sopenharmony_ci	decrypt_round(R0,R1,R2,R3,9*8);
29562306a36Sopenharmony_ci	decrypt_round(R2,R3,R0,R1,8*8);
29662306a36Sopenharmony_ci	decrypt_round(R0,R1,R2,R3,7*8);
29762306a36Sopenharmony_ci	decrypt_round(R2,R3,R0,R1,6*8);
29862306a36Sopenharmony_ci	decrypt_round(R0,R1,R2,R3,5*8);
29962306a36Sopenharmony_ci	decrypt_round(R2,R3,R0,R1,4*8);
30062306a36Sopenharmony_ci	decrypt_round(R0,R1,R2,R3,3*8);
30162306a36Sopenharmony_ci	decrypt_round(R2,R3,R0,R1,2*8);
30262306a36Sopenharmony_ci	decrypt_round(R0,R1,R2,R3,1*8);
30362306a36Sopenharmony_ci	decrypt_last_round(R2,R3,R0,R1,0);
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_ci	input_whitening(%eax,%ebp,c_offset)
30662306a36Sopenharmony_ci	input_whitening(%ebx,%ebp,d_offset)
30762306a36Sopenharmony_ci	input_whitening(%ecx,%ebp,a_offset)
30862306a36Sopenharmony_ci	input_whitening(%edx,%ebp,b_offset)
30962306a36Sopenharmony_ci	mov	out_blk+16(%esp),%edi;
31062306a36Sopenharmony_ci	mov	%eax,		c_offset(%edi)
31162306a36Sopenharmony_ci	mov	%ebx,		d_offset(%edi)
31262306a36Sopenharmony_ci	mov	%ecx,		(%edi)
31362306a36Sopenharmony_ci	mov	%edx,		b_offset(%edi)
31462306a36Sopenharmony_ci
31562306a36Sopenharmony_ci	pop	%edi
31662306a36Sopenharmony_ci	pop	%esi
31762306a36Sopenharmony_ci	pop	%ebx
31862306a36Sopenharmony_ci	pop	%ebp
31962306a36Sopenharmony_ci	mov	$1,	%eax
32062306a36Sopenharmony_ci	RET
32162306a36Sopenharmony_ciSYM_FUNC_END(twofish_dec_blk)
322