162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 OR MIT */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved. 462306a36Sopenharmony_ci * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci 762306a36Sopenharmony_ci#define MASK_U32 0x3c 862306a36Sopenharmony_ci#define CHACHA20_BLOCK_SIZE 64 962306a36Sopenharmony_ci#define STACK_SIZE 32 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci#define X0 $t0 1262306a36Sopenharmony_ci#define X1 $t1 1362306a36Sopenharmony_ci#define X2 $t2 1462306a36Sopenharmony_ci#define X3 $t3 1562306a36Sopenharmony_ci#define X4 $t4 1662306a36Sopenharmony_ci#define X5 $t5 1762306a36Sopenharmony_ci#define X6 $t6 1862306a36Sopenharmony_ci#define X7 $t7 1962306a36Sopenharmony_ci#define X8 $t8 2062306a36Sopenharmony_ci#define X9 $t9 2162306a36Sopenharmony_ci#define X10 $v1 2262306a36Sopenharmony_ci#define X11 $s6 2362306a36Sopenharmony_ci#define X12 $s5 2462306a36Sopenharmony_ci#define X13 $s4 2562306a36Sopenharmony_ci#define X14 $s3 2662306a36Sopenharmony_ci#define X15 $s2 2762306a36Sopenharmony_ci/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */ 2862306a36Sopenharmony_ci#define T0 $s1 2962306a36Sopenharmony_ci#define T1 $s0 3062306a36Sopenharmony_ci#define T(n) T ## n 3162306a36Sopenharmony_ci#define X(n) X ## n 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci/* Input arguments */ 3462306a36Sopenharmony_ci#define STATE $a0 3562306a36Sopenharmony_ci#define OUT $a1 3662306a36Sopenharmony_ci#define IN $a2 3762306a36Sopenharmony_ci#define BYTES $a3 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci/* Output argument */ 4062306a36Sopenharmony_ci/* NONCE[0] is kept in a register and not in memory. 4162306a36Sopenharmony_ci * We don't want to touch original value in memory. 4262306a36Sopenharmony_ci * Must be incremented every loop iteration. 4362306a36Sopenharmony_ci */ 4462306a36Sopenharmony_ci#define NONCE_0 $v0 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci/* SAVED_X and SAVED_CA are set in the jump table. 4762306a36Sopenharmony_ci * Use regs which are overwritten on exit else we don't leak clear data. 4862306a36Sopenharmony_ci * They are used to handling the last bytes which are not multiple of 4. 4962306a36Sopenharmony_ci */ 5062306a36Sopenharmony_ci#define SAVED_X X15 5162306a36Sopenharmony_ci#define SAVED_CA $s7 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci#define IS_UNALIGNED $s7 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 5662306a36Sopenharmony_ci#define MSB 0 5762306a36Sopenharmony_ci#define LSB 3 5862306a36Sopenharmony_ci#define ROTx rotl 5962306a36Sopenharmony_ci#define ROTR(n) rotr n, 24 6062306a36Sopenharmony_ci#define CPU_TO_LE32(n) \ 6162306a36Sopenharmony_ci wsbh n; \ 6262306a36Sopenharmony_ci rotr n, 16; 6362306a36Sopenharmony_ci#else 6462306a36Sopenharmony_ci#define MSB 3 6562306a36Sopenharmony_ci#define LSB 0 6662306a36Sopenharmony_ci#define ROTx rotr 6762306a36Sopenharmony_ci#define CPU_TO_LE32(n) 6862306a36Sopenharmony_ci#define ROTR(n) 6962306a36Sopenharmony_ci#endif 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci#define FOR_EACH_WORD(x) \ 7262306a36Sopenharmony_ci x( 0); \ 7362306a36Sopenharmony_ci x( 1); \ 7462306a36Sopenharmony_ci x( 2); \ 7562306a36Sopenharmony_ci x( 3); \ 7662306a36Sopenharmony_ci x( 4); \ 7762306a36Sopenharmony_ci x( 5); \ 7862306a36Sopenharmony_ci x( 6); \ 7962306a36Sopenharmony_ci x( 7); \ 8062306a36Sopenharmony_ci x( 8); \ 8162306a36Sopenharmony_ci x( 9); \ 8262306a36Sopenharmony_ci x(10); \ 8362306a36Sopenharmony_ci x(11); \ 8462306a36Sopenharmony_ci x(12); \ 8562306a36Sopenharmony_ci x(13); \ 8662306a36Sopenharmony_ci x(14); \ 8762306a36Sopenharmony_ci x(15); 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci#define FOR_EACH_WORD_REV(x) \ 9062306a36Sopenharmony_ci x(15); \ 9162306a36Sopenharmony_ci x(14); \ 9262306a36Sopenharmony_ci x(13); \ 9362306a36Sopenharmony_ci x(12); \ 9462306a36Sopenharmony_ci x(11); \ 9562306a36Sopenharmony_ci x(10); \ 9662306a36Sopenharmony_ci x( 9); \ 9762306a36Sopenharmony_ci x( 8); \ 9862306a36Sopenharmony_ci x( 7); \ 9962306a36Sopenharmony_ci x( 6); \ 10062306a36Sopenharmony_ci x( 5); \ 10162306a36Sopenharmony_ci x( 4); \ 10262306a36Sopenharmony_ci x( 3); \ 10362306a36Sopenharmony_ci x( 2); \ 10462306a36Sopenharmony_ci x( 1); \ 10562306a36Sopenharmony_ci x( 0); 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci#define PLUS_ONE_0 1 10862306a36Sopenharmony_ci#define PLUS_ONE_1 2 10962306a36Sopenharmony_ci#define PLUS_ONE_2 3 11062306a36Sopenharmony_ci#define PLUS_ONE_3 4 11162306a36Sopenharmony_ci#define PLUS_ONE_4 5 11262306a36Sopenharmony_ci#define PLUS_ONE_5 6 11362306a36Sopenharmony_ci#define PLUS_ONE_6 7 11462306a36Sopenharmony_ci#define PLUS_ONE_7 8 11562306a36Sopenharmony_ci#define PLUS_ONE_8 9 11662306a36Sopenharmony_ci#define PLUS_ONE_9 10 11762306a36Sopenharmony_ci#define PLUS_ONE_10 11 11862306a36Sopenharmony_ci#define PLUS_ONE_11 12 11962306a36Sopenharmony_ci#define PLUS_ONE_12 13 12062306a36Sopenharmony_ci#define PLUS_ONE_13 14 12162306a36Sopenharmony_ci#define PLUS_ONE_14 15 12262306a36Sopenharmony_ci#define PLUS_ONE_15 16 12362306a36Sopenharmony_ci#define PLUS_ONE(x) PLUS_ONE_ ## x 12462306a36Sopenharmony_ci#define _CONCAT3(a,b,c) a ## b ## c 12562306a36Sopenharmony_ci#define CONCAT3(a,b,c) _CONCAT3(a,b,c) 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci#define STORE_UNALIGNED(x) \ 12862306a36Sopenharmony_ciCONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \ 12962306a36Sopenharmony_ci .if (x != 12); \ 13062306a36Sopenharmony_ci lw T0, (x*4)(STATE); \ 13162306a36Sopenharmony_ci .endif; \ 13262306a36Sopenharmony_ci lwl T1, (x*4)+MSB ## (IN); \ 13362306a36Sopenharmony_ci lwr T1, (x*4)+LSB ## (IN); \ 13462306a36Sopenharmony_ci .if (x == 12); \ 13562306a36Sopenharmony_ci addu X ## x, NONCE_0; \ 13662306a36Sopenharmony_ci .else; \ 13762306a36Sopenharmony_ci addu X ## x, T0; \ 13862306a36Sopenharmony_ci .endif; \ 13962306a36Sopenharmony_ci CPU_TO_LE32(X ## x); \ 14062306a36Sopenharmony_ci xor X ## x, T1; \ 14162306a36Sopenharmony_ci swl X ## x, (x*4)+MSB ## (OUT); \ 14262306a36Sopenharmony_ci swr X ## x, (x*4)+LSB ## (OUT); 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci#define STORE_ALIGNED(x) \ 14562306a36Sopenharmony_ciCONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ 14662306a36Sopenharmony_ci .if (x != 12); \ 14762306a36Sopenharmony_ci lw T0, (x*4)(STATE); \ 14862306a36Sopenharmony_ci .endif; \ 14962306a36Sopenharmony_ci lw T1, (x*4) ## (IN); \ 15062306a36Sopenharmony_ci .if (x == 12); \ 15162306a36Sopenharmony_ci addu X ## x, NONCE_0; \ 15262306a36Sopenharmony_ci .else; \ 15362306a36Sopenharmony_ci addu X ## x, T0; \ 15462306a36Sopenharmony_ci .endif; \ 15562306a36Sopenharmony_ci CPU_TO_LE32(X ## x); \ 15662306a36Sopenharmony_ci xor X ## x, T1; \ 15762306a36Sopenharmony_ci sw X ## x, (x*4) ## (OUT); 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci/* Jump table macro. 16062306a36Sopenharmony_ci * Used for setup and handling the last bytes, which are not multiple of 4. 16162306a36Sopenharmony_ci * X15 is free to store Xn 16262306a36Sopenharmony_ci * Every jumptable entry must be equal in size. 16362306a36Sopenharmony_ci */ 16462306a36Sopenharmony_ci#define JMPTBL_ALIGNED(x) \ 16562306a36Sopenharmony_ci.Lchacha_mips_jmptbl_aligned_ ## x: ; \ 16662306a36Sopenharmony_ci .set noreorder; \ 16762306a36Sopenharmony_ci b .Lchacha_mips_xor_aligned_ ## x ## _b; \ 16862306a36Sopenharmony_ci .if (x == 12); \ 16962306a36Sopenharmony_ci addu SAVED_X, X ## x, NONCE_0; \ 17062306a36Sopenharmony_ci .else; \ 17162306a36Sopenharmony_ci addu SAVED_X, X ## x, SAVED_CA; \ 17262306a36Sopenharmony_ci .endif; \ 17362306a36Sopenharmony_ci .set reorder 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci#define JMPTBL_UNALIGNED(x) \ 17662306a36Sopenharmony_ci.Lchacha_mips_jmptbl_unaligned_ ## x: ; \ 17762306a36Sopenharmony_ci .set noreorder; \ 17862306a36Sopenharmony_ci b .Lchacha_mips_xor_unaligned_ ## x ## _b; \ 17962306a36Sopenharmony_ci .if (x == 12); \ 18062306a36Sopenharmony_ci addu SAVED_X, X ## x, NONCE_0; \ 18162306a36Sopenharmony_ci .else; \ 18262306a36Sopenharmony_ci addu SAVED_X, X ## x, SAVED_CA; \ 18362306a36Sopenharmony_ci .endif; \ 18462306a36Sopenharmony_ci .set reorder 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \ 18762306a36Sopenharmony_ci addu X(A), X(K); \ 18862306a36Sopenharmony_ci addu X(B), X(L); \ 18962306a36Sopenharmony_ci addu X(C), X(M); \ 19062306a36Sopenharmony_ci addu X(D), X(N); \ 19162306a36Sopenharmony_ci xor X(V), X(A); \ 19262306a36Sopenharmony_ci xor X(W), X(B); \ 19362306a36Sopenharmony_ci xor X(Y), X(C); \ 19462306a36Sopenharmony_ci xor X(Z), X(D); \ 19562306a36Sopenharmony_ci rotl X(V), S; \ 19662306a36Sopenharmony_ci rotl X(W), S; \ 19762306a36Sopenharmony_ci rotl X(Y), S; \ 19862306a36Sopenharmony_ci rotl X(Z), S; 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci.text 20162306a36Sopenharmony_ci.set reorder 20262306a36Sopenharmony_ci.set noat 20362306a36Sopenharmony_ci.globl chacha_crypt_arch 20462306a36Sopenharmony_ci.ent chacha_crypt_arch 20562306a36Sopenharmony_cichacha_crypt_arch: 20662306a36Sopenharmony_ci .frame $sp, STACK_SIZE, $ra 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci /* Load number of rounds */ 20962306a36Sopenharmony_ci lw $at, 16($sp) 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ci addiu $sp, -STACK_SIZE 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci /* Return bytes = 0. */ 21462306a36Sopenharmony_ci beqz BYTES, .Lchacha_mips_end 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci lw NONCE_0, 48(STATE) 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_ci /* Save s0-s7 */ 21962306a36Sopenharmony_ci sw $s0, 0($sp) 22062306a36Sopenharmony_ci sw $s1, 4($sp) 22162306a36Sopenharmony_ci sw $s2, 8($sp) 22262306a36Sopenharmony_ci sw $s3, 12($sp) 22362306a36Sopenharmony_ci sw $s4, 16($sp) 22462306a36Sopenharmony_ci sw $s5, 20($sp) 22562306a36Sopenharmony_ci sw $s6, 24($sp) 22662306a36Sopenharmony_ci sw $s7, 28($sp) 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci /* Test IN or OUT is unaligned. 22962306a36Sopenharmony_ci * IS_UNALIGNED = ( IN | OUT ) & 0x00000003 23062306a36Sopenharmony_ci */ 23162306a36Sopenharmony_ci or IS_UNALIGNED, IN, OUT 23262306a36Sopenharmony_ci andi IS_UNALIGNED, 0x3 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci b .Lchacha_rounds_start 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci.align 4 23762306a36Sopenharmony_ci.Loop_chacha_rounds: 23862306a36Sopenharmony_ci addiu IN, CHACHA20_BLOCK_SIZE 23962306a36Sopenharmony_ci addiu OUT, CHACHA20_BLOCK_SIZE 24062306a36Sopenharmony_ci addiu NONCE_0, 1 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci.Lchacha_rounds_start: 24362306a36Sopenharmony_ci lw X0, 0(STATE) 24462306a36Sopenharmony_ci lw X1, 4(STATE) 24562306a36Sopenharmony_ci lw X2, 8(STATE) 24662306a36Sopenharmony_ci lw X3, 12(STATE) 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci lw X4, 16(STATE) 24962306a36Sopenharmony_ci lw X5, 20(STATE) 25062306a36Sopenharmony_ci lw X6, 24(STATE) 25162306a36Sopenharmony_ci lw X7, 28(STATE) 25262306a36Sopenharmony_ci lw X8, 32(STATE) 25362306a36Sopenharmony_ci lw X9, 36(STATE) 25462306a36Sopenharmony_ci lw X10, 40(STATE) 25562306a36Sopenharmony_ci lw X11, 44(STATE) 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci move X12, NONCE_0 25862306a36Sopenharmony_ci lw X13, 52(STATE) 25962306a36Sopenharmony_ci lw X14, 56(STATE) 26062306a36Sopenharmony_ci lw X15, 60(STATE) 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci.Loop_chacha_xor_rounds: 26362306a36Sopenharmony_ci addiu $at, -2 26462306a36Sopenharmony_ci AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); 26562306a36Sopenharmony_ci AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); 26662306a36Sopenharmony_ci AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); 26762306a36Sopenharmony_ci AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); 26862306a36Sopenharmony_ci AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); 26962306a36Sopenharmony_ci AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); 27062306a36Sopenharmony_ci AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); 27162306a36Sopenharmony_ci AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); 27262306a36Sopenharmony_ci bnez $at, .Loop_chacha_xor_rounds 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci addiu BYTES, -(CHACHA20_BLOCK_SIZE) 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci /* Is data src/dst unaligned? Jump */ 27762306a36Sopenharmony_ci bnez IS_UNALIGNED, .Loop_chacha_unaligned 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci /* Set number rounds here to fill delayslot. */ 28062306a36Sopenharmony_ci lw $at, (STACK_SIZE+16)($sp) 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci /* BYTES < 0, it has no full block. */ 28362306a36Sopenharmony_ci bltz BYTES, .Lchacha_mips_no_full_block_aligned 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci FOR_EACH_WORD_REV(STORE_ALIGNED) 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci /* BYTES > 0? Loop again. */ 28862306a36Sopenharmony_ci bgtz BYTES, .Loop_chacha_rounds 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci /* Place this here to fill delay slot */ 29162306a36Sopenharmony_ci addiu NONCE_0, 1 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci /* BYTES < 0? Handle last bytes */ 29462306a36Sopenharmony_ci bltz BYTES, .Lchacha_mips_xor_bytes 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci.Lchacha_mips_xor_done: 29762306a36Sopenharmony_ci /* Restore used registers */ 29862306a36Sopenharmony_ci lw $s0, 0($sp) 29962306a36Sopenharmony_ci lw $s1, 4($sp) 30062306a36Sopenharmony_ci lw $s2, 8($sp) 30162306a36Sopenharmony_ci lw $s3, 12($sp) 30262306a36Sopenharmony_ci lw $s4, 16($sp) 30362306a36Sopenharmony_ci lw $s5, 20($sp) 30462306a36Sopenharmony_ci lw $s6, 24($sp) 30562306a36Sopenharmony_ci lw $s7, 28($sp) 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_ci /* Write NONCE_0 back to right location in state */ 30862306a36Sopenharmony_ci sw NONCE_0, 48(STATE) 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci.Lchacha_mips_end: 31162306a36Sopenharmony_ci addiu $sp, STACK_SIZE 31262306a36Sopenharmony_ci jr $ra 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci.Lchacha_mips_no_full_block_aligned: 31562306a36Sopenharmony_ci /* Restore the offset on BYTES */ 31662306a36Sopenharmony_ci addiu BYTES, CHACHA20_BLOCK_SIZE 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci /* Get number of full WORDS */ 31962306a36Sopenharmony_ci andi $at, BYTES, MASK_U32 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci /* Load upper half of jump table addr */ 32262306a36Sopenharmony_ci lui T0, %hi(.Lchacha_mips_jmptbl_aligned_0) 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci /* Calculate lower half jump table offset */ 32562306a36Sopenharmony_ci ins T0, $at, 1, 6 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci /* Add offset to STATE */ 32862306a36Sopenharmony_ci addu T1, STATE, $at 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci /* Add lower half jump table addr */ 33162306a36Sopenharmony_ci addiu T0, %lo(.Lchacha_mips_jmptbl_aligned_0) 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci /* Read value from STATE */ 33462306a36Sopenharmony_ci lw SAVED_CA, 0(T1) 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci /* Store remaining bytecounter as negative value */ 33762306a36Sopenharmony_ci subu BYTES, $at, BYTES 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci jr T0 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci /* Jump table */ 34262306a36Sopenharmony_ci FOR_EACH_WORD(JMPTBL_ALIGNED) 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_ci.Loop_chacha_unaligned: 34662306a36Sopenharmony_ci /* Set number rounds here to fill delayslot. */ 34762306a36Sopenharmony_ci lw $at, (STACK_SIZE+16)($sp) 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci /* BYTES > 0, it has no full block. */ 35062306a36Sopenharmony_ci bltz BYTES, .Lchacha_mips_no_full_block_unaligned 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci FOR_EACH_WORD_REV(STORE_UNALIGNED) 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci /* BYTES > 0? Loop again. */ 35562306a36Sopenharmony_ci bgtz BYTES, .Loop_chacha_rounds 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci /* Write NONCE_0 back to right location in state */ 35862306a36Sopenharmony_ci sw NONCE_0, 48(STATE) 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci .set noreorder 36162306a36Sopenharmony_ci /* Fall through to byte handling */ 36262306a36Sopenharmony_ci bgez BYTES, .Lchacha_mips_xor_done 36362306a36Sopenharmony_ci.Lchacha_mips_xor_unaligned_0_b: 36462306a36Sopenharmony_ci.Lchacha_mips_xor_aligned_0_b: 36562306a36Sopenharmony_ci /* Place this here to fill delay slot */ 36662306a36Sopenharmony_ci addiu NONCE_0, 1 36762306a36Sopenharmony_ci .set reorder 36862306a36Sopenharmony_ci 36962306a36Sopenharmony_ci.Lchacha_mips_xor_bytes: 37062306a36Sopenharmony_ci addu IN, $at 37162306a36Sopenharmony_ci addu OUT, $at 37262306a36Sopenharmony_ci /* First byte */ 37362306a36Sopenharmony_ci lbu T1, 0(IN) 37462306a36Sopenharmony_ci addiu $at, BYTES, 1 37562306a36Sopenharmony_ci CPU_TO_LE32(SAVED_X) 37662306a36Sopenharmony_ci ROTR(SAVED_X) 37762306a36Sopenharmony_ci xor T1, SAVED_X 37862306a36Sopenharmony_ci sb T1, 0(OUT) 37962306a36Sopenharmony_ci beqz $at, .Lchacha_mips_xor_done 38062306a36Sopenharmony_ci /* Second byte */ 38162306a36Sopenharmony_ci lbu T1, 1(IN) 38262306a36Sopenharmony_ci addiu $at, BYTES, 2 38362306a36Sopenharmony_ci ROTx SAVED_X, 8 38462306a36Sopenharmony_ci xor T1, SAVED_X 38562306a36Sopenharmony_ci sb T1, 1(OUT) 38662306a36Sopenharmony_ci beqz $at, .Lchacha_mips_xor_done 38762306a36Sopenharmony_ci /* Third byte */ 38862306a36Sopenharmony_ci lbu T1, 2(IN) 38962306a36Sopenharmony_ci ROTx SAVED_X, 8 39062306a36Sopenharmony_ci xor T1, SAVED_X 39162306a36Sopenharmony_ci sb T1, 2(OUT) 39262306a36Sopenharmony_ci b .Lchacha_mips_xor_done 39362306a36Sopenharmony_ci 39462306a36Sopenharmony_ci.Lchacha_mips_no_full_block_unaligned: 39562306a36Sopenharmony_ci /* Restore the offset on BYTES */ 39662306a36Sopenharmony_ci addiu BYTES, CHACHA20_BLOCK_SIZE 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_ci /* Get number of full WORDS */ 39962306a36Sopenharmony_ci andi $at, BYTES, MASK_U32 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci /* Load upper half of jump table addr */ 40262306a36Sopenharmony_ci lui T0, %hi(.Lchacha_mips_jmptbl_unaligned_0) 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ci /* Calculate lower half jump table offset */ 40562306a36Sopenharmony_ci ins T0, $at, 1, 6 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_ci /* Add offset to STATE */ 40862306a36Sopenharmony_ci addu T1, STATE, $at 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_ci /* Add lower half jump table addr */ 41162306a36Sopenharmony_ci addiu T0, %lo(.Lchacha_mips_jmptbl_unaligned_0) 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci /* Read value from STATE */ 41462306a36Sopenharmony_ci lw SAVED_CA, 0(T1) 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci /* Store remaining bytecounter as negative value */ 41762306a36Sopenharmony_ci subu BYTES, $at, BYTES 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci jr T0 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci /* Jump table */ 42262306a36Sopenharmony_ci FOR_EACH_WORD(JMPTBL_UNALIGNED) 42362306a36Sopenharmony_ci.end chacha_crypt_arch 42462306a36Sopenharmony_ci.set at 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_ci/* Input arguments 42762306a36Sopenharmony_ci * STATE $a0 42862306a36Sopenharmony_ci * OUT $a1 42962306a36Sopenharmony_ci * NROUND $a2 43062306a36Sopenharmony_ci */ 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci#undef X12 43362306a36Sopenharmony_ci#undef X13 43462306a36Sopenharmony_ci#undef X14 43562306a36Sopenharmony_ci#undef X15 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ci#define X12 $a3 43862306a36Sopenharmony_ci#define X13 $at 43962306a36Sopenharmony_ci#define X14 $v0 44062306a36Sopenharmony_ci#define X15 STATE 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_ci.set noat 44362306a36Sopenharmony_ci.globl hchacha_block_arch 44462306a36Sopenharmony_ci.ent hchacha_block_arch 44562306a36Sopenharmony_cihchacha_block_arch: 44662306a36Sopenharmony_ci .frame $sp, STACK_SIZE, $ra 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci addiu $sp, -STACK_SIZE 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci /* Save X11(s6) */ 45162306a36Sopenharmony_ci sw X11, 0($sp) 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_ci lw X0, 0(STATE) 45462306a36Sopenharmony_ci lw X1, 4(STATE) 45562306a36Sopenharmony_ci lw X2, 8(STATE) 45662306a36Sopenharmony_ci lw X3, 12(STATE) 45762306a36Sopenharmony_ci lw X4, 16(STATE) 45862306a36Sopenharmony_ci lw X5, 20(STATE) 45962306a36Sopenharmony_ci lw X6, 24(STATE) 46062306a36Sopenharmony_ci lw X7, 28(STATE) 46162306a36Sopenharmony_ci lw X8, 32(STATE) 46262306a36Sopenharmony_ci lw X9, 36(STATE) 46362306a36Sopenharmony_ci lw X10, 40(STATE) 46462306a36Sopenharmony_ci lw X11, 44(STATE) 46562306a36Sopenharmony_ci lw X12, 48(STATE) 46662306a36Sopenharmony_ci lw X13, 52(STATE) 46762306a36Sopenharmony_ci lw X14, 56(STATE) 46862306a36Sopenharmony_ci lw X15, 60(STATE) 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci.Loop_hchacha_xor_rounds: 47162306a36Sopenharmony_ci addiu $a2, -2 47262306a36Sopenharmony_ci AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); 47362306a36Sopenharmony_ci AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); 47462306a36Sopenharmony_ci AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); 47562306a36Sopenharmony_ci AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); 47662306a36Sopenharmony_ci AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); 47762306a36Sopenharmony_ci AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); 47862306a36Sopenharmony_ci AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); 47962306a36Sopenharmony_ci AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); 48062306a36Sopenharmony_ci bnez $a2, .Loop_hchacha_xor_rounds 48162306a36Sopenharmony_ci 48262306a36Sopenharmony_ci /* Restore used register */ 48362306a36Sopenharmony_ci lw X11, 0($sp) 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci sw X0, 0(OUT) 48662306a36Sopenharmony_ci sw X1, 4(OUT) 48762306a36Sopenharmony_ci sw X2, 8(OUT) 48862306a36Sopenharmony_ci sw X3, 12(OUT) 48962306a36Sopenharmony_ci sw X12, 16(OUT) 49062306a36Sopenharmony_ci sw X13, 20(OUT) 49162306a36Sopenharmony_ci sw X14, 24(OUT) 49262306a36Sopenharmony_ci sw X15, 28(OUT) 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_ci addiu $sp, STACK_SIZE 49562306a36Sopenharmony_ci jr $ra 49662306a36Sopenharmony_ci.end hchacha_block_arch 49762306a36Sopenharmony_ci.set at 498