162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Cast6 Cipher 8-way parallel algorithm (AVX/x86_64) 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2012 Johannes Goetzfried 662306a36Sopenharmony_ci * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> 962306a36Sopenharmony_ci */ 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci#include <linux/linkage.h> 1262306a36Sopenharmony_ci#include <asm/frame.h> 1362306a36Sopenharmony_ci#include "glue_helper-asm-avx.S" 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci.file "cast6-avx-x86_64-asm_64.S" 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci.extern cast_s1 1862306a36Sopenharmony_ci.extern cast_s2 1962306a36Sopenharmony_ci.extern cast_s3 2062306a36Sopenharmony_ci.extern cast_s4 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci/* structure of crypto context */ 2362306a36Sopenharmony_ci#define km 0 2462306a36Sopenharmony_ci#define kr (12*4*4) 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci/* s-boxes */ 2762306a36Sopenharmony_ci#define s1 cast_s1 2862306a36Sopenharmony_ci#define s2 cast_s2 2962306a36Sopenharmony_ci#define s3 cast_s3 3062306a36Sopenharmony_ci#define s4 cast_s4 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci/********************************************************************** 3362306a36Sopenharmony_ci 8-way AVX cast6 3462306a36Sopenharmony_ci **********************************************************************/ 3562306a36Sopenharmony_ci#define CTX %r15 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci#define RA1 %xmm0 3862306a36Sopenharmony_ci#define RB1 %xmm1 3962306a36Sopenharmony_ci#define RC1 %xmm2 4062306a36Sopenharmony_ci#define RD1 %xmm3 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci#define RA2 %xmm4 4362306a36Sopenharmony_ci#define RB2 %xmm5 4462306a36Sopenharmony_ci#define RC2 %xmm6 4562306a36Sopenharmony_ci#define RD2 %xmm7 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci#define RX %xmm8 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci#define RKM %xmm9 5062306a36Sopenharmony_ci#define RKR %xmm10 5162306a36Sopenharmony_ci#define RKRF %xmm11 5262306a36Sopenharmony_ci#define RKRR %xmm12 5362306a36Sopenharmony_ci#define R32 %xmm13 5462306a36Sopenharmony_ci#define R1ST %xmm14 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci#define RTMP %xmm15 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci#define RID1 %rdi 5962306a36Sopenharmony_ci#define RID1d %edi 6062306a36Sopenharmony_ci#define RID2 %rsi 6162306a36Sopenharmony_ci#define RID2d %esi 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci#define RGI1 %rdx 6462306a36Sopenharmony_ci#define RGI1bl %dl 6562306a36Sopenharmony_ci#define RGI1bh %dh 6662306a36Sopenharmony_ci#define RGI2 %rcx 6762306a36Sopenharmony_ci#define RGI2bl %cl 6862306a36Sopenharmony_ci#define RGI2bh %ch 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci#define RGI3 %rax 7162306a36Sopenharmony_ci#define RGI3bl %al 7262306a36Sopenharmony_ci#define RGI3bh %ah 7362306a36Sopenharmony_ci#define RGI4 %rbx 7462306a36Sopenharmony_ci#define RGI4bl %bl 7562306a36Sopenharmony_ci#define RGI4bh %bh 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci#define RFS1 %r8 7862306a36Sopenharmony_ci#define RFS1d %r8d 7962306a36Sopenharmony_ci#define RFS2 %r9 8062306a36Sopenharmony_ci#define RFS2d %r9d 8162306a36Sopenharmony_ci#define RFS3 %r10 8262306a36Sopenharmony_ci#define RFS3d %r10d 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \ 8662306a36Sopenharmony_ci movzbl src ## bh, RID1d; \ 8762306a36Sopenharmony_ci leaq s1(%rip), RID2; \ 8862306a36Sopenharmony_ci movl (RID2,RID1,4), dst ## d; \ 8962306a36Sopenharmony_ci movzbl src ## bl, RID2d; \ 9062306a36Sopenharmony_ci leaq s2(%rip), RID1; \ 9162306a36Sopenharmony_ci op1 (RID1,RID2,4), dst ## d; \ 9262306a36Sopenharmony_ci shrq $16, src; \ 9362306a36Sopenharmony_ci movzbl src ## bh, RID1d; \ 9462306a36Sopenharmony_ci leaq s3(%rip), RID2; \ 9562306a36Sopenharmony_ci op2 (RID2,RID1,4), dst ## d; \ 9662306a36Sopenharmony_ci movzbl src ## bl, RID2d; \ 9762306a36Sopenharmony_ci interleave_op(il_reg); \ 9862306a36Sopenharmony_ci leaq s4(%rip), RID1; \ 9962306a36Sopenharmony_ci op3 (RID1,RID2,4), dst ## d; 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci#define dummy(d) /* do nothing */ 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci#define shr_next(reg) \ 10462306a36Sopenharmony_ci shrq $16, reg; 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci#define F_head(a, x, gi1, gi2, op0) \ 10762306a36Sopenharmony_ci op0 a, RKM, x; \ 10862306a36Sopenharmony_ci vpslld RKRF, x, RTMP; \ 10962306a36Sopenharmony_ci vpsrld RKRR, x, x; \ 11062306a36Sopenharmony_ci vpor RTMP, x, x; \ 11162306a36Sopenharmony_ci \ 11262306a36Sopenharmony_ci vmovq x, gi1; \ 11362306a36Sopenharmony_ci vpextrq $1, x, gi2; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci#define F_tail(a, x, gi1, gi2, op1, op2, op3) \ 11662306a36Sopenharmony_ci lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \ 11762306a36Sopenharmony_ci lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \ 11862306a36Sopenharmony_ci \ 11962306a36Sopenharmony_ci lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none); \ 12062306a36Sopenharmony_ci shlq $32, RFS2; \ 12162306a36Sopenharmony_ci orq RFS1, RFS2; \ 12262306a36Sopenharmony_ci lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none); \ 12362306a36Sopenharmony_ci shlq $32, RFS1; \ 12462306a36Sopenharmony_ci orq RFS1, RFS3; \ 12562306a36Sopenharmony_ci \ 12662306a36Sopenharmony_ci vmovq RFS2, x; \ 12762306a36Sopenharmony_ci vpinsrq $1, RFS3, x, x; 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \ 13062306a36Sopenharmony_ci F_head(b1, RX, RGI1, RGI2, op0); \ 13162306a36Sopenharmony_ci F_head(b2, RX, RGI3, RGI4, op0); \ 13262306a36Sopenharmony_ci \ 13362306a36Sopenharmony_ci F_tail(b1, RX, RGI1, RGI2, op1, op2, op3); \ 13462306a36Sopenharmony_ci F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3); \ 13562306a36Sopenharmony_ci \ 13662306a36Sopenharmony_ci vpxor a1, RX, a1; \ 13762306a36Sopenharmony_ci vpxor a2, RTMP, a2; 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci#define F1_2(a1, b1, a2, b2) \ 14062306a36Sopenharmony_ci F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl) 14162306a36Sopenharmony_ci#define F2_2(a1, b1, a2, b2) \ 14262306a36Sopenharmony_ci F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl) 14362306a36Sopenharmony_ci#define F3_2(a1, b1, a2, b2) \ 14462306a36Sopenharmony_ci F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl) 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_ci#define qop(in, out, f) \ 14762306a36Sopenharmony_ci F ## f ## _2(out ## 1, in ## 1, out ## 2, in ## 2); 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci#define get_round_keys(nn) \ 15062306a36Sopenharmony_ci vbroadcastss (km+(4*(nn)))(CTX), RKM; \ 15162306a36Sopenharmony_ci vpand R1ST, RKR, RKRF; \ 15262306a36Sopenharmony_ci vpsubq RKRF, R32, RKRR; \ 15362306a36Sopenharmony_ci vpsrldq $1, RKR, RKR; 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci#define Q(n) \ 15662306a36Sopenharmony_ci get_round_keys(4*n+0); \ 15762306a36Sopenharmony_ci qop(RD, RC, 1); \ 15862306a36Sopenharmony_ci \ 15962306a36Sopenharmony_ci get_round_keys(4*n+1); \ 16062306a36Sopenharmony_ci qop(RC, RB, 2); \ 16162306a36Sopenharmony_ci \ 16262306a36Sopenharmony_ci get_round_keys(4*n+2); \ 16362306a36Sopenharmony_ci qop(RB, RA, 3); \ 16462306a36Sopenharmony_ci \ 16562306a36Sopenharmony_ci get_round_keys(4*n+3); \ 16662306a36Sopenharmony_ci qop(RA, RD, 1); 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci#define QBAR(n) \ 16962306a36Sopenharmony_ci get_round_keys(4*n+3); \ 17062306a36Sopenharmony_ci qop(RA, RD, 1); \ 17162306a36Sopenharmony_ci \ 17262306a36Sopenharmony_ci get_round_keys(4*n+2); \ 17362306a36Sopenharmony_ci qop(RB, RA, 3); \ 17462306a36Sopenharmony_ci \ 17562306a36Sopenharmony_ci get_round_keys(4*n+1); \ 17662306a36Sopenharmony_ci qop(RC, RB, 2); \ 17762306a36Sopenharmony_ci \ 17862306a36Sopenharmony_ci get_round_keys(4*n+0); \ 17962306a36Sopenharmony_ci qop(RD, RC, 1); 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci#define shuffle(mask) \ 18262306a36Sopenharmony_ci vpshufb mask(%rip), RKR, RKR; 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci#define preload_rkr(n, do_mask, mask) \ 18562306a36Sopenharmony_ci vbroadcastss .L16_mask(%rip), RKR; \ 18662306a36Sopenharmony_ci /* add 16-bit rotation to key rotations (mod 32) */ \ 18762306a36Sopenharmony_ci vpxor (kr+n*16)(CTX), RKR, RKR; \ 18862306a36Sopenharmony_ci do_mask(mask); 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ci#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ 19162306a36Sopenharmony_ci vpunpckldq x1, x0, t0; \ 19262306a36Sopenharmony_ci vpunpckhdq x1, x0, t2; \ 19362306a36Sopenharmony_ci vpunpckldq x3, x2, t1; \ 19462306a36Sopenharmony_ci vpunpckhdq x3, x2, x3; \ 19562306a36Sopenharmony_ci \ 19662306a36Sopenharmony_ci vpunpcklqdq t1, t0, x0; \ 19762306a36Sopenharmony_ci vpunpckhqdq t1, t0, x1; \ 19862306a36Sopenharmony_ci vpunpcklqdq x3, t2, x2; \ 19962306a36Sopenharmony_ci vpunpckhqdq x3, t2, x3; 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci#define inpack_blocks(x0, x1, x2, x3, t0, t1, t2, rmask) \ 20262306a36Sopenharmony_ci vpshufb rmask, x0, x0; \ 20362306a36Sopenharmony_ci vpshufb rmask, x1, x1; \ 20462306a36Sopenharmony_ci vpshufb rmask, x2, x2; \ 20562306a36Sopenharmony_ci vpshufb rmask, x3, x3; \ 20662306a36Sopenharmony_ci \ 20762306a36Sopenharmony_ci transpose_4x4(x0, x1, x2, x3, t0, t1, t2) 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci#define outunpack_blocks(x0, x1, x2, x3, t0, t1, t2, rmask) \ 21062306a36Sopenharmony_ci transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ 21162306a36Sopenharmony_ci \ 21262306a36Sopenharmony_ci vpshufb rmask, x0, x0; \ 21362306a36Sopenharmony_ci vpshufb rmask, x1, x1; \ 21462306a36Sopenharmony_ci vpshufb rmask, x2, x2; \ 21562306a36Sopenharmony_ci vpshufb rmask, x3, x3; 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci.section .rodata.cst16, "aM", @progbits, 16 21862306a36Sopenharmony_ci.align 16 21962306a36Sopenharmony_ci.Lbswap_mask: 22062306a36Sopenharmony_ci .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 22162306a36Sopenharmony_ci.Lbswap128_mask: 22262306a36Sopenharmony_ci .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 22362306a36Sopenharmony_ci.Lrkr_enc_Q_Q_QBAR_QBAR: 22462306a36Sopenharmony_ci .byte 0, 1, 2, 3, 4, 5, 6, 7, 11, 10, 9, 8, 15, 14, 13, 12 22562306a36Sopenharmony_ci.Lrkr_enc_QBAR_QBAR_QBAR_QBAR: 22662306a36Sopenharmony_ci .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 22762306a36Sopenharmony_ci.Lrkr_dec_Q_Q_Q_Q: 22862306a36Sopenharmony_ci .byte 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 22962306a36Sopenharmony_ci.Lrkr_dec_Q_Q_QBAR_QBAR: 23062306a36Sopenharmony_ci .byte 12, 13, 14, 15, 8, 9, 10, 11, 7, 6, 5, 4, 3, 2, 1, 0 23162306a36Sopenharmony_ci.Lrkr_dec_QBAR_QBAR_QBAR_QBAR: 23262306a36Sopenharmony_ci .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci.section .rodata.cst4.L16_mask, "aM", @progbits, 4 23562306a36Sopenharmony_ci.align 4 23662306a36Sopenharmony_ci.L16_mask: 23762306a36Sopenharmony_ci .byte 16, 16, 16, 16 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci.section .rodata.cst4.L32_mask, "aM", @progbits, 4 24062306a36Sopenharmony_ci.align 4 24162306a36Sopenharmony_ci.L32_mask: 24262306a36Sopenharmony_ci .byte 32, 0, 0, 0 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci.section .rodata.cst4.first_mask, "aM", @progbits, 4 24562306a36Sopenharmony_ci.align 4 24662306a36Sopenharmony_ci.Lfirst_mask: 24762306a36Sopenharmony_ci .byte 0x1f, 0, 0, 0 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci.text 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci.align 8 25262306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(__cast6_enc_blk8) 25362306a36Sopenharmony_ci /* input: 25462306a36Sopenharmony_ci * %rdi: ctx 25562306a36Sopenharmony_ci * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks 25662306a36Sopenharmony_ci * output: 25762306a36Sopenharmony_ci * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks 25862306a36Sopenharmony_ci */ 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci pushq %r15; 26162306a36Sopenharmony_ci pushq %rbx; 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci movq %rdi, CTX; 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci vmovdqa .Lbswap_mask(%rip), RKM; 26662306a36Sopenharmony_ci vmovd .Lfirst_mask(%rip), R1ST; 26762306a36Sopenharmony_ci vmovd .L32_mask(%rip), R32; 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); 27062306a36Sopenharmony_ci inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci preload_rkr(0, dummy, none); 27362306a36Sopenharmony_ci Q(0); 27462306a36Sopenharmony_ci Q(1); 27562306a36Sopenharmony_ci Q(2); 27662306a36Sopenharmony_ci Q(3); 27762306a36Sopenharmony_ci preload_rkr(1, shuffle, .Lrkr_enc_Q_Q_QBAR_QBAR); 27862306a36Sopenharmony_ci Q(4); 27962306a36Sopenharmony_ci Q(5); 28062306a36Sopenharmony_ci QBAR(6); 28162306a36Sopenharmony_ci QBAR(7); 28262306a36Sopenharmony_ci preload_rkr(2, shuffle, .Lrkr_enc_QBAR_QBAR_QBAR_QBAR); 28362306a36Sopenharmony_ci QBAR(8); 28462306a36Sopenharmony_ci QBAR(9); 28562306a36Sopenharmony_ci QBAR(10); 28662306a36Sopenharmony_ci QBAR(11); 28762306a36Sopenharmony_ci 28862306a36Sopenharmony_ci popq %rbx; 28962306a36Sopenharmony_ci popq %r15; 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci vmovdqa .Lbswap_mask(%rip), RKM; 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); 29462306a36Sopenharmony_ci outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci RET; 29762306a36Sopenharmony_ciSYM_FUNC_END(__cast6_enc_blk8) 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci.align 8 30062306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(__cast6_dec_blk8) 30162306a36Sopenharmony_ci /* input: 30262306a36Sopenharmony_ci * %rdi: ctx 30362306a36Sopenharmony_ci * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks 30462306a36Sopenharmony_ci * output: 30562306a36Sopenharmony_ci * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks 30662306a36Sopenharmony_ci */ 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci pushq %r15; 30962306a36Sopenharmony_ci pushq %rbx; 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci movq %rdi, CTX; 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci vmovdqa .Lbswap_mask(%rip), RKM; 31462306a36Sopenharmony_ci vmovd .Lfirst_mask(%rip), R1ST; 31562306a36Sopenharmony_ci vmovd .L32_mask(%rip), R32; 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); 31862306a36Sopenharmony_ci inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_ci preload_rkr(2, shuffle, .Lrkr_dec_Q_Q_Q_Q); 32162306a36Sopenharmony_ci Q(11); 32262306a36Sopenharmony_ci Q(10); 32362306a36Sopenharmony_ci Q(9); 32462306a36Sopenharmony_ci Q(8); 32562306a36Sopenharmony_ci preload_rkr(1, shuffle, .Lrkr_dec_Q_Q_QBAR_QBAR); 32662306a36Sopenharmony_ci Q(7); 32762306a36Sopenharmony_ci Q(6); 32862306a36Sopenharmony_ci QBAR(5); 32962306a36Sopenharmony_ci QBAR(4); 33062306a36Sopenharmony_ci preload_rkr(0, shuffle, .Lrkr_dec_QBAR_QBAR_QBAR_QBAR); 33162306a36Sopenharmony_ci QBAR(3); 33262306a36Sopenharmony_ci QBAR(2); 33362306a36Sopenharmony_ci QBAR(1); 33462306a36Sopenharmony_ci QBAR(0); 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci popq %rbx; 33762306a36Sopenharmony_ci popq %r15; 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci vmovdqa .Lbswap_mask(%rip), RKM; 34062306a36Sopenharmony_ci outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); 34162306a36Sopenharmony_ci outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci RET; 34462306a36Sopenharmony_ciSYM_FUNC_END(__cast6_dec_blk8) 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ciSYM_FUNC_START(cast6_ecb_enc_8way) 34762306a36Sopenharmony_ci /* input: 34862306a36Sopenharmony_ci * %rdi: ctx 34962306a36Sopenharmony_ci * %rsi: dst 35062306a36Sopenharmony_ci * %rdx: src 35162306a36Sopenharmony_ci */ 35262306a36Sopenharmony_ci FRAME_BEGIN 35362306a36Sopenharmony_ci pushq %r15; 35462306a36Sopenharmony_ci 35562306a36Sopenharmony_ci movq %rdi, CTX; 35662306a36Sopenharmony_ci movq %rsi, %r11; 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_ci load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci call __cast6_enc_blk8; 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_ci store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci popq %r15; 36562306a36Sopenharmony_ci FRAME_END 36662306a36Sopenharmony_ci RET; 36762306a36Sopenharmony_ciSYM_FUNC_END(cast6_ecb_enc_8way) 36862306a36Sopenharmony_ci 36962306a36Sopenharmony_ciSYM_FUNC_START(cast6_ecb_dec_8way) 37062306a36Sopenharmony_ci /* input: 37162306a36Sopenharmony_ci * %rdi: ctx 37262306a36Sopenharmony_ci * %rsi: dst 37362306a36Sopenharmony_ci * %rdx: src 37462306a36Sopenharmony_ci */ 37562306a36Sopenharmony_ci FRAME_BEGIN 37662306a36Sopenharmony_ci pushq %r15; 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci movq %rdi, CTX; 37962306a36Sopenharmony_ci movq %rsi, %r11; 38062306a36Sopenharmony_ci 38162306a36Sopenharmony_ci load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_ci call __cast6_dec_blk8; 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ci store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_ci popq %r15; 38862306a36Sopenharmony_ci FRAME_END 38962306a36Sopenharmony_ci RET; 39062306a36Sopenharmony_ciSYM_FUNC_END(cast6_ecb_dec_8way) 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_ciSYM_FUNC_START(cast6_cbc_dec_8way) 39362306a36Sopenharmony_ci /* input: 39462306a36Sopenharmony_ci * %rdi: ctx 39562306a36Sopenharmony_ci * %rsi: dst 39662306a36Sopenharmony_ci * %rdx: src 39762306a36Sopenharmony_ci */ 39862306a36Sopenharmony_ci FRAME_BEGIN 39962306a36Sopenharmony_ci pushq %r12; 40062306a36Sopenharmony_ci pushq %r15; 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ci movq %rdi, CTX; 40362306a36Sopenharmony_ci movq %rsi, %r11; 40462306a36Sopenharmony_ci movq %rdx, %r12; 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_ci load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ci call __cast6_dec_blk8; 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_ci store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_ci popq %r15; 41362306a36Sopenharmony_ci popq %r12; 41462306a36Sopenharmony_ci FRAME_END 41562306a36Sopenharmony_ci RET; 41662306a36Sopenharmony_ciSYM_FUNC_END(cast6_cbc_dec_8way) 417