18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Camellia Cipher Algorithm (x86_64) 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include <linux/linkage.h> 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci.file "camellia-x86_64-asm_64.S" 118c2ecf20Sopenharmony_ci.text 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ci.extern camellia_sp10011110; 148c2ecf20Sopenharmony_ci.extern camellia_sp22000222; 158c2ecf20Sopenharmony_ci.extern camellia_sp03303033; 168c2ecf20Sopenharmony_ci.extern camellia_sp00444404; 178c2ecf20Sopenharmony_ci.extern camellia_sp02220222; 188c2ecf20Sopenharmony_ci.extern camellia_sp30333033; 198c2ecf20Sopenharmony_ci.extern camellia_sp44044404; 208c2ecf20Sopenharmony_ci.extern camellia_sp11101110; 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci#define sp10011110 camellia_sp10011110 238c2ecf20Sopenharmony_ci#define sp22000222 camellia_sp22000222 248c2ecf20Sopenharmony_ci#define sp03303033 camellia_sp03303033 258c2ecf20Sopenharmony_ci#define sp00444404 camellia_sp00444404 268c2ecf20Sopenharmony_ci#define sp02220222 camellia_sp02220222 278c2ecf20Sopenharmony_ci#define sp30333033 camellia_sp30333033 288c2ecf20Sopenharmony_ci#define sp44044404 camellia_sp44044404 298c2ecf20Sopenharmony_ci#define sp11101110 camellia_sp11101110 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci#define CAMELLIA_TABLE_BYTE_LEN 272 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_ci/* struct camellia_ctx: */ 348c2ecf20Sopenharmony_ci#define key_table 0 358c2ecf20Sopenharmony_ci#define key_length CAMELLIA_TABLE_BYTE_LEN 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci/* register macros */ 388c2ecf20Sopenharmony_ci#define CTX %rdi 398c2ecf20Sopenharmony_ci#define RIO %rsi 408c2ecf20Sopenharmony_ci#define RIOd %esi 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci#define RAB0 %rax 438c2ecf20Sopenharmony_ci#define RCD0 %rcx 448c2ecf20Sopenharmony_ci#define RAB1 %rbx 458c2ecf20Sopenharmony_ci#define RCD1 %rdx 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci#define RAB0d %eax 488c2ecf20Sopenharmony_ci#define RCD0d %ecx 498c2ecf20Sopenharmony_ci#define RAB1d %ebx 508c2ecf20Sopenharmony_ci#define RCD1d %edx 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci#define RAB0bl %al 538c2ecf20Sopenharmony_ci#define RCD0bl %cl 548c2ecf20Sopenharmony_ci#define RAB1bl %bl 558c2ecf20Sopenharmony_ci#define RCD1bl %dl 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci#define RAB0bh %ah 588c2ecf20Sopenharmony_ci#define RCD0bh %ch 598c2ecf20Sopenharmony_ci#define RAB1bh %bh 608c2ecf20Sopenharmony_ci#define RCD1bh %dh 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci#define RT0 %rsi 638c2ecf20Sopenharmony_ci#define RT1 %r12 648c2ecf20Sopenharmony_ci#define RT2 %r8 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci#define RT0d %esi 678c2ecf20Sopenharmony_ci#define RT1d %r12d 688c2ecf20Sopenharmony_ci#define RT2d %r8d 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci#define RT2bl %r8b 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci#define RXOR %r9 738c2ecf20Sopenharmony_ci#define RR12 %r10 748c2ecf20Sopenharmony_ci#define RDST %r11 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_ci#define RXORd %r9d 778c2ecf20Sopenharmony_ci#define RXORbl %r9b 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_ci#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \ 808c2ecf20Sopenharmony_ci movzbl ab ## bl, tmp2 ## d; \ 818c2ecf20Sopenharmony_ci movzbl ab ## bh, tmp1 ## d; \ 828c2ecf20Sopenharmony_ci rorq $16, ab; \ 838c2ecf20Sopenharmony_ci xorq T0(, tmp2, 8), dst; \ 848c2ecf20Sopenharmony_ci xorq T1(, tmp1, 8), dst; 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci/********************************************************************** 878c2ecf20Sopenharmony_ci 1-way camellia 888c2ecf20Sopenharmony_ci **********************************************************************/ 898c2ecf20Sopenharmony_ci#define roundsm(ab, subkey, cd) \ 908c2ecf20Sopenharmony_ci movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \ 918c2ecf20Sopenharmony_ci \ 928c2ecf20Sopenharmony_ci xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \ 938c2ecf20Sopenharmony_ci xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \ 948c2ecf20Sopenharmony_ci xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \ 958c2ecf20Sopenharmony_ci xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \ 968c2ecf20Sopenharmony_ci \ 978c2ecf20Sopenharmony_ci xorq RT2, cd ## 0; 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci#define fls(l, r, kl, kr) \ 1008c2ecf20Sopenharmony_ci movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \ 1018c2ecf20Sopenharmony_ci andl l ## 0d, RT0d; \ 1028c2ecf20Sopenharmony_ci roll $1, RT0d; \ 1038c2ecf20Sopenharmony_ci shlq $32, RT0; \ 1048c2ecf20Sopenharmony_ci xorq RT0, l ## 0; \ 1058c2ecf20Sopenharmony_ci movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \ 1068c2ecf20Sopenharmony_ci orq r ## 0, RT1; \ 1078c2ecf20Sopenharmony_ci shrq $32, RT1; \ 1088c2ecf20Sopenharmony_ci xorq RT1, r ## 0; \ 1098c2ecf20Sopenharmony_ci \ 1108c2ecf20Sopenharmony_ci movq (key_table + ((kl) * 2) * 4)(CTX), RT2; \ 1118c2ecf20Sopenharmony_ci orq l ## 0, RT2; \ 1128c2ecf20Sopenharmony_ci shrq $32, RT2; \ 1138c2ecf20Sopenharmony_ci xorq RT2, l ## 0; \ 1148c2ecf20Sopenharmony_ci movl (key_table + ((kr) * 2) * 4)(CTX), RT0d; \ 1158c2ecf20Sopenharmony_ci andl r ## 0d, RT0d; \ 1168c2ecf20Sopenharmony_ci roll $1, RT0d; \ 1178c2ecf20Sopenharmony_ci shlq $32, RT0; \ 1188c2ecf20Sopenharmony_ci xorq RT0, r ## 0; 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci#define enc_rounds(i) \ 1218c2ecf20Sopenharmony_ci roundsm(RAB, i + 2, RCD); \ 1228c2ecf20Sopenharmony_ci roundsm(RCD, i + 3, RAB); \ 1238c2ecf20Sopenharmony_ci roundsm(RAB, i + 4, RCD); \ 1248c2ecf20Sopenharmony_ci roundsm(RCD, i + 5, RAB); \ 1258c2ecf20Sopenharmony_ci roundsm(RAB, i + 6, RCD); \ 1268c2ecf20Sopenharmony_ci roundsm(RCD, i + 7, RAB); 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci#define enc_fls(i) \ 1298c2ecf20Sopenharmony_ci fls(RAB, RCD, i + 0, i + 1); 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci#define enc_inpack() \ 1328c2ecf20Sopenharmony_ci movq (RIO), RAB0; \ 1338c2ecf20Sopenharmony_ci bswapq RAB0; \ 1348c2ecf20Sopenharmony_ci rolq $32, RAB0; \ 1358c2ecf20Sopenharmony_ci movq 4*2(RIO), RCD0; \ 1368c2ecf20Sopenharmony_ci bswapq RCD0; \ 1378c2ecf20Sopenharmony_ci rorq $32, RCD0; \ 1388c2ecf20Sopenharmony_ci xorq key_table(CTX), RAB0; 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci#define enc_outunpack(op, max) \ 1418c2ecf20Sopenharmony_ci xorq key_table(CTX, max, 8), RCD0; \ 1428c2ecf20Sopenharmony_ci rorq $32, RCD0; \ 1438c2ecf20Sopenharmony_ci bswapq RCD0; \ 1448c2ecf20Sopenharmony_ci op ## q RCD0, (RIO); \ 1458c2ecf20Sopenharmony_ci rolq $32, RAB0; \ 1468c2ecf20Sopenharmony_ci bswapq RAB0; \ 1478c2ecf20Sopenharmony_ci op ## q RAB0, 4*2(RIO); 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci#define dec_rounds(i) \ 1508c2ecf20Sopenharmony_ci roundsm(RAB, i + 7, RCD); \ 1518c2ecf20Sopenharmony_ci roundsm(RCD, i + 6, RAB); \ 1528c2ecf20Sopenharmony_ci roundsm(RAB, i + 5, RCD); \ 1538c2ecf20Sopenharmony_ci roundsm(RCD, i + 4, RAB); \ 1548c2ecf20Sopenharmony_ci roundsm(RAB, i + 3, RCD); \ 1558c2ecf20Sopenharmony_ci roundsm(RCD, i + 2, RAB); 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci#define dec_fls(i) \ 1588c2ecf20Sopenharmony_ci fls(RAB, RCD, i + 1, i + 0); 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci#define dec_inpack(max) \ 1618c2ecf20Sopenharmony_ci movq (RIO), RAB0; \ 1628c2ecf20Sopenharmony_ci bswapq RAB0; \ 1638c2ecf20Sopenharmony_ci rolq $32, RAB0; \ 1648c2ecf20Sopenharmony_ci movq 4*2(RIO), RCD0; \ 1658c2ecf20Sopenharmony_ci bswapq RCD0; \ 1668c2ecf20Sopenharmony_ci rorq $32, RCD0; \ 1678c2ecf20Sopenharmony_ci xorq key_table(CTX, max, 8), RAB0; 1688c2ecf20Sopenharmony_ci 1698c2ecf20Sopenharmony_ci#define dec_outunpack() \ 1708c2ecf20Sopenharmony_ci xorq key_table(CTX), RCD0; \ 1718c2ecf20Sopenharmony_ci rorq $32, RCD0; \ 1728c2ecf20Sopenharmony_ci bswapq RCD0; \ 1738c2ecf20Sopenharmony_ci movq RCD0, (RIO); \ 1748c2ecf20Sopenharmony_ci rolq $32, RAB0; \ 1758c2ecf20Sopenharmony_ci bswapq RAB0; \ 1768c2ecf20Sopenharmony_ci movq RAB0, 4*2(RIO); 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ciSYM_FUNC_START(__camellia_enc_blk) 1798c2ecf20Sopenharmony_ci /* input: 1808c2ecf20Sopenharmony_ci * %rdi: ctx, CTX 1818c2ecf20Sopenharmony_ci * %rsi: dst 1828c2ecf20Sopenharmony_ci * %rdx: src 1838c2ecf20Sopenharmony_ci * %rcx: bool xor 1848c2ecf20Sopenharmony_ci */ 1858c2ecf20Sopenharmony_ci movq %r12, RR12; 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci movq %rcx, RXOR; 1888c2ecf20Sopenharmony_ci movq %rsi, RDST; 1898c2ecf20Sopenharmony_ci movq %rdx, RIO; 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci enc_inpack(); 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_ci enc_rounds(0); 1948c2ecf20Sopenharmony_ci enc_fls(8); 1958c2ecf20Sopenharmony_ci enc_rounds(8); 1968c2ecf20Sopenharmony_ci enc_fls(16); 1978c2ecf20Sopenharmony_ci enc_rounds(16); 1988c2ecf20Sopenharmony_ci movl $24, RT1d; /* max */ 1998c2ecf20Sopenharmony_ci 2008c2ecf20Sopenharmony_ci cmpb $16, key_length(CTX); 2018c2ecf20Sopenharmony_ci je .L__enc_done; 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci enc_fls(24); 2048c2ecf20Sopenharmony_ci enc_rounds(24); 2058c2ecf20Sopenharmony_ci movl $32, RT1d; /* max */ 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci.L__enc_done: 2088c2ecf20Sopenharmony_ci testb RXORbl, RXORbl; 2098c2ecf20Sopenharmony_ci movq RDST, RIO; 2108c2ecf20Sopenharmony_ci 2118c2ecf20Sopenharmony_ci jnz .L__enc_xor; 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_ci enc_outunpack(mov, RT1); 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci movq RR12, %r12; 2168c2ecf20Sopenharmony_ci RET; 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci.L__enc_xor: 2198c2ecf20Sopenharmony_ci enc_outunpack(xor, RT1); 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci movq RR12, %r12; 2228c2ecf20Sopenharmony_ci RET; 2238c2ecf20Sopenharmony_ciSYM_FUNC_END(__camellia_enc_blk) 2248c2ecf20Sopenharmony_ci 2258c2ecf20Sopenharmony_ciSYM_FUNC_START(camellia_dec_blk) 2268c2ecf20Sopenharmony_ci /* input: 2278c2ecf20Sopenharmony_ci * %rdi: ctx, CTX 2288c2ecf20Sopenharmony_ci * %rsi: dst 2298c2ecf20Sopenharmony_ci * %rdx: src 2308c2ecf20Sopenharmony_ci */ 2318c2ecf20Sopenharmony_ci cmpl $16, key_length(CTX); 2328c2ecf20Sopenharmony_ci movl $32, RT2d; 2338c2ecf20Sopenharmony_ci movl $24, RXORd; 2348c2ecf20Sopenharmony_ci cmovel RXORd, RT2d; /* max */ 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ci movq %r12, RR12; 2378c2ecf20Sopenharmony_ci movq %rsi, RDST; 2388c2ecf20Sopenharmony_ci movq %rdx, RIO; 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_ci dec_inpack(RT2); 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_ci cmpb $24, RT2bl; 2438c2ecf20Sopenharmony_ci je .L__dec_rounds16; 2448c2ecf20Sopenharmony_ci 2458c2ecf20Sopenharmony_ci dec_rounds(24); 2468c2ecf20Sopenharmony_ci dec_fls(24); 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_ci.L__dec_rounds16: 2498c2ecf20Sopenharmony_ci dec_rounds(16); 2508c2ecf20Sopenharmony_ci dec_fls(16); 2518c2ecf20Sopenharmony_ci dec_rounds(8); 2528c2ecf20Sopenharmony_ci dec_fls(8); 2538c2ecf20Sopenharmony_ci dec_rounds(0); 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_ci movq RDST, RIO; 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci dec_outunpack(); 2588c2ecf20Sopenharmony_ci 2598c2ecf20Sopenharmony_ci movq RR12, %r12; 2608c2ecf20Sopenharmony_ci RET; 2618c2ecf20Sopenharmony_ciSYM_FUNC_END(camellia_dec_blk) 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_ci/********************************************************************** 2648c2ecf20Sopenharmony_ci 2-way camellia 2658c2ecf20Sopenharmony_ci **********************************************************************/ 2668c2ecf20Sopenharmony_ci#define roundsm2(ab, subkey, cd) \ 2678c2ecf20Sopenharmony_ci movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \ 2688c2ecf20Sopenharmony_ci xorq RT2, cd ## 1; \ 2698c2ecf20Sopenharmony_ci \ 2708c2ecf20Sopenharmony_ci xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \ 2718c2ecf20Sopenharmony_ci xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \ 2728c2ecf20Sopenharmony_ci xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \ 2738c2ecf20Sopenharmony_ci xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \ 2748c2ecf20Sopenharmony_ci \ 2758c2ecf20Sopenharmony_ci xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \ 2768c2ecf20Sopenharmony_ci xorq RT2, cd ## 0; \ 2778c2ecf20Sopenharmony_ci xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \ 2788c2ecf20Sopenharmony_ci xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \ 2798c2ecf20Sopenharmony_ci xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1); 2808c2ecf20Sopenharmony_ci 2818c2ecf20Sopenharmony_ci#define fls2(l, r, kl, kr) \ 2828c2ecf20Sopenharmony_ci movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \ 2838c2ecf20Sopenharmony_ci andl l ## 0d, RT0d; \ 2848c2ecf20Sopenharmony_ci roll $1, RT0d; \ 2858c2ecf20Sopenharmony_ci shlq $32, RT0; \ 2868c2ecf20Sopenharmony_ci xorq RT0, l ## 0; \ 2878c2ecf20Sopenharmony_ci movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \ 2888c2ecf20Sopenharmony_ci orq r ## 0, RT1; \ 2898c2ecf20Sopenharmony_ci shrq $32, RT1; \ 2908c2ecf20Sopenharmony_ci xorq RT1, r ## 0; \ 2918c2ecf20Sopenharmony_ci \ 2928c2ecf20Sopenharmony_ci movl (key_table + ((kl) * 2) * 4)(CTX), RT2d; \ 2938c2ecf20Sopenharmony_ci andl l ## 1d, RT2d; \ 2948c2ecf20Sopenharmony_ci roll $1, RT2d; \ 2958c2ecf20Sopenharmony_ci shlq $32, RT2; \ 2968c2ecf20Sopenharmony_ci xorq RT2, l ## 1; \ 2978c2ecf20Sopenharmony_ci movq (key_table + ((kr) * 2) * 4)(CTX), RT0; \ 2988c2ecf20Sopenharmony_ci orq r ## 1, RT0; \ 2998c2ecf20Sopenharmony_ci shrq $32, RT0; \ 3008c2ecf20Sopenharmony_ci xorq RT0, r ## 1; \ 3018c2ecf20Sopenharmony_ci \ 3028c2ecf20Sopenharmony_ci movq (key_table + ((kl) * 2) * 4)(CTX), RT1; \ 3038c2ecf20Sopenharmony_ci orq l ## 0, RT1; \ 3048c2ecf20Sopenharmony_ci shrq $32, RT1; \ 3058c2ecf20Sopenharmony_ci xorq RT1, l ## 0; \ 3068c2ecf20Sopenharmony_ci movl (key_table + ((kr) * 2) * 4)(CTX), RT2d; \ 3078c2ecf20Sopenharmony_ci andl r ## 0d, RT2d; \ 3088c2ecf20Sopenharmony_ci roll $1, RT2d; \ 3098c2ecf20Sopenharmony_ci shlq $32, RT2; \ 3108c2ecf20Sopenharmony_ci xorq RT2, r ## 0; \ 3118c2ecf20Sopenharmony_ci \ 3128c2ecf20Sopenharmony_ci movq (key_table + ((kl) * 2) * 4)(CTX), RT0; \ 3138c2ecf20Sopenharmony_ci orq l ## 1, RT0; \ 3148c2ecf20Sopenharmony_ci shrq $32, RT0; \ 3158c2ecf20Sopenharmony_ci xorq RT0, l ## 1; \ 3168c2ecf20Sopenharmony_ci movl (key_table + ((kr) * 2) * 4)(CTX), RT1d; \ 3178c2ecf20Sopenharmony_ci andl r ## 1d, RT1d; \ 3188c2ecf20Sopenharmony_ci roll $1, RT1d; \ 3198c2ecf20Sopenharmony_ci shlq $32, RT1; \ 3208c2ecf20Sopenharmony_ci xorq RT1, r ## 1; 3218c2ecf20Sopenharmony_ci 3228c2ecf20Sopenharmony_ci#define enc_rounds2(i) \ 3238c2ecf20Sopenharmony_ci roundsm2(RAB, i + 2, RCD); \ 3248c2ecf20Sopenharmony_ci roundsm2(RCD, i + 3, RAB); \ 3258c2ecf20Sopenharmony_ci roundsm2(RAB, i + 4, RCD); \ 3268c2ecf20Sopenharmony_ci roundsm2(RCD, i + 5, RAB); \ 3278c2ecf20Sopenharmony_ci roundsm2(RAB, i + 6, RCD); \ 3288c2ecf20Sopenharmony_ci roundsm2(RCD, i + 7, RAB); 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ci#define enc_fls2(i) \ 3318c2ecf20Sopenharmony_ci fls2(RAB, RCD, i + 0, i + 1); 3328c2ecf20Sopenharmony_ci 3338c2ecf20Sopenharmony_ci#define enc_inpack2() \ 3348c2ecf20Sopenharmony_ci movq (RIO), RAB0; \ 3358c2ecf20Sopenharmony_ci bswapq RAB0; \ 3368c2ecf20Sopenharmony_ci rorq $32, RAB0; \ 3378c2ecf20Sopenharmony_ci movq 4*2(RIO), RCD0; \ 3388c2ecf20Sopenharmony_ci bswapq RCD0; \ 3398c2ecf20Sopenharmony_ci rolq $32, RCD0; \ 3408c2ecf20Sopenharmony_ci xorq key_table(CTX), RAB0; \ 3418c2ecf20Sopenharmony_ci \ 3428c2ecf20Sopenharmony_ci movq 8*2(RIO), RAB1; \ 3438c2ecf20Sopenharmony_ci bswapq RAB1; \ 3448c2ecf20Sopenharmony_ci rorq $32, RAB1; \ 3458c2ecf20Sopenharmony_ci movq 12*2(RIO), RCD1; \ 3468c2ecf20Sopenharmony_ci bswapq RCD1; \ 3478c2ecf20Sopenharmony_ci rolq $32, RCD1; \ 3488c2ecf20Sopenharmony_ci xorq key_table(CTX), RAB1; 3498c2ecf20Sopenharmony_ci 3508c2ecf20Sopenharmony_ci#define enc_outunpack2(op, max) \ 3518c2ecf20Sopenharmony_ci xorq key_table(CTX, max, 8), RCD0; \ 3528c2ecf20Sopenharmony_ci rolq $32, RCD0; \ 3538c2ecf20Sopenharmony_ci bswapq RCD0; \ 3548c2ecf20Sopenharmony_ci op ## q RCD0, (RIO); \ 3558c2ecf20Sopenharmony_ci rorq $32, RAB0; \ 3568c2ecf20Sopenharmony_ci bswapq RAB0; \ 3578c2ecf20Sopenharmony_ci op ## q RAB0, 4*2(RIO); \ 3588c2ecf20Sopenharmony_ci \ 3598c2ecf20Sopenharmony_ci xorq key_table(CTX, max, 8), RCD1; \ 3608c2ecf20Sopenharmony_ci rolq $32, RCD1; \ 3618c2ecf20Sopenharmony_ci bswapq RCD1; \ 3628c2ecf20Sopenharmony_ci op ## q RCD1, 8*2(RIO); \ 3638c2ecf20Sopenharmony_ci rorq $32, RAB1; \ 3648c2ecf20Sopenharmony_ci bswapq RAB1; \ 3658c2ecf20Sopenharmony_ci op ## q RAB1, 12*2(RIO); 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_ci#define dec_rounds2(i) \ 3688c2ecf20Sopenharmony_ci roundsm2(RAB, i + 7, RCD); \ 3698c2ecf20Sopenharmony_ci roundsm2(RCD, i + 6, RAB); \ 3708c2ecf20Sopenharmony_ci roundsm2(RAB, i + 5, RCD); \ 3718c2ecf20Sopenharmony_ci roundsm2(RCD, i + 4, RAB); \ 3728c2ecf20Sopenharmony_ci roundsm2(RAB, i + 3, RCD); \ 3738c2ecf20Sopenharmony_ci roundsm2(RCD, i + 2, RAB); 3748c2ecf20Sopenharmony_ci 3758c2ecf20Sopenharmony_ci#define dec_fls2(i) \ 3768c2ecf20Sopenharmony_ci fls2(RAB, RCD, i + 1, i + 0); 3778c2ecf20Sopenharmony_ci 3788c2ecf20Sopenharmony_ci#define dec_inpack2(max) \ 3798c2ecf20Sopenharmony_ci movq (RIO), RAB0; \ 3808c2ecf20Sopenharmony_ci bswapq RAB0; \ 3818c2ecf20Sopenharmony_ci rorq $32, RAB0; \ 3828c2ecf20Sopenharmony_ci movq 4*2(RIO), RCD0; \ 3838c2ecf20Sopenharmony_ci bswapq RCD0; \ 3848c2ecf20Sopenharmony_ci rolq $32, RCD0; \ 3858c2ecf20Sopenharmony_ci xorq key_table(CTX, max, 8), RAB0; \ 3868c2ecf20Sopenharmony_ci \ 3878c2ecf20Sopenharmony_ci movq 8*2(RIO), RAB1; \ 3888c2ecf20Sopenharmony_ci bswapq RAB1; \ 3898c2ecf20Sopenharmony_ci rorq $32, RAB1; \ 3908c2ecf20Sopenharmony_ci movq 12*2(RIO), RCD1; \ 3918c2ecf20Sopenharmony_ci bswapq RCD1; \ 3928c2ecf20Sopenharmony_ci rolq $32, RCD1; \ 3938c2ecf20Sopenharmony_ci xorq key_table(CTX, max, 8), RAB1; 3948c2ecf20Sopenharmony_ci 3958c2ecf20Sopenharmony_ci#define dec_outunpack2() \ 3968c2ecf20Sopenharmony_ci xorq key_table(CTX), RCD0; \ 3978c2ecf20Sopenharmony_ci rolq $32, RCD0; \ 3988c2ecf20Sopenharmony_ci bswapq RCD0; \ 3998c2ecf20Sopenharmony_ci movq RCD0, (RIO); \ 4008c2ecf20Sopenharmony_ci rorq $32, RAB0; \ 4018c2ecf20Sopenharmony_ci bswapq RAB0; \ 4028c2ecf20Sopenharmony_ci movq RAB0, 4*2(RIO); \ 4038c2ecf20Sopenharmony_ci \ 4048c2ecf20Sopenharmony_ci xorq key_table(CTX), RCD1; \ 4058c2ecf20Sopenharmony_ci rolq $32, RCD1; \ 4068c2ecf20Sopenharmony_ci bswapq RCD1; \ 4078c2ecf20Sopenharmony_ci movq RCD1, 8*2(RIO); \ 4088c2ecf20Sopenharmony_ci rorq $32, RAB1; \ 4098c2ecf20Sopenharmony_ci bswapq RAB1; \ 4108c2ecf20Sopenharmony_ci movq RAB1, 12*2(RIO); 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_ciSYM_FUNC_START(__camellia_enc_blk_2way) 4138c2ecf20Sopenharmony_ci /* input: 4148c2ecf20Sopenharmony_ci * %rdi: ctx, CTX 4158c2ecf20Sopenharmony_ci * %rsi: dst 4168c2ecf20Sopenharmony_ci * %rdx: src 4178c2ecf20Sopenharmony_ci * %rcx: bool xor 4188c2ecf20Sopenharmony_ci */ 4198c2ecf20Sopenharmony_ci pushq %rbx; 4208c2ecf20Sopenharmony_ci 4218c2ecf20Sopenharmony_ci movq %r12, RR12; 4228c2ecf20Sopenharmony_ci movq %rcx, RXOR; 4238c2ecf20Sopenharmony_ci movq %rsi, RDST; 4248c2ecf20Sopenharmony_ci movq %rdx, RIO; 4258c2ecf20Sopenharmony_ci 4268c2ecf20Sopenharmony_ci enc_inpack2(); 4278c2ecf20Sopenharmony_ci 4288c2ecf20Sopenharmony_ci enc_rounds2(0); 4298c2ecf20Sopenharmony_ci enc_fls2(8); 4308c2ecf20Sopenharmony_ci enc_rounds2(8); 4318c2ecf20Sopenharmony_ci enc_fls2(16); 4328c2ecf20Sopenharmony_ci enc_rounds2(16); 4338c2ecf20Sopenharmony_ci movl $24, RT2d; /* max */ 4348c2ecf20Sopenharmony_ci 4358c2ecf20Sopenharmony_ci cmpb $16, key_length(CTX); 4368c2ecf20Sopenharmony_ci je .L__enc2_done; 4378c2ecf20Sopenharmony_ci 4388c2ecf20Sopenharmony_ci enc_fls2(24); 4398c2ecf20Sopenharmony_ci enc_rounds2(24); 4408c2ecf20Sopenharmony_ci movl $32, RT2d; /* max */ 4418c2ecf20Sopenharmony_ci 4428c2ecf20Sopenharmony_ci.L__enc2_done: 4438c2ecf20Sopenharmony_ci test RXORbl, RXORbl; 4448c2ecf20Sopenharmony_ci movq RDST, RIO; 4458c2ecf20Sopenharmony_ci jnz .L__enc2_xor; 4468c2ecf20Sopenharmony_ci 4478c2ecf20Sopenharmony_ci enc_outunpack2(mov, RT2); 4488c2ecf20Sopenharmony_ci 4498c2ecf20Sopenharmony_ci movq RR12, %r12; 4508c2ecf20Sopenharmony_ci popq %rbx; 4518c2ecf20Sopenharmony_ci RET; 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_ci.L__enc2_xor: 4548c2ecf20Sopenharmony_ci enc_outunpack2(xor, RT2); 4558c2ecf20Sopenharmony_ci 4568c2ecf20Sopenharmony_ci movq RR12, %r12; 4578c2ecf20Sopenharmony_ci popq %rbx; 4588c2ecf20Sopenharmony_ci RET; 4598c2ecf20Sopenharmony_ciSYM_FUNC_END(__camellia_enc_blk_2way) 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_ciSYM_FUNC_START(camellia_dec_blk_2way) 4628c2ecf20Sopenharmony_ci /* input: 4638c2ecf20Sopenharmony_ci * %rdi: ctx, CTX 4648c2ecf20Sopenharmony_ci * %rsi: dst 4658c2ecf20Sopenharmony_ci * %rdx: src 4668c2ecf20Sopenharmony_ci */ 4678c2ecf20Sopenharmony_ci cmpl $16, key_length(CTX); 4688c2ecf20Sopenharmony_ci movl $32, RT2d; 4698c2ecf20Sopenharmony_ci movl $24, RXORd; 4708c2ecf20Sopenharmony_ci cmovel RXORd, RT2d; /* max */ 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_ci movq %rbx, RXOR; 4738c2ecf20Sopenharmony_ci movq %r12, RR12; 4748c2ecf20Sopenharmony_ci movq %rsi, RDST; 4758c2ecf20Sopenharmony_ci movq %rdx, RIO; 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_ci dec_inpack2(RT2); 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_ci cmpb $24, RT2bl; 4808c2ecf20Sopenharmony_ci je .L__dec2_rounds16; 4818c2ecf20Sopenharmony_ci 4828c2ecf20Sopenharmony_ci dec_rounds2(24); 4838c2ecf20Sopenharmony_ci dec_fls2(24); 4848c2ecf20Sopenharmony_ci 4858c2ecf20Sopenharmony_ci.L__dec2_rounds16: 4868c2ecf20Sopenharmony_ci dec_rounds2(16); 4878c2ecf20Sopenharmony_ci dec_fls2(16); 4888c2ecf20Sopenharmony_ci dec_rounds2(8); 4898c2ecf20Sopenharmony_ci dec_fls2(8); 4908c2ecf20Sopenharmony_ci dec_rounds2(0); 4918c2ecf20Sopenharmony_ci 4928c2ecf20Sopenharmony_ci movq RDST, RIO; 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_ci dec_outunpack2(); 4958c2ecf20Sopenharmony_ci 4968c2ecf20Sopenharmony_ci movq RR12, %r12; 4978c2ecf20Sopenharmony_ci movq RXOR, %rbx; 4988c2ecf20Sopenharmony_ci RET; 4998c2ecf20Sopenharmony_ciSYM_FUNC_END(camellia_dec_blk_2way) 500