162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Camellia Cipher Algorithm (x86_64) 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <linux/linkage.h> 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci.file "camellia-x86_64-asm_64.S" 1162306a36Sopenharmony_ci.text 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci.extern camellia_sp10011110; 1462306a36Sopenharmony_ci.extern camellia_sp22000222; 1562306a36Sopenharmony_ci.extern camellia_sp03303033; 1662306a36Sopenharmony_ci.extern camellia_sp00444404; 1762306a36Sopenharmony_ci.extern camellia_sp02220222; 1862306a36Sopenharmony_ci.extern camellia_sp30333033; 1962306a36Sopenharmony_ci.extern camellia_sp44044404; 2062306a36Sopenharmony_ci.extern camellia_sp11101110; 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci#define sp10011110 camellia_sp10011110 2362306a36Sopenharmony_ci#define sp22000222 camellia_sp22000222 2462306a36Sopenharmony_ci#define sp03303033 camellia_sp03303033 2562306a36Sopenharmony_ci#define sp00444404 camellia_sp00444404 2662306a36Sopenharmony_ci#define sp02220222 camellia_sp02220222 2762306a36Sopenharmony_ci#define sp30333033 camellia_sp30333033 2862306a36Sopenharmony_ci#define sp44044404 camellia_sp44044404 2962306a36Sopenharmony_ci#define sp11101110 camellia_sp11101110 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci#define CAMELLIA_TABLE_BYTE_LEN 272 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci/* struct camellia_ctx: */ 3462306a36Sopenharmony_ci#define key_table 0 3562306a36Sopenharmony_ci#define key_length CAMELLIA_TABLE_BYTE_LEN 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci/* register macros */ 3862306a36Sopenharmony_ci#define CTX %rdi 3962306a36Sopenharmony_ci#define RIO %rsi 4062306a36Sopenharmony_ci#define RIOd %esi 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci#define RAB0 %rax 4362306a36Sopenharmony_ci#define RCD0 %rcx 4462306a36Sopenharmony_ci#define RAB1 %rbx 4562306a36Sopenharmony_ci#define RCD1 %rdx 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci#define RAB0d %eax 4862306a36Sopenharmony_ci#define RCD0d %ecx 4962306a36Sopenharmony_ci#define RAB1d %ebx 5062306a36Sopenharmony_ci#define RCD1d %edx 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci#define RAB0bl %al 5362306a36Sopenharmony_ci#define RCD0bl %cl 5462306a36Sopenharmony_ci#define RAB1bl %bl 5562306a36Sopenharmony_ci#define RCD1bl %dl 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci#define RAB0bh %ah 5862306a36Sopenharmony_ci#define RCD0bh %ch 5962306a36Sopenharmony_ci#define RAB1bh %bh 6062306a36Sopenharmony_ci#define RCD1bh %dh 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci#define RT0 %rsi 6362306a36Sopenharmony_ci#define RT1 %r12 6462306a36Sopenharmony_ci#define RT2 %r8 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci#define RT0d %esi 6762306a36Sopenharmony_ci#define RT1d %r12d 6862306a36Sopenharmony_ci#define RT2d %r8d 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci#define RT2bl %r8b 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci#define RXOR %r9 7362306a36Sopenharmony_ci#define RR12 %r10 7462306a36Sopenharmony_ci#define RDST %r11 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci#define RXORd %r9d 7762306a36Sopenharmony_ci#define RXORbl %r9b 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \ 8062306a36Sopenharmony_ci leaq T0(%rip), tmp1; \ 8162306a36Sopenharmony_ci movzbl ab ## bl, tmp2 ## d; \ 8262306a36Sopenharmony_ci xorq (tmp1, tmp2, 8), dst; \ 8362306a36Sopenharmony_ci leaq T1(%rip), tmp2; \ 8462306a36Sopenharmony_ci movzbl ab ## bh, tmp1 ## d; \ 8562306a36Sopenharmony_ci rorq $16, ab; \ 8662306a36Sopenharmony_ci xorq (tmp2, tmp1, 8), dst; 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci/********************************************************************** 8962306a36Sopenharmony_ci 1-way camellia 9062306a36Sopenharmony_ci **********************************************************************/ 9162306a36Sopenharmony_ci#define roundsm(ab, subkey, cd) \ 9262306a36Sopenharmony_ci movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \ 9362306a36Sopenharmony_ci \ 9462306a36Sopenharmony_ci xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \ 9562306a36Sopenharmony_ci xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \ 9662306a36Sopenharmony_ci xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \ 9762306a36Sopenharmony_ci xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \ 9862306a36Sopenharmony_ci \ 9962306a36Sopenharmony_ci xorq RT2, cd ## 0; 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci#define fls(l, r, kl, kr) \ 10262306a36Sopenharmony_ci movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \ 10362306a36Sopenharmony_ci andl l ## 0d, RT0d; \ 10462306a36Sopenharmony_ci roll $1, RT0d; \ 10562306a36Sopenharmony_ci shlq $32, RT0; \ 10662306a36Sopenharmony_ci xorq RT0, l ## 0; \ 10762306a36Sopenharmony_ci movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \ 10862306a36Sopenharmony_ci orq r ## 0, RT1; \ 10962306a36Sopenharmony_ci shrq $32, RT1; \ 11062306a36Sopenharmony_ci xorq RT1, r ## 0; \ 11162306a36Sopenharmony_ci \ 11262306a36Sopenharmony_ci movq (key_table + ((kl) * 2) * 4)(CTX), RT2; \ 11362306a36Sopenharmony_ci orq l ## 0, RT2; \ 11462306a36Sopenharmony_ci shrq $32, RT2; \ 11562306a36Sopenharmony_ci xorq RT2, l ## 0; \ 11662306a36Sopenharmony_ci movl (key_table + ((kr) * 2) * 4)(CTX), RT0d; \ 11762306a36Sopenharmony_ci andl r ## 0d, RT0d; \ 11862306a36Sopenharmony_ci roll $1, RT0d; \ 11962306a36Sopenharmony_ci shlq $32, RT0; \ 12062306a36Sopenharmony_ci xorq RT0, r ## 0; 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci#define enc_rounds(i) \ 12362306a36Sopenharmony_ci roundsm(RAB, i + 2, RCD); \ 12462306a36Sopenharmony_ci roundsm(RCD, i + 3, RAB); \ 12562306a36Sopenharmony_ci roundsm(RAB, i + 4, RCD); \ 12662306a36Sopenharmony_ci roundsm(RCD, i + 5, RAB); \ 12762306a36Sopenharmony_ci roundsm(RAB, i + 6, RCD); \ 12862306a36Sopenharmony_ci roundsm(RCD, i + 7, RAB); 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci#define enc_fls(i) \ 13162306a36Sopenharmony_ci fls(RAB, RCD, i + 0, i + 1); 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci#define enc_inpack() \ 13462306a36Sopenharmony_ci movq (RIO), RAB0; \ 13562306a36Sopenharmony_ci bswapq RAB0; \ 13662306a36Sopenharmony_ci rolq $32, RAB0; \ 13762306a36Sopenharmony_ci movq 4*2(RIO), RCD0; \ 13862306a36Sopenharmony_ci bswapq RCD0; \ 13962306a36Sopenharmony_ci rorq $32, RCD0; \ 14062306a36Sopenharmony_ci xorq key_table(CTX), RAB0; 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci#define enc_outunpack(op, max) \ 14362306a36Sopenharmony_ci xorq key_table(CTX, max, 8), RCD0; \ 14462306a36Sopenharmony_ci rorq $32, RCD0; \ 14562306a36Sopenharmony_ci bswapq RCD0; \ 14662306a36Sopenharmony_ci op ## q RCD0, (RIO); \ 14762306a36Sopenharmony_ci rolq $32, RAB0; \ 14862306a36Sopenharmony_ci bswapq RAB0; \ 14962306a36Sopenharmony_ci op ## q RAB0, 4*2(RIO); 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci#define dec_rounds(i) \ 15262306a36Sopenharmony_ci roundsm(RAB, i + 7, RCD); \ 15362306a36Sopenharmony_ci roundsm(RCD, i + 6, RAB); \ 15462306a36Sopenharmony_ci roundsm(RAB, i + 5, RCD); \ 15562306a36Sopenharmony_ci roundsm(RCD, i + 4, RAB); \ 15662306a36Sopenharmony_ci roundsm(RAB, i + 3, RCD); \ 15762306a36Sopenharmony_ci roundsm(RCD, i + 2, RAB); 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci#define dec_fls(i) \ 16062306a36Sopenharmony_ci fls(RAB, RCD, i + 1, i + 0); 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_ci#define dec_inpack(max) \ 16362306a36Sopenharmony_ci movq (RIO), RAB0; \ 16462306a36Sopenharmony_ci bswapq RAB0; \ 16562306a36Sopenharmony_ci rolq $32, RAB0; \ 16662306a36Sopenharmony_ci movq 4*2(RIO), RCD0; \ 16762306a36Sopenharmony_ci bswapq RCD0; \ 16862306a36Sopenharmony_ci rorq $32, RCD0; \ 16962306a36Sopenharmony_ci xorq key_table(CTX, max, 8), RAB0; 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci#define dec_outunpack() \ 17262306a36Sopenharmony_ci xorq key_table(CTX), RCD0; \ 17362306a36Sopenharmony_ci rorq $32, RCD0; \ 17462306a36Sopenharmony_ci bswapq RCD0; \ 17562306a36Sopenharmony_ci movq RCD0, (RIO); \ 17662306a36Sopenharmony_ci rolq $32, RAB0; \ 17762306a36Sopenharmony_ci bswapq RAB0; \ 17862306a36Sopenharmony_ci movq RAB0, 4*2(RIO); 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ciSYM_FUNC_START(__camellia_enc_blk) 18162306a36Sopenharmony_ci /* input: 18262306a36Sopenharmony_ci * %rdi: ctx, CTX 18362306a36Sopenharmony_ci * %rsi: dst 18462306a36Sopenharmony_ci * %rdx: src 18562306a36Sopenharmony_ci * %rcx: bool xor 18662306a36Sopenharmony_ci */ 18762306a36Sopenharmony_ci movq %r12, RR12; 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci movq %rcx, RXOR; 19062306a36Sopenharmony_ci movq %rsi, RDST; 19162306a36Sopenharmony_ci movq %rdx, RIO; 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci enc_inpack(); 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci enc_rounds(0); 19662306a36Sopenharmony_ci enc_fls(8); 19762306a36Sopenharmony_ci enc_rounds(8); 19862306a36Sopenharmony_ci enc_fls(16); 19962306a36Sopenharmony_ci enc_rounds(16); 20062306a36Sopenharmony_ci movl $24, RT1d; /* max */ 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci cmpb $16, key_length(CTX); 20362306a36Sopenharmony_ci je .L__enc_done; 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci enc_fls(24); 20662306a36Sopenharmony_ci enc_rounds(24); 20762306a36Sopenharmony_ci movl $32, RT1d; /* max */ 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci.L__enc_done: 21062306a36Sopenharmony_ci testb RXORbl, RXORbl; 21162306a36Sopenharmony_ci movq RDST, RIO; 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci jnz .L__enc_xor; 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci enc_outunpack(mov, RT1); 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci movq RR12, %r12; 21862306a36Sopenharmony_ci RET; 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci.L__enc_xor: 22162306a36Sopenharmony_ci enc_outunpack(xor, RT1); 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_ci movq RR12, %r12; 22462306a36Sopenharmony_ci RET; 22562306a36Sopenharmony_ciSYM_FUNC_END(__camellia_enc_blk) 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ciSYM_FUNC_START(camellia_dec_blk) 22862306a36Sopenharmony_ci /* input: 22962306a36Sopenharmony_ci * %rdi: ctx, CTX 23062306a36Sopenharmony_ci * %rsi: dst 23162306a36Sopenharmony_ci * %rdx: src 23262306a36Sopenharmony_ci */ 23362306a36Sopenharmony_ci cmpl $16, key_length(CTX); 23462306a36Sopenharmony_ci movl $32, RT2d; 23562306a36Sopenharmony_ci movl $24, RXORd; 23662306a36Sopenharmony_ci cmovel RXORd, RT2d; /* max */ 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci movq %r12, RR12; 23962306a36Sopenharmony_ci movq %rsi, RDST; 24062306a36Sopenharmony_ci movq %rdx, RIO; 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci dec_inpack(RT2); 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci cmpb $24, RT2bl; 24562306a36Sopenharmony_ci je .L__dec_rounds16; 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci dec_rounds(24); 24862306a36Sopenharmony_ci dec_fls(24); 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci.L__dec_rounds16: 25162306a36Sopenharmony_ci dec_rounds(16); 25262306a36Sopenharmony_ci dec_fls(16); 25362306a36Sopenharmony_ci dec_rounds(8); 25462306a36Sopenharmony_ci dec_fls(8); 25562306a36Sopenharmony_ci dec_rounds(0); 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci movq RDST, RIO; 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci dec_outunpack(); 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci movq RR12, %r12; 26262306a36Sopenharmony_ci RET; 26362306a36Sopenharmony_ciSYM_FUNC_END(camellia_dec_blk) 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci/********************************************************************** 26662306a36Sopenharmony_ci 2-way camellia 26762306a36Sopenharmony_ci **********************************************************************/ 26862306a36Sopenharmony_ci#define roundsm2(ab, subkey, cd) \ 26962306a36Sopenharmony_ci movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \ 27062306a36Sopenharmony_ci xorq RT2, cd ## 1; \ 27162306a36Sopenharmony_ci \ 27262306a36Sopenharmony_ci xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \ 27362306a36Sopenharmony_ci xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \ 27462306a36Sopenharmony_ci xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \ 27562306a36Sopenharmony_ci xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \ 27662306a36Sopenharmony_ci \ 27762306a36Sopenharmony_ci xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \ 27862306a36Sopenharmony_ci xorq RT2, cd ## 0; \ 27962306a36Sopenharmony_ci xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \ 28062306a36Sopenharmony_ci xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \ 28162306a36Sopenharmony_ci xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1); 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci#define fls2(l, r, kl, kr) \ 28462306a36Sopenharmony_ci movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \ 28562306a36Sopenharmony_ci andl l ## 0d, RT0d; \ 28662306a36Sopenharmony_ci roll $1, RT0d; \ 28762306a36Sopenharmony_ci shlq $32, RT0; \ 28862306a36Sopenharmony_ci xorq RT0, l ## 0; \ 28962306a36Sopenharmony_ci movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \ 29062306a36Sopenharmony_ci orq r ## 0, RT1; \ 29162306a36Sopenharmony_ci shrq $32, RT1; \ 29262306a36Sopenharmony_ci xorq RT1, r ## 0; \ 29362306a36Sopenharmony_ci \ 29462306a36Sopenharmony_ci movl (key_table + ((kl) * 2) * 4)(CTX), RT2d; \ 29562306a36Sopenharmony_ci andl l ## 1d, RT2d; \ 29662306a36Sopenharmony_ci roll $1, RT2d; \ 29762306a36Sopenharmony_ci shlq $32, RT2; \ 29862306a36Sopenharmony_ci xorq RT2, l ## 1; \ 29962306a36Sopenharmony_ci movq (key_table + ((kr) * 2) * 4)(CTX), RT0; \ 30062306a36Sopenharmony_ci orq r ## 1, RT0; \ 30162306a36Sopenharmony_ci shrq $32, RT0; \ 30262306a36Sopenharmony_ci xorq RT0, r ## 1; \ 30362306a36Sopenharmony_ci \ 30462306a36Sopenharmony_ci movq (key_table + ((kl) * 2) * 4)(CTX), RT1; \ 30562306a36Sopenharmony_ci orq l ## 0, RT1; \ 30662306a36Sopenharmony_ci shrq $32, RT1; \ 30762306a36Sopenharmony_ci xorq RT1, l ## 0; \ 30862306a36Sopenharmony_ci movl (key_table + ((kr) * 2) * 4)(CTX), RT2d; \ 30962306a36Sopenharmony_ci andl r ## 0d, RT2d; \ 31062306a36Sopenharmony_ci roll $1, RT2d; \ 31162306a36Sopenharmony_ci shlq $32, RT2; \ 31262306a36Sopenharmony_ci xorq RT2, r ## 0; \ 31362306a36Sopenharmony_ci \ 31462306a36Sopenharmony_ci movq (key_table + ((kl) * 2) * 4)(CTX), RT0; \ 31562306a36Sopenharmony_ci orq l ## 1, RT0; \ 31662306a36Sopenharmony_ci shrq $32, RT0; \ 31762306a36Sopenharmony_ci xorq RT0, l ## 1; \ 31862306a36Sopenharmony_ci movl (key_table + ((kr) * 2) * 4)(CTX), RT1d; \ 31962306a36Sopenharmony_ci andl r ## 1d, RT1d; \ 32062306a36Sopenharmony_ci roll $1, RT1d; \ 32162306a36Sopenharmony_ci shlq $32, RT1; \ 32262306a36Sopenharmony_ci xorq RT1, r ## 1; 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci#define enc_rounds2(i) \ 32562306a36Sopenharmony_ci roundsm2(RAB, i + 2, RCD); \ 32662306a36Sopenharmony_ci roundsm2(RCD, i + 3, RAB); \ 32762306a36Sopenharmony_ci roundsm2(RAB, i + 4, RCD); \ 32862306a36Sopenharmony_ci roundsm2(RCD, i + 5, RAB); \ 32962306a36Sopenharmony_ci roundsm2(RAB, i + 6, RCD); \ 33062306a36Sopenharmony_ci roundsm2(RCD, i + 7, RAB); 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci#define enc_fls2(i) \ 33362306a36Sopenharmony_ci fls2(RAB, RCD, i + 0, i + 1); 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_ci#define enc_inpack2() \ 33662306a36Sopenharmony_ci movq (RIO), RAB0; \ 33762306a36Sopenharmony_ci bswapq RAB0; \ 33862306a36Sopenharmony_ci rorq $32, RAB0; \ 33962306a36Sopenharmony_ci movq 4*2(RIO), RCD0; \ 34062306a36Sopenharmony_ci bswapq RCD0; \ 34162306a36Sopenharmony_ci rolq $32, RCD0; \ 34262306a36Sopenharmony_ci xorq key_table(CTX), RAB0; \ 34362306a36Sopenharmony_ci \ 34462306a36Sopenharmony_ci movq 8*2(RIO), RAB1; \ 34562306a36Sopenharmony_ci bswapq RAB1; \ 34662306a36Sopenharmony_ci rorq $32, RAB1; \ 34762306a36Sopenharmony_ci movq 12*2(RIO), RCD1; \ 34862306a36Sopenharmony_ci bswapq RCD1; \ 34962306a36Sopenharmony_ci rolq $32, RCD1; \ 35062306a36Sopenharmony_ci xorq key_table(CTX), RAB1; 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci#define enc_outunpack2(op, max) \ 35362306a36Sopenharmony_ci xorq key_table(CTX, max, 8), RCD0; \ 35462306a36Sopenharmony_ci rolq $32, RCD0; \ 35562306a36Sopenharmony_ci bswapq RCD0; \ 35662306a36Sopenharmony_ci op ## q RCD0, (RIO); \ 35762306a36Sopenharmony_ci rorq $32, RAB0; \ 35862306a36Sopenharmony_ci bswapq RAB0; \ 35962306a36Sopenharmony_ci op ## q RAB0, 4*2(RIO); \ 36062306a36Sopenharmony_ci \ 36162306a36Sopenharmony_ci xorq key_table(CTX, max, 8), RCD1; \ 36262306a36Sopenharmony_ci rolq $32, RCD1; \ 36362306a36Sopenharmony_ci bswapq RCD1; \ 36462306a36Sopenharmony_ci op ## q RCD1, 8*2(RIO); \ 36562306a36Sopenharmony_ci rorq $32, RAB1; \ 36662306a36Sopenharmony_ci bswapq RAB1; \ 36762306a36Sopenharmony_ci op ## q RAB1, 12*2(RIO); 36862306a36Sopenharmony_ci 36962306a36Sopenharmony_ci#define dec_rounds2(i) \ 37062306a36Sopenharmony_ci roundsm2(RAB, i + 7, RCD); \ 37162306a36Sopenharmony_ci roundsm2(RCD, i + 6, RAB); \ 37262306a36Sopenharmony_ci roundsm2(RAB, i + 5, RCD); \ 37362306a36Sopenharmony_ci roundsm2(RCD, i + 4, RAB); \ 37462306a36Sopenharmony_ci roundsm2(RAB, i + 3, RCD); \ 37562306a36Sopenharmony_ci roundsm2(RCD, i + 2, RAB); 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_ci#define dec_fls2(i) \ 37862306a36Sopenharmony_ci fls2(RAB, RCD, i + 1, i + 0); 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci#define dec_inpack2(max) \ 38162306a36Sopenharmony_ci movq (RIO), RAB0; \ 38262306a36Sopenharmony_ci bswapq RAB0; \ 38362306a36Sopenharmony_ci rorq $32, RAB0; \ 38462306a36Sopenharmony_ci movq 4*2(RIO), RCD0; \ 38562306a36Sopenharmony_ci bswapq RCD0; \ 38662306a36Sopenharmony_ci rolq $32, RCD0; \ 38762306a36Sopenharmony_ci xorq key_table(CTX, max, 8), RAB0; \ 38862306a36Sopenharmony_ci \ 38962306a36Sopenharmony_ci movq 8*2(RIO), RAB1; \ 39062306a36Sopenharmony_ci bswapq RAB1; \ 39162306a36Sopenharmony_ci rorq $32, RAB1; \ 39262306a36Sopenharmony_ci movq 12*2(RIO), RCD1; \ 39362306a36Sopenharmony_ci bswapq RCD1; \ 39462306a36Sopenharmony_ci rolq $32, RCD1; \ 39562306a36Sopenharmony_ci xorq key_table(CTX, max, 8), RAB1; 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci#define dec_outunpack2() \ 39862306a36Sopenharmony_ci xorq key_table(CTX), RCD0; \ 39962306a36Sopenharmony_ci rolq $32, RCD0; \ 40062306a36Sopenharmony_ci bswapq RCD0; \ 40162306a36Sopenharmony_ci movq RCD0, (RIO); \ 40262306a36Sopenharmony_ci rorq $32, RAB0; \ 40362306a36Sopenharmony_ci bswapq RAB0; \ 40462306a36Sopenharmony_ci movq RAB0, 4*2(RIO); \ 40562306a36Sopenharmony_ci \ 40662306a36Sopenharmony_ci xorq key_table(CTX), RCD1; \ 40762306a36Sopenharmony_ci rolq $32, RCD1; \ 40862306a36Sopenharmony_ci bswapq RCD1; \ 40962306a36Sopenharmony_ci movq RCD1, 8*2(RIO); \ 41062306a36Sopenharmony_ci rorq $32, RAB1; \ 41162306a36Sopenharmony_ci bswapq RAB1; \ 41262306a36Sopenharmony_ci movq RAB1, 12*2(RIO); 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ciSYM_FUNC_START(__camellia_enc_blk_2way) 41562306a36Sopenharmony_ci /* input: 41662306a36Sopenharmony_ci * %rdi: ctx, CTX 41762306a36Sopenharmony_ci * %rsi: dst 41862306a36Sopenharmony_ci * %rdx: src 41962306a36Sopenharmony_ci * %rcx: bool xor 42062306a36Sopenharmony_ci */ 42162306a36Sopenharmony_ci pushq %rbx; 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci movq %r12, RR12; 42462306a36Sopenharmony_ci movq %rcx, RXOR; 42562306a36Sopenharmony_ci movq %rsi, RDST; 42662306a36Sopenharmony_ci movq %rdx, RIO; 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_ci enc_inpack2(); 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci enc_rounds2(0); 43162306a36Sopenharmony_ci enc_fls2(8); 43262306a36Sopenharmony_ci enc_rounds2(8); 43362306a36Sopenharmony_ci enc_fls2(16); 43462306a36Sopenharmony_ci enc_rounds2(16); 43562306a36Sopenharmony_ci movl $24, RT2d; /* max */ 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ci cmpb $16, key_length(CTX); 43862306a36Sopenharmony_ci je .L__enc2_done; 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci enc_fls2(24); 44162306a36Sopenharmony_ci enc_rounds2(24); 44262306a36Sopenharmony_ci movl $32, RT2d; /* max */ 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci.L__enc2_done: 44562306a36Sopenharmony_ci test RXORbl, RXORbl; 44662306a36Sopenharmony_ci movq RDST, RIO; 44762306a36Sopenharmony_ci jnz .L__enc2_xor; 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci enc_outunpack2(mov, RT2); 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci movq RR12, %r12; 45262306a36Sopenharmony_ci popq %rbx; 45362306a36Sopenharmony_ci RET; 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci.L__enc2_xor: 45662306a36Sopenharmony_ci enc_outunpack2(xor, RT2); 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci movq RR12, %r12; 45962306a36Sopenharmony_ci popq %rbx; 46062306a36Sopenharmony_ci RET; 46162306a36Sopenharmony_ciSYM_FUNC_END(__camellia_enc_blk_2way) 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ciSYM_FUNC_START(camellia_dec_blk_2way) 46462306a36Sopenharmony_ci /* input: 46562306a36Sopenharmony_ci * %rdi: ctx, CTX 46662306a36Sopenharmony_ci * %rsi: dst 46762306a36Sopenharmony_ci * %rdx: src 46862306a36Sopenharmony_ci */ 46962306a36Sopenharmony_ci cmpl $16, key_length(CTX); 47062306a36Sopenharmony_ci movl $32, RT2d; 47162306a36Sopenharmony_ci movl $24, RXORd; 47262306a36Sopenharmony_ci cmovel RXORd, RT2d; /* max */ 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_ci movq %rbx, RXOR; 47562306a36Sopenharmony_ci movq %r12, RR12; 47662306a36Sopenharmony_ci movq %rsi, RDST; 47762306a36Sopenharmony_ci movq %rdx, RIO; 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci dec_inpack2(RT2); 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci cmpb $24, RT2bl; 48262306a36Sopenharmony_ci je .L__dec2_rounds16; 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ci dec_rounds2(24); 48562306a36Sopenharmony_ci dec_fls2(24); 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci.L__dec2_rounds16: 48862306a36Sopenharmony_ci dec_rounds2(16); 48962306a36Sopenharmony_ci dec_fls2(16); 49062306a36Sopenharmony_ci dec_rounds2(8); 49162306a36Sopenharmony_ci dec_fls2(8); 49262306a36Sopenharmony_ci dec_rounds2(0); 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_ci movq RDST, RIO; 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ci dec_outunpack2(); 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_ci movq RR12, %r12; 49962306a36Sopenharmony_ci movq RXOR, %rbx; 50062306a36Sopenharmony_ci RET; 50162306a36Sopenharmony_ciSYM_FUNC_END(camellia_dec_blk_2way) 502