162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <linux/linkage.h> 962306a36Sopenharmony_ci#include <asm/assembler.h> 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci .text 1262306a36Sopenharmony_ci .arch armv8-a 1362306a36Sopenharmony_ci .fpu crypto-neon-fp-armv8 1462306a36Sopenharmony_ci .align 3 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci .macro enc_round, state, key 1762306a36Sopenharmony_ci aese.8 \state, \key 1862306a36Sopenharmony_ci aesmc.8 \state, \state 1962306a36Sopenharmony_ci .endm 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ci .macro dec_round, state, key 2262306a36Sopenharmony_ci aesd.8 \state, \key 2362306a36Sopenharmony_ci aesimc.8 \state, \state 2462306a36Sopenharmony_ci .endm 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci .macro enc_dround, key1, key2 2762306a36Sopenharmony_ci enc_round q0, \key1 2862306a36Sopenharmony_ci enc_round q0, \key2 2962306a36Sopenharmony_ci .endm 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci .macro dec_dround, key1, key2 3262306a36Sopenharmony_ci dec_round q0, \key1 3362306a36Sopenharmony_ci dec_round q0, \key2 3462306a36Sopenharmony_ci .endm 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci .macro enc_fround, key1, key2, key3 3762306a36Sopenharmony_ci enc_round q0, \key1 3862306a36Sopenharmony_ci aese.8 q0, \key2 3962306a36Sopenharmony_ci veor q0, q0, \key3 4062306a36Sopenharmony_ci .endm 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci .macro dec_fround, key1, key2, key3 4362306a36Sopenharmony_ci dec_round q0, \key1 4462306a36Sopenharmony_ci aesd.8 q0, \key2 4562306a36Sopenharmony_ci veor q0, q0, \key3 4662306a36Sopenharmony_ci .endm 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci .macro enc_dround_4x, key1, key2 4962306a36Sopenharmony_ci enc_round q0, \key1 5062306a36Sopenharmony_ci enc_round q1, \key1 5162306a36Sopenharmony_ci enc_round q2, \key1 5262306a36Sopenharmony_ci enc_round q3, \key1 5362306a36Sopenharmony_ci enc_round q0, \key2 5462306a36Sopenharmony_ci enc_round q1, \key2 5562306a36Sopenharmony_ci enc_round q2, \key2 5662306a36Sopenharmony_ci enc_round q3, \key2 5762306a36Sopenharmony_ci .endm 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci .macro dec_dround_4x, key1, key2 6062306a36Sopenharmony_ci dec_round q0, \key1 6162306a36Sopenharmony_ci dec_round q1, \key1 6262306a36Sopenharmony_ci dec_round q2, \key1 6362306a36Sopenharmony_ci dec_round q3, \key1 6462306a36Sopenharmony_ci dec_round q0, \key2 6562306a36Sopenharmony_ci dec_round q1, \key2 6662306a36Sopenharmony_ci dec_round q2, \key2 6762306a36Sopenharmony_ci dec_round q3, \key2 6862306a36Sopenharmony_ci .endm 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci .macro enc_fround_4x, key1, key2, key3 7162306a36Sopenharmony_ci enc_round q0, \key1 7262306a36Sopenharmony_ci enc_round q1, \key1 7362306a36Sopenharmony_ci enc_round q2, \key1 7462306a36Sopenharmony_ci enc_round q3, \key1 7562306a36Sopenharmony_ci aese.8 q0, \key2 7662306a36Sopenharmony_ci aese.8 q1, \key2 7762306a36Sopenharmony_ci aese.8 q2, \key2 7862306a36Sopenharmony_ci aese.8 q3, \key2 7962306a36Sopenharmony_ci veor q0, q0, \key3 8062306a36Sopenharmony_ci veor q1, q1, \key3 8162306a36Sopenharmony_ci veor q2, q2, \key3 8262306a36Sopenharmony_ci veor q3, q3, \key3 8362306a36Sopenharmony_ci .endm 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci .macro dec_fround_4x, key1, key2, key3 8662306a36Sopenharmony_ci dec_round q0, \key1 8762306a36Sopenharmony_ci dec_round q1, \key1 8862306a36Sopenharmony_ci dec_round q2, \key1 8962306a36Sopenharmony_ci dec_round q3, \key1 9062306a36Sopenharmony_ci aesd.8 q0, \key2 9162306a36Sopenharmony_ci aesd.8 q1, \key2 9262306a36Sopenharmony_ci aesd.8 q2, \key2 9362306a36Sopenharmony_ci aesd.8 q3, \key2 9462306a36Sopenharmony_ci veor q0, q0, \key3 9562306a36Sopenharmony_ci veor q1, q1, \key3 9662306a36Sopenharmony_ci veor q2, q2, \key3 9762306a36Sopenharmony_ci veor q3, q3, \key3 9862306a36Sopenharmony_ci .endm 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci .macro do_block, dround, fround 10162306a36Sopenharmony_ci cmp r3, #12 @ which key size? 10262306a36Sopenharmony_ci vld1.32 {q10-q11}, [ip]! 10362306a36Sopenharmony_ci \dround q8, q9 10462306a36Sopenharmony_ci vld1.32 {q12-q13}, [ip]! 10562306a36Sopenharmony_ci \dround q10, q11 10662306a36Sopenharmony_ci vld1.32 {q10-q11}, [ip]! 10762306a36Sopenharmony_ci \dround q12, q13 10862306a36Sopenharmony_ci vld1.32 {q12-q13}, [ip]! 10962306a36Sopenharmony_ci \dround q10, q11 11062306a36Sopenharmony_ci blo 0f @ AES-128: 10 rounds 11162306a36Sopenharmony_ci vld1.32 {q10-q11}, [ip]! 11262306a36Sopenharmony_ci \dround q12, q13 11362306a36Sopenharmony_ci beq 1f @ AES-192: 12 rounds 11462306a36Sopenharmony_ci vld1.32 {q12-q13}, [ip] 11562306a36Sopenharmony_ci \dround q10, q11 11662306a36Sopenharmony_ci0: \fround q12, q13, q14 11762306a36Sopenharmony_ci bx lr 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci1: \fround q10, q11, q14 12062306a36Sopenharmony_ci bx lr 12162306a36Sopenharmony_ci .endm 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci /* 12462306a36Sopenharmony_ci * Internal, non-AAPCS compliant functions that implement the core AES 12562306a36Sopenharmony_ci * transforms. These should preserve all registers except q0 - q2 and ip 12662306a36Sopenharmony_ci * Arguments: 12762306a36Sopenharmony_ci * q0 : first in/output block 12862306a36Sopenharmony_ci * q1 : second in/output block (_4x version only) 12962306a36Sopenharmony_ci * q2 : third in/output block (_4x version only) 13062306a36Sopenharmony_ci * q3 : fourth in/output block (_4x version only) 13162306a36Sopenharmony_ci * q8 : first round key 13262306a36Sopenharmony_ci * q9 : secound round key 13362306a36Sopenharmony_ci * q14 : final round key 13462306a36Sopenharmony_ci * r2 : address of round key array 13562306a36Sopenharmony_ci * r3 : number of rounds 13662306a36Sopenharmony_ci */ 13762306a36Sopenharmony_ci .align 6 13862306a36Sopenharmony_ciaes_encrypt: 13962306a36Sopenharmony_ci add ip, r2, #32 @ 3rd round key 14062306a36Sopenharmony_ci.Laes_encrypt_tweak: 14162306a36Sopenharmony_ci do_block enc_dround, enc_fround 14262306a36Sopenharmony_ciENDPROC(aes_encrypt) 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci .align 6 14562306a36Sopenharmony_ciaes_decrypt: 14662306a36Sopenharmony_ci add ip, r2, #32 @ 3rd round key 14762306a36Sopenharmony_ci do_block dec_dround, dec_fround 14862306a36Sopenharmony_ciENDPROC(aes_decrypt) 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci .align 6 15162306a36Sopenharmony_ciaes_encrypt_4x: 15262306a36Sopenharmony_ci add ip, r2, #32 @ 3rd round key 15362306a36Sopenharmony_ci do_block enc_dround_4x, enc_fround_4x 15462306a36Sopenharmony_ciENDPROC(aes_encrypt_4x) 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci .align 6 15762306a36Sopenharmony_ciaes_decrypt_4x: 15862306a36Sopenharmony_ci add ip, r2, #32 @ 3rd round key 15962306a36Sopenharmony_ci do_block dec_dround_4x, dec_fround_4x 16062306a36Sopenharmony_ciENDPROC(aes_decrypt_4x) 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_ci .macro prepare_key, rk, rounds 16362306a36Sopenharmony_ci add ip, \rk, \rounds, lsl #4 16462306a36Sopenharmony_ci vld1.32 {q8-q9}, [\rk] @ load first 2 round keys 16562306a36Sopenharmony_ci vld1.32 {q14}, [ip] @ load last round key 16662306a36Sopenharmony_ci .endm 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci /* 16962306a36Sopenharmony_ci * aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, 17062306a36Sopenharmony_ci * int blocks) 17162306a36Sopenharmony_ci * aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, 17262306a36Sopenharmony_ci * int blocks) 17362306a36Sopenharmony_ci */ 17462306a36Sopenharmony_ciENTRY(ce_aes_ecb_encrypt) 17562306a36Sopenharmony_ci push {r4, lr} 17662306a36Sopenharmony_ci ldr r4, [sp, #8] 17762306a36Sopenharmony_ci prepare_key r2, r3 17862306a36Sopenharmony_ci.Lecbencloop4x: 17962306a36Sopenharmony_ci subs r4, r4, #4 18062306a36Sopenharmony_ci bmi .Lecbenc1x 18162306a36Sopenharmony_ci vld1.8 {q0-q1}, [r1]! 18262306a36Sopenharmony_ci vld1.8 {q2-q3}, [r1]! 18362306a36Sopenharmony_ci bl aes_encrypt_4x 18462306a36Sopenharmony_ci vst1.8 {q0-q1}, [r0]! 18562306a36Sopenharmony_ci vst1.8 {q2-q3}, [r0]! 18662306a36Sopenharmony_ci b .Lecbencloop4x 18762306a36Sopenharmony_ci.Lecbenc1x: 18862306a36Sopenharmony_ci adds r4, r4, #4 18962306a36Sopenharmony_ci beq .Lecbencout 19062306a36Sopenharmony_ci.Lecbencloop: 19162306a36Sopenharmony_ci vld1.8 {q0}, [r1]! 19262306a36Sopenharmony_ci bl aes_encrypt 19362306a36Sopenharmony_ci vst1.8 {q0}, [r0]! 19462306a36Sopenharmony_ci subs r4, r4, #1 19562306a36Sopenharmony_ci bne .Lecbencloop 19662306a36Sopenharmony_ci.Lecbencout: 19762306a36Sopenharmony_ci pop {r4, pc} 19862306a36Sopenharmony_ciENDPROC(ce_aes_ecb_encrypt) 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ciENTRY(ce_aes_ecb_decrypt) 20162306a36Sopenharmony_ci push {r4, lr} 20262306a36Sopenharmony_ci ldr r4, [sp, #8] 20362306a36Sopenharmony_ci prepare_key r2, r3 20462306a36Sopenharmony_ci.Lecbdecloop4x: 20562306a36Sopenharmony_ci subs r4, r4, #4 20662306a36Sopenharmony_ci bmi .Lecbdec1x 20762306a36Sopenharmony_ci vld1.8 {q0-q1}, [r1]! 20862306a36Sopenharmony_ci vld1.8 {q2-q3}, [r1]! 20962306a36Sopenharmony_ci bl aes_decrypt_4x 21062306a36Sopenharmony_ci vst1.8 {q0-q1}, [r0]! 21162306a36Sopenharmony_ci vst1.8 {q2-q3}, [r0]! 21262306a36Sopenharmony_ci b .Lecbdecloop4x 21362306a36Sopenharmony_ci.Lecbdec1x: 21462306a36Sopenharmony_ci adds r4, r4, #4 21562306a36Sopenharmony_ci beq .Lecbdecout 21662306a36Sopenharmony_ci.Lecbdecloop: 21762306a36Sopenharmony_ci vld1.8 {q0}, [r1]! 21862306a36Sopenharmony_ci bl aes_decrypt 21962306a36Sopenharmony_ci vst1.8 {q0}, [r0]! 22062306a36Sopenharmony_ci subs r4, r4, #1 22162306a36Sopenharmony_ci bne .Lecbdecloop 22262306a36Sopenharmony_ci.Lecbdecout: 22362306a36Sopenharmony_ci pop {r4, pc} 22462306a36Sopenharmony_ciENDPROC(ce_aes_ecb_decrypt) 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci /* 22762306a36Sopenharmony_ci * aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, 22862306a36Sopenharmony_ci * int blocks, u8 iv[]) 22962306a36Sopenharmony_ci * aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, 23062306a36Sopenharmony_ci * int blocks, u8 iv[]) 23162306a36Sopenharmony_ci */ 23262306a36Sopenharmony_ciENTRY(ce_aes_cbc_encrypt) 23362306a36Sopenharmony_ci push {r4-r6, lr} 23462306a36Sopenharmony_ci ldrd r4, r5, [sp, #16] 23562306a36Sopenharmony_ci vld1.8 {q0}, [r5] 23662306a36Sopenharmony_ci prepare_key r2, r3 23762306a36Sopenharmony_ci.Lcbcencloop: 23862306a36Sopenharmony_ci vld1.8 {q1}, [r1]! @ get next pt block 23962306a36Sopenharmony_ci veor q0, q0, q1 @ ..and xor with iv 24062306a36Sopenharmony_ci bl aes_encrypt 24162306a36Sopenharmony_ci vst1.8 {q0}, [r0]! 24262306a36Sopenharmony_ci subs r4, r4, #1 24362306a36Sopenharmony_ci bne .Lcbcencloop 24462306a36Sopenharmony_ci vst1.8 {q0}, [r5] 24562306a36Sopenharmony_ci pop {r4-r6, pc} 24662306a36Sopenharmony_ciENDPROC(ce_aes_cbc_encrypt) 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ciENTRY(ce_aes_cbc_decrypt) 24962306a36Sopenharmony_ci push {r4-r6, lr} 25062306a36Sopenharmony_ci ldrd r4, r5, [sp, #16] 25162306a36Sopenharmony_ci vld1.8 {q15}, [r5] @ keep iv in q15 25262306a36Sopenharmony_ci prepare_key r2, r3 25362306a36Sopenharmony_ci.Lcbcdecloop4x: 25462306a36Sopenharmony_ci subs r4, r4, #4 25562306a36Sopenharmony_ci bmi .Lcbcdec1x 25662306a36Sopenharmony_ci vld1.8 {q0-q1}, [r1]! 25762306a36Sopenharmony_ci vld1.8 {q2-q3}, [r1]! 25862306a36Sopenharmony_ci vmov q4, q0 25962306a36Sopenharmony_ci vmov q5, q1 26062306a36Sopenharmony_ci vmov q6, q2 26162306a36Sopenharmony_ci vmov q7, q3 26262306a36Sopenharmony_ci bl aes_decrypt_4x 26362306a36Sopenharmony_ci veor q0, q0, q15 26462306a36Sopenharmony_ci veor q1, q1, q4 26562306a36Sopenharmony_ci veor q2, q2, q5 26662306a36Sopenharmony_ci veor q3, q3, q6 26762306a36Sopenharmony_ci vmov q15, q7 26862306a36Sopenharmony_ci vst1.8 {q0-q1}, [r0]! 26962306a36Sopenharmony_ci vst1.8 {q2-q3}, [r0]! 27062306a36Sopenharmony_ci b .Lcbcdecloop4x 27162306a36Sopenharmony_ci.Lcbcdec1x: 27262306a36Sopenharmony_ci adds r4, r4, #4 27362306a36Sopenharmony_ci beq .Lcbcdecout 27462306a36Sopenharmony_ci vmov q6, q14 @ preserve last round key 27562306a36Sopenharmony_ci.Lcbcdecloop: 27662306a36Sopenharmony_ci vld1.8 {q0}, [r1]! @ get next ct block 27762306a36Sopenharmony_ci veor q14, q15, q6 @ combine prev ct with last key 27862306a36Sopenharmony_ci vmov q15, q0 27962306a36Sopenharmony_ci bl aes_decrypt 28062306a36Sopenharmony_ci vst1.8 {q0}, [r0]! 28162306a36Sopenharmony_ci subs r4, r4, #1 28262306a36Sopenharmony_ci bne .Lcbcdecloop 28362306a36Sopenharmony_ci.Lcbcdecout: 28462306a36Sopenharmony_ci vst1.8 {q15}, [r5] @ keep iv in q15 28562306a36Sopenharmony_ci pop {r4-r6, pc} 28662306a36Sopenharmony_ciENDPROC(ce_aes_cbc_decrypt) 28762306a36Sopenharmony_ci 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci /* 29062306a36Sopenharmony_ci * ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[], 29162306a36Sopenharmony_ci * int rounds, int bytes, u8 const iv[]) 29262306a36Sopenharmony_ci * ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[], 29362306a36Sopenharmony_ci * int rounds, int bytes, u8 const iv[]) 29462306a36Sopenharmony_ci */ 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ciENTRY(ce_aes_cbc_cts_encrypt) 29762306a36Sopenharmony_ci push {r4-r6, lr} 29862306a36Sopenharmony_ci ldrd r4, r5, [sp, #16] 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci movw ip, :lower16:.Lcts_permute_table 30162306a36Sopenharmony_ci movt ip, :upper16:.Lcts_permute_table 30262306a36Sopenharmony_ci sub r4, r4, #16 30362306a36Sopenharmony_ci add lr, ip, #32 30462306a36Sopenharmony_ci add ip, ip, r4 30562306a36Sopenharmony_ci sub lr, lr, r4 30662306a36Sopenharmony_ci vld1.8 {q5}, [ip] 30762306a36Sopenharmony_ci vld1.8 {q6}, [lr] 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci add ip, r1, r4 31062306a36Sopenharmony_ci vld1.8 {q0}, [r1] @ overlapping loads 31162306a36Sopenharmony_ci vld1.8 {q3}, [ip] 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci vld1.8 {q1}, [r5] @ get iv 31462306a36Sopenharmony_ci prepare_key r2, r3 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci veor q0, q0, q1 @ xor with iv 31762306a36Sopenharmony_ci bl aes_encrypt 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci vtbl.8 d4, {d0-d1}, d10 32062306a36Sopenharmony_ci vtbl.8 d5, {d0-d1}, d11 32162306a36Sopenharmony_ci vtbl.8 d2, {d6-d7}, d12 32262306a36Sopenharmony_ci vtbl.8 d3, {d6-d7}, d13 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci veor q0, q0, q1 32562306a36Sopenharmony_ci bl aes_encrypt 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci add r4, r0, r4 32862306a36Sopenharmony_ci vst1.8 {q2}, [r4] @ overlapping stores 32962306a36Sopenharmony_ci vst1.8 {q0}, [r0] 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_ci pop {r4-r6, pc} 33262306a36Sopenharmony_ciENDPROC(ce_aes_cbc_cts_encrypt) 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ciENTRY(ce_aes_cbc_cts_decrypt) 33562306a36Sopenharmony_ci push {r4-r6, lr} 33662306a36Sopenharmony_ci ldrd r4, r5, [sp, #16] 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci movw ip, :lower16:.Lcts_permute_table 33962306a36Sopenharmony_ci movt ip, :upper16:.Lcts_permute_table 34062306a36Sopenharmony_ci sub r4, r4, #16 34162306a36Sopenharmony_ci add lr, ip, #32 34262306a36Sopenharmony_ci add ip, ip, r4 34362306a36Sopenharmony_ci sub lr, lr, r4 34462306a36Sopenharmony_ci vld1.8 {q5}, [ip] 34562306a36Sopenharmony_ci vld1.8 {q6}, [lr] 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci add ip, r1, r4 34862306a36Sopenharmony_ci vld1.8 {q0}, [r1] @ overlapping loads 34962306a36Sopenharmony_ci vld1.8 {q1}, [ip] 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci vld1.8 {q3}, [r5] @ get iv 35262306a36Sopenharmony_ci prepare_key r2, r3 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci bl aes_decrypt 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci vtbl.8 d4, {d0-d1}, d10 35762306a36Sopenharmony_ci vtbl.8 d5, {d0-d1}, d11 35862306a36Sopenharmony_ci vtbx.8 d0, {d2-d3}, d12 35962306a36Sopenharmony_ci vtbx.8 d1, {d2-d3}, d13 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_ci veor q1, q1, q2 36262306a36Sopenharmony_ci bl aes_decrypt 36362306a36Sopenharmony_ci veor q0, q0, q3 @ xor with iv 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci add r4, r0, r4 36662306a36Sopenharmony_ci vst1.8 {q1}, [r4] @ overlapping stores 36762306a36Sopenharmony_ci vst1.8 {q0}, [r0] 36862306a36Sopenharmony_ci 36962306a36Sopenharmony_ci pop {r4-r6, pc} 37062306a36Sopenharmony_ciENDPROC(ce_aes_cbc_cts_decrypt) 37162306a36Sopenharmony_ci 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci /* 37462306a36Sopenharmony_ci * aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, 37562306a36Sopenharmony_ci * int blocks, u8 ctr[]) 37662306a36Sopenharmony_ci */ 37762306a36Sopenharmony_ciENTRY(ce_aes_ctr_encrypt) 37862306a36Sopenharmony_ci push {r4-r6, lr} 37962306a36Sopenharmony_ci ldrd r4, r5, [sp, #16] 38062306a36Sopenharmony_ci vld1.8 {q7}, [r5] @ load ctr 38162306a36Sopenharmony_ci prepare_key r2, r3 38262306a36Sopenharmony_ci vmov r6, s31 @ keep swabbed ctr in r6 38362306a36Sopenharmony_ci rev r6, r6 38462306a36Sopenharmony_ci cmn r6, r4 @ 32 bit overflow? 38562306a36Sopenharmony_ci bcs .Lctrloop 38662306a36Sopenharmony_ci.Lctrloop4x: 38762306a36Sopenharmony_ci subs r4, r4, #4 38862306a36Sopenharmony_ci bmi .Lctr1x 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_ci /* 39162306a36Sopenharmony_ci * NOTE: the sequence below has been carefully tweaked to avoid 39262306a36Sopenharmony_ci * a silicon erratum that exists in Cortex-A57 (#1742098) and 39362306a36Sopenharmony_ci * Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs 39462306a36Sopenharmony_ci * may produce an incorrect result if they take their input from a 39562306a36Sopenharmony_ci * register of which a single 32-bit lane has been updated the last 39662306a36Sopenharmony_ci * time it was modified. To work around this, the lanes of registers 39762306a36Sopenharmony_ci * q0-q3 below are not manipulated individually, and the different 39862306a36Sopenharmony_ci * counter values are prepared by successive manipulations of q7. 39962306a36Sopenharmony_ci */ 40062306a36Sopenharmony_ci add ip, r6, #1 40162306a36Sopenharmony_ci vmov q0, q7 40262306a36Sopenharmony_ci rev ip, ip 40362306a36Sopenharmony_ci add lr, r6, #2 40462306a36Sopenharmony_ci vmov s31, ip @ set lane 3 of q1 via q7 40562306a36Sopenharmony_ci add ip, r6, #3 40662306a36Sopenharmony_ci rev lr, lr 40762306a36Sopenharmony_ci vmov q1, q7 40862306a36Sopenharmony_ci vmov s31, lr @ set lane 3 of q2 via q7 40962306a36Sopenharmony_ci rev ip, ip 41062306a36Sopenharmony_ci vmov q2, q7 41162306a36Sopenharmony_ci vmov s31, ip @ set lane 3 of q3 via q7 41262306a36Sopenharmony_ci add r6, r6, #4 41362306a36Sopenharmony_ci vmov q3, q7 41462306a36Sopenharmony_ci 41562306a36Sopenharmony_ci vld1.8 {q4-q5}, [r1]! 41662306a36Sopenharmony_ci vld1.8 {q6}, [r1]! 41762306a36Sopenharmony_ci vld1.8 {q15}, [r1]! 41862306a36Sopenharmony_ci bl aes_encrypt_4x 41962306a36Sopenharmony_ci veor q0, q0, q4 42062306a36Sopenharmony_ci veor q1, q1, q5 42162306a36Sopenharmony_ci veor q2, q2, q6 42262306a36Sopenharmony_ci veor q3, q3, q15 42362306a36Sopenharmony_ci rev ip, r6 42462306a36Sopenharmony_ci vst1.8 {q0-q1}, [r0]! 42562306a36Sopenharmony_ci vst1.8 {q2-q3}, [r0]! 42662306a36Sopenharmony_ci vmov s31, ip 42762306a36Sopenharmony_ci b .Lctrloop4x 42862306a36Sopenharmony_ci.Lctr1x: 42962306a36Sopenharmony_ci adds r4, r4, #4 43062306a36Sopenharmony_ci beq .Lctrout 43162306a36Sopenharmony_ci.Lctrloop: 43262306a36Sopenharmony_ci vmov q0, q7 43362306a36Sopenharmony_ci bl aes_encrypt 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_ci adds r6, r6, #1 @ increment BE ctr 43662306a36Sopenharmony_ci rev ip, r6 43762306a36Sopenharmony_ci vmov s31, ip 43862306a36Sopenharmony_ci bcs .Lctrcarry 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci.Lctrcarrydone: 44162306a36Sopenharmony_ci subs r4, r4, #1 44262306a36Sopenharmony_ci bmi .Lctrtailblock @ blocks < 0 means tail block 44362306a36Sopenharmony_ci vld1.8 {q3}, [r1]! 44462306a36Sopenharmony_ci veor q3, q0, q3 44562306a36Sopenharmony_ci vst1.8 {q3}, [r0]! 44662306a36Sopenharmony_ci bne .Lctrloop 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci.Lctrout: 44962306a36Sopenharmony_ci vst1.8 {q7}, [r5] @ return next CTR value 45062306a36Sopenharmony_ci pop {r4-r6, pc} 45162306a36Sopenharmony_ci 45262306a36Sopenharmony_ci.Lctrtailblock: 45362306a36Sopenharmony_ci vst1.8 {q0}, [r0, :64] @ return the key stream 45462306a36Sopenharmony_ci b .Lctrout 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_ci.Lctrcarry: 45762306a36Sopenharmony_ci .irp sreg, s30, s29, s28 45862306a36Sopenharmony_ci vmov ip, \sreg @ load next word of ctr 45962306a36Sopenharmony_ci rev ip, ip @ ... to handle the carry 46062306a36Sopenharmony_ci adds ip, ip, #1 46162306a36Sopenharmony_ci rev ip, ip 46262306a36Sopenharmony_ci vmov \sreg, ip 46362306a36Sopenharmony_ci bcc .Lctrcarrydone 46462306a36Sopenharmony_ci .endr 46562306a36Sopenharmony_ci b .Lctrcarrydone 46662306a36Sopenharmony_ciENDPROC(ce_aes_ctr_encrypt) 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci /* 46962306a36Sopenharmony_ci * aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds, 47062306a36Sopenharmony_ci * int bytes, u8 iv[], u32 const rk2[], int first) 47162306a36Sopenharmony_ci * aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds, 47262306a36Sopenharmony_ci * int bytes, u8 iv[], u32 const rk2[], int first) 47362306a36Sopenharmony_ci */ 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci .macro next_tweak, out, in, const, tmp 47662306a36Sopenharmony_ci vshr.s64 \tmp, \in, #63 47762306a36Sopenharmony_ci vand \tmp, \tmp, \const 47862306a36Sopenharmony_ci vadd.u64 \out, \in, \in 47962306a36Sopenharmony_ci vext.8 \tmp, \tmp, \tmp, #8 48062306a36Sopenharmony_ci veor \out, \out, \tmp 48162306a36Sopenharmony_ci .endm 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_cice_aes_xts_init: 48462306a36Sopenharmony_ci vmov.i32 d30, #0x87 @ compose tweak mask vector 48562306a36Sopenharmony_ci vmovl.u32 q15, d30 48662306a36Sopenharmony_ci vshr.u64 d30, d31, #7 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci ldrd r4, r5, [sp, #16] @ load args 48962306a36Sopenharmony_ci ldr r6, [sp, #28] 49062306a36Sopenharmony_ci vld1.8 {q0}, [r5] @ load iv 49162306a36Sopenharmony_ci teq r6, #1 @ start of a block? 49262306a36Sopenharmony_ci bxne lr 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_ci @ Encrypt the IV in q0 with the second AES key. This should only 49562306a36Sopenharmony_ci @ be done at the start of a block. 49662306a36Sopenharmony_ci ldr r6, [sp, #24] @ load AES key 2 49762306a36Sopenharmony_ci prepare_key r6, r3 49862306a36Sopenharmony_ci add ip, r6, #32 @ 3rd round key of key 2 49962306a36Sopenharmony_ci b .Laes_encrypt_tweak @ tail call 50062306a36Sopenharmony_ciENDPROC(ce_aes_xts_init) 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_ciENTRY(ce_aes_xts_encrypt) 50362306a36Sopenharmony_ci push {r4-r6, lr} 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci bl ce_aes_xts_init @ run shared prologue 50662306a36Sopenharmony_ci prepare_key r2, r3 50762306a36Sopenharmony_ci vmov q4, q0 50862306a36Sopenharmony_ci 50962306a36Sopenharmony_ci teq r6, #0 @ start of a block? 51062306a36Sopenharmony_ci bne .Lxtsenc4x 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci.Lxtsencloop4x: 51362306a36Sopenharmony_ci next_tweak q4, q4, q15, q10 51462306a36Sopenharmony_ci.Lxtsenc4x: 51562306a36Sopenharmony_ci subs r4, r4, #64 51662306a36Sopenharmony_ci bmi .Lxtsenc1x 51762306a36Sopenharmony_ci vld1.8 {q0-q1}, [r1]! @ get 4 pt blocks 51862306a36Sopenharmony_ci vld1.8 {q2-q3}, [r1]! 51962306a36Sopenharmony_ci next_tweak q5, q4, q15, q10 52062306a36Sopenharmony_ci veor q0, q0, q4 52162306a36Sopenharmony_ci next_tweak q6, q5, q15, q10 52262306a36Sopenharmony_ci veor q1, q1, q5 52362306a36Sopenharmony_ci next_tweak q7, q6, q15, q10 52462306a36Sopenharmony_ci veor q2, q2, q6 52562306a36Sopenharmony_ci veor q3, q3, q7 52662306a36Sopenharmony_ci bl aes_encrypt_4x 52762306a36Sopenharmony_ci veor q0, q0, q4 52862306a36Sopenharmony_ci veor q1, q1, q5 52962306a36Sopenharmony_ci veor q2, q2, q6 53062306a36Sopenharmony_ci veor q3, q3, q7 53162306a36Sopenharmony_ci vst1.8 {q0-q1}, [r0]! @ write 4 ct blocks 53262306a36Sopenharmony_ci vst1.8 {q2-q3}, [r0]! 53362306a36Sopenharmony_ci vmov q4, q7 53462306a36Sopenharmony_ci teq r4, #0 53562306a36Sopenharmony_ci beq .Lxtsencret 53662306a36Sopenharmony_ci b .Lxtsencloop4x 53762306a36Sopenharmony_ci.Lxtsenc1x: 53862306a36Sopenharmony_ci adds r4, r4, #64 53962306a36Sopenharmony_ci beq .Lxtsencout 54062306a36Sopenharmony_ci subs r4, r4, #16 54162306a36Sopenharmony_ci bmi .LxtsencctsNx 54262306a36Sopenharmony_ci.Lxtsencloop: 54362306a36Sopenharmony_ci vld1.8 {q0}, [r1]! 54462306a36Sopenharmony_ci.Lxtsencctsout: 54562306a36Sopenharmony_ci veor q0, q0, q4 54662306a36Sopenharmony_ci bl aes_encrypt 54762306a36Sopenharmony_ci veor q0, q0, q4 54862306a36Sopenharmony_ci teq r4, #0 54962306a36Sopenharmony_ci beq .Lxtsencout 55062306a36Sopenharmony_ci subs r4, r4, #16 55162306a36Sopenharmony_ci next_tweak q4, q4, q15, q6 55262306a36Sopenharmony_ci bmi .Lxtsenccts 55362306a36Sopenharmony_ci vst1.8 {q0}, [r0]! 55462306a36Sopenharmony_ci b .Lxtsencloop 55562306a36Sopenharmony_ci.Lxtsencout: 55662306a36Sopenharmony_ci vst1.8 {q0}, [r0] 55762306a36Sopenharmony_ci.Lxtsencret: 55862306a36Sopenharmony_ci vst1.8 {q4}, [r5] 55962306a36Sopenharmony_ci pop {r4-r6, pc} 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_ci.LxtsencctsNx: 56262306a36Sopenharmony_ci vmov q0, q3 56362306a36Sopenharmony_ci sub r0, r0, #16 56462306a36Sopenharmony_ci.Lxtsenccts: 56562306a36Sopenharmony_ci movw ip, :lower16:.Lcts_permute_table 56662306a36Sopenharmony_ci movt ip, :upper16:.Lcts_permute_table 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_ci add r1, r1, r4 @ rewind input pointer 56962306a36Sopenharmony_ci add r4, r4, #16 @ # bytes in final block 57062306a36Sopenharmony_ci add lr, ip, #32 57162306a36Sopenharmony_ci add ip, ip, r4 57262306a36Sopenharmony_ci sub lr, lr, r4 57362306a36Sopenharmony_ci add r4, r0, r4 @ output address of final block 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci vld1.8 {q1}, [r1] @ load final partial block 57662306a36Sopenharmony_ci vld1.8 {q2}, [ip] 57762306a36Sopenharmony_ci vld1.8 {q3}, [lr] 57862306a36Sopenharmony_ci 57962306a36Sopenharmony_ci vtbl.8 d4, {d0-d1}, d4 58062306a36Sopenharmony_ci vtbl.8 d5, {d0-d1}, d5 58162306a36Sopenharmony_ci vtbx.8 d0, {d2-d3}, d6 58262306a36Sopenharmony_ci vtbx.8 d1, {d2-d3}, d7 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci vst1.8 {q2}, [r4] @ overlapping stores 58562306a36Sopenharmony_ci mov r4, #0 58662306a36Sopenharmony_ci b .Lxtsencctsout 58762306a36Sopenharmony_ciENDPROC(ce_aes_xts_encrypt) 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ciENTRY(ce_aes_xts_decrypt) 59162306a36Sopenharmony_ci push {r4-r6, lr} 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_ci bl ce_aes_xts_init @ run shared prologue 59462306a36Sopenharmony_ci prepare_key r2, r3 59562306a36Sopenharmony_ci vmov q4, q0 59662306a36Sopenharmony_ci 59762306a36Sopenharmony_ci /* subtract 16 bytes if we are doing CTS */ 59862306a36Sopenharmony_ci tst r4, #0xf 59962306a36Sopenharmony_ci subne r4, r4, #0x10 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ci teq r6, #0 @ start of a block? 60262306a36Sopenharmony_ci bne .Lxtsdec4x 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_ci.Lxtsdecloop4x: 60562306a36Sopenharmony_ci next_tweak q4, q4, q15, q10 60662306a36Sopenharmony_ci.Lxtsdec4x: 60762306a36Sopenharmony_ci subs r4, r4, #64 60862306a36Sopenharmony_ci bmi .Lxtsdec1x 60962306a36Sopenharmony_ci vld1.8 {q0-q1}, [r1]! @ get 4 ct blocks 61062306a36Sopenharmony_ci vld1.8 {q2-q3}, [r1]! 61162306a36Sopenharmony_ci next_tweak q5, q4, q15, q10 61262306a36Sopenharmony_ci veor q0, q0, q4 61362306a36Sopenharmony_ci next_tweak q6, q5, q15, q10 61462306a36Sopenharmony_ci veor q1, q1, q5 61562306a36Sopenharmony_ci next_tweak q7, q6, q15, q10 61662306a36Sopenharmony_ci veor q2, q2, q6 61762306a36Sopenharmony_ci veor q3, q3, q7 61862306a36Sopenharmony_ci bl aes_decrypt_4x 61962306a36Sopenharmony_ci veor q0, q0, q4 62062306a36Sopenharmony_ci veor q1, q1, q5 62162306a36Sopenharmony_ci veor q2, q2, q6 62262306a36Sopenharmony_ci veor q3, q3, q7 62362306a36Sopenharmony_ci vst1.8 {q0-q1}, [r0]! @ write 4 pt blocks 62462306a36Sopenharmony_ci vst1.8 {q2-q3}, [r0]! 62562306a36Sopenharmony_ci vmov q4, q7 62662306a36Sopenharmony_ci teq r4, #0 62762306a36Sopenharmony_ci beq .Lxtsdecout 62862306a36Sopenharmony_ci b .Lxtsdecloop4x 62962306a36Sopenharmony_ci.Lxtsdec1x: 63062306a36Sopenharmony_ci adds r4, r4, #64 63162306a36Sopenharmony_ci beq .Lxtsdecout 63262306a36Sopenharmony_ci subs r4, r4, #16 63362306a36Sopenharmony_ci.Lxtsdecloop: 63462306a36Sopenharmony_ci vld1.8 {q0}, [r1]! 63562306a36Sopenharmony_ci bmi .Lxtsdeccts 63662306a36Sopenharmony_ci.Lxtsdecctsout: 63762306a36Sopenharmony_ci veor q0, q0, q4 63862306a36Sopenharmony_ci bl aes_decrypt 63962306a36Sopenharmony_ci veor q0, q0, q4 64062306a36Sopenharmony_ci vst1.8 {q0}, [r0]! 64162306a36Sopenharmony_ci teq r4, #0 64262306a36Sopenharmony_ci beq .Lxtsdecout 64362306a36Sopenharmony_ci subs r4, r4, #16 64462306a36Sopenharmony_ci next_tweak q4, q4, q15, q6 64562306a36Sopenharmony_ci b .Lxtsdecloop 64662306a36Sopenharmony_ci.Lxtsdecout: 64762306a36Sopenharmony_ci vst1.8 {q4}, [r5] 64862306a36Sopenharmony_ci pop {r4-r6, pc} 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_ci.Lxtsdeccts: 65162306a36Sopenharmony_ci movw ip, :lower16:.Lcts_permute_table 65262306a36Sopenharmony_ci movt ip, :upper16:.Lcts_permute_table 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_ci add r1, r1, r4 @ rewind input pointer 65562306a36Sopenharmony_ci add r4, r4, #16 @ # bytes in final block 65662306a36Sopenharmony_ci add lr, ip, #32 65762306a36Sopenharmony_ci add ip, ip, r4 65862306a36Sopenharmony_ci sub lr, lr, r4 65962306a36Sopenharmony_ci add r4, r0, r4 @ output address of final block 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_ci next_tweak q5, q4, q15, q6 66262306a36Sopenharmony_ci 66362306a36Sopenharmony_ci vld1.8 {q1}, [r1] @ load final partial block 66462306a36Sopenharmony_ci vld1.8 {q2}, [ip] 66562306a36Sopenharmony_ci vld1.8 {q3}, [lr] 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_ci veor q0, q0, q5 66862306a36Sopenharmony_ci bl aes_decrypt 66962306a36Sopenharmony_ci veor q0, q0, q5 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ci vtbl.8 d4, {d0-d1}, d4 67262306a36Sopenharmony_ci vtbl.8 d5, {d0-d1}, d5 67362306a36Sopenharmony_ci vtbx.8 d0, {d2-d3}, d6 67462306a36Sopenharmony_ci vtbx.8 d1, {d2-d3}, d7 67562306a36Sopenharmony_ci 67662306a36Sopenharmony_ci vst1.8 {q2}, [r4] @ overlapping stores 67762306a36Sopenharmony_ci mov r4, #0 67862306a36Sopenharmony_ci b .Lxtsdecctsout 67962306a36Sopenharmony_ciENDPROC(ce_aes_xts_decrypt) 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_ci /* 68262306a36Sopenharmony_ci * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the 68362306a36Sopenharmony_ci * AES sbox substitution on each byte in 68462306a36Sopenharmony_ci * 'input' 68562306a36Sopenharmony_ci */ 68662306a36Sopenharmony_ciENTRY(ce_aes_sub) 68762306a36Sopenharmony_ci vdup.32 q1, r0 68862306a36Sopenharmony_ci veor q0, q0, q0 68962306a36Sopenharmony_ci aese.8 q0, q1 69062306a36Sopenharmony_ci vmov r0, s0 69162306a36Sopenharmony_ci bx lr 69262306a36Sopenharmony_ciENDPROC(ce_aes_sub) 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_ci /* 69562306a36Sopenharmony_ci * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns 69662306a36Sopenharmony_ci * operation on round key *src 69762306a36Sopenharmony_ci */ 69862306a36Sopenharmony_ciENTRY(ce_aes_invert) 69962306a36Sopenharmony_ci vld1.32 {q0}, [r1] 70062306a36Sopenharmony_ci aesimc.8 q0, q0 70162306a36Sopenharmony_ci vst1.32 {q0}, [r0] 70262306a36Sopenharmony_ci bx lr 70362306a36Sopenharmony_ciENDPROC(ce_aes_invert) 70462306a36Sopenharmony_ci 70562306a36Sopenharmony_ci .section ".rodata", "a" 70662306a36Sopenharmony_ci .align 6 70762306a36Sopenharmony_ci.Lcts_permute_table: 70862306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 70962306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 71062306a36Sopenharmony_ci .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 71162306a36Sopenharmony_ci .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf 71262306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 71362306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 714