162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Bit sliced AES using NEON instructions 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2017 Linaro Ltd. 662306a36Sopenharmony_ci * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> 762306a36Sopenharmony_ci */ 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci/* 1062306a36Sopenharmony_ci * The algorithm implemented here is described in detail by the paper 1162306a36Sopenharmony_ci * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and 1262306a36Sopenharmony_ci * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf) 1362306a36Sopenharmony_ci * 1462306a36Sopenharmony_ci * This implementation is based primarily on the OpenSSL implementation 1562306a36Sopenharmony_ci * for 32-bit ARM written by Andy Polyakov <appro@openssl.org> 1662306a36Sopenharmony_ci */ 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci#include <linux/linkage.h> 1962306a36Sopenharmony_ci#include <asm/assembler.h> 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ci .text 2262306a36Sopenharmony_ci .fpu neon 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci rounds .req ip 2562306a36Sopenharmony_ci bskey .req r4 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci q0l .req d0 2862306a36Sopenharmony_ci q0h .req d1 2962306a36Sopenharmony_ci q1l .req d2 3062306a36Sopenharmony_ci q1h .req d3 3162306a36Sopenharmony_ci q2l .req d4 3262306a36Sopenharmony_ci q2h .req d5 3362306a36Sopenharmony_ci q3l .req d6 3462306a36Sopenharmony_ci q3h .req d7 3562306a36Sopenharmony_ci q4l .req d8 3662306a36Sopenharmony_ci q4h .req d9 3762306a36Sopenharmony_ci q5l .req d10 3862306a36Sopenharmony_ci q5h .req d11 3962306a36Sopenharmony_ci q6l .req d12 4062306a36Sopenharmony_ci q6h .req d13 4162306a36Sopenharmony_ci q7l .req d14 4262306a36Sopenharmony_ci q7h .req d15 4362306a36Sopenharmony_ci q8l .req d16 4462306a36Sopenharmony_ci q8h .req d17 4562306a36Sopenharmony_ci q9l .req d18 4662306a36Sopenharmony_ci q9h .req d19 4762306a36Sopenharmony_ci q10l .req d20 4862306a36Sopenharmony_ci q10h .req d21 4962306a36Sopenharmony_ci q11l .req d22 5062306a36Sopenharmony_ci q11h .req d23 5162306a36Sopenharmony_ci q12l .req d24 5262306a36Sopenharmony_ci q12h .req d25 5362306a36Sopenharmony_ci q13l .req d26 5462306a36Sopenharmony_ci q13h .req d27 5562306a36Sopenharmony_ci q14l .req d28 5662306a36Sopenharmony_ci q14h .req d29 5762306a36Sopenharmony_ci q15l .req d30 5862306a36Sopenharmony_ci q15h .req d31 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci .macro __tbl, out, tbl, in, tmp 6162306a36Sopenharmony_ci .ifc \out, \tbl 6262306a36Sopenharmony_ci .ifb \tmp 6362306a36Sopenharmony_ci .error __tbl needs temp register if out == tbl 6462306a36Sopenharmony_ci .endif 6562306a36Sopenharmony_ci vmov \tmp, \out 6662306a36Sopenharmony_ci .endif 6762306a36Sopenharmony_ci vtbl.8 \out\()l, {\tbl}, \in\()l 6862306a36Sopenharmony_ci .ifc \out, \tbl 6962306a36Sopenharmony_ci vtbl.8 \out\()h, {\tmp}, \in\()h 7062306a36Sopenharmony_ci .else 7162306a36Sopenharmony_ci vtbl.8 \out\()h, {\tbl}, \in\()h 7262306a36Sopenharmony_ci .endif 7362306a36Sopenharmony_ci .endm 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci .macro __ldr, out, sym 7662306a36Sopenharmony_ci vldr \out\()l, \sym 7762306a36Sopenharmony_ci vldr \out\()h, \sym + 8 7862306a36Sopenharmony_ci .endm 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 8162306a36Sopenharmony_ci veor \b2, \b2, \b1 8262306a36Sopenharmony_ci veor \b5, \b5, \b6 8362306a36Sopenharmony_ci veor \b3, \b3, \b0 8462306a36Sopenharmony_ci veor \b6, \b6, \b2 8562306a36Sopenharmony_ci veor \b5, \b5, \b0 8662306a36Sopenharmony_ci veor \b6, \b6, \b3 8762306a36Sopenharmony_ci veor \b3, \b3, \b7 8862306a36Sopenharmony_ci veor \b7, \b7, \b5 8962306a36Sopenharmony_ci veor \b3, \b3, \b4 9062306a36Sopenharmony_ci veor \b4, \b4, \b5 9162306a36Sopenharmony_ci veor \b2, \b2, \b7 9262306a36Sopenharmony_ci veor \b3, \b3, \b1 9362306a36Sopenharmony_ci veor \b1, \b1, \b5 9462306a36Sopenharmony_ci .endm 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci .macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 9762306a36Sopenharmony_ci veor \b0, \b0, \b6 9862306a36Sopenharmony_ci veor \b1, \b1, \b4 9962306a36Sopenharmony_ci veor \b4, \b4, \b6 10062306a36Sopenharmony_ci veor \b2, \b2, \b0 10162306a36Sopenharmony_ci veor \b6, \b6, \b1 10262306a36Sopenharmony_ci veor \b1, \b1, \b5 10362306a36Sopenharmony_ci veor \b5, \b5, \b3 10462306a36Sopenharmony_ci veor \b3, \b3, \b7 10562306a36Sopenharmony_ci veor \b7, \b7, \b5 10662306a36Sopenharmony_ci veor \b2, \b2, \b5 10762306a36Sopenharmony_ci veor \b4, \b4, \b7 10862306a36Sopenharmony_ci .endm 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci .macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5 11162306a36Sopenharmony_ci veor \b1, \b1, \b7 11262306a36Sopenharmony_ci veor \b4, \b4, \b7 11362306a36Sopenharmony_ci veor \b7, \b7, \b5 11462306a36Sopenharmony_ci veor \b1, \b1, \b3 11562306a36Sopenharmony_ci veor \b2, \b2, \b5 11662306a36Sopenharmony_ci veor \b3, \b3, \b7 11762306a36Sopenharmony_ci veor \b6, \b6, \b1 11862306a36Sopenharmony_ci veor \b2, \b2, \b0 11962306a36Sopenharmony_ci veor \b5, \b5, \b3 12062306a36Sopenharmony_ci veor \b4, \b4, \b6 12162306a36Sopenharmony_ci veor \b0, \b0, \b6 12262306a36Sopenharmony_ci veor \b1, \b1, \b4 12362306a36Sopenharmony_ci .endm 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci .macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2 12662306a36Sopenharmony_ci veor \b1, \b1, \b5 12762306a36Sopenharmony_ci veor \b2, \b2, \b7 12862306a36Sopenharmony_ci veor \b3, \b3, \b1 12962306a36Sopenharmony_ci veor \b4, \b4, \b5 13062306a36Sopenharmony_ci veor \b7, \b7, \b5 13162306a36Sopenharmony_ci veor \b3, \b3, \b4 13262306a36Sopenharmony_ci veor \b5, \b5, \b0 13362306a36Sopenharmony_ci veor \b3, \b3, \b7 13462306a36Sopenharmony_ci veor \b6, \b6, \b2 13562306a36Sopenharmony_ci veor \b2, \b2, \b1 13662306a36Sopenharmony_ci veor \b6, \b6, \b3 13762306a36Sopenharmony_ci veor \b3, \b3, \b0 13862306a36Sopenharmony_ci veor \b5, \b5, \b6 13962306a36Sopenharmony_ci .endm 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci .macro mul_gf4, x0, x1, y0, y1, t0, t1 14262306a36Sopenharmony_ci veor \t0, \y0, \y1 14362306a36Sopenharmony_ci vand \t0, \t0, \x0 14462306a36Sopenharmony_ci veor \x0, \x0, \x1 14562306a36Sopenharmony_ci vand \t1, \x1, \y0 14662306a36Sopenharmony_ci vand \x0, \x0, \y1 14762306a36Sopenharmony_ci veor \x1, \t1, \t0 14862306a36Sopenharmony_ci veor \x0, \x0, \t1 14962306a36Sopenharmony_ci .endm 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci .macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1 15262306a36Sopenharmony_ci veor \t0, \y0, \y1 15362306a36Sopenharmony_ci veor \t1, \y2, \y3 15462306a36Sopenharmony_ci vand \t0, \t0, \x0 15562306a36Sopenharmony_ci vand \t1, \t1, \x2 15662306a36Sopenharmony_ci veor \x0, \x0, \x1 15762306a36Sopenharmony_ci veor \x2, \x2, \x3 15862306a36Sopenharmony_ci vand \x1, \x1, \y0 15962306a36Sopenharmony_ci vand \x3, \x3, \y2 16062306a36Sopenharmony_ci vand \x0, \x0, \y1 16162306a36Sopenharmony_ci vand \x2, \x2, \y3 16262306a36Sopenharmony_ci veor \x1, \x1, \x0 16362306a36Sopenharmony_ci veor \x2, \x2, \x3 16462306a36Sopenharmony_ci veor \x0, \x0, \t0 16562306a36Sopenharmony_ci veor \x3, \x3, \t1 16662306a36Sopenharmony_ci .endm 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci .macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \ 16962306a36Sopenharmony_ci y0, y1, y2, y3, t0, t1, t2, t3 17062306a36Sopenharmony_ci veor \t0, \x0, \x2 17162306a36Sopenharmony_ci veor \t1, \x1, \x3 17262306a36Sopenharmony_ci mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3 17362306a36Sopenharmony_ci veor \y0, \y0, \y2 17462306a36Sopenharmony_ci veor \y1, \y1, \y3 17562306a36Sopenharmony_ci mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2 17662306a36Sopenharmony_ci veor \x0, \x0, \t0 17762306a36Sopenharmony_ci veor \x2, \x2, \t0 17862306a36Sopenharmony_ci veor \x1, \x1, \t1 17962306a36Sopenharmony_ci veor \x3, \x3, \t1 18062306a36Sopenharmony_ci veor \t0, \x4, \x6 18162306a36Sopenharmony_ci veor \t1, \x5, \x7 18262306a36Sopenharmony_ci mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2 18362306a36Sopenharmony_ci veor \y0, \y0, \y2 18462306a36Sopenharmony_ci veor \y1, \y1, \y3 18562306a36Sopenharmony_ci mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3 18662306a36Sopenharmony_ci veor \x4, \x4, \t0 18762306a36Sopenharmony_ci veor \x6, \x6, \t0 18862306a36Sopenharmony_ci veor \x5, \x5, \t1 18962306a36Sopenharmony_ci veor \x7, \x7, \t1 19062306a36Sopenharmony_ci .endm 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci .macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \ 19362306a36Sopenharmony_ci t0, t1, t2, t3, s0, s1, s2, s3 19462306a36Sopenharmony_ci veor \t3, \x4, \x6 19562306a36Sopenharmony_ci veor \t0, \x5, \x7 19662306a36Sopenharmony_ci veor \t1, \x1, \x3 19762306a36Sopenharmony_ci veor \s1, \x7, \x6 19862306a36Sopenharmony_ci veor \s0, \x0, \x2 19962306a36Sopenharmony_ci veor \s3, \t3, \t0 20062306a36Sopenharmony_ci vorr \t2, \t0, \t1 20162306a36Sopenharmony_ci vand \s2, \t3, \s0 20262306a36Sopenharmony_ci vorr \t3, \t3, \s0 20362306a36Sopenharmony_ci veor \s0, \s0, \t1 20462306a36Sopenharmony_ci vand \t0, \t0, \t1 20562306a36Sopenharmony_ci veor \t1, \x3, \x2 20662306a36Sopenharmony_ci vand \s3, \s3, \s0 20762306a36Sopenharmony_ci vand \s1, \s1, \t1 20862306a36Sopenharmony_ci veor \t1, \x4, \x5 20962306a36Sopenharmony_ci veor \s0, \x1, \x0 21062306a36Sopenharmony_ci veor \t3, \t3, \s1 21162306a36Sopenharmony_ci veor \t2, \t2, \s1 21262306a36Sopenharmony_ci vand \s1, \t1, \s0 21362306a36Sopenharmony_ci vorr \t1, \t1, \s0 21462306a36Sopenharmony_ci veor \t3, \t3, \s3 21562306a36Sopenharmony_ci veor \t0, \t0, \s1 21662306a36Sopenharmony_ci veor \t2, \t2, \s2 21762306a36Sopenharmony_ci veor \t1, \t1, \s3 21862306a36Sopenharmony_ci veor \t0, \t0, \s2 21962306a36Sopenharmony_ci vand \s0, \x7, \x3 22062306a36Sopenharmony_ci veor \t1, \t1, \s2 22162306a36Sopenharmony_ci vand \s1, \x6, \x2 22262306a36Sopenharmony_ci vand \s2, \x5, \x1 22362306a36Sopenharmony_ci vorr \s3, \x4, \x0 22462306a36Sopenharmony_ci veor \t3, \t3, \s0 22562306a36Sopenharmony_ci veor \t1, \t1, \s2 22662306a36Sopenharmony_ci veor \s0, \t0, \s3 22762306a36Sopenharmony_ci veor \t2, \t2, \s1 22862306a36Sopenharmony_ci vand \s2, \t3, \t1 22962306a36Sopenharmony_ci veor \s1, \t2, \s2 23062306a36Sopenharmony_ci veor \s3, \s0, \s2 23162306a36Sopenharmony_ci vbsl \s1, \t1, \s0 23262306a36Sopenharmony_ci vmvn \t0, \s0 23362306a36Sopenharmony_ci vbsl \s0, \s1, \s3 23462306a36Sopenharmony_ci vbsl \t0, \s1, \s3 23562306a36Sopenharmony_ci vbsl \s3, \t3, \t2 23662306a36Sopenharmony_ci veor \t3, \t3, \t2 23762306a36Sopenharmony_ci vand \s2, \s0, \s3 23862306a36Sopenharmony_ci veor \t1, \t1, \t0 23962306a36Sopenharmony_ci veor \s2, \s2, \t3 24062306a36Sopenharmony_ci mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ 24162306a36Sopenharmony_ci \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 24262306a36Sopenharmony_ci .endm 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci .macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ 24562306a36Sopenharmony_ci t0, t1, t2, t3, s0, s1, s2, s3 24662306a36Sopenharmony_ci in_bs_ch \b0, \b1, \b2, \b3, \b4, \b5, \b6, \b7 24762306a36Sopenharmony_ci inv_gf256 \b6, \b5, \b0, \b3, \b7, \b1, \b4, \b2, \ 24862306a36Sopenharmony_ci \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 24962306a36Sopenharmony_ci out_bs_ch \b7, \b1, \b4, \b2, \b6, \b5, \b0, \b3 25062306a36Sopenharmony_ci .endm 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_ci .macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ 25362306a36Sopenharmony_ci t0, t1, t2, t3, s0, s1, s2, s3 25462306a36Sopenharmony_ci inv_in_bs_ch \b0, \b1, \b2, \b3, \b4, \b5, \b6, \b7 25562306a36Sopenharmony_ci inv_gf256 \b5, \b1, \b2, \b6, \b3, \b7, \b0, \b4, \ 25662306a36Sopenharmony_ci \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 25762306a36Sopenharmony_ci inv_out_bs_ch \b3, \b7, \b0, \b4, \b5, \b1, \b2, \b6 25862306a36Sopenharmony_ci .endm 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci .macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, \ 26162306a36Sopenharmony_ci t0, t1, t2, t3, mask 26262306a36Sopenharmony_ci vld1.8 {\t0-\t1}, [bskey, :256]! 26362306a36Sopenharmony_ci veor \t0, \t0, \x0 26462306a36Sopenharmony_ci vld1.8 {\t2-\t3}, [bskey, :256]! 26562306a36Sopenharmony_ci veor \t1, \t1, \x1 26662306a36Sopenharmony_ci __tbl \x0, \t0, \mask 26762306a36Sopenharmony_ci veor \t2, \t2, \x2 26862306a36Sopenharmony_ci __tbl \x1, \t1, \mask 26962306a36Sopenharmony_ci vld1.8 {\t0-\t1}, [bskey, :256]! 27062306a36Sopenharmony_ci veor \t3, \t3, \x3 27162306a36Sopenharmony_ci __tbl \x2, \t2, \mask 27262306a36Sopenharmony_ci __tbl \x3, \t3, \mask 27362306a36Sopenharmony_ci vld1.8 {\t2-\t3}, [bskey, :256]! 27462306a36Sopenharmony_ci veor \t0, \t0, \x4 27562306a36Sopenharmony_ci veor \t1, \t1, \x5 27662306a36Sopenharmony_ci __tbl \x4, \t0, \mask 27762306a36Sopenharmony_ci veor \t2, \t2, \x6 27862306a36Sopenharmony_ci __tbl \x5, \t1, \mask 27962306a36Sopenharmony_ci veor \t3, \t3, \x7 28062306a36Sopenharmony_ci __tbl \x6, \t2, \mask 28162306a36Sopenharmony_ci __tbl \x7, \t3, \mask 28262306a36Sopenharmony_ci .endm 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci .macro inv_shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, \ 28562306a36Sopenharmony_ci t0, t1, t2, t3, mask 28662306a36Sopenharmony_ci __tbl \x0, \x0, \mask, \t0 28762306a36Sopenharmony_ci __tbl \x1, \x1, \mask, \t1 28862306a36Sopenharmony_ci __tbl \x2, \x2, \mask, \t2 28962306a36Sopenharmony_ci __tbl \x3, \x3, \mask, \t3 29062306a36Sopenharmony_ci __tbl \x4, \x4, \mask, \t0 29162306a36Sopenharmony_ci __tbl \x5, \x5, \mask, \t1 29262306a36Sopenharmony_ci __tbl \x6, \x6, \mask, \t2 29362306a36Sopenharmony_ci __tbl \x7, \x7, \mask, \t3 29462306a36Sopenharmony_ci .endm 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci .macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ 29762306a36Sopenharmony_ci t0, t1, t2, t3, t4, t5, t6, t7, inv 29862306a36Sopenharmony_ci vext.8 \t0, \x0, \x0, #12 29962306a36Sopenharmony_ci vext.8 \t1, \x1, \x1, #12 30062306a36Sopenharmony_ci veor \x0, \x0, \t0 30162306a36Sopenharmony_ci vext.8 \t2, \x2, \x2, #12 30262306a36Sopenharmony_ci veor \x1, \x1, \t1 30362306a36Sopenharmony_ci vext.8 \t3, \x3, \x3, #12 30462306a36Sopenharmony_ci veor \x2, \x2, \t2 30562306a36Sopenharmony_ci vext.8 \t4, \x4, \x4, #12 30662306a36Sopenharmony_ci veor \x3, \x3, \t3 30762306a36Sopenharmony_ci vext.8 \t5, \x5, \x5, #12 30862306a36Sopenharmony_ci veor \x4, \x4, \t4 30962306a36Sopenharmony_ci vext.8 \t6, \x6, \x6, #12 31062306a36Sopenharmony_ci veor \x5, \x5, \t5 31162306a36Sopenharmony_ci vext.8 \t7, \x7, \x7, #12 31262306a36Sopenharmony_ci veor \x6, \x6, \t6 31362306a36Sopenharmony_ci veor \t1, \t1, \x0 31462306a36Sopenharmony_ci veor.8 \x7, \x7, \t7 31562306a36Sopenharmony_ci vext.8 \x0, \x0, \x0, #8 31662306a36Sopenharmony_ci veor \t2, \t2, \x1 31762306a36Sopenharmony_ci veor \t0, \t0, \x7 31862306a36Sopenharmony_ci veor \t1, \t1, \x7 31962306a36Sopenharmony_ci vext.8 \x1, \x1, \x1, #8 32062306a36Sopenharmony_ci veor \t5, \t5, \x4 32162306a36Sopenharmony_ci veor \x0, \x0, \t0 32262306a36Sopenharmony_ci veor \t6, \t6, \x5 32362306a36Sopenharmony_ci veor \x1, \x1, \t1 32462306a36Sopenharmony_ci vext.8 \t0, \x4, \x4, #8 32562306a36Sopenharmony_ci veor \t4, \t4, \x3 32662306a36Sopenharmony_ci vext.8 \t1, \x5, \x5, #8 32762306a36Sopenharmony_ci veor \t7, \t7, \x6 32862306a36Sopenharmony_ci vext.8 \x4, \x3, \x3, #8 32962306a36Sopenharmony_ci veor \t3, \t3, \x2 33062306a36Sopenharmony_ci vext.8 \x5, \x7, \x7, #8 33162306a36Sopenharmony_ci veor \t4, \t4, \x7 33262306a36Sopenharmony_ci vext.8 \x3, \x6, \x6, #8 33362306a36Sopenharmony_ci veor \t3, \t3, \x7 33462306a36Sopenharmony_ci vext.8 \x6, \x2, \x2, #8 33562306a36Sopenharmony_ci veor \x7, \t1, \t5 33662306a36Sopenharmony_ci .ifb \inv 33762306a36Sopenharmony_ci veor \x2, \t0, \t4 33862306a36Sopenharmony_ci veor \x4, \x4, \t3 33962306a36Sopenharmony_ci veor \x5, \x5, \t7 34062306a36Sopenharmony_ci veor \x3, \x3, \t6 34162306a36Sopenharmony_ci veor \x6, \x6, \t2 34262306a36Sopenharmony_ci .else 34362306a36Sopenharmony_ci veor \t3, \t3, \x4 34462306a36Sopenharmony_ci veor \x5, \x5, \t7 34562306a36Sopenharmony_ci veor \x2, \x3, \t6 34662306a36Sopenharmony_ci veor \x3, \t0, \t4 34762306a36Sopenharmony_ci veor \x4, \x6, \t2 34862306a36Sopenharmony_ci vmov \x6, \t3 34962306a36Sopenharmony_ci .endif 35062306a36Sopenharmony_ci .endm 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci .macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ 35362306a36Sopenharmony_ci t0, t1, t2, t3, t4, t5, t6, t7 35462306a36Sopenharmony_ci vld1.8 {\t0-\t1}, [bskey, :256]! 35562306a36Sopenharmony_ci veor \x0, \x0, \t0 35662306a36Sopenharmony_ci vld1.8 {\t2-\t3}, [bskey, :256]! 35762306a36Sopenharmony_ci veor \x1, \x1, \t1 35862306a36Sopenharmony_ci vld1.8 {\t4-\t5}, [bskey, :256]! 35962306a36Sopenharmony_ci veor \x2, \x2, \t2 36062306a36Sopenharmony_ci vld1.8 {\t6-\t7}, [bskey, :256] 36162306a36Sopenharmony_ci sub bskey, bskey, #224 36262306a36Sopenharmony_ci veor \x3, \x3, \t3 36362306a36Sopenharmony_ci veor \x4, \x4, \t4 36462306a36Sopenharmony_ci veor \x5, \x5, \t5 36562306a36Sopenharmony_ci veor \x6, \x6, \t6 36662306a36Sopenharmony_ci veor \x7, \x7, \t7 36762306a36Sopenharmony_ci vext.8 \t0, \x0, \x0, #8 36862306a36Sopenharmony_ci vext.8 \t6, \x6, \x6, #8 36962306a36Sopenharmony_ci vext.8 \t7, \x7, \x7, #8 37062306a36Sopenharmony_ci veor \t0, \t0, \x0 37162306a36Sopenharmony_ci vext.8 \t1, \x1, \x1, #8 37262306a36Sopenharmony_ci veor \t6, \t6, \x6 37362306a36Sopenharmony_ci vext.8 \t2, \x2, \x2, #8 37462306a36Sopenharmony_ci veor \t7, \t7, \x7 37562306a36Sopenharmony_ci vext.8 \t3, \x3, \x3, #8 37662306a36Sopenharmony_ci veor \t1, \t1, \x1 37762306a36Sopenharmony_ci vext.8 \t4, \x4, \x4, #8 37862306a36Sopenharmony_ci veor \t2, \t2, \x2 37962306a36Sopenharmony_ci vext.8 \t5, \x5, \x5, #8 38062306a36Sopenharmony_ci veor \t3, \t3, \x3 38162306a36Sopenharmony_ci veor \t4, \t4, \x4 38262306a36Sopenharmony_ci veor \t5, \t5, \x5 38362306a36Sopenharmony_ci veor \x0, \x0, \t6 38462306a36Sopenharmony_ci veor \x1, \x1, \t6 38562306a36Sopenharmony_ci veor \x2, \x2, \t0 38662306a36Sopenharmony_ci veor \x4, \x4, \t2 38762306a36Sopenharmony_ci veor \x3, \x3, \t1 38862306a36Sopenharmony_ci veor \x1, \x1, \t7 38962306a36Sopenharmony_ci veor \x2, \x2, \t7 39062306a36Sopenharmony_ci veor \x4, \x4, \t6 39162306a36Sopenharmony_ci veor \x5, \x5, \t3 39262306a36Sopenharmony_ci veor \x3, \x3, \t6 39362306a36Sopenharmony_ci veor \x6, \x6, \t4 39462306a36Sopenharmony_ci veor \x4, \x4, \t7 39562306a36Sopenharmony_ci veor \x5, \x5, \t7 39662306a36Sopenharmony_ci veor \x7, \x7, \t5 39762306a36Sopenharmony_ci mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ 39862306a36Sopenharmony_ci \t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1 39962306a36Sopenharmony_ci .endm 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci .macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1 40262306a36Sopenharmony_ci vshr.u64 \t0, \b0, #\n 40362306a36Sopenharmony_ci vshr.u64 \t1, \b1, #\n 40462306a36Sopenharmony_ci veor \t0, \t0, \a0 40562306a36Sopenharmony_ci veor \t1, \t1, \a1 40662306a36Sopenharmony_ci vand \t0, \t0, \mask 40762306a36Sopenharmony_ci vand \t1, \t1, \mask 40862306a36Sopenharmony_ci veor \a0, \a0, \t0 40962306a36Sopenharmony_ci vshl.s64 \t0, \t0, #\n 41062306a36Sopenharmony_ci veor \a1, \a1, \t1 41162306a36Sopenharmony_ci vshl.s64 \t1, \t1, #\n 41262306a36Sopenharmony_ci veor \b0, \b0, \t0 41362306a36Sopenharmony_ci veor \b1, \b1, \t1 41462306a36Sopenharmony_ci .endm 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci .macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3 41762306a36Sopenharmony_ci vmov.i8 \t0, #0x55 41862306a36Sopenharmony_ci vmov.i8 \t1, #0x33 41962306a36Sopenharmony_ci swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3 42062306a36Sopenharmony_ci swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3 42162306a36Sopenharmony_ci vmov.i8 \t0, #0x0f 42262306a36Sopenharmony_ci swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3 42362306a36Sopenharmony_ci swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3 42462306a36Sopenharmony_ci swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3 42562306a36Sopenharmony_ci swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3 42662306a36Sopenharmony_ci .endm 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_ci .align 4 42962306a36Sopenharmony_ciM0: .quad 0x02060a0e03070b0f, 0x0004080c0105090d 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ci /* 43262306a36Sopenharmony_ci * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds) 43362306a36Sopenharmony_ci */ 43462306a36Sopenharmony_ciENTRY(aesbs_convert_key) 43562306a36Sopenharmony_ci vld1.32 {q7}, [r1]! // load round 0 key 43662306a36Sopenharmony_ci vld1.32 {q15}, [r1]! // load round 1 key 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci vmov.i8 q8, #0x01 // bit masks 43962306a36Sopenharmony_ci vmov.i8 q9, #0x02 44062306a36Sopenharmony_ci vmov.i8 q10, #0x04 44162306a36Sopenharmony_ci vmov.i8 q11, #0x08 44262306a36Sopenharmony_ci vmov.i8 q12, #0x10 44362306a36Sopenharmony_ci vmov.i8 q13, #0x20 44462306a36Sopenharmony_ci __ldr q14, M0 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_ci sub r2, r2, #1 44762306a36Sopenharmony_ci vst1.8 {q7}, [r0, :128]! // save round 0 key 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci.Lkey_loop: 45062306a36Sopenharmony_ci __tbl q7, q15, q14 45162306a36Sopenharmony_ci vmov.i8 q6, #0x40 45262306a36Sopenharmony_ci vmov.i8 q15, #0x80 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_ci vtst.8 q0, q7, q8 45562306a36Sopenharmony_ci vtst.8 q1, q7, q9 45662306a36Sopenharmony_ci vtst.8 q2, q7, q10 45762306a36Sopenharmony_ci vtst.8 q3, q7, q11 45862306a36Sopenharmony_ci vtst.8 q4, q7, q12 45962306a36Sopenharmony_ci vtst.8 q5, q7, q13 46062306a36Sopenharmony_ci vtst.8 q6, q7, q6 46162306a36Sopenharmony_ci vtst.8 q7, q7, q15 46262306a36Sopenharmony_ci vld1.32 {q15}, [r1]! // load next round key 46362306a36Sopenharmony_ci vmvn q0, q0 46462306a36Sopenharmony_ci vmvn q1, q1 46562306a36Sopenharmony_ci vmvn q5, q5 46662306a36Sopenharmony_ci vmvn q6, q6 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci subs r2, r2, #1 46962306a36Sopenharmony_ci vst1.8 {q0-q1}, [r0, :256]! 47062306a36Sopenharmony_ci vst1.8 {q2-q3}, [r0, :256]! 47162306a36Sopenharmony_ci vst1.8 {q4-q5}, [r0, :256]! 47262306a36Sopenharmony_ci vst1.8 {q6-q7}, [r0, :256]! 47362306a36Sopenharmony_ci bne .Lkey_loop 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci vmov.i8 q7, #0x63 // compose .L63 47662306a36Sopenharmony_ci veor q15, q15, q7 47762306a36Sopenharmony_ci vst1.8 {q15}, [r0, :128] 47862306a36Sopenharmony_ci bx lr 47962306a36Sopenharmony_ciENDPROC(aesbs_convert_key) 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci .align 4 48262306a36Sopenharmony_ciM0SR: .quad 0x0a0e02060f03070b, 0x0004080c05090d01 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ciaesbs_encrypt8: 48562306a36Sopenharmony_ci vld1.8 {q9}, [bskey, :128]! // round 0 key 48662306a36Sopenharmony_ci __ldr q8, M0SR 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci veor q10, q0, q9 // xor with round0 key 48962306a36Sopenharmony_ci veor q11, q1, q9 49062306a36Sopenharmony_ci __tbl q0, q10, q8 49162306a36Sopenharmony_ci veor q12, q2, q9 49262306a36Sopenharmony_ci __tbl q1, q11, q8 49362306a36Sopenharmony_ci veor q13, q3, q9 49462306a36Sopenharmony_ci __tbl q2, q12, q8 49562306a36Sopenharmony_ci veor q14, q4, q9 49662306a36Sopenharmony_ci __tbl q3, q13, q8 49762306a36Sopenharmony_ci veor q15, q5, q9 49862306a36Sopenharmony_ci __tbl q4, q14, q8 49962306a36Sopenharmony_ci veor q10, q6, q9 50062306a36Sopenharmony_ci __tbl q5, q15, q8 50162306a36Sopenharmony_ci veor q11, q7, q9 50262306a36Sopenharmony_ci __tbl q6, q10, q8 50362306a36Sopenharmony_ci __tbl q7, q11, q8 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci bitslice q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11 50662306a36Sopenharmony_ci 50762306a36Sopenharmony_ci sub rounds, rounds, #1 50862306a36Sopenharmony_ci b .Lenc_sbox 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_ci .align 5 51162306a36Sopenharmony_ciSR: .quad 0x0504070600030201, 0x0f0e0d0c0a09080b 51262306a36Sopenharmony_ciSRM0: .quad 0x0304090e00050a0f, 0x01060b0c0207080d 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci.Lenc_last: 51562306a36Sopenharmony_ci __ldr q12, SRM0 51662306a36Sopenharmony_ci.Lenc_loop: 51762306a36Sopenharmony_ci shift_rows q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12 51862306a36Sopenharmony_ci.Lenc_sbox: 51962306a36Sopenharmony_ci sbox q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, \ 52062306a36Sopenharmony_ci q13, q14, q15 52162306a36Sopenharmony_ci subs rounds, rounds, #1 52262306a36Sopenharmony_ci bcc .Lenc_done 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci mix_cols q0, q1, q4, q6, q3, q7, q2, q5, q8, q9, q10, q11, q12, \ 52562306a36Sopenharmony_ci q13, q14, q15 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci beq .Lenc_last 52862306a36Sopenharmony_ci __ldr q12, SR 52962306a36Sopenharmony_ci b .Lenc_loop 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci.Lenc_done: 53262306a36Sopenharmony_ci vld1.8 {q12}, [bskey, :128] // last round key 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci bitslice q0, q1, q4, q6, q3, q7, q2, q5, q8, q9, q10, q11 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_ci veor q0, q0, q12 53762306a36Sopenharmony_ci veor q1, q1, q12 53862306a36Sopenharmony_ci veor q4, q4, q12 53962306a36Sopenharmony_ci veor q6, q6, q12 54062306a36Sopenharmony_ci veor q3, q3, q12 54162306a36Sopenharmony_ci veor q7, q7, q12 54262306a36Sopenharmony_ci veor q2, q2, q12 54362306a36Sopenharmony_ci veor q5, q5, q12 54462306a36Sopenharmony_ci bx lr 54562306a36Sopenharmony_ciENDPROC(aesbs_encrypt8) 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci .align 4 54862306a36Sopenharmony_ciM0ISR: .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 54962306a36Sopenharmony_ci 55062306a36Sopenharmony_ciaesbs_decrypt8: 55162306a36Sopenharmony_ci add bskey, bskey, rounds, lsl #7 55262306a36Sopenharmony_ci sub bskey, bskey, #112 55362306a36Sopenharmony_ci vld1.8 {q9}, [bskey, :128] // round 0 key 55462306a36Sopenharmony_ci sub bskey, bskey, #128 55562306a36Sopenharmony_ci __ldr q8, M0ISR 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci veor q10, q0, q9 // xor with round0 key 55862306a36Sopenharmony_ci veor q11, q1, q9 55962306a36Sopenharmony_ci __tbl q0, q10, q8 56062306a36Sopenharmony_ci veor q12, q2, q9 56162306a36Sopenharmony_ci __tbl q1, q11, q8 56262306a36Sopenharmony_ci veor q13, q3, q9 56362306a36Sopenharmony_ci __tbl q2, q12, q8 56462306a36Sopenharmony_ci veor q14, q4, q9 56562306a36Sopenharmony_ci __tbl q3, q13, q8 56662306a36Sopenharmony_ci veor q15, q5, q9 56762306a36Sopenharmony_ci __tbl q4, q14, q8 56862306a36Sopenharmony_ci veor q10, q6, q9 56962306a36Sopenharmony_ci __tbl q5, q15, q8 57062306a36Sopenharmony_ci veor q11, q7, q9 57162306a36Sopenharmony_ci __tbl q6, q10, q8 57262306a36Sopenharmony_ci __tbl q7, q11, q8 57362306a36Sopenharmony_ci 57462306a36Sopenharmony_ci bitslice q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_ci sub rounds, rounds, #1 57762306a36Sopenharmony_ci b .Ldec_sbox 57862306a36Sopenharmony_ci 57962306a36Sopenharmony_ci .align 5 58062306a36Sopenharmony_ciISR: .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 58162306a36Sopenharmony_ciISRM0: .quad 0x01040b0e0205080f, 0x0306090c00070a0d 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_ci.Ldec_last: 58462306a36Sopenharmony_ci __ldr q12, ISRM0 58562306a36Sopenharmony_ci.Ldec_loop: 58662306a36Sopenharmony_ci inv_shift_rows q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12 58762306a36Sopenharmony_ci.Ldec_sbox: 58862306a36Sopenharmony_ci inv_sbox q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, \ 58962306a36Sopenharmony_ci q13, q14, q15 59062306a36Sopenharmony_ci subs rounds, rounds, #1 59162306a36Sopenharmony_ci bcc .Ldec_done 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_ci inv_mix_cols q0, q1, q6, q4, q2, q7, q3, q5, q8, q9, q10, q11, q12, \ 59462306a36Sopenharmony_ci q13, q14, q15 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci beq .Ldec_last 59762306a36Sopenharmony_ci __ldr q12, ISR 59862306a36Sopenharmony_ci b .Ldec_loop 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ci.Ldec_done: 60162306a36Sopenharmony_ci add bskey, bskey, #112 60262306a36Sopenharmony_ci vld1.8 {q12}, [bskey, :128] // last round key 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_ci bitslice q0, q1, q6, q4, q2, q7, q3, q5, q8, q9, q10, q11 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_ci veor q0, q0, q12 60762306a36Sopenharmony_ci veor q1, q1, q12 60862306a36Sopenharmony_ci veor q6, q6, q12 60962306a36Sopenharmony_ci veor q4, q4, q12 61062306a36Sopenharmony_ci veor q2, q2, q12 61162306a36Sopenharmony_ci veor q7, q7, q12 61262306a36Sopenharmony_ci veor q3, q3, q12 61362306a36Sopenharmony_ci veor q5, q5, q12 61462306a36Sopenharmony_ci bx lr 61562306a36Sopenharmony_ciENDPROC(aesbs_decrypt8) 61662306a36Sopenharmony_ci 61762306a36Sopenharmony_ci /* 61862306a36Sopenharmony_ci * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 61962306a36Sopenharmony_ci * int blocks) 62062306a36Sopenharmony_ci * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 62162306a36Sopenharmony_ci * int blocks) 62262306a36Sopenharmony_ci */ 62362306a36Sopenharmony_ci .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 62462306a36Sopenharmony_ci push {r4-r6, lr} 62562306a36Sopenharmony_ci ldr r5, [sp, #16] // number of blocks 62662306a36Sopenharmony_ci 62762306a36Sopenharmony_ci99: adr ip, 0f 62862306a36Sopenharmony_ci and lr, r5, #7 62962306a36Sopenharmony_ci cmp r5, #8 63062306a36Sopenharmony_ci sub ip, ip, lr, lsl #2 63162306a36Sopenharmony_ci movlt pc, ip // computed goto if blocks < 8 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_ci vld1.8 {q0}, [r1]! 63462306a36Sopenharmony_ci vld1.8 {q1}, [r1]! 63562306a36Sopenharmony_ci vld1.8 {q2}, [r1]! 63662306a36Sopenharmony_ci vld1.8 {q3}, [r1]! 63762306a36Sopenharmony_ci vld1.8 {q4}, [r1]! 63862306a36Sopenharmony_ci vld1.8 {q5}, [r1]! 63962306a36Sopenharmony_ci vld1.8 {q6}, [r1]! 64062306a36Sopenharmony_ci vld1.8 {q7}, [r1]! 64162306a36Sopenharmony_ci 64262306a36Sopenharmony_ci0: mov bskey, r2 64362306a36Sopenharmony_ci mov rounds, r3 64462306a36Sopenharmony_ci bl \do8 64562306a36Sopenharmony_ci 64662306a36Sopenharmony_ci adr ip, 1f 64762306a36Sopenharmony_ci and lr, r5, #7 64862306a36Sopenharmony_ci cmp r5, #8 64962306a36Sopenharmony_ci sub ip, ip, lr, lsl #2 65062306a36Sopenharmony_ci movlt pc, ip // computed goto if blocks < 8 65162306a36Sopenharmony_ci 65262306a36Sopenharmony_ci vst1.8 {\o0}, [r0]! 65362306a36Sopenharmony_ci vst1.8 {\o1}, [r0]! 65462306a36Sopenharmony_ci vst1.8 {\o2}, [r0]! 65562306a36Sopenharmony_ci vst1.8 {\o3}, [r0]! 65662306a36Sopenharmony_ci vst1.8 {\o4}, [r0]! 65762306a36Sopenharmony_ci vst1.8 {\o5}, [r0]! 65862306a36Sopenharmony_ci vst1.8 {\o6}, [r0]! 65962306a36Sopenharmony_ci vst1.8 {\o7}, [r0]! 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_ci1: subs r5, r5, #8 66262306a36Sopenharmony_ci bgt 99b 66362306a36Sopenharmony_ci 66462306a36Sopenharmony_ci pop {r4-r6, pc} 66562306a36Sopenharmony_ci .endm 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_ci .align 4 66862306a36Sopenharmony_ciENTRY(aesbs_ecb_encrypt) 66962306a36Sopenharmony_ci __ecb_crypt aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5 67062306a36Sopenharmony_ciENDPROC(aesbs_ecb_encrypt) 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_ci .align 4 67362306a36Sopenharmony_ciENTRY(aesbs_ecb_decrypt) 67462306a36Sopenharmony_ci __ecb_crypt aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5 67562306a36Sopenharmony_ciENDPROC(aesbs_ecb_decrypt) 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci /* 67862306a36Sopenharmony_ci * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], 67962306a36Sopenharmony_ci * int rounds, int blocks, u8 iv[]) 68062306a36Sopenharmony_ci */ 68162306a36Sopenharmony_ci .align 4 68262306a36Sopenharmony_ciENTRY(aesbs_cbc_decrypt) 68362306a36Sopenharmony_ci mov ip, sp 68462306a36Sopenharmony_ci push {r4-r6, lr} 68562306a36Sopenharmony_ci ldm ip, {r5-r6} // load args 4-5 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ci99: adr ip, 0f 68862306a36Sopenharmony_ci and lr, r5, #7 68962306a36Sopenharmony_ci cmp r5, #8 69062306a36Sopenharmony_ci sub ip, ip, lr, lsl #2 69162306a36Sopenharmony_ci mov lr, r1 69262306a36Sopenharmony_ci movlt pc, ip // computed goto if blocks < 8 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_ci vld1.8 {q0}, [lr]! 69562306a36Sopenharmony_ci vld1.8 {q1}, [lr]! 69662306a36Sopenharmony_ci vld1.8 {q2}, [lr]! 69762306a36Sopenharmony_ci vld1.8 {q3}, [lr]! 69862306a36Sopenharmony_ci vld1.8 {q4}, [lr]! 69962306a36Sopenharmony_ci vld1.8 {q5}, [lr]! 70062306a36Sopenharmony_ci vld1.8 {q6}, [lr]! 70162306a36Sopenharmony_ci vld1.8 {q7}, [lr] 70262306a36Sopenharmony_ci 70362306a36Sopenharmony_ci0: mov bskey, r2 70462306a36Sopenharmony_ci mov rounds, r3 70562306a36Sopenharmony_ci bl aesbs_decrypt8 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_ci vld1.8 {q8}, [r6] 70862306a36Sopenharmony_ci vmov q9, q8 70962306a36Sopenharmony_ci vmov q10, q8 71062306a36Sopenharmony_ci vmov q11, q8 71162306a36Sopenharmony_ci vmov q12, q8 71262306a36Sopenharmony_ci vmov q13, q8 71362306a36Sopenharmony_ci vmov q14, q8 71462306a36Sopenharmony_ci vmov q15, q8 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci adr ip, 1f 71762306a36Sopenharmony_ci and lr, r5, #7 71862306a36Sopenharmony_ci cmp r5, #8 71962306a36Sopenharmony_ci sub ip, ip, lr, lsl #2 72062306a36Sopenharmony_ci movlt pc, ip // computed goto if blocks < 8 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_ci vld1.8 {q9}, [r1]! 72362306a36Sopenharmony_ci vld1.8 {q10}, [r1]! 72462306a36Sopenharmony_ci vld1.8 {q11}, [r1]! 72562306a36Sopenharmony_ci vld1.8 {q12}, [r1]! 72662306a36Sopenharmony_ci vld1.8 {q13}, [r1]! 72762306a36Sopenharmony_ci vld1.8 {q14}, [r1]! 72862306a36Sopenharmony_ci vld1.8 {q15}, [r1]! 72962306a36Sopenharmony_ci W(nop) 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_ci1: adr ip, 2f 73262306a36Sopenharmony_ci sub ip, ip, lr, lsl #3 73362306a36Sopenharmony_ci movlt pc, ip // computed goto if blocks < 8 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci veor q0, q0, q8 73662306a36Sopenharmony_ci vst1.8 {q0}, [r0]! 73762306a36Sopenharmony_ci veor q1, q1, q9 73862306a36Sopenharmony_ci vst1.8 {q1}, [r0]! 73962306a36Sopenharmony_ci veor q6, q6, q10 74062306a36Sopenharmony_ci vst1.8 {q6}, [r0]! 74162306a36Sopenharmony_ci veor q4, q4, q11 74262306a36Sopenharmony_ci vst1.8 {q4}, [r0]! 74362306a36Sopenharmony_ci veor q2, q2, q12 74462306a36Sopenharmony_ci vst1.8 {q2}, [r0]! 74562306a36Sopenharmony_ci veor q7, q7, q13 74662306a36Sopenharmony_ci vst1.8 {q7}, [r0]! 74762306a36Sopenharmony_ci veor q3, q3, q14 74862306a36Sopenharmony_ci vst1.8 {q3}, [r0]! 74962306a36Sopenharmony_ci veor q5, q5, q15 75062306a36Sopenharmony_ci vld1.8 {q8}, [r1]! // load next round's iv 75162306a36Sopenharmony_ci2: vst1.8 {q5}, [r0]! 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_ci subs r5, r5, #8 75462306a36Sopenharmony_ci vst1.8 {q8}, [r6] // store next round's iv 75562306a36Sopenharmony_ci bgt 99b 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_ci pop {r4-r6, pc} 75862306a36Sopenharmony_ciENDPROC(aesbs_cbc_decrypt) 75962306a36Sopenharmony_ci 76062306a36Sopenharmony_ci .macro next_ctr, q 76162306a36Sopenharmony_ci vmov \q\()h, r9, r10 76262306a36Sopenharmony_ci adds r10, r10, #1 76362306a36Sopenharmony_ci adcs r9, r9, #0 76462306a36Sopenharmony_ci vmov \q\()l, r7, r8 76562306a36Sopenharmony_ci adcs r8, r8, #0 76662306a36Sopenharmony_ci adc r7, r7, #0 76762306a36Sopenharmony_ci vrev32.8 \q, \q 76862306a36Sopenharmony_ci .endm 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_ci /* 77162306a36Sopenharmony_ci * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], 77262306a36Sopenharmony_ci * int rounds, int bytes, u8 ctr[]) 77362306a36Sopenharmony_ci */ 77462306a36Sopenharmony_ciENTRY(aesbs_ctr_encrypt) 77562306a36Sopenharmony_ci mov ip, sp 77662306a36Sopenharmony_ci push {r4-r10, lr} 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_ci ldm ip, {r5, r6} // load args 4-5 77962306a36Sopenharmony_ci vld1.8 {q0}, [r6] // load counter 78062306a36Sopenharmony_ci vrev32.8 q1, q0 78162306a36Sopenharmony_ci vmov r9, r10, d3 78262306a36Sopenharmony_ci vmov r7, r8, d2 78362306a36Sopenharmony_ci 78462306a36Sopenharmony_ci adds r10, r10, #1 78562306a36Sopenharmony_ci adcs r9, r9, #0 78662306a36Sopenharmony_ci adcs r8, r8, #0 78762306a36Sopenharmony_ci adc r7, r7, #0 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_ci99: vmov q1, q0 79062306a36Sopenharmony_ci sub lr, r5, #1 79162306a36Sopenharmony_ci vmov q2, q0 79262306a36Sopenharmony_ci adr ip, 0f 79362306a36Sopenharmony_ci vmov q3, q0 79462306a36Sopenharmony_ci and lr, lr, #112 79562306a36Sopenharmony_ci vmov q4, q0 79662306a36Sopenharmony_ci cmp r5, #112 79762306a36Sopenharmony_ci vmov q5, q0 79862306a36Sopenharmony_ci sub ip, ip, lr, lsl #1 79962306a36Sopenharmony_ci vmov q6, q0 80062306a36Sopenharmony_ci add ip, ip, lr, lsr #2 80162306a36Sopenharmony_ci vmov q7, q0 80262306a36Sopenharmony_ci movle pc, ip // computed goto if bytes < 112 80362306a36Sopenharmony_ci 80462306a36Sopenharmony_ci next_ctr q1 80562306a36Sopenharmony_ci next_ctr q2 80662306a36Sopenharmony_ci next_ctr q3 80762306a36Sopenharmony_ci next_ctr q4 80862306a36Sopenharmony_ci next_ctr q5 80962306a36Sopenharmony_ci next_ctr q6 81062306a36Sopenharmony_ci next_ctr q7 81162306a36Sopenharmony_ci 81262306a36Sopenharmony_ci0: mov bskey, r2 81362306a36Sopenharmony_ci mov rounds, r3 81462306a36Sopenharmony_ci bl aesbs_encrypt8 81562306a36Sopenharmony_ci 81662306a36Sopenharmony_ci adr ip, 1f 81762306a36Sopenharmony_ci sub lr, r5, #1 81862306a36Sopenharmony_ci cmp r5, #128 81962306a36Sopenharmony_ci bic lr, lr, #15 82062306a36Sopenharmony_ci ands r4, r5, #15 // preserves C flag 82162306a36Sopenharmony_ci teqcs r5, r5 // set Z flag if not last iteration 82262306a36Sopenharmony_ci sub ip, ip, lr, lsr #2 82362306a36Sopenharmony_ci rsb r4, r4, #16 82462306a36Sopenharmony_ci movcc pc, ip // computed goto if bytes < 128 82562306a36Sopenharmony_ci 82662306a36Sopenharmony_ci vld1.8 {q8}, [r1]! 82762306a36Sopenharmony_ci vld1.8 {q9}, [r1]! 82862306a36Sopenharmony_ci vld1.8 {q10}, [r1]! 82962306a36Sopenharmony_ci vld1.8 {q11}, [r1]! 83062306a36Sopenharmony_ci vld1.8 {q12}, [r1]! 83162306a36Sopenharmony_ci vld1.8 {q13}, [r1]! 83262306a36Sopenharmony_ci vld1.8 {q14}, [r1]! 83362306a36Sopenharmony_ci1: subne r1, r1, r4 83462306a36Sopenharmony_ci vld1.8 {q15}, [r1]! 83562306a36Sopenharmony_ci 83662306a36Sopenharmony_ci add ip, ip, #2f - 1b 83762306a36Sopenharmony_ci 83862306a36Sopenharmony_ci veor q0, q0, q8 83962306a36Sopenharmony_ci veor q1, q1, q9 84062306a36Sopenharmony_ci veor q4, q4, q10 84162306a36Sopenharmony_ci veor q6, q6, q11 84262306a36Sopenharmony_ci veor q3, q3, q12 84362306a36Sopenharmony_ci veor q7, q7, q13 84462306a36Sopenharmony_ci veor q2, q2, q14 84562306a36Sopenharmony_ci bne 3f 84662306a36Sopenharmony_ci veor q5, q5, q15 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_ci movcc pc, ip // computed goto if bytes < 128 84962306a36Sopenharmony_ci 85062306a36Sopenharmony_ci vst1.8 {q0}, [r0]! 85162306a36Sopenharmony_ci vst1.8 {q1}, [r0]! 85262306a36Sopenharmony_ci vst1.8 {q4}, [r0]! 85362306a36Sopenharmony_ci vst1.8 {q6}, [r0]! 85462306a36Sopenharmony_ci vst1.8 {q3}, [r0]! 85562306a36Sopenharmony_ci vst1.8 {q7}, [r0]! 85662306a36Sopenharmony_ci vst1.8 {q2}, [r0]! 85762306a36Sopenharmony_ci2: subne r0, r0, r4 85862306a36Sopenharmony_ci vst1.8 {q5}, [r0]! 85962306a36Sopenharmony_ci 86062306a36Sopenharmony_ci next_ctr q0 86162306a36Sopenharmony_ci 86262306a36Sopenharmony_ci subs r5, r5, #128 86362306a36Sopenharmony_ci bgt 99b 86462306a36Sopenharmony_ci 86562306a36Sopenharmony_ci vst1.8 {q0}, [r6] 86662306a36Sopenharmony_ci pop {r4-r10, pc} 86762306a36Sopenharmony_ci 86862306a36Sopenharmony_ci3: adr lr, .Lpermute_table + 16 86962306a36Sopenharmony_ci cmp r5, #16 // Z flag remains cleared 87062306a36Sopenharmony_ci sub lr, lr, r4 87162306a36Sopenharmony_ci vld1.8 {q8-q9}, [lr] 87262306a36Sopenharmony_ci vtbl.8 d16, {q5}, d16 87362306a36Sopenharmony_ci vtbl.8 d17, {q5}, d17 87462306a36Sopenharmony_ci veor q5, q8, q15 87562306a36Sopenharmony_ci bcc 4f // have to reload prev if R5 < 16 87662306a36Sopenharmony_ci vtbx.8 d10, {q2}, d18 87762306a36Sopenharmony_ci vtbx.8 d11, {q2}, d19 87862306a36Sopenharmony_ci mov pc, ip // branch back to VST sequence 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_ci4: sub r0, r0, r4 88162306a36Sopenharmony_ci vshr.s8 q9, q9, #7 // create mask for VBIF 88262306a36Sopenharmony_ci vld1.8 {q8}, [r0] // reload 88362306a36Sopenharmony_ci vbif q5, q8, q9 88462306a36Sopenharmony_ci vst1.8 {q5}, [r0] 88562306a36Sopenharmony_ci pop {r4-r10, pc} 88662306a36Sopenharmony_ciENDPROC(aesbs_ctr_encrypt) 88762306a36Sopenharmony_ci 88862306a36Sopenharmony_ci .align 6 88962306a36Sopenharmony_ci.Lpermute_table: 89062306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 89162306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 89262306a36Sopenharmony_ci .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 89362306a36Sopenharmony_ci .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f 89462306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 89562306a36Sopenharmony_ci .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_ci .macro next_tweak, out, in, const, tmp 89862306a36Sopenharmony_ci vshr.s64 \tmp, \in, #63 89962306a36Sopenharmony_ci vand \tmp, \tmp, \const 90062306a36Sopenharmony_ci vadd.u64 \out, \in, \in 90162306a36Sopenharmony_ci vext.8 \tmp, \tmp, \tmp, #8 90262306a36Sopenharmony_ci veor \out, \out, \tmp 90362306a36Sopenharmony_ci .endm 90462306a36Sopenharmony_ci 90562306a36Sopenharmony_ci /* 90662306a36Sopenharmony_ci * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 90762306a36Sopenharmony_ci * int blocks, u8 iv[], int reorder_last_tweak) 90862306a36Sopenharmony_ci * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 90962306a36Sopenharmony_ci * int blocks, u8 iv[], int reorder_last_tweak) 91062306a36Sopenharmony_ci */ 91162306a36Sopenharmony_ci .align 6 91262306a36Sopenharmony_ci__xts_prepare8: 91362306a36Sopenharmony_ci vld1.8 {q14}, [r7] // load iv 91462306a36Sopenharmony_ci vmov.i32 d30, #0x87 // compose tweak mask vector 91562306a36Sopenharmony_ci vmovl.u32 q15, d30 91662306a36Sopenharmony_ci vshr.u64 d30, d31, #7 91762306a36Sopenharmony_ci vmov q12, q14 91862306a36Sopenharmony_ci 91962306a36Sopenharmony_ci adr ip, 0f 92062306a36Sopenharmony_ci and r4, r6, #7 92162306a36Sopenharmony_ci cmp r6, #8 92262306a36Sopenharmony_ci sub ip, ip, r4, lsl #5 92362306a36Sopenharmony_ci mov r4, sp 92462306a36Sopenharmony_ci movlt pc, ip // computed goto if blocks < 8 92562306a36Sopenharmony_ci 92662306a36Sopenharmony_ci vld1.8 {q0}, [r1]! 92762306a36Sopenharmony_ci next_tweak q12, q14, q15, q13 92862306a36Sopenharmony_ci veor q0, q0, q14 92962306a36Sopenharmony_ci vst1.8 {q14}, [r4, :128]! 93062306a36Sopenharmony_ci 93162306a36Sopenharmony_ci vld1.8 {q1}, [r1]! 93262306a36Sopenharmony_ci next_tweak q14, q12, q15, q13 93362306a36Sopenharmony_ci veor q1, q1, q12 93462306a36Sopenharmony_ci vst1.8 {q12}, [r4, :128]! 93562306a36Sopenharmony_ci 93662306a36Sopenharmony_ci vld1.8 {q2}, [r1]! 93762306a36Sopenharmony_ci next_tweak q12, q14, q15, q13 93862306a36Sopenharmony_ci veor q2, q2, q14 93962306a36Sopenharmony_ci vst1.8 {q14}, [r4, :128]! 94062306a36Sopenharmony_ci 94162306a36Sopenharmony_ci vld1.8 {q3}, [r1]! 94262306a36Sopenharmony_ci next_tweak q14, q12, q15, q13 94362306a36Sopenharmony_ci veor q3, q3, q12 94462306a36Sopenharmony_ci vst1.8 {q12}, [r4, :128]! 94562306a36Sopenharmony_ci 94662306a36Sopenharmony_ci vld1.8 {q4}, [r1]! 94762306a36Sopenharmony_ci next_tweak q12, q14, q15, q13 94862306a36Sopenharmony_ci veor q4, q4, q14 94962306a36Sopenharmony_ci vst1.8 {q14}, [r4, :128]! 95062306a36Sopenharmony_ci 95162306a36Sopenharmony_ci vld1.8 {q5}, [r1]! 95262306a36Sopenharmony_ci next_tweak q14, q12, q15, q13 95362306a36Sopenharmony_ci veor q5, q5, q12 95462306a36Sopenharmony_ci vst1.8 {q12}, [r4, :128]! 95562306a36Sopenharmony_ci 95662306a36Sopenharmony_ci vld1.8 {q6}, [r1]! 95762306a36Sopenharmony_ci next_tweak q12, q14, q15, q13 95862306a36Sopenharmony_ci veor q6, q6, q14 95962306a36Sopenharmony_ci vst1.8 {q14}, [r4, :128]! 96062306a36Sopenharmony_ci 96162306a36Sopenharmony_ci vld1.8 {q7}, [r1]! 96262306a36Sopenharmony_ci next_tweak q14, q12, q15, q13 96362306a36Sopenharmony_ciTHUMB( itt le ) 96462306a36Sopenharmony_ci W(cmple) r8, #0 96562306a36Sopenharmony_ci ble 1f 96662306a36Sopenharmony_ci0: veor q7, q7, q12 96762306a36Sopenharmony_ci vst1.8 {q12}, [r4, :128] 96862306a36Sopenharmony_ci 96962306a36Sopenharmony_ci vst1.8 {q14}, [r7] // store next iv 97062306a36Sopenharmony_ci bx lr 97162306a36Sopenharmony_ci 97262306a36Sopenharmony_ci1: vswp q12, q14 97362306a36Sopenharmony_ci b 0b 97462306a36Sopenharmony_ciENDPROC(__xts_prepare8) 97562306a36Sopenharmony_ci 97662306a36Sopenharmony_ci .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 97762306a36Sopenharmony_ci push {r4-r8, lr} 97862306a36Sopenharmony_ci mov r5, sp // preserve sp 97962306a36Sopenharmony_ci ldrd r6, r7, [sp, #24] // get blocks and iv args 98062306a36Sopenharmony_ci rsb r8, ip, #1 98162306a36Sopenharmony_ci sub ip, sp, #128 // make room for 8x tweak 98262306a36Sopenharmony_ci bic ip, ip, #0xf // align sp to 16 bytes 98362306a36Sopenharmony_ci mov sp, ip 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_ci99: bl __xts_prepare8 98662306a36Sopenharmony_ci 98762306a36Sopenharmony_ci mov bskey, r2 98862306a36Sopenharmony_ci mov rounds, r3 98962306a36Sopenharmony_ci bl \do8 99062306a36Sopenharmony_ci 99162306a36Sopenharmony_ci adr ip, 0f 99262306a36Sopenharmony_ci and lr, r6, #7 99362306a36Sopenharmony_ci cmp r6, #8 99462306a36Sopenharmony_ci sub ip, ip, lr, lsl #2 99562306a36Sopenharmony_ci mov r4, sp 99662306a36Sopenharmony_ci movlt pc, ip // computed goto if blocks < 8 99762306a36Sopenharmony_ci 99862306a36Sopenharmony_ci vld1.8 {q8}, [r4, :128]! 99962306a36Sopenharmony_ci vld1.8 {q9}, [r4, :128]! 100062306a36Sopenharmony_ci vld1.8 {q10}, [r4, :128]! 100162306a36Sopenharmony_ci vld1.8 {q11}, [r4, :128]! 100262306a36Sopenharmony_ci vld1.8 {q12}, [r4, :128]! 100362306a36Sopenharmony_ci vld1.8 {q13}, [r4, :128]! 100462306a36Sopenharmony_ci vld1.8 {q14}, [r4, :128]! 100562306a36Sopenharmony_ci vld1.8 {q15}, [r4, :128] 100662306a36Sopenharmony_ci 100762306a36Sopenharmony_ci0: adr ip, 1f 100862306a36Sopenharmony_ci sub ip, ip, lr, lsl #3 100962306a36Sopenharmony_ci movlt pc, ip // computed goto if blocks < 8 101062306a36Sopenharmony_ci 101162306a36Sopenharmony_ci veor \o0, \o0, q8 101262306a36Sopenharmony_ci vst1.8 {\o0}, [r0]! 101362306a36Sopenharmony_ci veor \o1, \o1, q9 101462306a36Sopenharmony_ci vst1.8 {\o1}, [r0]! 101562306a36Sopenharmony_ci veor \o2, \o2, q10 101662306a36Sopenharmony_ci vst1.8 {\o2}, [r0]! 101762306a36Sopenharmony_ci veor \o3, \o3, q11 101862306a36Sopenharmony_ci vst1.8 {\o3}, [r0]! 101962306a36Sopenharmony_ci veor \o4, \o4, q12 102062306a36Sopenharmony_ci vst1.8 {\o4}, [r0]! 102162306a36Sopenharmony_ci veor \o5, \o5, q13 102262306a36Sopenharmony_ci vst1.8 {\o5}, [r0]! 102362306a36Sopenharmony_ci veor \o6, \o6, q14 102462306a36Sopenharmony_ci vst1.8 {\o6}, [r0]! 102562306a36Sopenharmony_ci veor \o7, \o7, q15 102662306a36Sopenharmony_ci vst1.8 {\o7}, [r0]! 102762306a36Sopenharmony_ci 102862306a36Sopenharmony_ci1: subs r6, r6, #8 102962306a36Sopenharmony_ci bgt 99b 103062306a36Sopenharmony_ci 103162306a36Sopenharmony_ci mov sp, r5 103262306a36Sopenharmony_ci pop {r4-r8, pc} 103362306a36Sopenharmony_ci .endm 103462306a36Sopenharmony_ci 103562306a36Sopenharmony_ciENTRY(aesbs_xts_encrypt) 103662306a36Sopenharmony_ci mov ip, #0 // never reorder final tweak 103762306a36Sopenharmony_ci __xts_crypt aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5 103862306a36Sopenharmony_ciENDPROC(aesbs_xts_encrypt) 103962306a36Sopenharmony_ci 104062306a36Sopenharmony_ciENTRY(aesbs_xts_decrypt) 104162306a36Sopenharmony_ci ldr ip, [sp, #8] // reorder final tweak? 104262306a36Sopenharmony_ci __xts_crypt aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5 104362306a36Sopenharmony_ciENDPROC(aesbs_xts_decrypt) 1044