162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#ifdef CONFIG_ARM64 762306a36Sopenharmony_ci#include <asm/neon-intrinsics.h> 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci#define AES_ROUND "aese %0.16b, %1.16b \n\t aesmc %0.16b, %0.16b" 1062306a36Sopenharmony_ci#else 1162306a36Sopenharmony_ci#include <arm_neon.h> 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#define AES_ROUND "aese.8 %q0, %q1 \n\t aesmc.8 %q0, %q0" 1462306a36Sopenharmony_ci#endif 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci#define AEGIS_BLOCK_SIZE 16 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci#include <stddef.h> 1962306a36Sopenharmony_ci#include "aegis-neon.h" 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ciextern int aegis128_have_aes_insn; 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_civoid *memcpy(void *dest, const void *src, size_t n); 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_cistruct aegis128_state { 2662306a36Sopenharmony_ci uint8x16_t v[5]; 2762306a36Sopenharmony_ci}; 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ciextern const uint8_t crypto_aes_sbox[]; 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_cistatic struct aegis128_state aegis128_load_state_neon(const void *state) 3262306a36Sopenharmony_ci{ 3362306a36Sopenharmony_ci return (struct aegis128_state){ { 3462306a36Sopenharmony_ci vld1q_u8(state), 3562306a36Sopenharmony_ci vld1q_u8(state + 16), 3662306a36Sopenharmony_ci vld1q_u8(state + 32), 3762306a36Sopenharmony_ci vld1q_u8(state + 48), 3862306a36Sopenharmony_ci vld1q_u8(state + 64) 3962306a36Sopenharmony_ci } }; 4062306a36Sopenharmony_ci} 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_cistatic void aegis128_save_state_neon(struct aegis128_state st, void *state) 4362306a36Sopenharmony_ci{ 4462306a36Sopenharmony_ci vst1q_u8(state, st.v[0]); 4562306a36Sopenharmony_ci vst1q_u8(state + 16, st.v[1]); 4662306a36Sopenharmony_ci vst1q_u8(state + 32, st.v[2]); 4762306a36Sopenharmony_ci vst1q_u8(state + 48, st.v[3]); 4862306a36Sopenharmony_ci vst1q_u8(state + 64, st.v[4]); 4962306a36Sopenharmony_ci} 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_cistatic inline __attribute__((always_inline)) 5262306a36Sopenharmony_ciuint8x16_t aegis_aes_round(uint8x16_t w) 5362306a36Sopenharmony_ci{ 5462306a36Sopenharmony_ci uint8x16_t z = {}; 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci#ifdef CONFIG_ARM64 5762306a36Sopenharmony_ci if (!__builtin_expect(aegis128_have_aes_insn, 1)) { 5862306a36Sopenharmony_ci static const uint8_t shift_rows[] = { 5962306a36Sopenharmony_ci 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3, 6062306a36Sopenharmony_ci 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb, 6162306a36Sopenharmony_ci }; 6262306a36Sopenharmony_ci static const uint8_t ror32by8[] = { 6362306a36Sopenharmony_ci 0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4, 6462306a36Sopenharmony_ci 0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc, 6562306a36Sopenharmony_ci }; 6662306a36Sopenharmony_ci uint8x16_t v; 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci // shift rows 6962306a36Sopenharmony_ci w = vqtbl1q_u8(w, vld1q_u8(shift_rows)); 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci // sub bytes 7262306a36Sopenharmony_ci#ifndef CONFIG_CC_IS_GCC 7362306a36Sopenharmony_ci v = vqtbl4q_u8(vld1q_u8_x4(crypto_aes_sbox), w); 7462306a36Sopenharmony_ci v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0x40), w - 0x40); 7562306a36Sopenharmony_ci v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0x80), w - 0x80); 7662306a36Sopenharmony_ci v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0xc0), w - 0xc0); 7762306a36Sopenharmony_ci#else 7862306a36Sopenharmony_ci asm("tbl %0.16b, {v16.16b-v19.16b}, %1.16b" : "=w"(v) : "w"(w)); 7962306a36Sopenharmony_ci w -= 0x40; 8062306a36Sopenharmony_ci asm("tbx %0.16b, {v20.16b-v23.16b}, %1.16b" : "+w"(v) : "w"(w)); 8162306a36Sopenharmony_ci w -= 0x40; 8262306a36Sopenharmony_ci asm("tbx %0.16b, {v24.16b-v27.16b}, %1.16b" : "+w"(v) : "w"(w)); 8362306a36Sopenharmony_ci w -= 0x40; 8462306a36Sopenharmony_ci asm("tbx %0.16b, {v28.16b-v31.16b}, %1.16b" : "+w"(v) : "w"(w)); 8562306a36Sopenharmony_ci#endif 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci // mix columns 8862306a36Sopenharmony_ci w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b); 8962306a36Sopenharmony_ci w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v); 9062306a36Sopenharmony_ci w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8)); 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci return w; 9362306a36Sopenharmony_ci } 9462306a36Sopenharmony_ci#endif 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci /* 9762306a36Sopenharmony_ci * We use inline asm here instead of the vaeseq_u8/vaesmcq_u8 intrinsics 9862306a36Sopenharmony_ci * to force the compiler to issue the aese/aesmc instructions in pairs. 9962306a36Sopenharmony_ci * This is much faster on many cores, where the instruction pair can 10062306a36Sopenharmony_ci * execute in a single cycle. 10162306a36Sopenharmony_ci */ 10262306a36Sopenharmony_ci asm(AES_ROUND : "+w"(w) : "w"(z)); 10362306a36Sopenharmony_ci return w; 10462306a36Sopenharmony_ci} 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_cistatic inline __attribute__((always_inline)) 10762306a36Sopenharmony_cistruct aegis128_state aegis128_update_neon(struct aegis128_state st, 10862306a36Sopenharmony_ci uint8x16_t m) 10962306a36Sopenharmony_ci{ 11062306a36Sopenharmony_ci m ^= aegis_aes_round(st.v[4]); 11162306a36Sopenharmony_ci st.v[4] ^= aegis_aes_round(st.v[3]); 11262306a36Sopenharmony_ci st.v[3] ^= aegis_aes_round(st.v[2]); 11362306a36Sopenharmony_ci st.v[2] ^= aegis_aes_round(st.v[1]); 11462306a36Sopenharmony_ci st.v[1] ^= aegis_aes_round(st.v[0]); 11562306a36Sopenharmony_ci st.v[0] ^= m; 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci return st; 11862306a36Sopenharmony_ci} 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_cistatic inline __attribute__((always_inline)) 12162306a36Sopenharmony_civoid preload_sbox(void) 12262306a36Sopenharmony_ci{ 12362306a36Sopenharmony_ci if (!IS_ENABLED(CONFIG_ARM64) || 12462306a36Sopenharmony_ci !IS_ENABLED(CONFIG_CC_IS_GCC) || 12562306a36Sopenharmony_ci __builtin_expect(aegis128_have_aes_insn, 1)) 12662306a36Sopenharmony_ci return; 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci asm("ld1 {v16.16b-v19.16b}, [%0], #64 \n\t" 12962306a36Sopenharmony_ci "ld1 {v20.16b-v23.16b}, [%0], #64 \n\t" 13062306a36Sopenharmony_ci "ld1 {v24.16b-v27.16b}, [%0], #64 \n\t" 13162306a36Sopenharmony_ci "ld1 {v28.16b-v31.16b}, [%0] \n\t" 13262306a36Sopenharmony_ci :: "r"(crypto_aes_sbox)); 13362306a36Sopenharmony_ci} 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_civoid crypto_aegis128_init_neon(void *state, const void *key, const void *iv) 13662306a36Sopenharmony_ci{ 13762306a36Sopenharmony_ci static const uint8_t const0[] = { 13862306a36Sopenharmony_ci 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d, 13962306a36Sopenharmony_ci 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62, 14062306a36Sopenharmony_ci }; 14162306a36Sopenharmony_ci static const uint8_t const1[] = { 14262306a36Sopenharmony_ci 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1, 14362306a36Sopenharmony_ci 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd, 14462306a36Sopenharmony_ci }; 14562306a36Sopenharmony_ci uint8x16_t k = vld1q_u8(key); 14662306a36Sopenharmony_ci uint8x16_t kiv = k ^ vld1q_u8(iv); 14762306a36Sopenharmony_ci struct aegis128_state st = {{ 14862306a36Sopenharmony_ci kiv, 14962306a36Sopenharmony_ci vld1q_u8(const1), 15062306a36Sopenharmony_ci vld1q_u8(const0), 15162306a36Sopenharmony_ci k ^ vld1q_u8(const0), 15262306a36Sopenharmony_ci k ^ vld1q_u8(const1), 15362306a36Sopenharmony_ci }}; 15462306a36Sopenharmony_ci int i; 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci preload_sbox(); 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci for (i = 0; i < 5; i++) { 15962306a36Sopenharmony_ci st = aegis128_update_neon(st, k); 16062306a36Sopenharmony_ci st = aegis128_update_neon(st, kiv); 16162306a36Sopenharmony_ci } 16262306a36Sopenharmony_ci aegis128_save_state_neon(st, state); 16362306a36Sopenharmony_ci} 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_civoid crypto_aegis128_update_neon(void *state, const void *msg) 16662306a36Sopenharmony_ci{ 16762306a36Sopenharmony_ci struct aegis128_state st = aegis128_load_state_neon(state); 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci preload_sbox(); 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci st = aegis128_update_neon(st, vld1q_u8(msg)); 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci aegis128_save_state_neon(st, state); 17462306a36Sopenharmony_ci} 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci#ifdef CONFIG_ARM 17762306a36Sopenharmony_ci/* 17862306a36Sopenharmony_ci * AArch32 does not provide these intrinsics natively because it does not 17962306a36Sopenharmony_ci * implement the underlying instructions. AArch32 only provides 64-bit 18062306a36Sopenharmony_ci * wide vtbl.8/vtbx.8 instruction, so use those instead. 18162306a36Sopenharmony_ci */ 18262306a36Sopenharmony_cistatic uint8x16_t vqtbl1q_u8(uint8x16_t a, uint8x16_t b) 18362306a36Sopenharmony_ci{ 18462306a36Sopenharmony_ci union { 18562306a36Sopenharmony_ci uint8x16_t val; 18662306a36Sopenharmony_ci uint8x8x2_t pair; 18762306a36Sopenharmony_ci } __a = { a }; 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci return vcombine_u8(vtbl2_u8(__a.pair, vget_low_u8(b)), 19062306a36Sopenharmony_ci vtbl2_u8(__a.pair, vget_high_u8(b))); 19162306a36Sopenharmony_ci} 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_cistatic uint8x16_t vqtbx1q_u8(uint8x16_t v, uint8x16_t a, uint8x16_t b) 19462306a36Sopenharmony_ci{ 19562306a36Sopenharmony_ci union { 19662306a36Sopenharmony_ci uint8x16_t val; 19762306a36Sopenharmony_ci uint8x8x2_t pair; 19862306a36Sopenharmony_ci } __a = { a }; 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci return vcombine_u8(vtbx2_u8(vget_low_u8(v), __a.pair, vget_low_u8(b)), 20162306a36Sopenharmony_ci vtbx2_u8(vget_high_u8(v), __a.pair, vget_high_u8(b))); 20262306a36Sopenharmony_ci} 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_cistatic int8_t vminvq_s8(int8x16_t v) 20562306a36Sopenharmony_ci{ 20662306a36Sopenharmony_ci int8x8_t s = vpmin_s8(vget_low_s8(v), vget_high_s8(v)); 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci s = vpmin_s8(s, s); 20962306a36Sopenharmony_ci s = vpmin_s8(s, s); 21062306a36Sopenharmony_ci s = vpmin_s8(s, s); 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci return vget_lane_s8(s, 0); 21362306a36Sopenharmony_ci} 21462306a36Sopenharmony_ci#endif 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_cistatic const uint8_t permute[] __aligned(64) = { 21762306a36Sopenharmony_ci -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 21862306a36Sopenharmony_ci 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 21962306a36Sopenharmony_ci -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 22062306a36Sopenharmony_ci}; 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_civoid crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src, 22362306a36Sopenharmony_ci unsigned int size) 22462306a36Sopenharmony_ci{ 22562306a36Sopenharmony_ci struct aegis128_state st = aegis128_load_state_neon(state); 22662306a36Sopenharmony_ci const int short_input = size < AEGIS_BLOCK_SIZE; 22762306a36Sopenharmony_ci uint8x16_t msg; 22862306a36Sopenharmony_ci 22962306a36Sopenharmony_ci preload_sbox(); 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci while (size >= AEGIS_BLOCK_SIZE) { 23262306a36Sopenharmony_ci uint8x16_t s = st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4]; 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci msg = vld1q_u8(src); 23562306a36Sopenharmony_ci st = aegis128_update_neon(st, msg); 23662306a36Sopenharmony_ci msg ^= s; 23762306a36Sopenharmony_ci vst1q_u8(dst, msg); 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci size -= AEGIS_BLOCK_SIZE; 24062306a36Sopenharmony_ci src += AEGIS_BLOCK_SIZE; 24162306a36Sopenharmony_ci dst += AEGIS_BLOCK_SIZE; 24262306a36Sopenharmony_ci } 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci if (size > 0) { 24562306a36Sopenharmony_ci uint8x16_t s = st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4]; 24662306a36Sopenharmony_ci uint8_t buf[AEGIS_BLOCK_SIZE]; 24762306a36Sopenharmony_ci const void *in = src; 24862306a36Sopenharmony_ci void *out = dst; 24962306a36Sopenharmony_ci uint8x16_t m; 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci if (__builtin_expect(short_input, 0)) 25262306a36Sopenharmony_ci in = out = memcpy(buf + AEGIS_BLOCK_SIZE - size, src, size); 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci m = vqtbl1q_u8(vld1q_u8(in + size - AEGIS_BLOCK_SIZE), 25562306a36Sopenharmony_ci vld1q_u8(permute + 32 - size)); 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci st = aegis128_update_neon(st, m); 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci vst1q_u8(out + size - AEGIS_BLOCK_SIZE, 26062306a36Sopenharmony_ci vqtbl1q_u8(m ^ s, vld1q_u8(permute + size))); 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci if (__builtin_expect(short_input, 0)) 26362306a36Sopenharmony_ci memcpy(dst, out, size); 26462306a36Sopenharmony_ci else 26562306a36Sopenharmony_ci vst1q_u8(out - AEGIS_BLOCK_SIZE, msg); 26662306a36Sopenharmony_ci } 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci aegis128_save_state_neon(st, state); 26962306a36Sopenharmony_ci} 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_civoid crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src, 27262306a36Sopenharmony_ci unsigned int size) 27362306a36Sopenharmony_ci{ 27462306a36Sopenharmony_ci struct aegis128_state st = aegis128_load_state_neon(state); 27562306a36Sopenharmony_ci const int short_input = size < AEGIS_BLOCK_SIZE; 27662306a36Sopenharmony_ci uint8x16_t msg; 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci preload_sbox(); 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci while (size >= AEGIS_BLOCK_SIZE) { 28162306a36Sopenharmony_ci msg = vld1q_u8(src) ^ st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4]; 28262306a36Sopenharmony_ci st = aegis128_update_neon(st, msg); 28362306a36Sopenharmony_ci vst1q_u8(dst, msg); 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci size -= AEGIS_BLOCK_SIZE; 28662306a36Sopenharmony_ci src += AEGIS_BLOCK_SIZE; 28762306a36Sopenharmony_ci dst += AEGIS_BLOCK_SIZE; 28862306a36Sopenharmony_ci } 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci if (size > 0) { 29162306a36Sopenharmony_ci uint8x16_t s = st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4]; 29262306a36Sopenharmony_ci uint8_t buf[AEGIS_BLOCK_SIZE]; 29362306a36Sopenharmony_ci const void *in = src; 29462306a36Sopenharmony_ci void *out = dst; 29562306a36Sopenharmony_ci uint8x16_t m; 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci if (__builtin_expect(short_input, 0)) 29862306a36Sopenharmony_ci in = out = memcpy(buf + AEGIS_BLOCK_SIZE - size, src, size); 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci m = s ^ vqtbx1q_u8(s, vld1q_u8(in + size - AEGIS_BLOCK_SIZE), 30162306a36Sopenharmony_ci vld1q_u8(permute + 32 - size)); 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci st = aegis128_update_neon(st, m); 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci vst1q_u8(out + size - AEGIS_BLOCK_SIZE, 30662306a36Sopenharmony_ci vqtbl1q_u8(m, vld1q_u8(permute + size))); 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci if (__builtin_expect(short_input, 0)) 30962306a36Sopenharmony_ci memcpy(dst, out, size); 31062306a36Sopenharmony_ci else 31162306a36Sopenharmony_ci vst1q_u8(out - AEGIS_BLOCK_SIZE, msg); 31262306a36Sopenharmony_ci } 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci aegis128_save_state_neon(st, state); 31562306a36Sopenharmony_ci} 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ciint crypto_aegis128_final_neon(void *state, void *tag_xor, 31862306a36Sopenharmony_ci unsigned int assoclen, 31962306a36Sopenharmony_ci unsigned int cryptlen, 32062306a36Sopenharmony_ci unsigned int authsize) 32162306a36Sopenharmony_ci{ 32262306a36Sopenharmony_ci struct aegis128_state st = aegis128_load_state_neon(state); 32362306a36Sopenharmony_ci uint8x16_t v; 32462306a36Sopenharmony_ci int i; 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_ci preload_sbox(); 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci v = st.v[3] ^ (uint8x16_t)vcombine_u64(vmov_n_u64(8ULL * assoclen), 32962306a36Sopenharmony_ci vmov_n_u64(8ULL * cryptlen)); 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_ci for (i = 0; i < 7; i++) 33262306a36Sopenharmony_ci st = aegis128_update_neon(st, v); 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci v = st.v[0] ^ st.v[1] ^ st.v[2] ^ st.v[3] ^ st.v[4]; 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci if (authsize > 0) { 33762306a36Sopenharmony_ci v = vqtbl1q_u8(~vceqq_u8(v, vld1q_u8(tag_xor)), 33862306a36Sopenharmony_ci vld1q_u8(permute + authsize)); 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_ci return vminvq_s8((int8x16_t)v); 34162306a36Sopenharmony_ci } 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci vst1q_u8(tag_xor, v); 34462306a36Sopenharmony_ci return 0; 34562306a36Sopenharmony_ci} 346