18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include <linux/linkage.h> 98c2ecf20Sopenharmony_ci#include <asm/assembler.h> 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci .text 128c2ecf20Sopenharmony_ci .arch armv8-a+crypto 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci /* 158c2ecf20Sopenharmony_ci * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, 168c2ecf20Sopenharmony_ci * u32 *macp, u8 const rk[], u32 rounds); 178c2ecf20Sopenharmony_ci */ 188c2ecf20Sopenharmony_ciSYM_FUNC_START(ce_aes_ccm_auth_data) 198c2ecf20Sopenharmony_ci ldr w8, [x3] /* leftover from prev round? */ 208c2ecf20Sopenharmony_ci ld1 {v0.16b}, [x0] /* load mac */ 218c2ecf20Sopenharmony_ci cbz w8, 1f 228c2ecf20Sopenharmony_ci sub w8, w8, #16 238c2ecf20Sopenharmony_ci eor v1.16b, v1.16b, v1.16b 248c2ecf20Sopenharmony_ci0: ldrb w7, [x1], #1 /* get 1 byte of input */ 258c2ecf20Sopenharmony_ci subs w2, w2, #1 268c2ecf20Sopenharmony_ci add w8, w8, #1 278c2ecf20Sopenharmony_ci ins v1.b[0], w7 288c2ecf20Sopenharmony_ci ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ 298c2ecf20Sopenharmony_ci beq 8f /* out of input? */ 308c2ecf20Sopenharmony_ci cbnz w8, 0b 318c2ecf20Sopenharmony_ci eor v0.16b, v0.16b, v1.16b 328c2ecf20Sopenharmony_ci1: ld1 {v3.4s}, [x4] /* load first round key */ 338c2ecf20Sopenharmony_ci prfm pldl1strm, [x1] 348c2ecf20Sopenharmony_ci cmp w5, #12 /* which key size? */ 358c2ecf20Sopenharmony_ci add x6, x4, #16 368c2ecf20Sopenharmony_ci sub w7, w5, #2 /* modified # of rounds */ 378c2ecf20Sopenharmony_ci bmi 2f 388c2ecf20Sopenharmony_ci bne 5f 398c2ecf20Sopenharmony_ci mov v5.16b, v3.16b 408c2ecf20Sopenharmony_ci b 4f 418c2ecf20Sopenharmony_ci2: mov v4.16b, v3.16b 428c2ecf20Sopenharmony_ci ld1 {v5.4s}, [x6], #16 /* load 2nd round key */ 438c2ecf20Sopenharmony_ci3: aese v0.16b, v4.16b 448c2ecf20Sopenharmony_ci aesmc v0.16b, v0.16b 458c2ecf20Sopenharmony_ci4: ld1 {v3.4s}, [x6], #16 /* load next round key */ 468c2ecf20Sopenharmony_ci aese v0.16b, v5.16b 478c2ecf20Sopenharmony_ci aesmc v0.16b, v0.16b 488c2ecf20Sopenharmony_ci5: ld1 {v4.4s}, [x6], #16 /* load next round key */ 498c2ecf20Sopenharmony_ci subs w7, w7, #3 508c2ecf20Sopenharmony_ci aese v0.16b, v3.16b 518c2ecf20Sopenharmony_ci aesmc v0.16b, v0.16b 528c2ecf20Sopenharmony_ci ld1 {v5.4s}, [x6], #16 /* load next round key */ 538c2ecf20Sopenharmony_ci bpl 3b 548c2ecf20Sopenharmony_ci aese v0.16b, v4.16b 558c2ecf20Sopenharmony_ci subs w2, w2, #16 /* last data? */ 568c2ecf20Sopenharmony_ci eor v0.16b, v0.16b, v5.16b /* final round */ 578c2ecf20Sopenharmony_ci bmi 6f 588c2ecf20Sopenharmony_ci ld1 {v1.16b}, [x1], #16 /* load next input block */ 598c2ecf20Sopenharmony_ci eor v0.16b, v0.16b, v1.16b /* xor with mac */ 608c2ecf20Sopenharmony_ci bne 1b 618c2ecf20Sopenharmony_ci6: st1 {v0.16b}, [x0] /* store mac */ 628c2ecf20Sopenharmony_ci beq 10f 638c2ecf20Sopenharmony_ci adds w2, w2, #16 648c2ecf20Sopenharmony_ci beq 10f 658c2ecf20Sopenharmony_ci mov w8, w2 668c2ecf20Sopenharmony_ci7: ldrb w7, [x1], #1 678c2ecf20Sopenharmony_ci umov w6, v0.b[0] 688c2ecf20Sopenharmony_ci eor w6, w6, w7 698c2ecf20Sopenharmony_ci strb w6, [x0], #1 708c2ecf20Sopenharmony_ci subs w2, w2, #1 718c2ecf20Sopenharmony_ci beq 10f 728c2ecf20Sopenharmony_ci ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ 738c2ecf20Sopenharmony_ci b 7b 748c2ecf20Sopenharmony_ci8: cbz w8, 91f 758c2ecf20Sopenharmony_ci mov w7, w8 768c2ecf20Sopenharmony_ci add w8, w8, #16 778c2ecf20Sopenharmony_ci9: ext v1.16b, v1.16b, v1.16b, #1 788c2ecf20Sopenharmony_ci adds w7, w7, #1 798c2ecf20Sopenharmony_ci bne 9b 808c2ecf20Sopenharmony_ci91: eor v0.16b, v0.16b, v1.16b 818c2ecf20Sopenharmony_ci st1 {v0.16b}, [x0] 828c2ecf20Sopenharmony_ci10: str w8, [x3] 838c2ecf20Sopenharmony_ci ret 848c2ecf20Sopenharmony_ciSYM_FUNC_END(ce_aes_ccm_auth_data) 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci /* 878c2ecf20Sopenharmony_ci * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[], 888c2ecf20Sopenharmony_ci * u32 rounds); 898c2ecf20Sopenharmony_ci */ 908c2ecf20Sopenharmony_ciSYM_FUNC_START(ce_aes_ccm_final) 918c2ecf20Sopenharmony_ci ld1 {v3.4s}, [x2], #16 /* load first round key */ 928c2ecf20Sopenharmony_ci ld1 {v0.16b}, [x0] /* load mac */ 938c2ecf20Sopenharmony_ci cmp w3, #12 /* which key size? */ 948c2ecf20Sopenharmony_ci sub w3, w3, #2 /* modified # of rounds */ 958c2ecf20Sopenharmony_ci ld1 {v1.16b}, [x1] /* load 1st ctriv */ 968c2ecf20Sopenharmony_ci bmi 0f 978c2ecf20Sopenharmony_ci bne 3f 988c2ecf20Sopenharmony_ci mov v5.16b, v3.16b 998c2ecf20Sopenharmony_ci b 2f 1008c2ecf20Sopenharmony_ci0: mov v4.16b, v3.16b 1018c2ecf20Sopenharmony_ci1: ld1 {v5.4s}, [x2], #16 /* load next round key */ 1028c2ecf20Sopenharmony_ci aese v0.16b, v4.16b 1038c2ecf20Sopenharmony_ci aesmc v0.16b, v0.16b 1048c2ecf20Sopenharmony_ci aese v1.16b, v4.16b 1058c2ecf20Sopenharmony_ci aesmc v1.16b, v1.16b 1068c2ecf20Sopenharmony_ci2: ld1 {v3.4s}, [x2], #16 /* load next round key */ 1078c2ecf20Sopenharmony_ci aese v0.16b, v5.16b 1088c2ecf20Sopenharmony_ci aesmc v0.16b, v0.16b 1098c2ecf20Sopenharmony_ci aese v1.16b, v5.16b 1108c2ecf20Sopenharmony_ci aesmc v1.16b, v1.16b 1118c2ecf20Sopenharmony_ci3: ld1 {v4.4s}, [x2], #16 /* load next round key */ 1128c2ecf20Sopenharmony_ci subs w3, w3, #3 1138c2ecf20Sopenharmony_ci aese v0.16b, v3.16b 1148c2ecf20Sopenharmony_ci aesmc v0.16b, v0.16b 1158c2ecf20Sopenharmony_ci aese v1.16b, v3.16b 1168c2ecf20Sopenharmony_ci aesmc v1.16b, v1.16b 1178c2ecf20Sopenharmony_ci bpl 1b 1188c2ecf20Sopenharmony_ci aese v0.16b, v4.16b 1198c2ecf20Sopenharmony_ci aese v1.16b, v4.16b 1208c2ecf20Sopenharmony_ci /* final round key cancels out */ 1218c2ecf20Sopenharmony_ci eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ 1228c2ecf20Sopenharmony_ci st1 {v0.16b}, [x0] /* store result */ 1238c2ecf20Sopenharmony_ci ret 1248c2ecf20Sopenharmony_ciSYM_FUNC_END(ce_aes_ccm_final) 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci .macro aes_ccm_do_crypt,enc 1278c2ecf20Sopenharmony_ci ldr x8, [x6, #8] /* load lower ctr */ 1288c2ecf20Sopenharmony_ci ld1 {v0.16b}, [x5] /* load mac */ 1298c2ecf20Sopenharmony_ciCPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ 1308c2ecf20Sopenharmony_ci0: /* outer loop */ 1318c2ecf20Sopenharmony_ci ld1 {v1.8b}, [x6] /* load upper ctr */ 1328c2ecf20Sopenharmony_ci prfm pldl1strm, [x1] 1338c2ecf20Sopenharmony_ci add x8, x8, #1 1348c2ecf20Sopenharmony_ci rev x9, x8 1358c2ecf20Sopenharmony_ci cmp w4, #12 /* which key size? */ 1368c2ecf20Sopenharmony_ci sub w7, w4, #2 /* get modified # of rounds */ 1378c2ecf20Sopenharmony_ci ins v1.d[1], x9 /* no carry in lower ctr */ 1388c2ecf20Sopenharmony_ci ld1 {v3.4s}, [x3] /* load first round key */ 1398c2ecf20Sopenharmony_ci add x10, x3, #16 1408c2ecf20Sopenharmony_ci bmi 1f 1418c2ecf20Sopenharmony_ci bne 4f 1428c2ecf20Sopenharmony_ci mov v5.16b, v3.16b 1438c2ecf20Sopenharmony_ci b 3f 1448c2ecf20Sopenharmony_ci1: mov v4.16b, v3.16b 1458c2ecf20Sopenharmony_ci ld1 {v5.4s}, [x10], #16 /* load 2nd round key */ 1468c2ecf20Sopenharmony_ci2: /* inner loop: 3 rounds, 2x interleaved */ 1478c2ecf20Sopenharmony_ci aese v0.16b, v4.16b 1488c2ecf20Sopenharmony_ci aesmc v0.16b, v0.16b 1498c2ecf20Sopenharmony_ci aese v1.16b, v4.16b 1508c2ecf20Sopenharmony_ci aesmc v1.16b, v1.16b 1518c2ecf20Sopenharmony_ci3: ld1 {v3.4s}, [x10], #16 /* load next round key */ 1528c2ecf20Sopenharmony_ci aese v0.16b, v5.16b 1538c2ecf20Sopenharmony_ci aesmc v0.16b, v0.16b 1548c2ecf20Sopenharmony_ci aese v1.16b, v5.16b 1558c2ecf20Sopenharmony_ci aesmc v1.16b, v1.16b 1568c2ecf20Sopenharmony_ci4: ld1 {v4.4s}, [x10], #16 /* load next round key */ 1578c2ecf20Sopenharmony_ci subs w7, w7, #3 1588c2ecf20Sopenharmony_ci aese v0.16b, v3.16b 1598c2ecf20Sopenharmony_ci aesmc v0.16b, v0.16b 1608c2ecf20Sopenharmony_ci aese v1.16b, v3.16b 1618c2ecf20Sopenharmony_ci aesmc v1.16b, v1.16b 1628c2ecf20Sopenharmony_ci ld1 {v5.4s}, [x10], #16 /* load next round key */ 1638c2ecf20Sopenharmony_ci bpl 2b 1648c2ecf20Sopenharmony_ci aese v0.16b, v4.16b 1658c2ecf20Sopenharmony_ci aese v1.16b, v4.16b 1668c2ecf20Sopenharmony_ci subs w2, w2, #16 1678c2ecf20Sopenharmony_ci bmi 6f /* partial block? */ 1688c2ecf20Sopenharmony_ci ld1 {v2.16b}, [x1], #16 /* load next input block */ 1698c2ecf20Sopenharmony_ci .if \enc == 1 1708c2ecf20Sopenharmony_ci eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ 1718c2ecf20Sopenharmony_ci eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ 1728c2ecf20Sopenharmony_ci .else 1738c2ecf20Sopenharmony_ci eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */ 1748c2ecf20Sopenharmony_ci eor v1.16b, v2.16b, v5.16b /* final round enc */ 1758c2ecf20Sopenharmony_ci .endif 1768c2ecf20Sopenharmony_ci eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ 1778c2ecf20Sopenharmony_ci st1 {v1.16b}, [x0], #16 /* write output block */ 1788c2ecf20Sopenharmony_ci bne 0b 1798c2ecf20Sopenharmony_ciCPU_LE( rev x8, x8 ) 1808c2ecf20Sopenharmony_ci st1 {v0.16b}, [x5] /* store mac */ 1818c2ecf20Sopenharmony_ci str x8, [x6, #8] /* store lsb end of ctr (BE) */ 1828c2ecf20Sopenharmony_ci5: ret 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_ci6: eor v0.16b, v0.16b, v5.16b /* final round mac */ 1858c2ecf20Sopenharmony_ci eor v1.16b, v1.16b, v5.16b /* final round enc */ 1868c2ecf20Sopenharmony_ci st1 {v0.16b}, [x5] /* store mac */ 1878c2ecf20Sopenharmony_ci add w2, w2, #16 /* process partial tail block */ 1888c2ecf20Sopenharmony_ci7: ldrb w9, [x1], #1 /* get 1 byte of input */ 1898c2ecf20Sopenharmony_ci umov w6, v1.b[0] /* get top crypted ctr byte */ 1908c2ecf20Sopenharmony_ci umov w7, v0.b[0] /* get top mac byte */ 1918c2ecf20Sopenharmony_ci .if \enc == 1 1928c2ecf20Sopenharmony_ci eor w7, w7, w9 1938c2ecf20Sopenharmony_ci eor w9, w9, w6 1948c2ecf20Sopenharmony_ci .else 1958c2ecf20Sopenharmony_ci eor w9, w9, w6 1968c2ecf20Sopenharmony_ci eor w7, w7, w9 1978c2ecf20Sopenharmony_ci .endif 1988c2ecf20Sopenharmony_ci strb w9, [x0], #1 /* store out byte */ 1998c2ecf20Sopenharmony_ci strb w7, [x5], #1 /* store mac byte */ 2008c2ecf20Sopenharmony_ci subs w2, w2, #1 2018c2ecf20Sopenharmony_ci beq 5b 2028c2ecf20Sopenharmony_ci ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ 2038c2ecf20Sopenharmony_ci ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ 2048c2ecf20Sopenharmony_ci b 7b 2058c2ecf20Sopenharmony_ci .endm 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci /* 2088c2ecf20Sopenharmony_ci * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, 2098c2ecf20Sopenharmony_ci * u8 const rk[], u32 rounds, u8 mac[], 2108c2ecf20Sopenharmony_ci * u8 ctr[]); 2118c2ecf20Sopenharmony_ci * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, 2128c2ecf20Sopenharmony_ci * u8 const rk[], u32 rounds, u8 mac[], 2138c2ecf20Sopenharmony_ci * u8 ctr[]); 2148c2ecf20Sopenharmony_ci */ 2158c2ecf20Sopenharmony_ciSYM_FUNC_START(ce_aes_ccm_encrypt) 2168c2ecf20Sopenharmony_ci aes_ccm_do_crypt 1 2178c2ecf20Sopenharmony_ciSYM_FUNC_END(ce_aes_ccm_encrypt) 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ciSYM_FUNC_START(ce_aes_ccm_decrypt) 2208c2ecf20Sopenharmony_ci aes_ccm_do_crypt 0 2218c2ecf20Sopenharmony_ciSYM_FUNC_END(ce_aes_ccm_decrypt) 222