162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */ 262306a36Sopenharmony_ci# 362306a36Sopenharmony_ci# Accelerated poly1305 implementation for ppc64le. 462306a36Sopenharmony_ci# 562306a36Sopenharmony_ci# Copyright 2023- IBM Corp. All rights reserved 662306a36Sopenharmony_ci# 762306a36Sopenharmony_ci#=================================================================================== 862306a36Sopenharmony_ci# Written by Danny Tsen <dtsen@us.ibm.com> 962306a36Sopenharmony_ci# 1062306a36Sopenharmony_ci# Poly1305 - this version mainly using vector/VSX/Scalar 1162306a36Sopenharmony_ci# - 26 bits limbs 1262306a36Sopenharmony_ci# - Handle multiple 64 byte blcok. 1362306a36Sopenharmony_ci# 1462306a36Sopenharmony_ci# Block size 16 bytes 1562306a36Sopenharmony_ci# key = (r, s) 1662306a36Sopenharmony_ci# clamp r &= 0x0FFFFFFC0FFFFFFC 0x0FFFFFFC0FFFFFFF 1762306a36Sopenharmony_ci# p = 2^130 - 5 1862306a36Sopenharmony_ci# a += m 1962306a36Sopenharmony_ci# a = (r + a) % p 2062306a36Sopenharmony_ci# a += s 2162306a36Sopenharmony_ci# 2262306a36Sopenharmony_ci# Improve performance by breaking down polynominal to the sum of products with 2362306a36Sopenharmony_ci# h4 = m1 * r⁴ + m2 * r³ + m3 * r² + m4 * r 2462306a36Sopenharmony_ci# 2562306a36Sopenharmony_ci# 07/22/21 - this revison based on the above sum of products. Setup r^4, r^3, r^2, r and s3, s2, s1, s0 2662306a36Sopenharmony_ci# to 9 vectors for multiplications. 2762306a36Sopenharmony_ci# 2862306a36Sopenharmony_ci# setup r^4, r^3, r^2, r vectors 2962306a36Sopenharmony_ci# vs [r^1, r^3, r^2, r^4] 3062306a36Sopenharmony_ci# vs0 = [r0,.....] 3162306a36Sopenharmony_ci# vs1 = [r1,.....] 3262306a36Sopenharmony_ci# vs2 = [r2,.....] 3362306a36Sopenharmony_ci# vs3 = [r3,.....] 3462306a36Sopenharmony_ci# vs4 = [r4,.....] 3562306a36Sopenharmony_ci# vs5 = [r1*5,...] 3662306a36Sopenharmony_ci# vs6 = [r2*5,...] 3762306a36Sopenharmony_ci# vs7 = [r2*5,...] 3862306a36Sopenharmony_ci# vs8 = [r4*5,...] 3962306a36Sopenharmony_ci# 4062306a36Sopenharmony_ci# Each word in a vector consists a member of a "r/s" in [a * r/s]. 4162306a36Sopenharmony_ci# 4262306a36Sopenharmony_ci# r0, r4*5, r3*5, r2*5, r1*5; 4362306a36Sopenharmony_ci# r1, r0, r4*5, r3*5, r2*5; 4462306a36Sopenharmony_ci# r2, r1, r0, r4*5, r3*5; 4562306a36Sopenharmony_ci# r3, r2, r1, r0, r4*5; 4662306a36Sopenharmony_ci# r4, r3, r2, r1, r0 ; 4762306a36Sopenharmony_ci# 4862306a36Sopenharmony_ci# 4962306a36Sopenharmony_ci# poly1305_p10le_4blocks( uint8_t *k, uint32_t mlen, uint8_t *m) 5062306a36Sopenharmony_ci# k = 32 bytes key 5162306a36Sopenharmony_ci# r3 = k (r, s) 5262306a36Sopenharmony_ci# r4 = mlen 5362306a36Sopenharmony_ci# r5 = m 5462306a36Sopenharmony_ci# 5562306a36Sopenharmony_ci#include <asm/ppc_asm.h> 5662306a36Sopenharmony_ci#include <asm/asm-offsets.h> 5762306a36Sopenharmony_ci#include <asm/asm-compat.h> 5862306a36Sopenharmony_ci#include <linux/linkage.h> 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci.machine "any" 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci.text 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci.macro SAVE_GPR GPR OFFSET FRAME 6562306a36Sopenharmony_ci std \GPR,\OFFSET(\FRAME) 6662306a36Sopenharmony_ci.endm 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci.macro SAVE_VRS VRS OFFSET FRAME 6962306a36Sopenharmony_ci li 16, \OFFSET 7062306a36Sopenharmony_ci stvx \VRS, 16, \FRAME 7162306a36Sopenharmony_ci.endm 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci.macro SAVE_VSX VSX OFFSET FRAME 7462306a36Sopenharmony_ci li 16, \OFFSET 7562306a36Sopenharmony_ci stxvx \VSX, 16, \FRAME 7662306a36Sopenharmony_ci.endm 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci.macro RESTORE_GPR GPR OFFSET FRAME 7962306a36Sopenharmony_ci ld \GPR,\OFFSET(\FRAME) 8062306a36Sopenharmony_ci.endm 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci.macro RESTORE_VRS VRS OFFSET FRAME 8362306a36Sopenharmony_ci li 16, \OFFSET 8462306a36Sopenharmony_ci lvx \VRS, 16, \FRAME 8562306a36Sopenharmony_ci.endm 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci.macro RESTORE_VSX VSX OFFSET FRAME 8862306a36Sopenharmony_ci li 16, \OFFSET 8962306a36Sopenharmony_ci lxvx \VSX, 16, \FRAME 9062306a36Sopenharmony_ci.endm 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci.macro SAVE_REGS 9362306a36Sopenharmony_ci mflr 0 9462306a36Sopenharmony_ci std 0, 16(1) 9562306a36Sopenharmony_ci stdu 1,-752(1) 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci SAVE_GPR 14, 112, 1 9862306a36Sopenharmony_ci SAVE_GPR 15, 120, 1 9962306a36Sopenharmony_ci SAVE_GPR 16, 128, 1 10062306a36Sopenharmony_ci SAVE_GPR 17, 136, 1 10162306a36Sopenharmony_ci SAVE_GPR 18, 144, 1 10262306a36Sopenharmony_ci SAVE_GPR 19, 152, 1 10362306a36Sopenharmony_ci SAVE_GPR 20, 160, 1 10462306a36Sopenharmony_ci SAVE_GPR 21, 168, 1 10562306a36Sopenharmony_ci SAVE_GPR 22, 176, 1 10662306a36Sopenharmony_ci SAVE_GPR 23, 184, 1 10762306a36Sopenharmony_ci SAVE_GPR 24, 192, 1 10862306a36Sopenharmony_ci SAVE_GPR 25, 200, 1 10962306a36Sopenharmony_ci SAVE_GPR 26, 208, 1 11062306a36Sopenharmony_ci SAVE_GPR 27, 216, 1 11162306a36Sopenharmony_ci SAVE_GPR 28, 224, 1 11262306a36Sopenharmony_ci SAVE_GPR 29, 232, 1 11362306a36Sopenharmony_ci SAVE_GPR 30, 240, 1 11462306a36Sopenharmony_ci SAVE_GPR 31, 248, 1 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci addi 9, 1, 256 11762306a36Sopenharmony_ci SAVE_VRS 20, 0, 9 11862306a36Sopenharmony_ci SAVE_VRS 21, 16, 9 11962306a36Sopenharmony_ci SAVE_VRS 22, 32, 9 12062306a36Sopenharmony_ci SAVE_VRS 23, 48, 9 12162306a36Sopenharmony_ci SAVE_VRS 24, 64, 9 12262306a36Sopenharmony_ci SAVE_VRS 25, 80, 9 12362306a36Sopenharmony_ci SAVE_VRS 26, 96, 9 12462306a36Sopenharmony_ci SAVE_VRS 27, 112, 9 12562306a36Sopenharmony_ci SAVE_VRS 28, 128, 9 12662306a36Sopenharmony_ci SAVE_VRS 29, 144, 9 12762306a36Sopenharmony_ci SAVE_VRS 30, 160, 9 12862306a36Sopenharmony_ci SAVE_VRS 31, 176, 9 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci SAVE_VSX 14, 192, 9 13162306a36Sopenharmony_ci SAVE_VSX 15, 208, 9 13262306a36Sopenharmony_ci SAVE_VSX 16, 224, 9 13362306a36Sopenharmony_ci SAVE_VSX 17, 240, 9 13462306a36Sopenharmony_ci SAVE_VSX 18, 256, 9 13562306a36Sopenharmony_ci SAVE_VSX 19, 272, 9 13662306a36Sopenharmony_ci SAVE_VSX 20, 288, 9 13762306a36Sopenharmony_ci SAVE_VSX 21, 304, 9 13862306a36Sopenharmony_ci SAVE_VSX 22, 320, 9 13962306a36Sopenharmony_ci SAVE_VSX 23, 336, 9 14062306a36Sopenharmony_ci SAVE_VSX 24, 352, 9 14162306a36Sopenharmony_ci SAVE_VSX 25, 368, 9 14262306a36Sopenharmony_ci SAVE_VSX 26, 384, 9 14362306a36Sopenharmony_ci SAVE_VSX 27, 400, 9 14462306a36Sopenharmony_ci SAVE_VSX 28, 416, 9 14562306a36Sopenharmony_ci SAVE_VSX 29, 432, 9 14662306a36Sopenharmony_ci SAVE_VSX 30, 448, 9 14762306a36Sopenharmony_ci SAVE_VSX 31, 464, 9 14862306a36Sopenharmony_ci.endm # SAVE_REGS 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci.macro RESTORE_REGS 15162306a36Sopenharmony_ci addi 9, 1, 256 15262306a36Sopenharmony_ci RESTORE_VRS 20, 0, 9 15362306a36Sopenharmony_ci RESTORE_VRS 21, 16, 9 15462306a36Sopenharmony_ci RESTORE_VRS 22, 32, 9 15562306a36Sopenharmony_ci RESTORE_VRS 23, 48, 9 15662306a36Sopenharmony_ci RESTORE_VRS 24, 64, 9 15762306a36Sopenharmony_ci RESTORE_VRS 25, 80, 9 15862306a36Sopenharmony_ci RESTORE_VRS 26, 96, 9 15962306a36Sopenharmony_ci RESTORE_VRS 27, 112, 9 16062306a36Sopenharmony_ci RESTORE_VRS 28, 128, 9 16162306a36Sopenharmony_ci RESTORE_VRS 29, 144, 9 16262306a36Sopenharmony_ci RESTORE_VRS 30, 160, 9 16362306a36Sopenharmony_ci RESTORE_VRS 31, 176, 9 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci RESTORE_VSX 14, 192, 9 16662306a36Sopenharmony_ci RESTORE_VSX 15, 208, 9 16762306a36Sopenharmony_ci RESTORE_VSX 16, 224, 9 16862306a36Sopenharmony_ci RESTORE_VSX 17, 240, 9 16962306a36Sopenharmony_ci RESTORE_VSX 18, 256, 9 17062306a36Sopenharmony_ci RESTORE_VSX 19, 272, 9 17162306a36Sopenharmony_ci RESTORE_VSX 20, 288, 9 17262306a36Sopenharmony_ci RESTORE_VSX 21, 304, 9 17362306a36Sopenharmony_ci RESTORE_VSX 22, 320, 9 17462306a36Sopenharmony_ci RESTORE_VSX 23, 336, 9 17562306a36Sopenharmony_ci RESTORE_VSX 24, 352, 9 17662306a36Sopenharmony_ci RESTORE_VSX 25, 368, 9 17762306a36Sopenharmony_ci RESTORE_VSX 26, 384, 9 17862306a36Sopenharmony_ci RESTORE_VSX 27, 400, 9 17962306a36Sopenharmony_ci RESTORE_VSX 28, 416, 9 18062306a36Sopenharmony_ci RESTORE_VSX 29, 432, 9 18162306a36Sopenharmony_ci RESTORE_VSX 30, 448, 9 18262306a36Sopenharmony_ci RESTORE_VSX 31, 464, 9 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci RESTORE_GPR 14, 112, 1 18562306a36Sopenharmony_ci RESTORE_GPR 15, 120, 1 18662306a36Sopenharmony_ci RESTORE_GPR 16, 128, 1 18762306a36Sopenharmony_ci RESTORE_GPR 17, 136, 1 18862306a36Sopenharmony_ci RESTORE_GPR 18, 144, 1 18962306a36Sopenharmony_ci RESTORE_GPR 19, 152, 1 19062306a36Sopenharmony_ci RESTORE_GPR 20, 160, 1 19162306a36Sopenharmony_ci RESTORE_GPR 21, 168, 1 19262306a36Sopenharmony_ci RESTORE_GPR 22, 176, 1 19362306a36Sopenharmony_ci RESTORE_GPR 23, 184, 1 19462306a36Sopenharmony_ci RESTORE_GPR 24, 192, 1 19562306a36Sopenharmony_ci RESTORE_GPR 25, 200, 1 19662306a36Sopenharmony_ci RESTORE_GPR 26, 208, 1 19762306a36Sopenharmony_ci RESTORE_GPR 27, 216, 1 19862306a36Sopenharmony_ci RESTORE_GPR 28, 224, 1 19962306a36Sopenharmony_ci RESTORE_GPR 29, 232, 1 20062306a36Sopenharmony_ci RESTORE_GPR 30, 240, 1 20162306a36Sopenharmony_ci RESTORE_GPR 31, 248, 1 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci addi 1, 1, 752 20462306a36Sopenharmony_ci ld 0, 16(1) 20562306a36Sopenharmony_ci mtlr 0 20662306a36Sopenharmony_ci.endm # RESTORE_REGS 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci# 20962306a36Sopenharmony_ci# p[0] = a0*r0 + a1*r4*5 + a2*r3*5 + a3*r2*5 + a4*r1*5; 21062306a36Sopenharmony_ci# p[1] = a0*r1 + a1*r0 + a2*r4*5 + a3*r3*5 + a4*r2*5; 21162306a36Sopenharmony_ci# p[2] = a0*r2 + a1*r1 + a2*r0 + a3*r4*5 + a4*r3*5; 21262306a36Sopenharmony_ci# p[3] = a0*r3 + a1*r2 + a2*r1 + a3*r0 + a4*r4*5; 21362306a36Sopenharmony_ci# p[4] = a0*r4 + a1*r3 + a2*r2 + a3*r1 + a4*r0 ; 21462306a36Sopenharmony_ci# 21562306a36Sopenharmony_ci# [r^2, r^3, r^1, r^4] 21662306a36Sopenharmony_ci# [m3, m2, m4, m1] 21762306a36Sopenharmony_ci# 21862306a36Sopenharmony_ci# multiply odd and even words 21962306a36Sopenharmony_ci.macro mul_odd 22062306a36Sopenharmony_ci vmulouw 14, 4, 26 22162306a36Sopenharmony_ci vmulouw 10, 5, 3 22262306a36Sopenharmony_ci vmulouw 11, 6, 2 22362306a36Sopenharmony_ci vmulouw 12, 7, 1 22462306a36Sopenharmony_ci vmulouw 13, 8, 0 22562306a36Sopenharmony_ci vmulouw 15, 4, 27 22662306a36Sopenharmony_ci vaddudm 14, 14, 10 22762306a36Sopenharmony_ci vaddudm 14, 14, 11 22862306a36Sopenharmony_ci vmulouw 10, 5, 26 22962306a36Sopenharmony_ci vmulouw 11, 6, 3 23062306a36Sopenharmony_ci vaddudm 14, 14, 12 23162306a36Sopenharmony_ci vaddudm 14, 14, 13 # x0 23262306a36Sopenharmony_ci vaddudm 15, 15, 10 23362306a36Sopenharmony_ci vaddudm 15, 15, 11 23462306a36Sopenharmony_ci vmulouw 12, 7, 2 23562306a36Sopenharmony_ci vmulouw 13, 8, 1 23662306a36Sopenharmony_ci vaddudm 15, 15, 12 23762306a36Sopenharmony_ci vaddudm 15, 15, 13 # x1 23862306a36Sopenharmony_ci vmulouw 16, 4, 28 23962306a36Sopenharmony_ci vmulouw 10, 5, 27 24062306a36Sopenharmony_ci vmulouw 11, 6, 26 24162306a36Sopenharmony_ci vaddudm 16, 16, 10 24262306a36Sopenharmony_ci vaddudm 16, 16, 11 24362306a36Sopenharmony_ci vmulouw 12, 7, 3 24462306a36Sopenharmony_ci vmulouw 13, 8, 2 24562306a36Sopenharmony_ci vaddudm 16, 16, 12 24662306a36Sopenharmony_ci vaddudm 16, 16, 13 # x2 24762306a36Sopenharmony_ci vmulouw 17, 4, 29 24862306a36Sopenharmony_ci vmulouw 10, 5, 28 24962306a36Sopenharmony_ci vmulouw 11, 6, 27 25062306a36Sopenharmony_ci vaddudm 17, 17, 10 25162306a36Sopenharmony_ci vaddudm 17, 17, 11 25262306a36Sopenharmony_ci vmulouw 12, 7, 26 25362306a36Sopenharmony_ci vmulouw 13, 8, 3 25462306a36Sopenharmony_ci vaddudm 17, 17, 12 25562306a36Sopenharmony_ci vaddudm 17, 17, 13 # x3 25662306a36Sopenharmony_ci vmulouw 18, 4, 30 25762306a36Sopenharmony_ci vmulouw 10, 5, 29 25862306a36Sopenharmony_ci vmulouw 11, 6, 28 25962306a36Sopenharmony_ci vaddudm 18, 18, 10 26062306a36Sopenharmony_ci vaddudm 18, 18, 11 26162306a36Sopenharmony_ci vmulouw 12, 7, 27 26262306a36Sopenharmony_ci vmulouw 13, 8, 26 26362306a36Sopenharmony_ci vaddudm 18, 18, 12 26462306a36Sopenharmony_ci vaddudm 18, 18, 13 # x4 26562306a36Sopenharmony_ci.endm 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci.macro mul_even 26862306a36Sopenharmony_ci vmuleuw 9, 4, 26 26962306a36Sopenharmony_ci vmuleuw 10, 5, 3 27062306a36Sopenharmony_ci vmuleuw 11, 6, 2 27162306a36Sopenharmony_ci vmuleuw 12, 7, 1 27262306a36Sopenharmony_ci vmuleuw 13, 8, 0 27362306a36Sopenharmony_ci vaddudm 14, 14, 9 27462306a36Sopenharmony_ci vaddudm 14, 14, 10 27562306a36Sopenharmony_ci vaddudm 14, 14, 11 27662306a36Sopenharmony_ci vaddudm 14, 14, 12 27762306a36Sopenharmony_ci vaddudm 14, 14, 13 # x0 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci vmuleuw 9, 4, 27 28062306a36Sopenharmony_ci vmuleuw 10, 5, 26 28162306a36Sopenharmony_ci vmuleuw 11, 6, 3 28262306a36Sopenharmony_ci vmuleuw 12, 7, 2 28362306a36Sopenharmony_ci vmuleuw 13, 8, 1 28462306a36Sopenharmony_ci vaddudm 15, 15, 9 28562306a36Sopenharmony_ci vaddudm 15, 15, 10 28662306a36Sopenharmony_ci vaddudm 15, 15, 11 28762306a36Sopenharmony_ci vaddudm 15, 15, 12 28862306a36Sopenharmony_ci vaddudm 15, 15, 13 # x1 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci vmuleuw 9, 4, 28 29162306a36Sopenharmony_ci vmuleuw 10, 5, 27 29262306a36Sopenharmony_ci vmuleuw 11, 6, 26 29362306a36Sopenharmony_ci vmuleuw 12, 7, 3 29462306a36Sopenharmony_ci vmuleuw 13, 8, 2 29562306a36Sopenharmony_ci vaddudm 16, 16, 9 29662306a36Sopenharmony_ci vaddudm 16, 16, 10 29762306a36Sopenharmony_ci vaddudm 16, 16, 11 29862306a36Sopenharmony_ci vaddudm 16, 16, 12 29962306a36Sopenharmony_ci vaddudm 16, 16, 13 # x2 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci vmuleuw 9, 4, 29 30262306a36Sopenharmony_ci vmuleuw 10, 5, 28 30362306a36Sopenharmony_ci vmuleuw 11, 6, 27 30462306a36Sopenharmony_ci vmuleuw 12, 7, 26 30562306a36Sopenharmony_ci vmuleuw 13, 8, 3 30662306a36Sopenharmony_ci vaddudm 17, 17, 9 30762306a36Sopenharmony_ci vaddudm 17, 17, 10 30862306a36Sopenharmony_ci vaddudm 17, 17, 11 30962306a36Sopenharmony_ci vaddudm 17, 17, 12 31062306a36Sopenharmony_ci vaddudm 17, 17, 13 # x3 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_ci vmuleuw 9, 4, 30 31362306a36Sopenharmony_ci vmuleuw 10, 5, 29 31462306a36Sopenharmony_ci vmuleuw 11, 6, 28 31562306a36Sopenharmony_ci vmuleuw 12, 7, 27 31662306a36Sopenharmony_ci vmuleuw 13, 8, 26 31762306a36Sopenharmony_ci vaddudm 18, 18, 9 31862306a36Sopenharmony_ci vaddudm 18, 18, 10 31962306a36Sopenharmony_ci vaddudm 18, 18, 11 32062306a36Sopenharmony_ci vaddudm 18, 18, 12 32162306a36Sopenharmony_ci vaddudm 18, 18, 13 # x4 32262306a36Sopenharmony_ci.endm 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci# 32562306a36Sopenharmony_ci# poly1305_setup_r 32662306a36Sopenharmony_ci# 32762306a36Sopenharmony_ci# setup r^4, r^3, r^2, r vectors 32862306a36Sopenharmony_ci# [r, r^3, r^2, r^4] 32962306a36Sopenharmony_ci# vs0 = [r0,...] 33062306a36Sopenharmony_ci# vs1 = [r1,...] 33162306a36Sopenharmony_ci# vs2 = [r2,...] 33262306a36Sopenharmony_ci# vs3 = [r3,...] 33362306a36Sopenharmony_ci# vs4 = [r4,...] 33462306a36Sopenharmony_ci# vs5 = [r4*5,...] 33562306a36Sopenharmony_ci# vs6 = [r3*5,...] 33662306a36Sopenharmony_ci# vs7 = [r2*5,...] 33762306a36Sopenharmony_ci# vs8 = [r1*5,...] 33862306a36Sopenharmony_ci# 33962306a36Sopenharmony_ci# r0, r4*5, r3*5, r2*5, r1*5; 34062306a36Sopenharmony_ci# r1, r0, r4*5, r3*5, r2*5; 34162306a36Sopenharmony_ci# r2, r1, r0, r4*5, r3*5; 34262306a36Sopenharmony_ci# r3, r2, r1, r0, r4*5; 34362306a36Sopenharmony_ci# r4, r3, r2, r1, r0 ; 34462306a36Sopenharmony_ci# 34562306a36Sopenharmony_ci.macro poly1305_setup_r 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci # save r 34862306a36Sopenharmony_ci xxlor 26, 58, 58 34962306a36Sopenharmony_ci xxlor 27, 59, 59 35062306a36Sopenharmony_ci xxlor 28, 60, 60 35162306a36Sopenharmony_ci xxlor 29, 61, 61 35262306a36Sopenharmony_ci xxlor 30, 62, 62 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci xxlxor 31, 31, 31 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci# [r, r^3, r^2, r^4] 35762306a36Sopenharmony_ci # compute r^2 35862306a36Sopenharmony_ci vmr 4, 26 35962306a36Sopenharmony_ci vmr 5, 27 36062306a36Sopenharmony_ci vmr 6, 28 36162306a36Sopenharmony_ci vmr 7, 29 36262306a36Sopenharmony_ci vmr 8, 30 36362306a36Sopenharmony_ci bl do_mul # r^2 r^1 36462306a36Sopenharmony_ci xxpermdi 58, 58, 36, 0x3 # r0 36562306a36Sopenharmony_ci xxpermdi 59, 59, 37, 0x3 # r1 36662306a36Sopenharmony_ci xxpermdi 60, 60, 38, 0x3 # r2 36762306a36Sopenharmony_ci xxpermdi 61, 61, 39, 0x3 # r3 36862306a36Sopenharmony_ci xxpermdi 62, 62, 40, 0x3 # r4 36962306a36Sopenharmony_ci xxpermdi 36, 36, 36, 0x3 37062306a36Sopenharmony_ci xxpermdi 37, 37, 37, 0x3 37162306a36Sopenharmony_ci xxpermdi 38, 38, 38, 0x3 37262306a36Sopenharmony_ci xxpermdi 39, 39, 39, 0x3 37362306a36Sopenharmony_ci xxpermdi 40, 40, 40, 0x3 37462306a36Sopenharmony_ci vspltisb 13, 2 37562306a36Sopenharmony_ci vsld 9, 27, 13 37662306a36Sopenharmony_ci vsld 10, 28, 13 37762306a36Sopenharmony_ci vsld 11, 29, 13 37862306a36Sopenharmony_ci vsld 12, 30, 13 37962306a36Sopenharmony_ci vaddudm 0, 9, 27 38062306a36Sopenharmony_ci vaddudm 1, 10, 28 38162306a36Sopenharmony_ci vaddudm 2, 11, 29 38262306a36Sopenharmony_ci vaddudm 3, 12, 30 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ci bl do_mul # r^4 r^3 38562306a36Sopenharmony_ci vmrgow 26, 26, 4 38662306a36Sopenharmony_ci vmrgow 27, 27, 5 38762306a36Sopenharmony_ci vmrgow 28, 28, 6 38862306a36Sopenharmony_ci vmrgow 29, 29, 7 38962306a36Sopenharmony_ci vmrgow 30, 30, 8 39062306a36Sopenharmony_ci vspltisb 13, 2 39162306a36Sopenharmony_ci vsld 9, 27, 13 39262306a36Sopenharmony_ci vsld 10, 28, 13 39362306a36Sopenharmony_ci vsld 11, 29, 13 39462306a36Sopenharmony_ci vsld 12, 30, 13 39562306a36Sopenharmony_ci vaddudm 0, 9, 27 39662306a36Sopenharmony_ci vaddudm 1, 10, 28 39762306a36Sopenharmony_ci vaddudm 2, 11, 29 39862306a36Sopenharmony_ci vaddudm 3, 12, 30 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci # r^2 r^4 40162306a36Sopenharmony_ci xxlor 0, 58, 58 40262306a36Sopenharmony_ci xxlor 1, 59, 59 40362306a36Sopenharmony_ci xxlor 2, 60, 60 40462306a36Sopenharmony_ci xxlor 3, 61, 61 40562306a36Sopenharmony_ci xxlor 4, 62, 62 40662306a36Sopenharmony_ci xxlor 5, 32, 32 40762306a36Sopenharmony_ci xxlor 6, 33, 33 40862306a36Sopenharmony_ci xxlor 7, 34, 34 40962306a36Sopenharmony_ci xxlor 8, 35, 35 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_ci vspltw 9, 26, 3 41262306a36Sopenharmony_ci vspltw 10, 26, 2 41362306a36Sopenharmony_ci vmrgow 26, 10, 9 41462306a36Sopenharmony_ci vspltw 9, 27, 3 41562306a36Sopenharmony_ci vspltw 10, 27, 2 41662306a36Sopenharmony_ci vmrgow 27, 10, 9 41762306a36Sopenharmony_ci vspltw 9, 28, 3 41862306a36Sopenharmony_ci vspltw 10, 28, 2 41962306a36Sopenharmony_ci vmrgow 28, 10, 9 42062306a36Sopenharmony_ci vspltw 9, 29, 3 42162306a36Sopenharmony_ci vspltw 10, 29, 2 42262306a36Sopenharmony_ci vmrgow 29, 10, 9 42362306a36Sopenharmony_ci vspltw 9, 30, 3 42462306a36Sopenharmony_ci vspltw 10, 30, 2 42562306a36Sopenharmony_ci vmrgow 30, 10, 9 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci vsld 9, 27, 13 42862306a36Sopenharmony_ci vsld 10, 28, 13 42962306a36Sopenharmony_ci vsld 11, 29, 13 43062306a36Sopenharmony_ci vsld 12, 30, 13 43162306a36Sopenharmony_ci vaddudm 0, 9, 27 43262306a36Sopenharmony_ci vaddudm 1, 10, 28 43362306a36Sopenharmony_ci vaddudm 2, 11, 29 43462306a36Sopenharmony_ci vaddudm 3, 12, 30 43562306a36Sopenharmony_ci.endm 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(do_mul) 43862306a36Sopenharmony_ci mul_odd 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci # do reduction ( h %= p ) 44162306a36Sopenharmony_ci # carry reduction 44262306a36Sopenharmony_ci vspltisb 9, 2 44362306a36Sopenharmony_ci vsrd 10, 14, 31 44462306a36Sopenharmony_ci vsrd 11, 17, 31 44562306a36Sopenharmony_ci vand 7, 17, 25 44662306a36Sopenharmony_ci vand 4, 14, 25 44762306a36Sopenharmony_ci vaddudm 18, 18, 11 44862306a36Sopenharmony_ci vsrd 12, 18, 31 44962306a36Sopenharmony_ci vaddudm 15, 15, 10 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci vsrd 11, 15, 31 45262306a36Sopenharmony_ci vand 8, 18, 25 45362306a36Sopenharmony_ci vand 5, 15, 25 45462306a36Sopenharmony_ci vaddudm 4, 4, 12 45562306a36Sopenharmony_ci vsld 10, 12, 9 45662306a36Sopenharmony_ci vaddudm 6, 16, 11 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci vsrd 13, 6, 31 45962306a36Sopenharmony_ci vand 6, 6, 25 46062306a36Sopenharmony_ci vaddudm 4, 4, 10 46162306a36Sopenharmony_ci vsrd 10, 4, 31 46262306a36Sopenharmony_ci vaddudm 7, 7, 13 46362306a36Sopenharmony_ci 46462306a36Sopenharmony_ci vsrd 11, 7, 31 46562306a36Sopenharmony_ci vand 7, 7, 25 46662306a36Sopenharmony_ci vand 4, 4, 25 46762306a36Sopenharmony_ci vaddudm 5, 5, 10 46862306a36Sopenharmony_ci vaddudm 8, 8, 11 46962306a36Sopenharmony_ci blr 47062306a36Sopenharmony_ciSYM_FUNC_END(do_mul) 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_ci# 47362306a36Sopenharmony_ci# init key 47462306a36Sopenharmony_ci# 47562306a36Sopenharmony_ci.macro do_poly1305_init 47662306a36Sopenharmony_ci addis 10, 2, rmask@toc@ha 47762306a36Sopenharmony_ci addi 10, 10, rmask@toc@l 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci ld 11, 0(10) 48062306a36Sopenharmony_ci ld 12, 8(10) 48162306a36Sopenharmony_ci 48262306a36Sopenharmony_ci li 14, 16 48362306a36Sopenharmony_ci li 15, 32 48462306a36Sopenharmony_ci addis 10, 2, cnum@toc@ha 48562306a36Sopenharmony_ci addi 10, 10, cnum@toc@l 48662306a36Sopenharmony_ci lvx 25, 0, 10 # v25 - mask 48762306a36Sopenharmony_ci lvx 31, 14, 10 # v31 = 1a 48862306a36Sopenharmony_ci lvx 19, 15, 10 # v19 = 1 << 24 48962306a36Sopenharmony_ci lxv 24, 48(10) # vs24 49062306a36Sopenharmony_ci lxv 25, 64(10) # vs25 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci # initialize 49362306a36Sopenharmony_ci # load key from r3 to vectors 49462306a36Sopenharmony_ci ld 9, 24(3) 49562306a36Sopenharmony_ci ld 10, 32(3) 49662306a36Sopenharmony_ci and. 9, 9, 11 49762306a36Sopenharmony_ci and. 10, 10, 12 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci # break 26 bits 50062306a36Sopenharmony_ci extrdi 14, 9, 26, 38 50162306a36Sopenharmony_ci extrdi 15, 9, 26, 12 50262306a36Sopenharmony_ci extrdi 16, 9, 12, 0 50362306a36Sopenharmony_ci mtvsrdd 58, 0, 14 50462306a36Sopenharmony_ci insrdi 16, 10, 14, 38 50562306a36Sopenharmony_ci mtvsrdd 59, 0, 15 50662306a36Sopenharmony_ci extrdi 17, 10, 26, 24 50762306a36Sopenharmony_ci mtvsrdd 60, 0, 16 50862306a36Sopenharmony_ci extrdi 18, 10, 24, 0 50962306a36Sopenharmony_ci mtvsrdd 61, 0, 17 51062306a36Sopenharmony_ci mtvsrdd 62, 0, 18 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci # r1 = r1 * 5, r2 = r2 * 5, r3 = r3 * 5, r4 = r4 * 5 51362306a36Sopenharmony_ci li 9, 5 51462306a36Sopenharmony_ci mtvsrdd 36, 0, 9 51562306a36Sopenharmony_ci vmulouw 0, 27, 4 # v0 = rr0 51662306a36Sopenharmony_ci vmulouw 1, 28, 4 # v1 = rr1 51762306a36Sopenharmony_ci vmulouw 2, 29, 4 # v2 = rr2 51862306a36Sopenharmony_ci vmulouw 3, 30, 4 # v3 = rr3 51962306a36Sopenharmony_ci.endm 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_ci# 52262306a36Sopenharmony_ci# poly1305_p10le_4blocks( uint8_t *k, uint32_t mlen, uint8_t *m) 52362306a36Sopenharmony_ci# k = 32 bytes key 52462306a36Sopenharmony_ci# r3 = k (r, s) 52562306a36Sopenharmony_ci# r4 = mlen 52662306a36Sopenharmony_ci# r5 = m 52762306a36Sopenharmony_ci# 52862306a36Sopenharmony_ciSYM_FUNC_START(poly1305_p10le_4blocks) 52962306a36Sopenharmony_ci.align 5 53062306a36Sopenharmony_ci cmpdi 5, 64 53162306a36Sopenharmony_ci blt Out_no_poly1305 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci SAVE_REGS 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_ci do_poly1305_init 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci li 21, 0 # counter to message 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci poly1305_setup_r 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_ci # load previous H state 54262306a36Sopenharmony_ci # break/convert r6 to 26 bits 54362306a36Sopenharmony_ci ld 9, 0(3) 54462306a36Sopenharmony_ci ld 10, 8(3) 54562306a36Sopenharmony_ci ld 19, 16(3) 54662306a36Sopenharmony_ci sldi 19, 19, 24 54762306a36Sopenharmony_ci mtvsrdd 41, 0, 19 54862306a36Sopenharmony_ci extrdi 14, 9, 26, 38 54962306a36Sopenharmony_ci extrdi 15, 9, 26, 12 55062306a36Sopenharmony_ci extrdi 16, 9, 12, 0 55162306a36Sopenharmony_ci mtvsrdd 36, 0, 14 55262306a36Sopenharmony_ci insrdi 16, 10, 14, 38 55362306a36Sopenharmony_ci mtvsrdd 37, 0, 15 55462306a36Sopenharmony_ci extrdi 17, 10, 26, 24 55562306a36Sopenharmony_ci mtvsrdd 38, 0, 16 55662306a36Sopenharmony_ci extrdi 18, 10, 24, 0 55762306a36Sopenharmony_ci mtvsrdd 39, 0, 17 55862306a36Sopenharmony_ci mtvsrdd 40, 0, 18 55962306a36Sopenharmony_ci vor 8, 8, 9 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_ci # input m1 m2 56262306a36Sopenharmony_ci add 20, 4, 21 56362306a36Sopenharmony_ci xxlor 49, 24, 24 56462306a36Sopenharmony_ci xxlor 50, 25, 25 56562306a36Sopenharmony_ci lxvw4x 43, 0, 20 56662306a36Sopenharmony_ci addi 17, 20, 16 56762306a36Sopenharmony_ci lxvw4x 44, 0, 17 56862306a36Sopenharmony_ci vperm 14, 11, 12, 17 56962306a36Sopenharmony_ci vperm 15, 11, 12, 18 57062306a36Sopenharmony_ci vand 9, 14, 25 # a0 57162306a36Sopenharmony_ci vsrd 10, 14, 31 # >> 26 57262306a36Sopenharmony_ci vsrd 11, 10, 31 # 12 bits left 57362306a36Sopenharmony_ci vand 10, 10, 25 # a1 57462306a36Sopenharmony_ci vspltisb 13, 12 57562306a36Sopenharmony_ci vand 16, 15, 25 57662306a36Sopenharmony_ci vsld 12, 16, 13 57762306a36Sopenharmony_ci vor 11, 11, 12 57862306a36Sopenharmony_ci vand 11, 11, 25 # a2 57962306a36Sopenharmony_ci vspltisb 13, 14 58062306a36Sopenharmony_ci vsrd 12, 15, 13 # >> 14 58162306a36Sopenharmony_ci vsrd 13, 12, 31 # >> 26, a4 58262306a36Sopenharmony_ci vand 12, 12, 25 # a3 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci vaddudm 20, 4, 9 58562306a36Sopenharmony_ci vaddudm 21, 5, 10 58662306a36Sopenharmony_ci vaddudm 22, 6, 11 58762306a36Sopenharmony_ci vaddudm 23, 7, 12 58862306a36Sopenharmony_ci vaddudm 24, 8, 13 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ci # m3 m4 59162306a36Sopenharmony_ci addi 17, 17, 16 59262306a36Sopenharmony_ci lxvw4x 43, 0, 17 59362306a36Sopenharmony_ci addi 17, 17, 16 59462306a36Sopenharmony_ci lxvw4x 44, 0, 17 59562306a36Sopenharmony_ci vperm 14, 11, 12, 17 59662306a36Sopenharmony_ci vperm 15, 11, 12, 18 59762306a36Sopenharmony_ci vand 9, 14, 25 # a0 59862306a36Sopenharmony_ci vsrd 10, 14, 31 # >> 26 59962306a36Sopenharmony_ci vsrd 11, 10, 31 # 12 bits left 60062306a36Sopenharmony_ci vand 10, 10, 25 # a1 60162306a36Sopenharmony_ci vspltisb 13, 12 60262306a36Sopenharmony_ci vand 16, 15, 25 60362306a36Sopenharmony_ci vsld 12, 16, 13 60462306a36Sopenharmony_ci vspltisb 13, 14 60562306a36Sopenharmony_ci vor 11, 11, 12 60662306a36Sopenharmony_ci vand 11, 11, 25 # a2 60762306a36Sopenharmony_ci vsrd 12, 15, 13 # >> 14 60862306a36Sopenharmony_ci vsrd 13, 12, 31 # >> 26, a4 60962306a36Sopenharmony_ci vand 12, 12, 25 # a3 61062306a36Sopenharmony_ci 61162306a36Sopenharmony_ci # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1] 61262306a36Sopenharmony_ci vmrgow 4, 9, 20 61362306a36Sopenharmony_ci vmrgow 5, 10, 21 61462306a36Sopenharmony_ci vmrgow 6, 11, 22 61562306a36Sopenharmony_ci vmrgow 7, 12, 23 61662306a36Sopenharmony_ci vmrgow 8, 13, 24 61762306a36Sopenharmony_ci vaddudm 8, 8, 19 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci addi 5, 5, -64 # len -= 64 62062306a36Sopenharmony_ci addi 21, 21, 64 # offset += 64 62162306a36Sopenharmony_ci 62262306a36Sopenharmony_ci li 9, 64 62362306a36Sopenharmony_ci divdu 31, 5, 9 62462306a36Sopenharmony_ci 62562306a36Sopenharmony_ci cmpdi 31, 0 62662306a36Sopenharmony_ci ble Skip_block_loop 62762306a36Sopenharmony_ci 62862306a36Sopenharmony_ci mtctr 31 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci# h4 = m1 * r⁴ + m2 * r³ + m3 * r² + m4 * r 63162306a36Sopenharmony_ci# Rewrite the polynominal sum of product as follows, 63262306a36Sopenharmony_ci# h1 = (h0 + m1) * r^2, h2 = (h0 + m2) * r^2 63362306a36Sopenharmony_ci# h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2 --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h0 + m4) r^2 63462306a36Sopenharmony_ci# .... Repeat 63562306a36Sopenharmony_ci# h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2 --> 63662306a36Sopenharmony_ci# h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1 --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r 63762306a36Sopenharmony_ci# 63862306a36Sopenharmony_ciloop_4blocks: 63962306a36Sopenharmony_ci 64062306a36Sopenharmony_ci # Multiply odd words and even words 64162306a36Sopenharmony_ci mul_odd 64262306a36Sopenharmony_ci mul_even 64362306a36Sopenharmony_ci # carry reduction 64462306a36Sopenharmony_ci vspltisb 9, 2 64562306a36Sopenharmony_ci vsrd 10, 14, 31 64662306a36Sopenharmony_ci vsrd 11, 17, 31 64762306a36Sopenharmony_ci vand 7, 17, 25 64862306a36Sopenharmony_ci vand 4, 14, 25 64962306a36Sopenharmony_ci vaddudm 18, 18, 11 65062306a36Sopenharmony_ci vsrd 12, 18, 31 65162306a36Sopenharmony_ci vaddudm 15, 15, 10 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_ci vsrd 11, 15, 31 65462306a36Sopenharmony_ci vand 8, 18, 25 65562306a36Sopenharmony_ci vand 5, 15, 25 65662306a36Sopenharmony_ci vaddudm 4, 4, 12 65762306a36Sopenharmony_ci vsld 10, 12, 9 65862306a36Sopenharmony_ci vaddudm 6, 16, 11 65962306a36Sopenharmony_ci 66062306a36Sopenharmony_ci vsrd 13, 6, 31 66162306a36Sopenharmony_ci vand 6, 6, 25 66262306a36Sopenharmony_ci vaddudm 4, 4, 10 66362306a36Sopenharmony_ci vsrd 10, 4, 31 66462306a36Sopenharmony_ci vaddudm 7, 7, 13 66562306a36Sopenharmony_ci 66662306a36Sopenharmony_ci vsrd 11, 7, 31 66762306a36Sopenharmony_ci vand 7, 7, 25 66862306a36Sopenharmony_ci vand 4, 4, 25 66962306a36Sopenharmony_ci vaddudm 5, 5, 10 67062306a36Sopenharmony_ci vaddudm 8, 8, 11 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_ci # input m1 m2 m3 m4 67362306a36Sopenharmony_ci add 20, 4, 21 67462306a36Sopenharmony_ci xxlor 49, 24, 24 67562306a36Sopenharmony_ci xxlor 50, 25, 25 67662306a36Sopenharmony_ci lxvw4x 43, 0, 20 67762306a36Sopenharmony_ci addi 17, 20, 16 67862306a36Sopenharmony_ci lxvw4x 44, 0, 17 67962306a36Sopenharmony_ci vperm 14, 11, 12, 17 68062306a36Sopenharmony_ci vperm 15, 11, 12, 18 68162306a36Sopenharmony_ci addi 17, 17, 16 68262306a36Sopenharmony_ci lxvw4x 43, 0, 17 68362306a36Sopenharmony_ci addi 17, 17, 16 68462306a36Sopenharmony_ci lxvw4x 44, 0, 17 68562306a36Sopenharmony_ci vperm 17, 11, 12, 17 68662306a36Sopenharmony_ci vperm 18, 11, 12, 18 68762306a36Sopenharmony_ci 68862306a36Sopenharmony_ci vand 20, 14, 25 # a0 68962306a36Sopenharmony_ci vand 9, 17, 25 # a0 69062306a36Sopenharmony_ci vsrd 21, 14, 31 # >> 26 69162306a36Sopenharmony_ci vsrd 22, 21, 31 # 12 bits left 69262306a36Sopenharmony_ci vsrd 10, 17, 31 # >> 26 69362306a36Sopenharmony_ci vsrd 11, 10, 31 # 12 bits left 69462306a36Sopenharmony_ci 69562306a36Sopenharmony_ci vand 21, 21, 25 # a1 69662306a36Sopenharmony_ci vand 10, 10, 25 # a1 69762306a36Sopenharmony_ci 69862306a36Sopenharmony_ci vspltisb 13, 12 69962306a36Sopenharmony_ci vand 16, 15, 25 70062306a36Sopenharmony_ci vsld 23, 16, 13 70162306a36Sopenharmony_ci vor 22, 22, 23 70262306a36Sopenharmony_ci vand 22, 22, 25 # a2 70362306a36Sopenharmony_ci vand 16, 18, 25 70462306a36Sopenharmony_ci vsld 12, 16, 13 70562306a36Sopenharmony_ci vor 11, 11, 12 70662306a36Sopenharmony_ci vand 11, 11, 25 # a2 70762306a36Sopenharmony_ci vspltisb 13, 14 70862306a36Sopenharmony_ci vsrd 23, 15, 13 # >> 14 70962306a36Sopenharmony_ci vsrd 24, 23, 31 # >> 26, a4 71062306a36Sopenharmony_ci vand 23, 23, 25 # a3 71162306a36Sopenharmony_ci vsrd 12, 18, 13 # >> 14 71262306a36Sopenharmony_ci vsrd 13, 12, 31 # >> 26, a4 71362306a36Sopenharmony_ci vand 12, 12, 25 # a3 71462306a36Sopenharmony_ci 71562306a36Sopenharmony_ci vaddudm 4, 4, 20 71662306a36Sopenharmony_ci vaddudm 5, 5, 21 71762306a36Sopenharmony_ci vaddudm 6, 6, 22 71862306a36Sopenharmony_ci vaddudm 7, 7, 23 71962306a36Sopenharmony_ci vaddudm 8, 8, 24 72062306a36Sopenharmony_ci 72162306a36Sopenharmony_ci # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1] 72262306a36Sopenharmony_ci vmrgow 4, 9, 4 72362306a36Sopenharmony_ci vmrgow 5, 10, 5 72462306a36Sopenharmony_ci vmrgow 6, 11, 6 72562306a36Sopenharmony_ci vmrgow 7, 12, 7 72662306a36Sopenharmony_ci vmrgow 8, 13, 8 72762306a36Sopenharmony_ci vaddudm 8, 8, 19 72862306a36Sopenharmony_ci 72962306a36Sopenharmony_ci addi 5, 5, -64 # len -= 64 73062306a36Sopenharmony_ci addi 21, 21, 64 # offset += 64 73162306a36Sopenharmony_ci 73262306a36Sopenharmony_ci bdnz loop_4blocks 73362306a36Sopenharmony_ci 73462306a36Sopenharmony_ciSkip_block_loop: 73562306a36Sopenharmony_ci xxlor 58, 0, 0 73662306a36Sopenharmony_ci xxlor 59, 1, 1 73762306a36Sopenharmony_ci xxlor 60, 2, 2 73862306a36Sopenharmony_ci xxlor 61, 3, 3 73962306a36Sopenharmony_ci xxlor 62, 4, 4 74062306a36Sopenharmony_ci xxlor 32, 5, 5 74162306a36Sopenharmony_ci xxlor 33, 6, 6 74262306a36Sopenharmony_ci xxlor 34, 7, 7 74362306a36Sopenharmony_ci xxlor 35, 8, 8 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_ci # Multiply odd words and even words 74662306a36Sopenharmony_ci mul_odd 74762306a36Sopenharmony_ci mul_even 74862306a36Sopenharmony_ci 74962306a36Sopenharmony_ci # Sum the products. 75062306a36Sopenharmony_ci xxpermdi 41, 31, 46, 0 75162306a36Sopenharmony_ci xxpermdi 42, 31, 47, 0 75262306a36Sopenharmony_ci vaddudm 4, 14, 9 75362306a36Sopenharmony_ci xxpermdi 36, 31, 36, 3 75462306a36Sopenharmony_ci vaddudm 5, 15, 10 75562306a36Sopenharmony_ci xxpermdi 37, 31, 37, 3 75662306a36Sopenharmony_ci xxpermdi 43, 31, 48, 0 75762306a36Sopenharmony_ci vaddudm 6, 16, 11 75862306a36Sopenharmony_ci xxpermdi 38, 31, 38, 3 75962306a36Sopenharmony_ci xxpermdi 44, 31, 49, 0 76062306a36Sopenharmony_ci vaddudm 7, 17, 12 76162306a36Sopenharmony_ci xxpermdi 39, 31, 39, 3 76262306a36Sopenharmony_ci xxpermdi 45, 31, 50, 0 76362306a36Sopenharmony_ci vaddudm 8, 18, 13 76462306a36Sopenharmony_ci xxpermdi 40, 31, 40, 3 76562306a36Sopenharmony_ci 76662306a36Sopenharmony_ci # carry reduction 76762306a36Sopenharmony_ci vspltisb 9, 2 76862306a36Sopenharmony_ci vsrd 10, 4, 31 76962306a36Sopenharmony_ci vsrd 11, 7, 31 77062306a36Sopenharmony_ci vand 7, 7, 25 77162306a36Sopenharmony_ci vand 4, 4, 25 77262306a36Sopenharmony_ci vaddudm 8, 8, 11 77362306a36Sopenharmony_ci vsrd 12, 8, 31 77462306a36Sopenharmony_ci vaddudm 5, 5, 10 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_ci vsrd 11, 5, 31 77762306a36Sopenharmony_ci vand 8, 8, 25 77862306a36Sopenharmony_ci vand 5, 5, 25 77962306a36Sopenharmony_ci vaddudm 4, 4, 12 78062306a36Sopenharmony_ci vsld 10, 12, 9 78162306a36Sopenharmony_ci vaddudm 6, 6, 11 78262306a36Sopenharmony_ci 78362306a36Sopenharmony_ci vsrd 13, 6, 31 78462306a36Sopenharmony_ci vand 6, 6, 25 78562306a36Sopenharmony_ci vaddudm 4, 4, 10 78662306a36Sopenharmony_ci vsrd 10, 4, 31 78762306a36Sopenharmony_ci vaddudm 7, 7, 13 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_ci vsrd 11, 7, 31 79062306a36Sopenharmony_ci vand 7, 7, 25 79162306a36Sopenharmony_ci vand 4, 4, 25 79262306a36Sopenharmony_ci vaddudm 5, 5, 10 79362306a36Sopenharmony_ci vsrd 10, 5, 31 79462306a36Sopenharmony_ci vand 5, 5, 25 79562306a36Sopenharmony_ci vaddudm 6, 6, 10 79662306a36Sopenharmony_ci vaddudm 8, 8, 11 79762306a36Sopenharmony_ci 79862306a36Sopenharmony_ci b do_final_update 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_cido_final_update: 80162306a36Sopenharmony_ci # combine 26 bit limbs 80262306a36Sopenharmony_ci # v4, v5, v6, v7 and v8 are 26 bit vectors 80362306a36Sopenharmony_ci vsld 5, 5, 31 80462306a36Sopenharmony_ci vor 20, 4, 5 80562306a36Sopenharmony_ci vspltisb 11, 12 80662306a36Sopenharmony_ci vsrd 12, 6, 11 80762306a36Sopenharmony_ci vsld 6, 6, 31 80862306a36Sopenharmony_ci vsld 6, 6, 31 80962306a36Sopenharmony_ci vor 20, 20, 6 81062306a36Sopenharmony_ci vspltisb 11, 14 81162306a36Sopenharmony_ci vsld 7, 7, 11 81262306a36Sopenharmony_ci vor 21, 7, 12 81362306a36Sopenharmony_ci mfvsrld 16, 40 # save last 2 bytes 81462306a36Sopenharmony_ci vsld 8, 8, 11 81562306a36Sopenharmony_ci vsld 8, 8, 31 81662306a36Sopenharmony_ci vor 21, 21, 8 81762306a36Sopenharmony_ci mfvsrld 17, 52 81862306a36Sopenharmony_ci mfvsrld 19, 53 81962306a36Sopenharmony_ci srdi 16, 16, 24 82062306a36Sopenharmony_ci 82162306a36Sopenharmony_ci std 17, 0(3) 82262306a36Sopenharmony_ci std 19, 8(3) 82362306a36Sopenharmony_ci stw 16, 16(3) 82462306a36Sopenharmony_ci 82562306a36Sopenharmony_ciOut_loop: 82662306a36Sopenharmony_ci li 3, 0 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_ci RESTORE_REGS 82962306a36Sopenharmony_ci 83062306a36Sopenharmony_ci blr 83162306a36Sopenharmony_ci 83262306a36Sopenharmony_ciOut_no_poly1305: 83362306a36Sopenharmony_ci li 3, 0 83462306a36Sopenharmony_ci blr 83562306a36Sopenharmony_ciSYM_FUNC_END(poly1305_p10le_4blocks) 83662306a36Sopenharmony_ci 83762306a36Sopenharmony_ci# 83862306a36Sopenharmony_ci# ======================================================================= 83962306a36Sopenharmony_ci# The following functions implement 64 x 64 bits multiplication poly1305. 84062306a36Sopenharmony_ci# 84162306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(Poly1305_init_64) 84262306a36Sopenharmony_ci # mask 0x0FFFFFFC0FFFFFFC 84362306a36Sopenharmony_ci # mask 0x0FFFFFFC0FFFFFFF 84462306a36Sopenharmony_ci addis 10, 2, rmask@toc@ha 84562306a36Sopenharmony_ci addi 10, 10, rmask@toc@l 84662306a36Sopenharmony_ci ld 11, 0(10) 84762306a36Sopenharmony_ci ld 12, 8(10) 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_ci # initialize 85062306a36Sopenharmony_ci # load key from r3 85162306a36Sopenharmony_ci ld 9, 24(3) 85262306a36Sopenharmony_ci ld 10, 32(3) 85362306a36Sopenharmony_ci and. 9, 9, 11 # cramp mask r0 85462306a36Sopenharmony_ci and. 10, 10, 12 # cramp mask r1 85562306a36Sopenharmony_ci 85662306a36Sopenharmony_ci srdi 21, 10, 2 85762306a36Sopenharmony_ci add 19, 21, 10 # s1: r19 - (r1 >> 2) *5 85862306a36Sopenharmony_ci 85962306a36Sopenharmony_ci # setup r and s 86062306a36Sopenharmony_ci li 25, 0 86162306a36Sopenharmony_ci mtvsrdd 32+0, 9, 19 # r0, s1 86262306a36Sopenharmony_ci mtvsrdd 32+1, 10, 9 # r1, r0 86362306a36Sopenharmony_ci mtvsrdd 32+2, 19, 25 # s1 86462306a36Sopenharmony_ci mtvsrdd 32+3, 9, 25 # r0 86562306a36Sopenharmony_ci 86662306a36Sopenharmony_ci blr 86762306a36Sopenharmony_ciSYM_FUNC_END(Poly1305_init_64) 86862306a36Sopenharmony_ci 86962306a36Sopenharmony_ci# Poly1305_mult 87062306a36Sopenharmony_ci# v6 = (h0, h1), v8 = h2 87162306a36Sopenharmony_ci# v0 = (r0, s1), v1 = (r1, r0), v2 = s1, v3 = r0 87262306a36Sopenharmony_ci# 87362306a36Sopenharmony_ci# Output: v7, v10, v11 87462306a36Sopenharmony_ci# 87562306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(Poly1305_mult) 87662306a36Sopenharmony_ci # 87762306a36Sopenharmony_ci # d0 = h0 * r0 + h1 * s1 87862306a36Sopenharmony_ci vmsumudm 7, 6, 0, 9 # h0 * r0, h1 * s1 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_ci # d1 = h0 * r1 + h1 * r0 + h2 * s1 88162306a36Sopenharmony_ci vmsumudm 11, 6, 1, 9 # h0 * r1, h1 * r0 88262306a36Sopenharmony_ci vmsumudm 10, 8, 2, 11 # d1 += h2 * s1 88362306a36Sopenharmony_ci 88462306a36Sopenharmony_ci # d2 = r0 88562306a36Sopenharmony_ci vmsumudm 11, 8, 3, 9 # d2 = h2 * r0 88662306a36Sopenharmony_ci blr 88762306a36Sopenharmony_ciSYM_FUNC_END(Poly1305_mult) 88862306a36Sopenharmony_ci 88962306a36Sopenharmony_ci# 89062306a36Sopenharmony_ci# carry reduction 89162306a36Sopenharmony_ci# h %=p 89262306a36Sopenharmony_ci# 89362306a36Sopenharmony_ci# Input: v7, v10, v11 89462306a36Sopenharmony_ci# Output: r27, r28, r29 89562306a36Sopenharmony_ci# 89662306a36Sopenharmony_ciSYM_FUNC_START_LOCAL(Carry_reduction) 89762306a36Sopenharmony_ci mfvsrld 27, 32+7 89862306a36Sopenharmony_ci mfvsrld 28, 32+10 89962306a36Sopenharmony_ci mfvsrld 29, 32+11 90062306a36Sopenharmony_ci mfvsrd 20, 32+7 # h0.h 90162306a36Sopenharmony_ci mfvsrd 21, 32+10 # h1.h 90262306a36Sopenharmony_ci 90362306a36Sopenharmony_ci addc 28, 28, 20 90462306a36Sopenharmony_ci adde 29, 29, 21 90562306a36Sopenharmony_ci srdi 22, 29, 0x2 90662306a36Sopenharmony_ci sldi 23, 22, 0x2 90762306a36Sopenharmony_ci add 23, 23, 22 # (h2 & 3) * 5 90862306a36Sopenharmony_ci addc 27, 27, 23 # h0 90962306a36Sopenharmony_ci addze 28, 28 # h1 91062306a36Sopenharmony_ci andi. 29, 29, 0x3 # h2 91162306a36Sopenharmony_ci blr 91262306a36Sopenharmony_ciSYM_FUNC_END(Carry_reduction) 91362306a36Sopenharmony_ci 91462306a36Sopenharmony_ci# 91562306a36Sopenharmony_ci# poly1305 multiplication 91662306a36Sopenharmony_ci# h *= r, h %= p 91762306a36Sopenharmony_ci# d0 = h0 * r0 + h1 * s1 91862306a36Sopenharmony_ci# d1 = h0 * r1 + h1 * r0 + h2 * s1 91962306a36Sopenharmony_ci# d2 = h0 * r0 92062306a36Sopenharmony_ci# 92162306a36Sopenharmony_ci# 92262306a36Sopenharmony_ci# unsigned int poly1305_test_64s(unisgned char *state, const byte *src, size_t len, highbit) 92362306a36Sopenharmony_ci# - no highbit if final leftover block (highbit = 0) 92462306a36Sopenharmony_ci# 92562306a36Sopenharmony_ciSYM_FUNC_START(poly1305_64s) 92662306a36Sopenharmony_ci cmpdi 5, 0 92762306a36Sopenharmony_ci ble Out_no_poly1305_64 92862306a36Sopenharmony_ci 92962306a36Sopenharmony_ci mflr 0 93062306a36Sopenharmony_ci std 0, 16(1) 93162306a36Sopenharmony_ci stdu 1,-400(1) 93262306a36Sopenharmony_ci 93362306a36Sopenharmony_ci SAVE_GPR 14, 112, 1 93462306a36Sopenharmony_ci SAVE_GPR 15, 120, 1 93562306a36Sopenharmony_ci SAVE_GPR 16, 128, 1 93662306a36Sopenharmony_ci SAVE_GPR 17, 136, 1 93762306a36Sopenharmony_ci SAVE_GPR 18, 144, 1 93862306a36Sopenharmony_ci SAVE_GPR 19, 152, 1 93962306a36Sopenharmony_ci SAVE_GPR 20, 160, 1 94062306a36Sopenharmony_ci SAVE_GPR 21, 168, 1 94162306a36Sopenharmony_ci SAVE_GPR 22, 176, 1 94262306a36Sopenharmony_ci SAVE_GPR 23, 184, 1 94362306a36Sopenharmony_ci SAVE_GPR 24, 192, 1 94462306a36Sopenharmony_ci SAVE_GPR 25, 200, 1 94562306a36Sopenharmony_ci SAVE_GPR 26, 208, 1 94662306a36Sopenharmony_ci SAVE_GPR 27, 216, 1 94762306a36Sopenharmony_ci SAVE_GPR 28, 224, 1 94862306a36Sopenharmony_ci SAVE_GPR 29, 232, 1 94962306a36Sopenharmony_ci SAVE_GPR 30, 240, 1 95062306a36Sopenharmony_ci SAVE_GPR 31, 248, 1 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_ci # Init poly1305 95362306a36Sopenharmony_ci bl Poly1305_init_64 95462306a36Sopenharmony_ci 95562306a36Sopenharmony_ci li 25, 0 # offset to inp and outp 95662306a36Sopenharmony_ci 95762306a36Sopenharmony_ci add 11, 25, 4 95862306a36Sopenharmony_ci 95962306a36Sopenharmony_ci # load h 96062306a36Sopenharmony_ci # h0, h1, h2? 96162306a36Sopenharmony_ci ld 27, 0(3) 96262306a36Sopenharmony_ci ld 28, 8(3) 96362306a36Sopenharmony_ci lwz 29, 16(3) 96462306a36Sopenharmony_ci 96562306a36Sopenharmony_ci li 30, 16 96662306a36Sopenharmony_ci divdu 31, 5, 30 96762306a36Sopenharmony_ci 96862306a36Sopenharmony_ci mtctr 31 96962306a36Sopenharmony_ci 97062306a36Sopenharmony_ci mr 24, 6 # highbit 97162306a36Sopenharmony_ci 97262306a36Sopenharmony_ciLoop_block_64: 97362306a36Sopenharmony_ci vxor 9, 9, 9 97462306a36Sopenharmony_ci 97562306a36Sopenharmony_ci ld 20, 0(11) 97662306a36Sopenharmony_ci ld 21, 8(11) 97762306a36Sopenharmony_ci addi 11, 11, 16 97862306a36Sopenharmony_ci 97962306a36Sopenharmony_ci addc 27, 27, 20 98062306a36Sopenharmony_ci adde 28, 28, 21 98162306a36Sopenharmony_ci adde 29, 29, 24 98262306a36Sopenharmony_ci 98362306a36Sopenharmony_ci li 22, 0 98462306a36Sopenharmony_ci mtvsrdd 32+6, 27, 28 # h0, h1 98562306a36Sopenharmony_ci mtvsrdd 32+8, 29, 22 # h2 98662306a36Sopenharmony_ci 98762306a36Sopenharmony_ci bl Poly1305_mult 98862306a36Sopenharmony_ci 98962306a36Sopenharmony_ci bl Carry_reduction 99062306a36Sopenharmony_ci 99162306a36Sopenharmony_ci bdnz Loop_block_64 99262306a36Sopenharmony_ci 99362306a36Sopenharmony_ci std 27, 0(3) 99462306a36Sopenharmony_ci std 28, 8(3) 99562306a36Sopenharmony_ci stw 29, 16(3) 99662306a36Sopenharmony_ci 99762306a36Sopenharmony_ci li 3, 0 99862306a36Sopenharmony_ci 99962306a36Sopenharmony_ci RESTORE_GPR 14, 112, 1 100062306a36Sopenharmony_ci RESTORE_GPR 15, 120, 1 100162306a36Sopenharmony_ci RESTORE_GPR 16, 128, 1 100262306a36Sopenharmony_ci RESTORE_GPR 17, 136, 1 100362306a36Sopenharmony_ci RESTORE_GPR 18, 144, 1 100462306a36Sopenharmony_ci RESTORE_GPR 19, 152, 1 100562306a36Sopenharmony_ci RESTORE_GPR 20, 160, 1 100662306a36Sopenharmony_ci RESTORE_GPR 21, 168, 1 100762306a36Sopenharmony_ci RESTORE_GPR 22, 176, 1 100862306a36Sopenharmony_ci RESTORE_GPR 23, 184, 1 100962306a36Sopenharmony_ci RESTORE_GPR 24, 192, 1 101062306a36Sopenharmony_ci RESTORE_GPR 25, 200, 1 101162306a36Sopenharmony_ci RESTORE_GPR 26, 208, 1 101262306a36Sopenharmony_ci RESTORE_GPR 27, 216, 1 101362306a36Sopenharmony_ci RESTORE_GPR 28, 224, 1 101462306a36Sopenharmony_ci RESTORE_GPR 29, 232, 1 101562306a36Sopenharmony_ci RESTORE_GPR 30, 240, 1 101662306a36Sopenharmony_ci RESTORE_GPR 31, 248, 1 101762306a36Sopenharmony_ci 101862306a36Sopenharmony_ci addi 1, 1, 400 101962306a36Sopenharmony_ci ld 0, 16(1) 102062306a36Sopenharmony_ci mtlr 0 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_ci blr 102362306a36Sopenharmony_ci 102462306a36Sopenharmony_ciOut_no_poly1305_64: 102562306a36Sopenharmony_ci li 3, 0 102662306a36Sopenharmony_ci blr 102762306a36Sopenharmony_ciSYM_FUNC_END(poly1305_64s) 102862306a36Sopenharmony_ci 102962306a36Sopenharmony_ci# 103062306a36Sopenharmony_ci# Input: r3 = h, r4 = s, r5 = mac 103162306a36Sopenharmony_ci# mac = h + s 103262306a36Sopenharmony_ci# 103362306a36Sopenharmony_ciSYM_FUNC_START(poly1305_emit_64) 103462306a36Sopenharmony_ci ld 10, 0(3) 103562306a36Sopenharmony_ci ld 11, 8(3) 103662306a36Sopenharmony_ci ld 12, 16(3) 103762306a36Sopenharmony_ci 103862306a36Sopenharmony_ci # compare modulus 103962306a36Sopenharmony_ci # h + 5 + (-p) 104062306a36Sopenharmony_ci mr 6, 10 104162306a36Sopenharmony_ci mr 7, 11 104262306a36Sopenharmony_ci mr 8, 12 104362306a36Sopenharmony_ci addic. 6, 6, 5 104462306a36Sopenharmony_ci addze 7, 7 104562306a36Sopenharmony_ci addze 8, 8 104662306a36Sopenharmony_ci srdi 9, 8, 2 # overflow? 104762306a36Sopenharmony_ci cmpdi 9, 0 104862306a36Sopenharmony_ci beq Skip_h64 104962306a36Sopenharmony_ci mr 10, 6 105062306a36Sopenharmony_ci mr 11, 7 105162306a36Sopenharmony_ci mr 12, 8 105262306a36Sopenharmony_ci 105362306a36Sopenharmony_ciSkip_h64: 105462306a36Sopenharmony_ci ld 6, 0(4) 105562306a36Sopenharmony_ci ld 7, 8(4) 105662306a36Sopenharmony_ci addc 10, 10, 6 105762306a36Sopenharmony_ci adde 11, 11, 7 105862306a36Sopenharmony_ci addze 12, 12 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ci std 10, 0(5) 106162306a36Sopenharmony_ci std 11, 8(5) 106262306a36Sopenharmony_ci blr 106362306a36Sopenharmony_ciSYM_FUNC_END(poly1305_emit_64) 106462306a36Sopenharmony_ci 106562306a36Sopenharmony_ciSYM_DATA_START_LOCAL(RMASK) 106662306a36Sopenharmony_ci.align 5 106762306a36Sopenharmony_cirmask: 106862306a36Sopenharmony_ci.byte 0xff, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f 106962306a36Sopenharmony_cicnum: 107062306a36Sopenharmony_ci.long 0x03ffffff, 0x00000000, 0x03ffffff, 0x00000000 107162306a36Sopenharmony_ci.long 0x1a, 0x00, 0x1a, 0x00 107262306a36Sopenharmony_ci.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 107362306a36Sopenharmony_ci.long 0x00010203, 0x04050607, 0x10111213, 0x14151617 107462306a36Sopenharmony_ci.long 0x08090a0b, 0x0c0d0e0f, 0x18191a1b, 0x1c1d1e1f 107562306a36Sopenharmony_ciSYM_DATA_END(RMASK) 1076