1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2018-2020 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci# 9e1051a39Sopenharmony_ci# ==================================================================== 10e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 11e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and 12e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further 13e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/. 14e1051a39Sopenharmony_ci# ==================================================================== 15e1051a39Sopenharmony_ci# 16e1051a39Sopenharmony_ci# X25519 lower-level primitives for PPC64. 17e1051a39Sopenharmony_ci# 18e1051a39Sopenharmony_ci# July 2018. 19e1051a39Sopenharmony_ci# 20e1051a39Sopenharmony_ci# Base 2^64 is faster than base 2^51 on pre-POWER8, most notably ~15% 21e1051a39Sopenharmony_ci# faster on PPC970/G5. POWER8 on the other hand seems to trip on own 22e1051a39Sopenharmony_ci# shoelaces when handling longer carry chains. As base 2^51 has just 23e1051a39Sopenharmony_ci# single-carry pairs, it's 25% faster than base 2^64. Since PPC970 is 24e1051a39Sopenharmony_ci# pretty old, base 2^64 implementation is not engaged. Comparison to 25e1051a39Sopenharmony_ci# compiler-generated code is complicated by the fact that not all 26e1051a39Sopenharmony_ci# compilers support 128-bit integers. When compiler doesn't, like xlc, 27e1051a39Sopenharmony_ci# this module delivers more than 2x improvement, and when it does, 28e1051a39Sopenharmony_ci# from 12% to 30% improvement was measured... 29e1051a39Sopenharmony_ci 30e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension) 31e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file 32e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 33e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 34e1051a39Sopenharmony_ci 35e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 36e1051a39Sopenharmony_ci( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 37e1051a39Sopenharmony_ci( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 38e1051a39Sopenharmony_cidie "can't locate ppc-xlate.pl"; 39e1051a39Sopenharmony_ci 40e1051a39Sopenharmony_ciopen OUT,"| \"$^X\" $xlate $flavour \"$output\"" 41e1051a39Sopenharmony_ci or die "can't call $xlate: $!"; 42e1051a39Sopenharmony_ci*STDOUT=*OUT; 43e1051a39Sopenharmony_ci 44e1051a39Sopenharmony_cimy $sp = "r1"; 45e1051a39Sopenharmony_cimy ($rp,$ap,$bp) = map("r$_",3..5); 46e1051a39Sopenharmony_ci 47e1051a39Sopenharmony_ci####################################################### base 2^64 48e1051a39Sopenharmony_ciif (0) { 49e1051a39Sopenharmony_cimy ($bi,$a0,$a1,$a2,$a3,$t0,$t1, $t2,$t3, 50e1051a39Sopenharmony_ci $acc0,$acc1,$acc2,$acc3,$acc4,$acc5,$acc6,$acc7) = 51e1051a39Sopenharmony_ci map("r$_",(6..12,22..31)); 52e1051a39Sopenharmony_cimy $zero = "r0"; 53e1051a39Sopenharmony_cimy $FRAME = 16*8; 54e1051a39Sopenharmony_ci 55e1051a39Sopenharmony_ci$code.=<<___; 56e1051a39Sopenharmony_ci.text 57e1051a39Sopenharmony_ci 58e1051a39Sopenharmony_ci.globl x25519_fe64_mul 59e1051a39Sopenharmony_ci.type x25519_fe64_mul,\@function 60e1051a39Sopenharmony_ci.align 5 61e1051a39Sopenharmony_cix25519_fe64_mul: 62e1051a39Sopenharmony_ci stdu $sp,-$FRAME($sp) 63e1051a39Sopenharmony_ci std r22,`$FRAME-8*10`($sp) 64e1051a39Sopenharmony_ci std r23,`$FRAME-8*9`($sp) 65e1051a39Sopenharmony_ci std r24,`$FRAME-8*8`($sp) 66e1051a39Sopenharmony_ci std r25,`$FRAME-8*7`($sp) 67e1051a39Sopenharmony_ci std r26,`$FRAME-8*6`($sp) 68e1051a39Sopenharmony_ci std r27,`$FRAME-8*5`($sp) 69e1051a39Sopenharmony_ci std r28,`$FRAME-8*4`($sp) 70e1051a39Sopenharmony_ci std r29,`$FRAME-8*3`($sp) 71e1051a39Sopenharmony_ci std r30,`$FRAME-8*2`($sp) 72e1051a39Sopenharmony_ci std r31,`$FRAME-8*1`($sp) 73e1051a39Sopenharmony_ci 74e1051a39Sopenharmony_ci ld $bi,0($bp) 75e1051a39Sopenharmony_ci ld $a0,0($ap) 76e1051a39Sopenharmony_ci xor $zero,$zero,$zero 77e1051a39Sopenharmony_ci ld $a1,8($ap) 78e1051a39Sopenharmony_ci ld $a2,16($ap) 79e1051a39Sopenharmony_ci ld $a3,24($ap) 80e1051a39Sopenharmony_ci 81e1051a39Sopenharmony_ci mulld $acc0,$a0,$bi # a[0]*b[0] 82e1051a39Sopenharmony_ci mulhdu $t0,$a0,$bi 83e1051a39Sopenharmony_ci mulld $acc1,$a1,$bi # a[1]*b[0] 84e1051a39Sopenharmony_ci mulhdu $t1,$a1,$bi 85e1051a39Sopenharmony_ci mulld $acc2,$a2,$bi # a[2]*b[0] 86e1051a39Sopenharmony_ci mulhdu $t2,$a2,$bi 87e1051a39Sopenharmony_ci mulld $acc3,$a3,$bi # a[3]*b[0] 88e1051a39Sopenharmony_ci mulhdu $t3,$a3,$bi 89e1051a39Sopenharmony_ci___ 90e1051a39Sopenharmony_cifor(my @acc=($acc0,$acc1,$acc2,$acc3,$acc4,$acc5,$acc6,$acc7), 91e1051a39Sopenharmony_ci my $i=1; $i<4; shift(@acc), $i++) { 92e1051a39Sopenharmony_cimy $acc4 = $i==1? $zero : @acc[4]; 93e1051a39Sopenharmony_ci 94e1051a39Sopenharmony_ci$code.=<<___; 95e1051a39Sopenharmony_ci ld $bi,`8*$i`($bp) 96e1051a39Sopenharmony_ci addc @acc[1],@acc[1],$t0 # accumulate high parts 97e1051a39Sopenharmony_ci mulld $t0,$a0,$bi 98e1051a39Sopenharmony_ci adde @acc[2],@acc[2],$t1 99e1051a39Sopenharmony_ci mulld $t1,$a1,$bi 100e1051a39Sopenharmony_ci adde @acc[3],@acc[3],$t2 101e1051a39Sopenharmony_ci mulld $t2,$a2,$bi 102e1051a39Sopenharmony_ci adde @acc[4],$acc4,$t3 103e1051a39Sopenharmony_ci mulld $t3,$a3,$bi 104e1051a39Sopenharmony_ci addc @acc[1],@acc[1],$t0 # accumulate low parts 105e1051a39Sopenharmony_ci mulhdu $t0,$a0,$bi 106e1051a39Sopenharmony_ci adde @acc[2],@acc[2],$t1 107e1051a39Sopenharmony_ci mulhdu $t1,$a1,$bi 108e1051a39Sopenharmony_ci adde @acc[3],@acc[3],$t2 109e1051a39Sopenharmony_ci mulhdu $t2,$a2,$bi 110e1051a39Sopenharmony_ci adde @acc[4],@acc[4],$t3 111e1051a39Sopenharmony_ci mulhdu $t3,$a3,$bi 112e1051a39Sopenharmony_ci adde @acc[5],$zero,$zero 113e1051a39Sopenharmony_ci___ 114e1051a39Sopenharmony_ci} 115e1051a39Sopenharmony_ci$code.=<<___; 116e1051a39Sopenharmony_ci li $bi,38 117e1051a39Sopenharmony_ci addc $acc4,$acc4,$t0 118e1051a39Sopenharmony_ci mulld $t0,$acc4,$bi 119e1051a39Sopenharmony_ci adde $acc5,$acc5,$t1 120e1051a39Sopenharmony_ci mulld $t1,$acc5,$bi 121e1051a39Sopenharmony_ci adde $acc6,$acc6,$t2 122e1051a39Sopenharmony_ci mulld $t2,$acc6,$bi 123e1051a39Sopenharmony_ci adde $acc7,$acc7,$t3 124e1051a39Sopenharmony_ci mulld $t3,$acc7,$bi 125e1051a39Sopenharmony_ci 126e1051a39Sopenharmony_ci addc $acc0,$acc0,$t0 127e1051a39Sopenharmony_ci mulhdu $t0,$acc4,$bi 128e1051a39Sopenharmony_ci adde $acc1,$acc1,$t1 129e1051a39Sopenharmony_ci mulhdu $t1,$acc5,$bi 130e1051a39Sopenharmony_ci adde $acc2,$acc2,$t2 131e1051a39Sopenharmony_ci mulhdu $t2,$acc6,$bi 132e1051a39Sopenharmony_ci adde $acc3,$acc3,$t3 133e1051a39Sopenharmony_ci mulhdu $t3,$acc7,$bi 134e1051a39Sopenharmony_ci adde $acc4,$zero,$zero 135e1051a39Sopenharmony_ci 136e1051a39Sopenharmony_ci addc $acc1,$acc1,$t0 137e1051a39Sopenharmony_ci adde $acc2,$acc2,$t1 138e1051a39Sopenharmony_ci adde $acc3,$acc3,$t2 139e1051a39Sopenharmony_ci adde $acc4,$acc4,$t3 140e1051a39Sopenharmony_ci 141e1051a39Sopenharmony_ci mulld $acc4,$acc4,$bi 142e1051a39Sopenharmony_ci 143e1051a39Sopenharmony_ci addc $acc0,$acc0,$acc4 144e1051a39Sopenharmony_ci addze $acc1,$acc1 145e1051a39Sopenharmony_ci addze $acc2,$acc2 146e1051a39Sopenharmony_ci addze $acc3,$acc3 147e1051a39Sopenharmony_ci 148e1051a39Sopenharmony_ci subfe $acc4,$acc4,$acc4 # carry -> ~mask 149e1051a39Sopenharmony_ci std $acc1,8($rp) 150e1051a39Sopenharmony_ci andc $acc4,$bi,$acc4 151e1051a39Sopenharmony_ci std $acc2,16($rp) 152e1051a39Sopenharmony_ci add $acc0,$acc0,$acc4 153e1051a39Sopenharmony_ci std $acc3,24($rp) 154e1051a39Sopenharmony_ci std $acc0,0($rp) 155e1051a39Sopenharmony_ci 156e1051a39Sopenharmony_ci ld r22,`$FRAME-8*10`($sp) 157e1051a39Sopenharmony_ci ld r23,`$FRAME-8*9`($sp) 158e1051a39Sopenharmony_ci ld r24,`$FRAME-8*8`($sp) 159e1051a39Sopenharmony_ci ld r25,`$FRAME-8*7`($sp) 160e1051a39Sopenharmony_ci ld r26,`$FRAME-8*6`($sp) 161e1051a39Sopenharmony_ci ld r27,`$FRAME-8*5`($sp) 162e1051a39Sopenharmony_ci ld r28,`$FRAME-8*4`($sp) 163e1051a39Sopenharmony_ci ld r29,`$FRAME-8*3`($sp) 164e1051a39Sopenharmony_ci ld r30,`$FRAME-8*2`($sp) 165e1051a39Sopenharmony_ci ld r31,`$FRAME-8*1`($sp) 166e1051a39Sopenharmony_ci addi $sp,$sp,$FRAME 167e1051a39Sopenharmony_ci blr 168e1051a39Sopenharmony_ci .long 0 169e1051a39Sopenharmony_ci .byte 0,12,4,0,0x80,10,3,0 170e1051a39Sopenharmony_ci .long 0 171e1051a39Sopenharmony_ci.size x25519_fe64_mul,.-x25519_fe64_mul 172e1051a39Sopenharmony_ci 173e1051a39Sopenharmony_ci.globl x25519_fe64_sqr 174e1051a39Sopenharmony_ci.type x25519_fe64_sqr,\@function 175e1051a39Sopenharmony_ci.align 5 176e1051a39Sopenharmony_cix25519_fe64_sqr: 177e1051a39Sopenharmony_ci stdu $sp,-$FRAME($sp) 178e1051a39Sopenharmony_ci std r22,`$FRAME-8*10`($sp) 179e1051a39Sopenharmony_ci std r23,`$FRAME-8*9`($sp) 180e1051a39Sopenharmony_ci std r24,`$FRAME-8*8`($sp) 181e1051a39Sopenharmony_ci std r25,`$FRAME-8*7`($sp) 182e1051a39Sopenharmony_ci std r26,`$FRAME-8*6`($sp) 183e1051a39Sopenharmony_ci std r27,`$FRAME-8*5`($sp) 184e1051a39Sopenharmony_ci std r28,`$FRAME-8*4`($sp) 185e1051a39Sopenharmony_ci std r29,`$FRAME-8*3`($sp) 186e1051a39Sopenharmony_ci std r30,`$FRAME-8*2`($sp) 187e1051a39Sopenharmony_ci std r31,`$FRAME-8*1`($sp) 188e1051a39Sopenharmony_ci 189e1051a39Sopenharmony_ci ld $a0,0($ap) 190e1051a39Sopenharmony_ci xor $zero,$zero,$zero 191e1051a39Sopenharmony_ci ld $a1,8($ap) 192e1051a39Sopenharmony_ci ld $a2,16($ap) 193e1051a39Sopenharmony_ci ld $a3,24($ap) 194e1051a39Sopenharmony_ci 195e1051a39Sopenharmony_ci ################################ 196e1051a39Sopenharmony_ci # | | | | | |a1*a0| | 197e1051a39Sopenharmony_ci # | | | | |a2*a0| | | 198e1051a39Sopenharmony_ci # | |a3*a2|a3*a0| | | | 199e1051a39Sopenharmony_ci # | | | |a2*a1| | | | 200e1051a39Sopenharmony_ci # | | |a3*a1| | | | | 201e1051a39Sopenharmony_ci # *| | | | | | | | 2| 202e1051a39Sopenharmony_ci # +|a3*a3|a2*a2|a1*a1|a0*a0| 203e1051a39Sopenharmony_ci # |--+--+--+--+--+--+--+--| 204e1051a39Sopenharmony_ci # |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is $accx, i.e. follow $accx 205e1051a39Sopenharmony_ci # 206e1051a39Sopenharmony_ci # "can't overflow" below mark carrying into high part of 207e1051a39Sopenharmony_ci # multiplication result, which can't overflow, because it 208e1051a39Sopenharmony_ci # can never be all ones. 209e1051a39Sopenharmony_ci 210e1051a39Sopenharmony_ci mulld $acc1,$a1,$a0 # a[1]*a[0] 211e1051a39Sopenharmony_ci mulhdu $t1,$a1,$a0 212e1051a39Sopenharmony_ci mulld $acc2,$a2,$a0 # a[2]*a[0] 213e1051a39Sopenharmony_ci mulhdu $t2,$a2,$a0 214e1051a39Sopenharmony_ci mulld $acc3,$a3,$a0 # a[3]*a[0] 215e1051a39Sopenharmony_ci mulhdu $acc4,$a3,$a0 216e1051a39Sopenharmony_ci 217e1051a39Sopenharmony_ci addc $acc2,$acc2,$t1 # accumulate high parts of multiplication 218e1051a39Sopenharmony_ci mulld $t0,$a2,$a1 # a[2]*a[1] 219e1051a39Sopenharmony_ci mulhdu $t1,$a2,$a1 220e1051a39Sopenharmony_ci adde $acc3,$acc3,$t2 221e1051a39Sopenharmony_ci mulld $t2,$a3,$a1 # a[3]*a[1] 222e1051a39Sopenharmony_ci mulhdu $t3,$a3,$a1 223e1051a39Sopenharmony_ci addze $acc4,$acc4 # can't overflow 224e1051a39Sopenharmony_ci 225e1051a39Sopenharmony_ci mulld $acc5,$a3,$a2 # a[3]*a[2] 226e1051a39Sopenharmony_ci mulhdu $acc6,$a3,$a2 227e1051a39Sopenharmony_ci 228e1051a39Sopenharmony_ci addc $t1,$t1,$t2 # accumulate high parts of multiplication 229e1051a39Sopenharmony_ci mulld $acc0,$a0,$a0 # a[0]*a[0] 230e1051a39Sopenharmony_ci addze $t2,$t3 # can't overflow 231e1051a39Sopenharmony_ci 232e1051a39Sopenharmony_ci addc $acc3,$acc3,$t0 # accumulate low parts of multiplication 233e1051a39Sopenharmony_ci mulhdu $a0,$a0,$a0 234e1051a39Sopenharmony_ci adde $acc4,$acc4,$t1 235e1051a39Sopenharmony_ci mulld $t1,$a1,$a1 # a[1]*a[1] 236e1051a39Sopenharmony_ci adde $acc5,$acc5,$t2 237e1051a39Sopenharmony_ci mulhdu $a1,$a1,$a1 238e1051a39Sopenharmony_ci addze $acc6,$acc6 # can't overflow 239e1051a39Sopenharmony_ci 240e1051a39Sopenharmony_ci addc $acc1,$acc1,$acc1 # acc[1-6]*=2 241e1051a39Sopenharmony_ci mulld $t2,$a2,$a2 # a[2]*a[2] 242e1051a39Sopenharmony_ci adde $acc2,$acc2,$acc2 243e1051a39Sopenharmony_ci mulhdu $a2,$a2,$a2 244e1051a39Sopenharmony_ci adde $acc3,$acc3,$acc3 245e1051a39Sopenharmony_ci mulld $t3,$a3,$a3 # a[3]*a[3] 246e1051a39Sopenharmony_ci adde $acc4,$acc4,$acc4 247e1051a39Sopenharmony_ci mulhdu $a3,$a3,$a3 248e1051a39Sopenharmony_ci adde $acc5,$acc5,$acc5 249e1051a39Sopenharmony_ci adde $acc6,$acc6,$acc6 250e1051a39Sopenharmony_ci addze $acc7,$zero 251e1051a39Sopenharmony_ci 252e1051a39Sopenharmony_ci addc $acc1,$acc1,$a0 # +a[i]*a[i] 253e1051a39Sopenharmony_ci li $bi,38 254e1051a39Sopenharmony_ci adde $acc2,$acc2,$t1 255e1051a39Sopenharmony_ci adde $acc3,$acc3,$a1 256e1051a39Sopenharmony_ci adde $acc4,$acc4,$t2 257e1051a39Sopenharmony_ci adde $acc5,$acc5,$a2 258e1051a39Sopenharmony_ci adde $acc6,$acc6,$t3 259e1051a39Sopenharmony_ci adde $acc7,$acc7,$a3 260e1051a39Sopenharmony_ci 261e1051a39Sopenharmony_ci mulld $t0,$acc4,$bi 262e1051a39Sopenharmony_ci mulld $t1,$acc5,$bi 263e1051a39Sopenharmony_ci mulld $t2,$acc6,$bi 264e1051a39Sopenharmony_ci mulld $t3,$acc7,$bi 265e1051a39Sopenharmony_ci 266e1051a39Sopenharmony_ci addc $acc0,$acc0,$t0 267e1051a39Sopenharmony_ci mulhdu $t0,$acc4,$bi 268e1051a39Sopenharmony_ci adde $acc1,$acc1,$t1 269e1051a39Sopenharmony_ci mulhdu $t1,$acc5,$bi 270e1051a39Sopenharmony_ci adde $acc2,$acc2,$t2 271e1051a39Sopenharmony_ci mulhdu $t2,$acc6,$bi 272e1051a39Sopenharmony_ci adde $acc3,$acc3,$t3 273e1051a39Sopenharmony_ci mulhdu $t3,$acc7,$bi 274e1051a39Sopenharmony_ci addze $acc4,$zero 275e1051a39Sopenharmony_ci 276e1051a39Sopenharmony_ci addc $acc1,$acc1,$t0 277e1051a39Sopenharmony_ci adde $acc2,$acc2,$t1 278e1051a39Sopenharmony_ci adde $acc3,$acc3,$t2 279e1051a39Sopenharmony_ci adde $acc4,$acc4,$t3 280e1051a39Sopenharmony_ci 281e1051a39Sopenharmony_ci mulld $acc4,$acc4,$bi 282e1051a39Sopenharmony_ci 283e1051a39Sopenharmony_ci addc $acc0,$acc0,$acc4 284e1051a39Sopenharmony_ci addze $acc1,$acc1 285e1051a39Sopenharmony_ci addze $acc2,$acc2 286e1051a39Sopenharmony_ci addze $acc3,$acc3 287e1051a39Sopenharmony_ci 288e1051a39Sopenharmony_ci subfe $acc4,$acc4,$acc4 # carry -> ~mask 289e1051a39Sopenharmony_ci std $acc1,8($rp) 290e1051a39Sopenharmony_ci andc $acc4,$bi,$acc4 291e1051a39Sopenharmony_ci std $acc2,16($rp) 292e1051a39Sopenharmony_ci add $acc0,$acc0,$acc4 293e1051a39Sopenharmony_ci std $acc3,24($rp) 294e1051a39Sopenharmony_ci std $acc0,0($rp) 295e1051a39Sopenharmony_ci 296e1051a39Sopenharmony_ci ld r22,`$FRAME-8*10`($sp) 297e1051a39Sopenharmony_ci ld r23,`$FRAME-8*9`($sp) 298e1051a39Sopenharmony_ci ld r24,`$FRAME-8*8`($sp) 299e1051a39Sopenharmony_ci ld r25,`$FRAME-8*7`($sp) 300e1051a39Sopenharmony_ci ld r26,`$FRAME-8*6`($sp) 301e1051a39Sopenharmony_ci ld r27,`$FRAME-8*5`($sp) 302e1051a39Sopenharmony_ci ld r28,`$FRAME-8*4`($sp) 303e1051a39Sopenharmony_ci ld r29,`$FRAME-8*3`($sp) 304e1051a39Sopenharmony_ci ld r30,`$FRAME-8*2`($sp) 305e1051a39Sopenharmony_ci ld r31,`$FRAME-8*1`($sp) 306e1051a39Sopenharmony_ci addi $sp,$sp,$FRAME 307e1051a39Sopenharmony_ci blr 308e1051a39Sopenharmony_ci .long 0 309e1051a39Sopenharmony_ci .byte 0,12,4,0,0x80,10,2,0 310e1051a39Sopenharmony_ci .long 0 311e1051a39Sopenharmony_ci.size x25519_fe64_sqr,.-x25519_fe64_sqr 312e1051a39Sopenharmony_ci 313e1051a39Sopenharmony_ci.globl x25519_fe64_mul121666 314e1051a39Sopenharmony_ci.type x25519_fe64_mul121666,\@function 315e1051a39Sopenharmony_ci.align 5 316e1051a39Sopenharmony_cix25519_fe64_mul121666: 317e1051a39Sopenharmony_ci lis $bi,`65536>>16` 318e1051a39Sopenharmony_ci ori $bi,$bi,`121666-65536` 319e1051a39Sopenharmony_ci 320e1051a39Sopenharmony_ci ld $t0,0($ap) 321e1051a39Sopenharmony_ci ld $t1,8($ap) 322e1051a39Sopenharmony_ci ld $bp,16($ap) 323e1051a39Sopenharmony_ci ld $ap,24($ap) 324e1051a39Sopenharmony_ci 325e1051a39Sopenharmony_ci mulld $a0,$t0,$bi 326e1051a39Sopenharmony_ci mulhdu $t0,$t0,$bi 327e1051a39Sopenharmony_ci mulld $a1,$t1,$bi 328e1051a39Sopenharmony_ci mulhdu $t1,$t1,$bi 329e1051a39Sopenharmony_ci mulld $a2,$bp,$bi 330e1051a39Sopenharmony_ci mulhdu $bp,$bp,$bi 331e1051a39Sopenharmony_ci mulld $a3,$ap,$bi 332e1051a39Sopenharmony_ci mulhdu $ap,$ap,$bi 333e1051a39Sopenharmony_ci 334e1051a39Sopenharmony_ci addc $a1,$a1,$t0 335e1051a39Sopenharmony_ci adde $a2,$a2,$t1 336e1051a39Sopenharmony_ci adde $a3,$a3,$bp 337e1051a39Sopenharmony_ci addze $ap, $ap 338e1051a39Sopenharmony_ci 339e1051a39Sopenharmony_ci mulli $ap,$ap,38 340e1051a39Sopenharmony_ci 341e1051a39Sopenharmony_ci addc $a0,$a0,$ap 342e1051a39Sopenharmony_ci addze $a1,$a1 343e1051a39Sopenharmony_ci addze $a2,$a2 344e1051a39Sopenharmony_ci addze $a3,$a3 345e1051a39Sopenharmony_ci 346e1051a39Sopenharmony_ci subfe $t1,$t1,$t1 # carry -> ~mask 347e1051a39Sopenharmony_ci std $a1,8($rp) 348e1051a39Sopenharmony_ci andc $t0,$t0,$t1 349e1051a39Sopenharmony_ci std $a2,16($rp) 350e1051a39Sopenharmony_ci add $a0,$a0,$t0 351e1051a39Sopenharmony_ci std $a3,24($rp) 352e1051a39Sopenharmony_ci std $a0,0($rp) 353e1051a39Sopenharmony_ci 354e1051a39Sopenharmony_ci blr 355e1051a39Sopenharmony_ci .long 0 356e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,2,0 357e1051a39Sopenharmony_ci .long 0 358e1051a39Sopenharmony_ci.size x25519_fe64_mul121666,.-x25519_fe64_mul121666 359e1051a39Sopenharmony_ci 360e1051a39Sopenharmony_ci.globl x25519_fe64_add 361e1051a39Sopenharmony_ci.type x25519_fe64_add,\@function 362e1051a39Sopenharmony_ci.align 5 363e1051a39Sopenharmony_cix25519_fe64_add: 364e1051a39Sopenharmony_ci ld $a0,0($ap) 365e1051a39Sopenharmony_ci ld $t0,0($bp) 366e1051a39Sopenharmony_ci ld $a1,8($ap) 367e1051a39Sopenharmony_ci ld $t1,8($bp) 368e1051a39Sopenharmony_ci ld $a2,16($ap) 369e1051a39Sopenharmony_ci ld $bi,16($bp) 370e1051a39Sopenharmony_ci ld $a3,24($ap) 371e1051a39Sopenharmony_ci ld $bp,24($bp) 372e1051a39Sopenharmony_ci 373e1051a39Sopenharmony_ci addc $a0,$a0,$t0 374e1051a39Sopenharmony_ci adde $a1,$a1,$t1 375e1051a39Sopenharmony_ci adde $a2,$a2,$bi 376e1051a39Sopenharmony_ci adde $a3,$a3,$bp 377e1051a39Sopenharmony_ci 378e1051a39Sopenharmony_ci li $t0,38 379e1051a39Sopenharmony_ci subfe $t1,$t1,$t1 # carry -> ~mask 380e1051a39Sopenharmony_ci andc $t1,$t0,$t1 381e1051a39Sopenharmony_ci 382e1051a39Sopenharmony_ci addc $a0,$a0,$t1 383e1051a39Sopenharmony_ci addze $a1,$a1 384e1051a39Sopenharmony_ci addze $a2,$a2 385e1051a39Sopenharmony_ci addze $a3,$a3 386e1051a39Sopenharmony_ci 387e1051a39Sopenharmony_ci subfe $t1,$t1,$t1 # carry -> ~mask 388e1051a39Sopenharmony_ci std $a1,8($rp) 389e1051a39Sopenharmony_ci andc $t0,$t0,$t1 390e1051a39Sopenharmony_ci std $a2,16($rp) 391e1051a39Sopenharmony_ci add $a0,$a0,$t0 392e1051a39Sopenharmony_ci std $a3,24($rp) 393e1051a39Sopenharmony_ci std $a0,0($rp) 394e1051a39Sopenharmony_ci 395e1051a39Sopenharmony_ci blr 396e1051a39Sopenharmony_ci .long 0 397e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,3,0 398e1051a39Sopenharmony_ci .long 0 399e1051a39Sopenharmony_ci.size x25519_fe64_add,.-x25519_fe64_add 400e1051a39Sopenharmony_ci 401e1051a39Sopenharmony_ci.globl x25519_fe64_sub 402e1051a39Sopenharmony_ci.type x25519_fe64_sub,\@function 403e1051a39Sopenharmony_ci.align 5 404e1051a39Sopenharmony_cix25519_fe64_sub: 405e1051a39Sopenharmony_ci ld $a0,0($ap) 406e1051a39Sopenharmony_ci ld $t0,0($bp) 407e1051a39Sopenharmony_ci ld $a1,8($ap) 408e1051a39Sopenharmony_ci ld $t1,8($bp) 409e1051a39Sopenharmony_ci ld $a2,16($ap) 410e1051a39Sopenharmony_ci ld $bi,16($bp) 411e1051a39Sopenharmony_ci ld $a3,24($ap) 412e1051a39Sopenharmony_ci ld $bp,24($bp) 413e1051a39Sopenharmony_ci 414e1051a39Sopenharmony_ci subfc $a0,$t0,$a0 415e1051a39Sopenharmony_ci subfe $a1,$t1,$a1 416e1051a39Sopenharmony_ci subfe $a2,$bi,$a2 417e1051a39Sopenharmony_ci subfe $a3,$bp,$a3 418e1051a39Sopenharmony_ci 419e1051a39Sopenharmony_ci li $t0,38 420e1051a39Sopenharmony_ci subfe $t1,$t1,$t1 # borrow -> mask 421e1051a39Sopenharmony_ci xor $zero,$zero,$zero 422e1051a39Sopenharmony_ci and $t1,$t0,$t1 423e1051a39Sopenharmony_ci 424e1051a39Sopenharmony_ci subfc $a0,$t1,$a0 425e1051a39Sopenharmony_ci subfe $a1,$zero,$a1 426e1051a39Sopenharmony_ci subfe $a2,$zero,$a2 427e1051a39Sopenharmony_ci subfe $a3,$zero,$a3 428e1051a39Sopenharmony_ci 429e1051a39Sopenharmony_ci subfe $t1,$t1,$t1 # borrow -> mask 430e1051a39Sopenharmony_ci std $a1,8($rp) 431e1051a39Sopenharmony_ci and $t0,$t0,$t1 432e1051a39Sopenharmony_ci std $a2,16($rp) 433e1051a39Sopenharmony_ci subf $a0,$t0,$a0 434e1051a39Sopenharmony_ci std $a3,24($rp) 435e1051a39Sopenharmony_ci std $a0,0($rp) 436e1051a39Sopenharmony_ci 437e1051a39Sopenharmony_ci blr 438e1051a39Sopenharmony_ci .long 0 439e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,3,0 440e1051a39Sopenharmony_ci .long 0 441e1051a39Sopenharmony_ci.size x25519_fe64_sub,.-x25519_fe64_sub 442e1051a39Sopenharmony_ci 443e1051a39Sopenharmony_ci.globl x25519_fe64_tobytes 444e1051a39Sopenharmony_ci.type x25519_fe64_tobytes,\@function 445e1051a39Sopenharmony_ci.align 5 446e1051a39Sopenharmony_cix25519_fe64_tobytes: 447e1051a39Sopenharmony_ci ld $a3,24($ap) 448e1051a39Sopenharmony_ci ld $a0,0($ap) 449e1051a39Sopenharmony_ci ld $a1,8($ap) 450e1051a39Sopenharmony_ci ld $a2,16($ap) 451e1051a39Sopenharmony_ci 452e1051a39Sopenharmony_ci sradi $t0,$a3,63 # most significant bit -> mask 453e1051a39Sopenharmony_ci li $t1,19 454e1051a39Sopenharmony_ci and $t0,$t0,$t1 455e1051a39Sopenharmony_ci sldi $a3,$a3,1 456e1051a39Sopenharmony_ci add $t0,$t0,$t1 # compare to modulus in the same go 457e1051a39Sopenharmony_ci srdi $a3,$a3,1 # most significant bit cleared 458e1051a39Sopenharmony_ci 459e1051a39Sopenharmony_ci addc $a0,$a0,$t0 460e1051a39Sopenharmony_ci addze $a1,$a1 461e1051a39Sopenharmony_ci addze $a2,$a2 462e1051a39Sopenharmony_ci addze $a3,$a3 463e1051a39Sopenharmony_ci 464e1051a39Sopenharmony_ci xor $zero,$zero,$zero 465e1051a39Sopenharmony_ci sradi $t0,$a3,63 # most significant bit -> mask 466e1051a39Sopenharmony_ci sldi $a3,$a3,1 467e1051a39Sopenharmony_ci andc $t0,$t1,$t0 468e1051a39Sopenharmony_ci srdi $a3,$a3,1 # most significant bit cleared 469e1051a39Sopenharmony_ci 470e1051a39Sopenharmony_ci subi $rp,$rp,1 471e1051a39Sopenharmony_ci subfc $a0,$t0,$a0 472e1051a39Sopenharmony_ci subfe $a1,$zero,$a1 473e1051a39Sopenharmony_ci subfe $a2,$zero,$a2 474e1051a39Sopenharmony_ci subfe $a3,$zero,$a3 475e1051a39Sopenharmony_ci 476e1051a39Sopenharmony_ci___ 477e1051a39Sopenharmony_cifor (my @a=($a0,$a1,$a2,$a3), my $i=0; $i<4; shift(@a), $i++) { 478e1051a39Sopenharmony_ci$code.=<<___; 479e1051a39Sopenharmony_ci srdi $t0,@a[0],8 480e1051a39Sopenharmony_ci stbu @a[0],1($rp) 481e1051a39Sopenharmony_ci srdi @a[0],@a[0],16 482e1051a39Sopenharmony_ci stbu $t0,1($rp) 483e1051a39Sopenharmony_ci srdi $t0,@a[0],8 484e1051a39Sopenharmony_ci stbu @a[0],1($rp) 485e1051a39Sopenharmony_ci srdi @a[0],@a[0],16 486e1051a39Sopenharmony_ci stbu $t0,1($rp) 487e1051a39Sopenharmony_ci srdi $t0,@a[0],8 488e1051a39Sopenharmony_ci stbu @a[0],1($rp) 489e1051a39Sopenharmony_ci srdi @a[0],@a[0],16 490e1051a39Sopenharmony_ci stbu $t0,1($rp) 491e1051a39Sopenharmony_ci srdi $t0,@a[0],8 492e1051a39Sopenharmony_ci stbu @a[0],1($rp) 493e1051a39Sopenharmony_ci stbu $t0,1($rp) 494e1051a39Sopenharmony_ci___ 495e1051a39Sopenharmony_ci} 496e1051a39Sopenharmony_ci$code.=<<___; 497e1051a39Sopenharmony_ci blr 498e1051a39Sopenharmony_ci .long 0 499e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,2,0 500e1051a39Sopenharmony_ci .long 0 501e1051a39Sopenharmony_ci.size x25519_fe64_tobytes,.-x25519_fe64_tobytes 502e1051a39Sopenharmony_ci___ 503e1051a39Sopenharmony_ci} 504e1051a39Sopenharmony_ci####################################################### base 2^51 505e1051a39Sopenharmony_ci{ 506e1051a39Sopenharmony_cimy ($bi,$a0,$a1,$a2,$a3,$a4,$t0, $t1, 507e1051a39Sopenharmony_ci $h0lo,$h0hi,$h1lo,$h1hi,$h2lo,$h2hi,$h3lo,$h3hi,$h4lo,$h4hi) = 508e1051a39Sopenharmony_ci map("r$_",(6..12,21..31)); 509e1051a39Sopenharmony_cimy $mask = "r0"; 510e1051a39Sopenharmony_cimy $FRAME = 18*8; 511e1051a39Sopenharmony_ci 512e1051a39Sopenharmony_ci$code.=<<___; 513e1051a39Sopenharmony_ci.text 514e1051a39Sopenharmony_ci 515e1051a39Sopenharmony_ci.globl x25519_fe51_mul 516e1051a39Sopenharmony_ci.type x25519_fe51_mul,\@function 517e1051a39Sopenharmony_ci.align 5 518e1051a39Sopenharmony_cix25519_fe51_mul: 519e1051a39Sopenharmony_ci stdu $sp,-$FRAME($sp) 520e1051a39Sopenharmony_ci std r21,`$FRAME-8*11`($sp) 521e1051a39Sopenharmony_ci std r22,`$FRAME-8*10`($sp) 522e1051a39Sopenharmony_ci std r23,`$FRAME-8*9`($sp) 523e1051a39Sopenharmony_ci std r24,`$FRAME-8*8`($sp) 524e1051a39Sopenharmony_ci std r25,`$FRAME-8*7`($sp) 525e1051a39Sopenharmony_ci std r26,`$FRAME-8*6`($sp) 526e1051a39Sopenharmony_ci std r27,`$FRAME-8*5`($sp) 527e1051a39Sopenharmony_ci std r28,`$FRAME-8*4`($sp) 528e1051a39Sopenharmony_ci std r29,`$FRAME-8*3`($sp) 529e1051a39Sopenharmony_ci std r30,`$FRAME-8*2`($sp) 530e1051a39Sopenharmony_ci std r31,`$FRAME-8*1`($sp) 531e1051a39Sopenharmony_ci 532e1051a39Sopenharmony_ci ld $bi,0($bp) 533e1051a39Sopenharmony_ci ld $a0,0($ap) 534e1051a39Sopenharmony_ci ld $a1,8($ap) 535e1051a39Sopenharmony_ci ld $a2,16($ap) 536e1051a39Sopenharmony_ci ld $a3,24($ap) 537e1051a39Sopenharmony_ci ld $a4,32($ap) 538e1051a39Sopenharmony_ci 539e1051a39Sopenharmony_ci mulld $h0lo,$a0,$bi # a[0]*b[0] 540e1051a39Sopenharmony_ci mulhdu $h0hi,$a0,$bi 541e1051a39Sopenharmony_ci 542e1051a39Sopenharmony_ci mulld $h1lo,$a1,$bi # a[1]*b[0] 543e1051a39Sopenharmony_ci mulhdu $h1hi,$a1,$bi 544e1051a39Sopenharmony_ci 545e1051a39Sopenharmony_ci mulld $h4lo,$a4,$bi # a[4]*b[0] 546e1051a39Sopenharmony_ci mulhdu $h4hi,$a4,$bi 547e1051a39Sopenharmony_ci ld $ap,8($bp) 548e1051a39Sopenharmony_ci mulli $a4,$a4,19 549e1051a39Sopenharmony_ci 550e1051a39Sopenharmony_ci mulld $h2lo,$a2,$bi # a[2]*b[0] 551e1051a39Sopenharmony_ci mulhdu $h2hi,$a2,$bi 552e1051a39Sopenharmony_ci 553e1051a39Sopenharmony_ci mulld $h3lo,$a3,$bi # a[3]*b[0] 554e1051a39Sopenharmony_ci mulhdu $h3hi,$a3,$bi 555e1051a39Sopenharmony_ci___ 556e1051a39Sopenharmony_cifor(my @a=($a0,$a1,$a2,$a3,$a4), 557e1051a39Sopenharmony_ci my $i=1; $i<4; $i++) { 558e1051a39Sopenharmony_ci ($ap,$bi) = ($bi,$ap); 559e1051a39Sopenharmony_ci$code.=<<___; 560e1051a39Sopenharmony_ci mulld $t0,@a[4],$bi 561e1051a39Sopenharmony_ci mulhdu $t1,@a[4],$bi 562e1051a39Sopenharmony_ci addc $h0lo,$h0lo,$t0 563e1051a39Sopenharmony_ci adde $h0hi,$h0hi,$t1 564e1051a39Sopenharmony_ci 565e1051a39Sopenharmony_ci mulld $t0,@a[0],$bi 566e1051a39Sopenharmony_ci mulhdu $t1,@a[0],$bi 567e1051a39Sopenharmony_ci addc $h1lo,$h1lo,$t0 568e1051a39Sopenharmony_ci adde $h1hi,$h1hi,$t1 569e1051a39Sopenharmony_ci 570e1051a39Sopenharmony_ci mulld $t0,@a[3],$bi 571e1051a39Sopenharmony_ci mulhdu $t1,@a[3],$bi 572e1051a39Sopenharmony_ci ld $ap,`8*($i+1)`($bp) 573e1051a39Sopenharmony_ci mulli @a[3],@a[3],19 574e1051a39Sopenharmony_ci addc $h4lo,$h4lo,$t0 575e1051a39Sopenharmony_ci adde $h4hi,$h4hi,$t1 576e1051a39Sopenharmony_ci 577e1051a39Sopenharmony_ci mulld $t0,@a[1],$bi 578e1051a39Sopenharmony_ci mulhdu $t1,@a[1],$bi 579e1051a39Sopenharmony_ci addc $h2lo,$h2lo,$t0 580e1051a39Sopenharmony_ci adde $h2hi,$h2hi,$t1 581e1051a39Sopenharmony_ci 582e1051a39Sopenharmony_ci mulld $t0,@a[2],$bi 583e1051a39Sopenharmony_ci mulhdu $t1,@a[2],$bi 584e1051a39Sopenharmony_ci addc $h3lo,$h3lo,$t0 585e1051a39Sopenharmony_ci adde $h3hi,$h3hi,$t1 586e1051a39Sopenharmony_ci___ 587e1051a39Sopenharmony_ci unshift(@a,pop(@a)); 588e1051a39Sopenharmony_ci} 589e1051a39Sopenharmony_ci ($ap,$bi) = ($bi,$ap); 590e1051a39Sopenharmony_ci$code.=<<___; 591e1051a39Sopenharmony_ci mulld $t0,$a1,$bi 592e1051a39Sopenharmony_ci mulhdu $t1,$a1,$bi 593e1051a39Sopenharmony_ci addc $h0lo,$h0lo,$t0 594e1051a39Sopenharmony_ci adde $h0hi,$h0hi,$t1 595e1051a39Sopenharmony_ci 596e1051a39Sopenharmony_ci mulld $t0,$a2,$bi 597e1051a39Sopenharmony_ci mulhdu $t1,$a2,$bi 598e1051a39Sopenharmony_ci addc $h1lo,$h1lo,$t0 599e1051a39Sopenharmony_ci adde $h1hi,$h1hi,$t1 600e1051a39Sopenharmony_ci 601e1051a39Sopenharmony_ci mulld $t0,$a3,$bi 602e1051a39Sopenharmony_ci mulhdu $t1,$a3,$bi 603e1051a39Sopenharmony_ci addc $h2lo,$h2lo,$t0 604e1051a39Sopenharmony_ci adde $h2hi,$h2hi,$t1 605e1051a39Sopenharmony_ci 606e1051a39Sopenharmony_ci mulld $t0,$a4,$bi 607e1051a39Sopenharmony_ci mulhdu $t1,$a4,$bi 608e1051a39Sopenharmony_ci addc $h3lo,$h3lo,$t0 609e1051a39Sopenharmony_ci adde $h3hi,$h3hi,$t1 610e1051a39Sopenharmony_ci 611e1051a39Sopenharmony_ci mulld $t0,$a0,$bi 612e1051a39Sopenharmony_ci mulhdu $t1,$a0,$bi 613e1051a39Sopenharmony_ci addc $h4lo,$h4lo,$t0 614e1051a39Sopenharmony_ci adde $h4hi,$h4hi,$t1 615e1051a39Sopenharmony_ci 616e1051a39Sopenharmony_ci.Lfe51_reduce: 617e1051a39Sopenharmony_ci li $mask,-1 618e1051a39Sopenharmony_ci srdi $mask,$mask,13 # 0x7ffffffffffff 619e1051a39Sopenharmony_ci 620e1051a39Sopenharmony_ci srdi $t0,$h2lo,51 621e1051a39Sopenharmony_ci and $a2,$h2lo,$mask 622e1051a39Sopenharmony_ci insrdi $t0,$h2hi,51,0 # h2>>51 623e1051a39Sopenharmony_ci srdi $t1,$h0lo,51 624e1051a39Sopenharmony_ci and $a0,$h0lo,$mask 625e1051a39Sopenharmony_ci insrdi $t1,$h0hi,51,0 # h0>>51 626e1051a39Sopenharmony_ci addc $h3lo,$h3lo,$t0 627e1051a39Sopenharmony_ci addze $h3hi,$h3hi 628e1051a39Sopenharmony_ci addc $h1lo,$h1lo,$t1 629e1051a39Sopenharmony_ci addze $h1hi,$h1hi 630e1051a39Sopenharmony_ci 631e1051a39Sopenharmony_ci srdi $t0,$h3lo,51 632e1051a39Sopenharmony_ci and $a3,$h3lo,$mask 633e1051a39Sopenharmony_ci insrdi $t0,$h3hi,51,0 # h3>>51 634e1051a39Sopenharmony_ci srdi $t1,$h1lo,51 635e1051a39Sopenharmony_ci and $a1,$h1lo,$mask 636e1051a39Sopenharmony_ci insrdi $t1,$h1hi,51,0 # h1>>51 637e1051a39Sopenharmony_ci addc $h4lo,$h4lo,$t0 638e1051a39Sopenharmony_ci addze $h4hi,$h4hi 639e1051a39Sopenharmony_ci add $a2,$a2,$t1 640e1051a39Sopenharmony_ci 641e1051a39Sopenharmony_ci srdi $t0,$h4lo,51 642e1051a39Sopenharmony_ci and $a4,$h4lo,$mask 643e1051a39Sopenharmony_ci insrdi $t0,$h4hi,51,0 644e1051a39Sopenharmony_ci mulli $t0,$t0,19 # (h4 >> 51) * 19 645e1051a39Sopenharmony_ci 646e1051a39Sopenharmony_ci add $a0,$a0,$t0 647e1051a39Sopenharmony_ci 648e1051a39Sopenharmony_ci srdi $t1,$a2,51 649e1051a39Sopenharmony_ci and $a2,$a2,$mask 650e1051a39Sopenharmony_ci add $a3,$a3,$t1 651e1051a39Sopenharmony_ci 652e1051a39Sopenharmony_ci srdi $t0,$a0,51 653e1051a39Sopenharmony_ci and $a0,$a0,$mask 654e1051a39Sopenharmony_ci add $a1,$a1,$t0 655e1051a39Sopenharmony_ci 656e1051a39Sopenharmony_ci std $a2,16($rp) 657e1051a39Sopenharmony_ci std $a3,24($rp) 658e1051a39Sopenharmony_ci std $a4,32($rp) 659e1051a39Sopenharmony_ci std $a0,0($rp) 660e1051a39Sopenharmony_ci std $a1,8($rp) 661e1051a39Sopenharmony_ci 662e1051a39Sopenharmony_ci ld r21,`$FRAME-8*11`($sp) 663e1051a39Sopenharmony_ci ld r22,`$FRAME-8*10`($sp) 664e1051a39Sopenharmony_ci ld r23,`$FRAME-8*9`($sp) 665e1051a39Sopenharmony_ci ld r24,`$FRAME-8*8`($sp) 666e1051a39Sopenharmony_ci ld r25,`$FRAME-8*7`($sp) 667e1051a39Sopenharmony_ci ld r26,`$FRAME-8*6`($sp) 668e1051a39Sopenharmony_ci ld r27,`$FRAME-8*5`($sp) 669e1051a39Sopenharmony_ci ld r28,`$FRAME-8*4`($sp) 670e1051a39Sopenharmony_ci ld r29,`$FRAME-8*3`($sp) 671e1051a39Sopenharmony_ci ld r30,`$FRAME-8*2`($sp) 672e1051a39Sopenharmony_ci ld r31,`$FRAME-8*1`($sp) 673e1051a39Sopenharmony_ci addi $sp,$sp,$FRAME 674e1051a39Sopenharmony_ci blr 675e1051a39Sopenharmony_ci .long 0 676e1051a39Sopenharmony_ci .byte 0,12,4,0,0x80,11,3,0 677e1051a39Sopenharmony_ci .long 0 678e1051a39Sopenharmony_ci.size x25519_fe51_mul,.-x25519_fe51_mul 679e1051a39Sopenharmony_ci___ 680e1051a39Sopenharmony_ci{ 681e1051a39Sopenharmony_cimy ($a0,$a1,$a2,$a3,$a4,$t0,$t1) = ($a0,$a1,$a2,$a3,$a4,$t0,$t1); 682e1051a39Sopenharmony_ci$code.=<<___; 683e1051a39Sopenharmony_ci.globl x25519_fe51_sqr 684e1051a39Sopenharmony_ci.type x25519_fe51_sqr,\@function 685e1051a39Sopenharmony_ci.align 5 686e1051a39Sopenharmony_cix25519_fe51_sqr: 687e1051a39Sopenharmony_ci stdu $sp,-$FRAME($sp) 688e1051a39Sopenharmony_ci std r21,`$FRAME-8*11`($sp) 689e1051a39Sopenharmony_ci std r22,`$FRAME-8*10`($sp) 690e1051a39Sopenharmony_ci std r23,`$FRAME-8*9`($sp) 691e1051a39Sopenharmony_ci std r24,`$FRAME-8*8`($sp) 692e1051a39Sopenharmony_ci std r25,`$FRAME-8*7`($sp) 693e1051a39Sopenharmony_ci std r26,`$FRAME-8*6`($sp) 694e1051a39Sopenharmony_ci std r27,`$FRAME-8*5`($sp) 695e1051a39Sopenharmony_ci std r28,`$FRAME-8*4`($sp) 696e1051a39Sopenharmony_ci std r29,`$FRAME-8*3`($sp) 697e1051a39Sopenharmony_ci std r30,`$FRAME-8*2`($sp) 698e1051a39Sopenharmony_ci std r31,`$FRAME-8*1`($sp) 699e1051a39Sopenharmony_ci 700e1051a39Sopenharmony_ci ld $a0,0($ap) 701e1051a39Sopenharmony_ci ld $a1,8($ap) 702e1051a39Sopenharmony_ci ld $a2,16($ap) 703e1051a39Sopenharmony_ci ld $a3,24($ap) 704e1051a39Sopenharmony_ci ld $a4,32($ap) 705e1051a39Sopenharmony_ci 706e1051a39Sopenharmony_ci add $bi,$a0,$a0 # a[0]*2 707e1051a39Sopenharmony_ci mulli $t1,$a4,19 # a[4]*19 708e1051a39Sopenharmony_ci 709e1051a39Sopenharmony_ci mulld $h0lo,$a0,$a0 710e1051a39Sopenharmony_ci mulhdu $h0hi,$a0,$a0 711e1051a39Sopenharmony_ci mulld $h1lo,$a1,$bi 712e1051a39Sopenharmony_ci mulhdu $h1hi,$a1,$bi 713e1051a39Sopenharmony_ci mulld $h2lo,$a2,$bi 714e1051a39Sopenharmony_ci mulhdu $h2hi,$a2,$bi 715e1051a39Sopenharmony_ci mulld $h3lo,$a3,$bi 716e1051a39Sopenharmony_ci mulhdu $h3hi,$a3,$bi 717e1051a39Sopenharmony_ci mulld $h4lo,$a4,$bi 718e1051a39Sopenharmony_ci mulhdu $h4hi,$a4,$bi 719e1051a39Sopenharmony_ci add $bi,$a1,$a1 # a[1]*2 720e1051a39Sopenharmony_ci___ 721e1051a39Sopenharmony_ci ($a4,$t1) = ($t1,$a4); 722e1051a39Sopenharmony_ci$code.=<<___; 723e1051a39Sopenharmony_ci mulld $t0,$t1,$a4 724e1051a39Sopenharmony_ci mulhdu $t1,$t1,$a4 725e1051a39Sopenharmony_ci addc $h3lo,$h3lo,$t0 726e1051a39Sopenharmony_ci adde $h3hi,$h3hi,$t1 727e1051a39Sopenharmony_ci 728e1051a39Sopenharmony_ci mulli $bp,$a3,19 # a[3]*19 729e1051a39Sopenharmony_ci 730e1051a39Sopenharmony_ci mulld $t0,$a1,$a1 731e1051a39Sopenharmony_ci mulhdu $t1,$a1,$a1 732e1051a39Sopenharmony_ci addc $h2lo,$h2lo,$t0 733e1051a39Sopenharmony_ci adde $h2hi,$h2hi,$t1 734e1051a39Sopenharmony_ci mulld $t0,$a2,$bi 735e1051a39Sopenharmony_ci mulhdu $t1,$a2,$bi 736e1051a39Sopenharmony_ci addc $h3lo,$h3lo,$t0 737e1051a39Sopenharmony_ci adde $h3hi,$h3hi,$t1 738e1051a39Sopenharmony_ci mulld $t0,$a3,$bi 739e1051a39Sopenharmony_ci mulhdu $t1,$a3,$bi 740e1051a39Sopenharmony_ci addc $h4lo,$h4lo,$t0 741e1051a39Sopenharmony_ci adde $h4hi,$h4hi,$t1 742e1051a39Sopenharmony_ci mulld $t0,$a4,$bi 743e1051a39Sopenharmony_ci mulhdu $t1,$a4,$bi 744e1051a39Sopenharmony_ci add $bi,$a3,$a3 # a[3]*2 745e1051a39Sopenharmony_ci addc $h0lo,$h0lo,$t0 746e1051a39Sopenharmony_ci adde $h0hi,$h0hi,$t1 747e1051a39Sopenharmony_ci___ 748e1051a39Sopenharmony_ci ($a3,$t1) = ($bp,$a3); 749e1051a39Sopenharmony_ci$code.=<<___; 750e1051a39Sopenharmony_ci mulld $t0,$t1,$a3 751e1051a39Sopenharmony_ci mulhdu $t1,$t1,$a3 752e1051a39Sopenharmony_ci addc $h1lo,$h1lo,$t0 753e1051a39Sopenharmony_ci adde $h1hi,$h1hi,$t1 754e1051a39Sopenharmony_ci mulld $t0,$bi,$a4 755e1051a39Sopenharmony_ci mulhdu $t1,$bi,$a4 756e1051a39Sopenharmony_ci add $bi,$a2,$a2 # a[2]*2 757e1051a39Sopenharmony_ci addc $h2lo,$h2lo,$t0 758e1051a39Sopenharmony_ci adde $h2hi,$h2hi,$t1 759e1051a39Sopenharmony_ci 760e1051a39Sopenharmony_ci mulld $t0,$a2,$a2 761e1051a39Sopenharmony_ci mulhdu $t1,$a2,$a2 762e1051a39Sopenharmony_ci addc $h4lo,$h4lo,$t0 763e1051a39Sopenharmony_ci adde $h4hi,$h4hi,$t1 764e1051a39Sopenharmony_ci mulld $t0,$a3,$bi 765e1051a39Sopenharmony_ci mulhdu $t1,$a3,$bi 766e1051a39Sopenharmony_ci addc $h0lo,$h0lo,$t0 767e1051a39Sopenharmony_ci adde $h0hi,$h0hi,$t1 768e1051a39Sopenharmony_ci mulld $t0,$a4,$bi 769e1051a39Sopenharmony_ci mulhdu $t1,$a4,$bi 770e1051a39Sopenharmony_ci addc $h1lo,$h1lo,$t0 771e1051a39Sopenharmony_ci adde $h1hi,$h1hi,$t1 772e1051a39Sopenharmony_ci 773e1051a39Sopenharmony_ci b .Lfe51_reduce 774e1051a39Sopenharmony_ci .long 0 775e1051a39Sopenharmony_ci .byte 0,12,4,0,0x80,11,2,0 776e1051a39Sopenharmony_ci .long 0 777e1051a39Sopenharmony_ci.size x25519_fe51_sqr,.-x25519_fe51_sqr 778e1051a39Sopenharmony_ci___ 779e1051a39Sopenharmony_ci} 780e1051a39Sopenharmony_ci$code.=<<___; 781e1051a39Sopenharmony_ci.globl x25519_fe51_mul121666 782e1051a39Sopenharmony_ci.type x25519_fe51_mul121666,\@function 783e1051a39Sopenharmony_ci.align 5 784e1051a39Sopenharmony_cix25519_fe51_mul121666: 785e1051a39Sopenharmony_ci stdu $sp,-$FRAME($sp) 786e1051a39Sopenharmony_ci std r21,`$FRAME-8*11`($sp) 787e1051a39Sopenharmony_ci std r22,`$FRAME-8*10`($sp) 788e1051a39Sopenharmony_ci std r23,`$FRAME-8*9`($sp) 789e1051a39Sopenharmony_ci std r24,`$FRAME-8*8`($sp) 790e1051a39Sopenharmony_ci std r25,`$FRAME-8*7`($sp) 791e1051a39Sopenharmony_ci std r26,`$FRAME-8*6`($sp) 792e1051a39Sopenharmony_ci std r27,`$FRAME-8*5`($sp) 793e1051a39Sopenharmony_ci std r28,`$FRAME-8*4`($sp) 794e1051a39Sopenharmony_ci std r29,`$FRAME-8*3`($sp) 795e1051a39Sopenharmony_ci std r30,`$FRAME-8*2`($sp) 796e1051a39Sopenharmony_ci std r31,`$FRAME-8*1`($sp) 797e1051a39Sopenharmony_ci 798e1051a39Sopenharmony_ci lis $bi,`65536>>16` 799e1051a39Sopenharmony_ci ori $bi,$bi,`121666-65536` 800e1051a39Sopenharmony_ci ld $a0,0($ap) 801e1051a39Sopenharmony_ci ld $a1,8($ap) 802e1051a39Sopenharmony_ci ld $a2,16($ap) 803e1051a39Sopenharmony_ci ld $a3,24($ap) 804e1051a39Sopenharmony_ci ld $a4,32($ap) 805e1051a39Sopenharmony_ci 806e1051a39Sopenharmony_ci mulld $h0lo,$a0,$bi # a[0]*121666 807e1051a39Sopenharmony_ci mulhdu $h0hi,$a0,$bi 808e1051a39Sopenharmony_ci mulld $h1lo,$a1,$bi # a[1]*121666 809e1051a39Sopenharmony_ci mulhdu $h1hi,$a1,$bi 810e1051a39Sopenharmony_ci mulld $h2lo,$a2,$bi # a[2]*121666 811e1051a39Sopenharmony_ci mulhdu $h2hi,$a2,$bi 812e1051a39Sopenharmony_ci mulld $h3lo,$a3,$bi # a[3]*121666 813e1051a39Sopenharmony_ci mulhdu $h3hi,$a3,$bi 814e1051a39Sopenharmony_ci mulld $h4lo,$a4,$bi # a[4]*121666 815e1051a39Sopenharmony_ci mulhdu $h4hi,$a4,$bi 816e1051a39Sopenharmony_ci 817e1051a39Sopenharmony_ci b .Lfe51_reduce 818e1051a39Sopenharmony_ci .long 0 819e1051a39Sopenharmony_ci .byte 0,12,4,0,0x80,11,2,0 820e1051a39Sopenharmony_ci .long 0 821e1051a39Sopenharmony_ci.size x25519_fe51_mul121666,.-x25519_fe51_mul121666 822e1051a39Sopenharmony_ci___ 823e1051a39Sopenharmony_ci} 824e1051a39Sopenharmony_ci 825e1051a39Sopenharmony_ci$code =~ s/\`([^\`]*)\`/eval $1/gem; 826e1051a39Sopenharmony_ciprint $code; 827e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 828