1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci 9e1051a39Sopenharmony_ci# 10e1051a39Sopenharmony_ci# ==================================================================== 11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and 13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further 14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/. 15e1051a39Sopenharmony_ci# ==================================================================== 16e1051a39Sopenharmony_ci# 17e1051a39Sopenharmony_ci# ECP_NISTZ256 module for PPC64. 18e1051a39Sopenharmony_ci# 19e1051a39Sopenharmony_ci# August 2016. 20e1051a39Sopenharmony_ci# 21e1051a39Sopenharmony_ci# Original ECP_NISTZ256 submission targeting x86_64 is detailed in 22e1051a39Sopenharmony_ci# http://eprint.iacr.org/2013/816. 23e1051a39Sopenharmony_ci# 24e1051a39Sopenharmony_ci# with/without -DECP_NISTZ256_ASM 25e1051a39Sopenharmony_ci# POWER7 +260-530% 26e1051a39Sopenharmony_ci# POWER8 +220-340% 27e1051a39Sopenharmony_ci 28e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension) 29e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file 30e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 31e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 32e1051a39Sopenharmony_ci 33e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 34e1051a39Sopenharmony_ci( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 35e1051a39Sopenharmony_ci( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 36e1051a39Sopenharmony_cidie "can't locate ppc-xlate.pl"; 37e1051a39Sopenharmony_ci 38e1051a39Sopenharmony_ciopen OUT,"| \"$^X\" $xlate $flavour \"$output\"" 39e1051a39Sopenharmony_ci or die "can't call $xlate: $!"; 40e1051a39Sopenharmony_ci*STDOUT=*OUT; 41e1051a39Sopenharmony_ci 42e1051a39Sopenharmony_cimy $sp="r1"; 43e1051a39Sopenharmony_ci 44e1051a39Sopenharmony_ci{ 45e1051a39Sopenharmony_cimy ($rp,$ap,$bp,$bi,$acc0,$acc1,$acc2,$acc3,$poly1,$poly3, 46e1051a39Sopenharmony_ci $acc4,$acc5,$a0,$a1,$a2,$a3,$t0,$t1,$t2,$t3) = 47e1051a39Sopenharmony_ci map("r$_",(3..12,22..31)); 48e1051a39Sopenharmony_ci 49e1051a39Sopenharmony_cimy ($acc6,$acc7)=($bp,$bi); # used in __ecp_nistz256_sqr_mont 50e1051a39Sopenharmony_ci 51e1051a39Sopenharmony_ci$code.=<<___; 52e1051a39Sopenharmony_ci.machine "any" 53e1051a39Sopenharmony_ci.text 54e1051a39Sopenharmony_ci___ 55e1051a39Sopenharmony_ci######################################################################## 56e1051a39Sopenharmony_ci# Convert ecp_nistz256_table.c to layout expected by ecp_nistz_gather_w7 57e1051a39Sopenharmony_ci# 58e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 59e1051a39Sopenharmony_ciopen TABLE,"<ecp_nistz256_table.c" or 60e1051a39Sopenharmony_ciopen TABLE,"<${dir}../ecp_nistz256_table.c" or 61e1051a39Sopenharmony_cidie "failed to open ecp_nistz256_table.c:",$!; 62e1051a39Sopenharmony_ci 63e1051a39Sopenharmony_ciuse integer; 64e1051a39Sopenharmony_ci 65e1051a39Sopenharmony_ciforeach(<TABLE>) { 66e1051a39Sopenharmony_ci s/TOBN\(\s*(0x[0-9a-f]+),\s*(0x[0-9a-f]+)\s*\)/push @arr,hex($2),hex($1)/geo; 67e1051a39Sopenharmony_ci} 68e1051a39Sopenharmony_ciclose TABLE; 69e1051a39Sopenharmony_ci 70e1051a39Sopenharmony_ci# See ecp_nistz256_table.c for explanation for why it's 64*16*37. 71e1051a39Sopenharmony_ci# 64*16*37-1 is because $#arr returns last valid index or @arr, not 72e1051a39Sopenharmony_ci# amount of elements. 73e1051a39Sopenharmony_cidie "insane number of elements" if ($#arr != 64*16*37-1); 74e1051a39Sopenharmony_ci 75e1051a39Sopenharmony_ci$code.=<<___; 76e1051a39Sopenharmony_ci.type ecp_nistz256_precomputed,\@object 77e1051a39Sopenharmony_ci.globl ecp_nistz256_precomputed 78e1051a39Sopenharmony_ci.align 12 79e1051a39Sopenharmony_ciecp_nistz256_precomputed: 80e1051a39Sopenharmony_ci___ 81e1051a39Sopenharmony_ci######################################################################## 82e1051a39Sopenharmony_ci# this conversion smashes P256_POINT_AFFINE by individual bytes with 83e1051a39Sopenharmony_ci# 64 byte interval, similar to 84e1051a39Sopenharmony_ci# 1111222233334444 85e1051a39Sopenharmony_ci# 1234123412341234 86e1051a39Sopenharmony_cifor(1..37) { 87e1051a39Sopenharmony_ci @tbl = splice(@arr,0,64*16); 88e1051a39Sopenharmony_ci for($i=0;$i<64;$i++) { 89e1051a39Sopenharmony_ci undef @line; 90e1051a39Sopenharmony_ci for($j=0;$j<64;$j++) { 91e1051a39Sopenharmony_ci push @line,(@tbl[$j*16+$i/4]>>(($i%4)*8))&0xff; 92e1051a39Sopenharmony_ci } 93e1051a39Sopenharmony_ci $code.=".byte\t"; 94e1051a39Sopenharmony_ci $code.=join(',',map { sprintf "0x%02x",$_} @line); 95e1051a39Sopenharmony_ci $code.="\n"; 96e1051a39Sopenharmony_ci } 97e1051a39Sopenharmony_ci} 98e1051a39Sopenharmony_ci 99e1051a39Sopenharmony_ci$code.=<<___; 100e1051a39Sopenharmony_ci.size ecp_nistz256_precomputed,.-ecp_nistz256_precomputed 101e1051a39Sopenharmony_ci.asciz "ECP_NISTZ256 for PPC64, CRYPTOGAMS by <appro\@openssl.org>" 102e1051a39Sopenharmony_ci 103e1051a39Sopenharmony_ci# void ecp_nistz256_mul_mont(BN_ULONG x0[4],const BN_ULONG x1[4], 104e1051a39Sopenharmony_ci# const BN_ULONG x2[4]); 105e1051a39Sopenharmony_ci.globl ecp_nistz256_mul_mont 106e1051a39Sopenharmony_ci.align 5 107e1051a39Sopenharmony_ciecp_nistz256_mul_mont: 108e1051a39Sopenharmony_ci stdu $sp,-128($sp) 109e1051a39Sopenharmony_ci mflr r0 110e1051a39Sopenharmony_ci std r22,48($sp) 111e1051a39Sopenharmony_ci std r23,56($sp) 112e1051a39Sopenharmony_ci std r24,64($sp) 113e1051a39Sopenharmony_ci std r25,72($sp) 114e1051a39Sopenharmony_ci std r26,80($sp) 115e1051a39Sopenharmony_ci std r27,88($sp) 116e1051a39Sopenharmony_ci std r28,96($sp) 117e1051a39Sopenharmony_ci std r29,104($sp) 118e1051a39Sopenharmony_ci std r30,112($sp) 119e1051a39Sopenharmony_ci std r31,120($sp) 120e1051a39Sopenharmony_ci 121e1051a39Sopenharmony_ci ld $a0,0($ap) 122e1051a39Sopenharmony_ci ld $bi,0($bp) 123e1051a39Sopenharmony_ci ld $a1,8($ap) 124e1051a39Sopenharmony_ci ld $a2,16($ap) 125e1051a39Sopenharmony_ci ld $a3,24($ap) 126e1051a39Sopenharmony_ci 127e1051a39Sopenharmony_ci li $poly1,-1 128e1051a39Sopenharmony_ci srdi $poly1,$poly1,32 # 0x00000000ffffffff 129e1051a39Sopenharmony_ci li $poly3,1 130e1051a39Sopenharmony_ci orc $poly3,$poly3,$poly1 # 0xffffffff00000001 131e1051a39Sopenharmony_ci 132e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont 133e1051a39Sopenharmony_ci 134e1051a39Sopenharmony_ci mtlr r0 135e1051a39Sopenharmony_ci ld r22,48($sp) 136e1051a39Sopenharmony_ci ld r23,56($sp) 137e1051a39Sopenharmony_ci ld r24,64($sp) 138e1051a39Sopenharmony_ci ld r25,72($sp) 139e1051a39Sopenharmony_ci ld r26,80($sp) 140e1051a39Sopenharmony_ci ld r27,88($sp) 141e1051a39Sopenharmony_ci ld r28,96($sp) 142e1051a39Sopenharmony_ci ld r29,104($sp) 143e1051a39Sopenharmony_ci ld r30,112($sp) 144e1051a39Sopenharmony_ci ld r31,120($sp) 145e1051a39Sopenharmony_ci addi $sp,$sp,128 146e1051a39Sopenharmony_ci blr 147e1051a39Sopenharmony_ci .long 0 148e1051a39Sopenharmony_ci .byte 0,12,4,0,0x80,10,3,0 149e1051a39Sopenharmony_ci .long 0 150e1051a39Sopenharmony_ci.size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont 151e1051a39Sopenharmony_ci 152e1051a39Sopenharmony_ci# void ecp_nistz256_sqr_mont(BN_ULONG x0[4],const BN_ULONG x1[4]); 153e1051a39Sopenharmony_ci.globl ecp_nistz256_sqr_mont 154e1051a39Sopenharmony_ci.align 4 155e1051a39Sopenharmony_ciecp_nistz256_sqr_mont: 156e1051a39Sopenharmony_ci stdu $sp,-128($sp) 157e1051a39Sopenharmony_ci mflr r0 158e1051a39Sopenharmony_ci std r22,48($sp) 159e1051a39Sopenharmony_ci std r23,56($sp) 160e1051a39Sopenharmony_ci std r24,64($sp) 161e1051a39Sopenharmony_ci std r25,72($sp) 162e1051a39Sopenharmony_ci std r26,80($sp) 163e1051a39Sopenharmony_ci std r27,88($sp) 164e1051a39Sopenharmony_ci std r28,96($sp) 165e1051a39Sopenharmony_ci std r29,104($sp) 166e1051a39Sopenharmony_ci std r30,112($sp) 167e1051a39Sopenharmony_ci std r31,120($sp) 168e1051a39Sopenharmony_ci 169e1051a39Sopenharmony_ci ld $a0,0($ap) 170e1051a39Sopenharmony_ci ld $a1,8($ap) 171e1051a39Sopenharmony_ci ld $a2,16($ap) 172e1051a39Sopenharmony_ci ld $a3,24($ap) 173e1051a39Sopenharmony_ci 174e1051a39Sopenharmony_ci li $poly1,-1 175e1051a39Sopenharmony_ci srdi $poly1,$poly1,32 # 0x00000000ffffffff 176e1051a39Sopenharmony_ci li $poly3,1 177e1051a39Sopenharmony_ci orc $poly3,$poly3,$poly1 # 0xffffffff00000001 178e1051a39Sopenharmony_ci 179e1051a39Sopenharmony_ci bl __ecp_nistz256_sqr_mont 180e1051a39Sopenharmony_ci 181e1051a39Sopenharmony_ci mtlr r0 182e1051a39Sopenharmony_ci ld r22,48($sp) 183e1051a39Sopenharmony_ci ld r23,56($sp) 184e1051a39Sopenharmony_ci ld r24,64($sp) 185e1051a39Sopenharmony_ci ld r25,72($sp) 186e1051a39Sopenharmony_ci ld r26,80($sp) 187e1051a39Sopenharmony_ci ld r27,88($sp) 188e1051a39Sopenharmony_ci ld r28,96($sp) 189e1051a39Sopenharmony_ci ld r29,104($sp) 190e1051a39Sopenharmony_ci ld r30,112($sp) 191e1051a39Sopenharmony_ci ld r31,120($sp) 192e1051a39Sopenharmony_ci addi $sp,$sp,128 193e1051a39Sopenharmony_ci blr 194e1051a39Sopenharmony_ci .long 0 195e1051a39Sopenharmony_ci .byte 0,12,4,0,0x80,10,2,0 196e1051a39Sopenharmony_ci .long 0 197e1051a39Sopenharmony_ci.size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont 198e1051a39Sopenharmony_ci 199e1051a39Sopenharmony_ci# void ecp_nistz256_add(BN_ULONG x0[4],const BN_ULONG x1[4], 200e1051a39Sopenharmony_ci# const BN_ULONG x2[4]); 201e1051a39Sopenharmony_ci.globl ecp_nistz256_add 202e1051a39Sopenharmony_ci.align 4 203e1051a39Sopenharmony_ciecp_nistz256_add: 204e1051a39Sopenharmony_ci stdu $sp,-128($sp) 205e1051a39Sopenharmony_ci mflr r0 206e1051a39Sopenharmony_ci std r28,96($sp) 207e1051a39Sopenharmony_ci std r29,104($sp) 208e1051a39Sopenharmony_ci std r30,112($sp) 209e1051a39Sopenharmony_ci std r31,120($sp) 210e1051a39Sopenharmony_ci 211e1051a39Sopenharmony_ci ld $acc0,0($ap) 212e1051a39Sopenharmony_ci ld $t0, 0($bp) 213e1051a39Sopenharmony_ci ld $acc1,8($ap) 214e1051a39Sopenharmony_ci ld $t1, 8($bp) 215e1051a39Sopenharmony_ci ld $acc2,16($ap) 216e1051a39Sopenharmony_ci ld $t2, 16($bp) 217e1051a39Sopenharmony_ci ld $acc3,24($ap) 218e1051a39Sopenharmony_ci ld $t3, 24($bp) 219e1051a39Sopenharmony_ci 220e1051a39Sopenharmony_ci li $poly1,-1 221e1051a39Sopenharmony_ci srdi $poly1,$poly1,32 # 0x00000000ffffffff 222e1051a39Sopenharmony_ci li $poly3,1 223e1051a39Sopenharmony_ci orc $poly3,$poly3,$poly1 # 0xffffffff00000001 224e1051a39Sopenharmony_ci 225e1051a39Sopenharmony_ci bl __ecp_nistz256_add 226e1051a39Sopenharmony_ci 227e1051a39Sopenharmony_ci mtlr r0 228e1051a39Sopenharmony_ci ld r28,96($sp) 229e1051a39Sopenharmony_ci ld r29,104($sp) 230e1051a39Sopenharmony_ci ld r30,112($sp) 231e1051a39Sopenharmony_ci ld r31,120($sp) 232e1051a39Sopenharmony_ci addi $sp,$sp,128 233e1051a39Sopenharmony_ci blr 234e1051a39Sopenharmony_ci .long 0 235e1051a39Sopenharmony_ci .byte 0,12,4,0,0x80,4,3,0 236e1051a39Sopenharmony_ci .long 0 237e1051a39Sopenharmony_ci.size ecp_nistz256_add,.-ecp_nistz256_add 238e1051a39Sopenharmony_ci 239e1051a39Sopenharmony_ci# void ecp_nistz256_div_by_2(BN_ULONG x0[4],const BN_ULONG x1[4]); 240e1051a39Sopenharmony_ci.globl ecp_nistz256_div_by_2 241e1051a39Sopenharmony_ci.align 4 242e1051a39Sopenharmony_ciecp_nistz256_div_by_2: 243e1051a39Sopenharmony_ci stdu $sp,-128($sp) 244e1051a39Sopenharmony_ci mflr r0 245e1051a39Sopenharmony_ci std r28,96($sp) 246e1051a39Sopenharmony_ci std r29,104($sp) 247e1051a39Sopenharmony_ci std r30,112($sp) 248e1051a39Sopenharmony_ci std r31,120($sp) 249e1051a39Sopenharmony_ci 250e1051a39Sopenharmony_ci ld $acc0,0($ap) 251e1051a39Sopenharmony_ci ld $acc1,8($ap) 252e1051a39Sopenharmony_ci ld $acc2,16($ap) 253e1051a39Sopenharmony_ci ld $acc3,24($ap) 254e1051a39Sopenharmony_ci 255e1051a39Sopenharmony_ci li $poly1,-1 256e1051a39Sopenharmony_ci srdi $poly1,$poly1,32 # 0x00000000ffffffff 257e1051a39Sopenharmony_ci li $poly3,1 258e1051a39Sopenharmony_ci orc $poly3,$poly3,$poly1 # 0xffffffff00000001 259e1051a39Sopenharmony_ci 260e1051a39Sopenharmony_ci bl __ecp_nistz256_div_by_2 261e1051a39Sopenharmony_ci 262e1051a39Sopenharmony_ci mtlr r0 263e1051a39Sopenharmony_ci ld r28,96($sp) 264e1051a39Sopenharmony_ci ld r29,104($sp) 265e1051a39Sopenharmony_ci ld r30,112($sp) 266e1051a39Sopenharmony_ci ld r31,120($sp) 267e1051a39Sopenharmony_ci addi $sp,$sp,128 268e1051a39Sopenharmony_ci blr 269e1051a39Sopenharmony_ci .long 0 270e1051a39Sopenharmony_ci .byte 0,12,4,0,0x80,4,2,0 271e1051a39Sopenharmony_ci .long 0 272e1051a39Sopenharmony_ci.size ecp_nistz256_div_by_2,.-ecp_nistz256_div_by_2 273e1051a39Sopenharmony_ci 274e1051a39Sopenharmony_ci# void ecp_nistz256_mul_by_2(BN_ULONG x0[4],const BN_ULONG x1[4]); 275e1051a39Sopenharmony_ci.globl ecp_nistz256_mul_by_2 276e1051a39Sopenharmony_ci.align 4 277e1051a39Sopenharmony_ciecp_nistz256_mul_by_2: 278e1051a39Sopenharmony_ci stdu $sp,-128($sp) 279e1051a39Sopenharmony_ci mflr r0 280e1051a39Sopenharmony_ci std r28,96($sp) 281e1051a39Sopenharmony_ci std r29,104($sp) 282e1051a39Sopenharmony_ci std r30,112($sp) 283e1051a39Sopenharmony_ci std r31,120($sp) 284e1051a39Sopenharmony_ci 285e1051a39Sopenharmony_ci ld $acc0,0($ap) 286e1051a39Sopenharmony_ci ld $acc1,8($ap) 287e1051a39Sopenharmony_ci ld $acc2,16($ap) 288e1051a39Sopenharmony_ci ld $acc3,24($ap) 289e1051a39Sopenharmony_ci 290e1051a39Sopenharmony_ci mr $t0,$acc0 291e1051a39Sopenharmony_ci mr $t1,$acc1 292e1051a39Sopenharmony_ci mr $t2,$acc2 293e1051a39Sopenharmony_ci mr $t3,$acc3 294e1051a39Sopenharmony_ci 295e1051a39Sopenharmony_ci li $poly1,-1 296e1051a39Sopenharmony_ci srdi $poly1,$poly1,32 # 0x00000000ffffffff 297e1051a39Sopenharmony_ci li $poly3,1 298e1051a39Sopenharmony_ci orc $poly3,$poly3,$poly1 # 0xffffffff00000001 299e1051a39Sopenharmony_ci 300e1051a39Sopenharmony_ci bl __ecp_nistz256_add # ret = a+a // 2*a 301e1051a39Sopenharmony_ci 302e1051a39Sopenharmony_ci mtlr r0 303e1051a39Sopenharmony_ci ld r28,96($sp) 304e1051a39Sopenharmony_ci ld r29,104($sp) 305e1051a39Sopenharmony_ci ld r30,112($sp) 306e1051a39Sopenharmony_ci ld r31,120($sp) 307e1051a39Sopenharmony_ci addi $sp,$sp,128 308e1051a39Sopenharmony_ci blr 309e1051a39Sopenharmony_ci .long 0 310e1051a39Sopenharmony_ci .byte 0,12,4,0,0x80,4,3,0 311e1051a39Sopenharmony_ci .long 0 312e1051a39Sopenharmony_ci.size ecp_nistz256_mul_by_2,.-ecp_nistz256_mul_by_2 313e1051a39Sopenharmony_ci 314e1051a39Sopenharmony_ci# void ecp_nistz256_mul_by_3(BN_ULONG x0[4],const BN_ULONG x1[4]); 315e1051a39Sopenharmony_ci.globl ecp_nistz256_mul_by_3 316e1051a39Sopenharmony_ci.align 4 317e1051a39Sopenharmony_ciecp_nistz256_mul_by_3: 318e1051a39Sopenharmony_ci stdu $sp,-128($sp) 319e1051a39Sopenharmony_ci mflr r0 320e1051a39Sopenharmony_ci std r28,96($sp) 321e1051a39Sopenharmony_ci std r29,104($sp) 322e1051a39Sopenharmony_ci std r30,112($sp) 323e1051a39Sopenharmony_ci std r31,120($sp) 324e1051a39Sopenharmony_ci 325e1051a39Sopenharmony_ci ld $acc0,0($ap) 326e1051a39Sopenharmony_ci ld $acc1,8($ap) 327e1051a39Sopenharmony_ci ld $acc2,16($ap) 328e1051a39Sopenharmony_ci ld $acc3,24($ap) 329e1051a39Sopenharmony_ci 330e1051a39Sopenharmony_ci mr $t0,$acc0 331e1051a39Sopenharmony_ci std $acc0,64($sp) 332e1051a39Sopenharmony_ci mr $t1,$acc1 333e1051a39Sopenharmony_ci std $acc1,72($sp) 334e1051a39Sopenharmony_ci mr $t2,$acc2 335e1051a39Sopenharmony_ci std $acc2,80($sp) 336e1051a39Sopenharmony_ci mr $t3,$acc3 337e1051a39Sopenharmony_ci std $acc3,88($sp) 338e1051a39Sopenharmony_ci 339e1051a39Sopenharmony_ci li $poly1,-1 340e1051a39Sopenharmony_ci srdi $poly1,$poly1,32 # 0x00000000ffffffff 341e1051a39Sopenharmony_ci li $poly3,1 342e1051a39Sopenharmony_ci orc $poly3,$poly3,$poly1 # 0xffffffff00000001 343e1051a39Sopenharmony_ci 344e1051a39Sopenharmony_ci bl __ecp_nistz256_add # ret = a+a // 2*a 345e1051a39Sopenharmony_ci 346e1051a39Sopenharmony_ci ld $t0,64($sp) 347e1051a39Sopenharmony_ci ld $t1,72($sp) 348e1051a39Sopenharmony_ci ld $t2,80($sp) 349e1051a39Sopenharmony_ci ld $t3,88($sp) 350e1051a39Sopenharmony_ci 351e1051a39Sopenharmony_ci bl __ecp_nistz256_add # ret += a // 2*a+a=3*a 352e1051a39Sopenharmony_ci 353e1051a39Sopenharmony_ci mtlr r0 354e1051a39Sopenharmony_ci ld r28,96($sp) 355e1051a39Sopenharmony_ci ld r29,104($sp) 356e1051a39Sopenharmony_ci ld r30,112($sp) 357e1051a39Sopenharmony_ci ld r31,120($sp) 358e1051a39Sopenharmony_ci addi $sp,$sp,128 359e1051a39Sopenharmony_ci blr 360e1051a39Sopenharmony_ci .long 0 361e1051a39Sopenharmony_ci .byte 0,12,4,0,0x80,4,2,0 362e1051a39Sopenharmony_ci .long 0 363e1051a39Sopenharmony_ci.size ecp_nistz256_mul_by_3,.-ecp_nistz256_mul_by_3 364e1051a39Sopenharmony_ci 365e1051a39Sopenharmony_ci# void ecp_nistz256_sub(BN_ULONG x0[4],const BN_ULONG x1[4], 366e1051a39Sopenharmony_ci# const BN_ULONG x2[4]); 367e1051a39Sopenharmony_ci.globl ecp_nistz256_sub 368e1051a39Sopenharmony_ci.align 4 369e1051a39Sopenharmony_ciecp_nistz256_sub: 370e1051a39Sopenharmony_ci stdu $sp,-128($sp) 371e1051a39Sopenharmony_ci mflr r0 372e1051a39Sopenharmony_ci std r28,96($sp) 373e1051a39Sopenharmony_ci std r29,104($sp) 374e1051a39Sopenharmony_ci std r30,112($sp) 375e1051a39Sopenharmony_ci std r31,120($sp) 376e1051a39Sopenharmony_ci 377e1051a39Sopenharmony_ci ld $acc0,0($ap) 378e1051a39Sopenharmony_ci ld $acc1,8($ap) 379e1051a39Sopenharmony_ci ld $acc2,16($ap) 380e1051a39Sopenharmony_ci ld $acc3,24($ap) 381e1051a39Sopenharmony_ci 382e1051a39Sopenharmony_ci li $poly1,-1 383e1051a39Sopenharmony_ci srdi $poly1,$poly1,32 # 0x00000000ffffffff 384e1051a39Sopenharmony_ci li $poly3,1 385e1051a39Sopenharmony_ci orc $poly3,$poly3,$poly1 # 0xffffffff00000001 386e1051a39Sopenharmony_ci 387e1051a39Sopenharmony_ci bl __ecp_nistz256_sub_from 388e1051a39Sopenharmony_ci 389e1051a39Sopenharmony_ci mtlr r0 390e1051a39Sopenharmony_ci ld r28,96($sp) 391e1051a39Sopenharmony_ci ld r29,104($sp) 392e1051a39Sopenharmony_ci ld r30,112($sp) 393e1051a39Sopenharmony_ci ld r31,120($sp) 394e1051a39Sopenharmony_ci addi $sp,$sp,128 395e1051a39Sopenharmony_ci blr 396e1051a39Sopenharmony_ci .long 0 397e1051a39Sopenharmony_ci .byte 0,12,4,0,0x80,4,3,0 398e1051a39Sopenharmony_ci .long 0 399e1051a39Sopenharmony_ci.size ecp_nistz256_sub,.-ecp_nistz256_sub 400e1051a39Sopenharmony_ci 401e1051a39Sopenharmony_ci# void ecp_nistz256_neg(BN_ULONG x0[4],const BN_ULONG x1[4]); 402e1051a39Sopenharmony_ci.globl ecp_nistz256_neg 403e1051a39Sopenharmony_ci.align 4 404e1051a39Sopenharmony_ciecp_nistz256_neg: 405e1051a39Sopenharmony_ci stdu $sp,-128($sp) 406e1051a39Sopenharmony_ci mflr r0 407e1051a39Sopenharmony_ci std r28,96($sp) 408e1051a39Sopenharmony_ci std r29,104($sp) 409e1051a39Sopenharmony_ci std r30,112($sp) 410e1051a39Sopenharmony_ci std r31,120($sp) 411e1051a39Sopenharmony_ci 412e1051a39Sopenharmony_ci mr $bp,$ap 413e1051a39Sopenharmony_ci li $acc0,0 414e1051a39Sopenharmony_ci li $acc1,0 415e1051a39Sopenharmony_ci li $acc2,0 416e1051a39Sopenharmony_ci li $acc3,0 417e1051a39Sopenharmony_ci 418e1051a39Sopenharmony_ci li $poly1,-1 419e1051a39Sopenharmony_ci srdi $poly1,$poly1,32 # 0x00000000ffffffff 420e1051a39Sopenharmony_ci li $poly3,1 421e1051a39Sopenharmony_ci orc $poly3,$poly3,$poly1 # 0xffffffff00000001 422e1051a39Sopenharmony_ci 423e1051a39Sopenharmony_ci bl __ecp_nistz256_sub_from 424e1051a39Sopenharmony_ci 425e1051a39Sopenharmony_ci mtlr r0 426e1051a39Sopenharmony_ci ld r28,96($sp) 427e1051a39Sopenharmony_ci ld r29,104($sp) 428e1051a39Sopenharmony_ci ld r30,112($sp) 429e1051a39Sopenharmony_ci ld r31,120($sp) 430e1051a39Sopenharmony_ci addi $sp,$sp,128 431e1051a39Sopenharmony_ci blr 432e1051a39Sopenharmony_ci .long 0 433e1051a39Sopenharmony_ci .byte 0,12,4,0,0x80,4,2,0 434e1051a39Sopenharmony_ci .long 0 435e1051a39Sopenharmony_ci.size ecp_nistz256_neg,.-ecp_nistz256_neg 436e1051a39Sopenharmony_ci 437e1051a39Sopenharmony_ci# note that __ecp_nistz256_mul_mont expects a[0-3] input pre-loaded 438e1051a39Sopenharmony_ci# to $a0-$a3 and b[0] - to $bi 439e1051a39Sopenharmony_ci.type __ecp_nistz256_mul_mont,\@function 440e1051a39Sopenharmony_ci.align 4 441e1051a39Sopenharmony_ci__ecp_nistz256_mul_mont: 442e1051a39Sopenharmony_ci mulld $acc0,$a0,$bi # a[0]*b[0] 443e1051a39Sopenharmony_ci mulhdu $t0,$a0,$bi 444e1051a39Sopenharmony_ci 445e1051a39Sopenharmony_ci mulld $acc1,$a1,$bi # a[1]*b[0] 446e1051a39Sopenharmony_ci mulhdu $t1,$a1,$bi 447e1051a39Sopenharmony_ci 448e1051a39Sopenharmony_ci mulld $acc2,$a2,$bi # a[2]*b[0] 449e1051a39Sopenharmony_ci mulhdu $t2,$a2,$bi 450e1051a39Sopenharmony_ci 451e1051a39Sopenharmony_ci mulld $acc3,$a3,$bi # a[3]*b[0] 452e1051a39Sopenharmony_ci mulhdu $t3,$a3,$bi 453e1051a39Sopenharmony_ci ld $bi,8($bp) # b[1] 454e1051a39Sopenharmony_ci 455e1051a39Sopenharmony_ci addc $acc1,$acc1,$t0 # accumulate high parts of multiplication 456e1051a39Sopenharmony_ci sldi $t0,$acc0,32 457e1051a39Sopenharmony_ci adde $acc2,$acc2,$t1 458e1051a39Sopenharmony_ci srdi $t1,$acc0,32 459e1051a39Sopenharmony_ci adde $acc3,$acc3,$t2 460e1051a39Sopenharmony_ci addze $acc4,$t3 461e1051a39Sopenharmony_ci li $acc5,0 462e1051a39Sopenharmony_ci___ 463e1051a39Sopenharmony_cifor($i=1;$i<4;$i++) { 464e1051a39Sopenharmony_ci ################################################################ 465e1051a39Sopenharmony_ci # Reduction iteration is normally performed by accumulating 466e1051a39Sopenharmony_ci # result of multiplication of modulus by "magic" digit [and 467e1051a39Sopenharmony_ci # omitting least significant word, which is guaranteed to 468e1051a39Sopenharmony_ci # be 0], but thanks to special form of modulus and "magic" 469e1051a39Sopenharmony_ci # digit being equal to least significant word, it can be 470e1051a39Sopenharmony_ci # performed with additions and subtractions alone. Indeed: 471e1051a39Sopenharmony_ci # 472e1051a39Sopenharmony_ci # ffff0001.00000000.0000ffff.ffffffff 473e1051a39Sopenharmony_ci # * abcdefgh 474e1051a39Sopenharmony_ci # + xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.abcdefgh 475e1051a39Sopenharmony_ci # 476e1051a39Sopenharmony_ci # Now observing that ff..ff*x = (2^n-1)*x = 2^n*x-x, we 477e1051a39Sopenharmony_ci # rewrite above as: 478e1051a39Sopenharmony_ci # 479e1051a39Sopenharmony_ci # xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.abcdefgh 480e1051a39Sopenharmony_ci # + abcdefgh.abcdefgh.0000abcd.efgh0000.00000000 481e1051a39Sopenharmony_ci # - 0000abcd.efgh0000.00000000.00000000.abcdefgh 482e1051a39Sopenharmony_ci # 483e1051a39Sopenharmony_ci # or marking redundant operations: 484e1051a39Sopenharmony_ci # 485e1051a39Sopenharmony_ci # xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.-------- 486e1051a39Sopenharmony_ci # + abcdefgh.abcdefgh.0000abcd.efgh0000.-------- 487e1051a39Sopenharmony_ci # - 0000abcd.efgh0000.--------.--------.-------- 488e1051a39Sopenharmony_ci 489e1051a39Sopenharmony_ci$code.=<<___; 490e1051a39Sopenharmony_ci subfc $t2,$t0,$acc0 # "*0xffff0001" 491e1051a39Sopenharmony_ci subfe $t3,$t1,$acc0 492e1051a39Sopenharmony_ci addc $acc0,$acc1,$t0 # +=acc[0]<<96 and omit acc[0] 493e1051a39Sopenharmony_ci adde $acc1,$acc2,$t1 494e1051a39Sopenharmony_ci adde $acc2,$acc3,$t2 # +=acc[0]*0xffff0001 495e1051a39Sopenharmony_ci adde $acc3,$acc4,$t3 496e1051a39Sopenharmony_ci addze $acc4,$acc5 497e1051a39Sopenharmony_ci 498e1051a39Sopenharmony_ci mulld $t0,$a0,$bi # lo(a[0]*b[i]) 499e1051a39Sopenharmony_ci mulld $t1,$a1,$bi # lo(a[1]*b[i]) 500e1051a39Sopenharmony_ci mulld $t2,$a2,$bi # lo(a[2]*b[i]) 501e1051a39Sopenharmony_ci mulld $t3,$a3,$bi # lo(a[3]*b[i]) 502e1051a39Sopenharmony_ci addc $acc0,$acc0,$t0 # accumulate low parts of multiplication 503e1051a39Sopenharmony_ci mulhdu $t0,$a0,$bi # hi(a[0]*b[i]) 504e1051a39Sopenharmony_ci adde $acc1,$acc1,$t1 505e1051a39Sopenharmony_ci mulhdu $t1,$a1,$bi # hi(a[1]*b[i]) 506e1051a39Sopenharmony_ci adde $acc2,$acc2,$t2 507e1051a39Sopenharmony_ci mulhdu $t2,$a2,$bi # hi(a[2]*b[i]) 508e1051a39Sopenharmony_ci adde $acc3,$acc3,$t3 509e1051a39Sopenharmony_ci mulhdu $t3,$a3,$bi # hi(a[3]*b[i]) 510e1051a39Sopenharmony_ci addze $acc4,$acc4 511e1051a39Sopenharmony_ci___ 512e1051a39Sopenharmony_ci$code.=<<___ if ($i<3); 513e1051a39Sopenharmony_ci ld $bi,8*($i+1)($bp) # b[$i+1] 514e1051a39Sopenharmony_ci___ 515e1051a39Sopenharmony_ci$code.=<<___; 516e1051a39Sopenharmony_ci addc $acc1,$acc1,$t0 # accumulate high parts of multiplication 517e1051a39Sopenharmony_ci sldi $t0,$acc0,32 518e1051a39Sopenharmony_ci adde $acc2,$acc2,$t1 519e1051a39Sopenharmony_ci srdi $t1,$acc0,32 520e1051a39Sopenharmony_ci adde $acc3,$acc3,$t2 521e1051a39Sopenharmony_ci adde $acc4,$acc4,$t3 522e1051a39Sopenharmony_ci li $acc5,0 523e1051a39Sopenharmony_ci addze $acc5,$acc5 524e1051a39Sopenharmony_ci___ 525e1051a39Sopenharmony_ci} 526e1051a39Sopenharmony_ci$code.=<<___; 527e1051a39Sopenharmony_ci # last reduction 528e1051a39Sopenharmony_ci subfc $t2,$t0,$acc0 # "*0xffff0001" 529e1051a39Sopenharmony_ci subfe $t3,$t1,$acc0 530e1051a39Sopenharmony_ci addc $acc0,$acc1,$t0 # +=acc[0]<<96 and omit acc[0] 531e1051a39Sopenharmony_ci adde $acc1,$acc2,$t1 532e1051a39Sopenharmony_ci adde $acc2,$acc3,$t2 # +=acc[0]*0xffff0001 533e1051a39Sopenharmony_ci adde $acc3,$acc4,$t3 534e1051a39Sopenharmony_ci addze $acc4,$acc5 535e1051a39Sopenharmony_ci 536e1051a39Sopenharmony_ci li $t2,0 537e1051a39Sopenharmony_ci addic $acc0,$acc0,1 # ret -= modulus 538e1051a39Sopenharmony_ci subfe $acc1,$poly1,$acc1 539e1051a39Sopenharmony_ci subfe $acc2,$t2,$acc2 540e1051a39Sopenharmony_ci subfe $acc3,$poly3,$acc3 541e1051a39Sopenharmony_ci subfe $acc4,$t2,$acc4 542e1051a39Sopenharmony_ci 543e1051a39Sopenharmony_ci addc $acc0,$acc0,$acc4 # ret += modulus if borrow 544e1051a39Sopenharmony_ci and $t1,$poly1,$acc4 545e1051a39Sopenharmony_ci and $t3,$poly3,$acc4 546e1051a39Sopenharmony_ci adde $acc1,$acc1,$t1 547e1051a39Sopenharmony_ci addze $acc2,$acc2 548e1051a39Sopenharmony_ci adde $acc3,$acc3,$t3 549e1051a39Sopenharmony_ci 550e1051a39Sopenharmony_ci std $acc0,0($rp) 551e1051a39Sopenharmony_ci std $acc1,8($rp) 552e1051a39Sopenharmony_ci std $acc2,16($rp) 553e1051a39Sopenharmony_ci std $acc3,24($rp) 554e1051a39Sopenharmony_ci 555e1051a39Sopenharmony_ci blr 556e1051a39Sopenharmony_ci .long 0 557e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,1,0 558e1051a39Sopenharmony_ci .long 0 559e1051a39Sopenharmony_ci.size __ecp_nistz256_mul_mont,.-__ecp_nistz256_mul_mont 560e1051a39Sopenharmony_ci 561e1051a39Sopenharmony_ci# note that __ecp_nistz256_sqr_mont expects a[0-3] input pre-loaded 562e1051a39Sopenharmony_ci# to $a0-$a3 563e1051a39Sopenharmony_ci.type __ecp_nistz256_sqr_mont,\@function 564e1051a39Sopenharmony_ci.align 4 565e1051a39Sopenharmony_ci__ecp_nistz256_sqr_mont: 566e1051a39Sopenharmony_ci ################################################################ 567e1051a39Sopenharmony_ci # | | | | | |a1*a0| | 568e1051a39Sopenharmony_ci # | | | | |a2*a0| | | 569e1051a39Sopenharmony_ci # | |a3*a2|a3*a0| | | | 570e1051a39Sopenharmony_ci # | | | |a2*a1| | | | 571e1051a39Sopenharmony_ci # | | |a3*a1| | | | | 572e1051a39Sopenharmony_ci # *| | | | | | | | 2| 573e1051a39Sopenharmony_ci # +|a3*a3|a2*a2|a1*a1|a0*a0| 574e1051a39Sopenharmony_ci # |--+--+--+--+--+--+--+--| 575e1051a39Sopenharmony_ci # |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is $accx, i.e. follow $accx 576e1051a39Sopenharmony_ci # 577e1051a39Sopenharmony_ci # "can't overflow" below mark carrying into high part of 578e1051a39Sopenharmony_ci # multiplication result, which can't overflow, because it 579e1051a39Sopenharmony_ci # can never be all ones. 580e1051a39Sopenharmony_ci 581e1051a39Sopenharmony_ci mulld $acc1,$a1,$a0 # a[1]*a[0] 582e1051a39Sopenharmony_ci mulhdu $t1,$a1,$a0 583e1051a39Sopenharmony_ci mulld $acc2,$a2,$a0 # a[2]*a[0] 584e1051a39Sopenharmony_ci mulhdu $t2,$a2,$a0 585e1051a39Sopenharmony_ci mulld $acc3,$a3,$a0 # a[3]*a[0] 586e1051a39Sopenharmony_ci mulhdu $acc4,$a3,$a0 587e1051a39Sopenharmony_ci 588e1051a39Sopenharmony_ci addc $acc2,$acc2,$t1 # accumulate high parts of multiplication 589e1051a39Sopenharmony_ci mulld $t0,$a2,$a1 # a[2]*a[1] 590e1051a39Sopenharmony_ci mulhdu $t1,$a2,$a1 591e1051a39Sopenharmony_ci adde $acc3,$acc3,$t2 592e1051a39Sopenharmony_ci mulld $t2,$a3,$a1 # a[3]*a[1] 593e1051a39Sopenharmony_ci mulhdu $t3,$a3,$a1 594e1051a39Sopenharmony_ci addze $acc4,$acc4 # can't overflow 595e1051a39Sopenharmony_ci 596e1051a39Sopenharmony_ci mulld $acc5,$a3,$a2 # a[3]*a[2] 597e1051a39Sopenharmony_ci mulhdu $acc6,$a3,$a2 598e1051a39Sopenharmony_ci 599e1051a39Sopenharmony_ci addc $t1,$t1,$t2 # accumulate high parts of multiplication 600e1051a39Sopenharmony_ci addze $t2,$t3 # can't overflow 601e1051a39Sopenharmony_ci 602e1051a39Sopenharmony_ci addc $acc3,$acc3,$t0 # accumulate low parts of multiplication 603e1051a39Sopenharmony_ci adde $acc4,$acc4,$t1 604e1051a39Sopenharmony_ci adde $acc5,$acc5,$t2 605e1051a39Sopenharmony_ci addze $acc6,$acc6 # can't overflow 606e1051a39Sopenharmony_ci 607e1051a39Sopenharmony_ci addc $acc1,$acc1,$acc1 # acc[1-6]*=2 608e1051a39Sopenharmony_ci adde $acc2,$acc2,$acc2 609e1051a39Sopenharmony_ci adde $acc3,$acc3,$acc3 610e1051a39Sopenharmony_ci adde $acc4,$acc4,$acc4 611e1051a39Sopenharmony_ci adde $acc5,$acc5,$acc5 612e1051a39Sopenharmony_ci adde $acc6,$acc6,$acc6 613e1051a39Sopenharmony_ci li $acc7,0 614e1051a39Sopenharmony_ci addze $acc7,$acc7 615e1051a39Sopenharmony_ci 616e1051a39Sopenharmony_ci mulld $acc0,$a0,$a0 # a[0]*a[0] 617e1051a39Sopenharmony_ci mulhdu $a0,$a0,$a0 618e1051a39Sopenharmony_ci mulld $t1,$a1,$a1 # a[1]*a[1] 619e1051a39Sopenharmony_ci mulhdu $a1,$a1,$a1 620e1051a39Sopenharmony_ci mulld $t2,$a2,$a2 # a[2]*a[2] 621e1051a39Sopenharmony_ci mulhdu $a2,$a2,$a2 622e1051a39Sopenharmony_ci mulld $t3,$a3,$a3 # a[3]*a[3] 623e1051a39Sopenharmony_ci mulhdu $a3,$a3,$a3 624e1051a39Sopenharmony_ci addc $acc1,$acc1,$a0 # +a[i]*a[i] 625e1051a39Sopenharmony_ci sldi $t0,$acc0,32 626e1051a39Sopenharmony_ci adde $acc2,$acc2,$t1 627e1051a39Sopenharmony_ci srdi $t1,$acc0,32 628e1051a39Sopenharmony_ci adde $acc3,$acc3,$a1 629e1051a39Sopenharmony_ci adde $acc4,$acc4,$t2 630e1051a39Sopenharmony_ci adde $acc5,$acc5,$a2 631e1051a39Sopenharmony_ci adde $acc6,$acc6,$t3 632e1051a39Sopenharmony_ci adde $acc7,$acc7,$a3 633e1051a39Sopenharmony_ci___ 634e1051a39Sopenharmony_cifor($i=0;$i<3;$i++) { # reductions, see commentary in 635e1051a39Sopenharmony_ci # multiplication for details 636e1051a39Sopenharmony_ci$code.=<<___; 637e1051a39Sopenharmony_ci subfc $t2,$t0,$acc0 # "*0xffff0001" 638e1051a39Sopenharmony_ci subfe $t3,$t1,$acc0 639e1051a39Sopenharmony_ci addc $acc0,$acc1,$t0 # +=acc[0]<<96 and omit acc[0] 640e1051a39Sopenharmony_ci sldi $t0,$acc0,32 641e1051a39Sopenharmony_ci adde $acc1,$acc2,$t1 642e1051a39Sopenharmony_ci srdi $t1,$acc0,32 643e1051a39Sopenharmony_ci adde $acc2,$acc3,$t2 # +=acc[0]*0xffff0001 644e1051a39Sopenharmony_ci addze $acc3,$t3 # can't overflow 645e1051a39Sopenharmony_ci___ 646e1051a39Sopenharmony_ci} 647e1051a39Sopenharmony_ci$code.=<<___; 648e1051a39Sopenharmony_ci subfc $t2,$t0,$acc0 # "*0xffff0001" 649e1051a39Sopenharmony_ci subfe $t3,$t1,$acc0 650e1051a39Sopenharmony_ci addc $acc0,$acc1,$t0 # +=acc[0]<<96 and omit acc[0] 651e1051a39Sopenharmony_ci adde $acc1,$acc2,$t1 652e1051a39Sopenharmony_ci adde $acc2,$acc3,$t2 # +=acc[0]*0xffff0001 653e1051a39Sopenharmony_ci addze $acc3,$t3 # can't overflow 654e1051a39Sopenharmony_ci 655e1051a39Sopenharmony_ci addc $acc0,$acc0,$acc4 # accumulate upper half 656e1051a39Sopenharmony_ci adde $acc1,$acc1,$acc5 657e1051a39Sopenharmony_ci adde $acc2,$acc2,$acc6 658e1051a39Sopenharmony_ci adde $acc3,$acc3,$acc7 659e1051a39Sopenharmony_ci li $t2,0 660e1051a39Sopenharmony_ci addze $acc4,$t2 661e1051a39Sopenharmony_ci 662e1051a39Sopenharmony_ci addic $acc0,$acc0,1 # ret -= modulus 663e1051a39Sopenharmony_ci subfe $acc1,$poly1,$acc1 664e1051a39Sopenharmony_ci subfe $acc2,$t2,$acc2 665e1051a39Sopenharmony_ci subfe $acc3,$poly3,$acc3 666e1051a39Sopenharmony_ci subfe $acc4,$t2,$acc4 667e1051a39Sopenharmony_ci 668e1051a39Sopenharmony_ci addc $acc0,$acc0,$acc4 # ret += modulus if borrow 669e1051a39Sopenharmony_ci and $t1,$poly1,$acc4 670e1051a39Sopenharmony_ci and $t3,$poly3,$acc4 671e1051a39Sopenharmony_ci adde $acc1,$acc1,$t1 672e1051a39Sopenharmony_ci addze $acc2,$acc2 673e1051a39Sopenharmony_ci adde $acc3,$acc3,$t3 674e1051a39Sopenharmony_ci 675e1051a39Sopenharmony_ci std $acc0,0($rp) 676e1051a39Sopenharmony_ci std $acc1,8($rp) 677e1051a39Sopenharmony_ci std $acc2,16($rp) 678e1051a39Sopenharmony_ci std $acc3,24($rp) 679e1051a39Sopenharmony_ci 680e1051a39Sopenharmony_ci blr 681e1051a39Sopenharmony_ci .long 0 682e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,1,0 683e1051a39Sopenharmony_ci .long 0 684e1051a39Sopenharmony_ci.size __ecp_nistz256_sqr_mont,.-__ecp_nistz256_sqr_mont 685e1051a39Sopenharmony_ci 686e1051a39Sopenharmony_ci# Note that __ecp_nistz256_add expects both input vectors pre-loaded to 687e1051a39Sopenharmony_ci# $a0-$a3 and $t0-$t3. This is done because it's used in multiple 688e1051a39Sopenharmony_ci# contexts, e.g. in multiplication by 2 and 3... 689e1051a39Sopenharmony_ci.type __ecp_nistz256_add,\@function 690e1051a39Sopenharmony_ci.align 4 691e1051a39Sopenharmony_ci__ecp_nistz256_add: 692e1051a39Sopenharmony_ci addc $acc0,$acc0,$t0 # ret = a+b 693e1051a39Sopenharmony_ci adde $acc1,$acc1,$t1 694e1051a39Sopenharmony_ci adde $acc2,$acc2,$t2 695e1051a39Sopenharmony_ci li $t2,0 696e1051a39Sopenharmony_ci adde $acc3,$acc3,$t3 697e1051a39Sopenharmony_ci addze $t0,$t2 698e1051a39Sopenharmony_ci 699e1051a39Sopenharmony_ci # if a+b >= modulus, subtract modulus 700e1051a39Sopenharmony_ci # 701e1051a39Sopenharmony_ci # But since comparison implies subtraction, we subtract 702e1051a39Sopenharmony_ci # modulus and then add it back if subtraction borrowed. 703e1051a39Sopenharmony_ci 704e1051a39Sopenharmony_ci subic $acc0,$acc0,-1 705e1051a39Sopenharmony_ci subfe $acc1,$poly1,$acc1 706e1051a39Sopenharmony_ci subfe $acc2,$t2,$acc2 707e1051a39Sopenharmony_ci subfe $acc3,$poly3,$acc3 708e1051a39Sopenharmony_ci subfe $t0,$t2,$t0 709e1051a39Sopenharmony_ci 710e1051a39Sopenharmony_ci addc $acc0,$acc0,$t0 711e1051a39Sopenharmony_ci and $t1,$poly1,$t0 712e1051a39Sopenharmony_ci and $t3,$poly3,$t0 713e1051a39Sopenharmony_ci adde $acc1,$acc1,$t1 714e1051a39Sopenharmony_ci addze $acc2,$acc2 715e1051a39Sopenharmony_ci adde $acc3,$acc3,$t3 716e1051a39Sopenharmony_ci 717e1051a39Sopenharmony_ci std $acc0,0($rp) 718e1051a39Sopenharmony_ci std $acc1,8($rp) 719e1051a39Sopenharmony_ci std $acc2,16($rp) 720e1051a39Sopenharmony_ci std $acc3,24($rp) 721e1051a39Sopenharmony_ci 722e1051a39Sopenharmony_ci blr 723e1051a39Sopenharmony_ci .long 0 724e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,3,0 725e1051a39Sopenharmony_ci .long 0 726e1051a39Sopenharmony_ci.size __ecp_nistz256_add,.-__ecp_nistz256_add 727e1051a39Sopenharmony_ci 728e1051a39Sopenharmony_ci.type __ecp_nistz256_sub_from,\@function 729e1051a39Sopenharmony_ci.align 4 730e1051a39Sopenharmony_ci__ecp_nistz256_sub_from: 731e1051a39Sopenharmony_ci ld $t0,0($bp) 732e1051a39Sopenharmony_ci ld $t1,8($bp) 733e1051a39Sopenharmony_ci ld $t2,16($bp) 734e1051a39Sopenharmony_ci ld $t3,24($bp) 735e1051a39Sopenharmony_ci subfc $acc0,$t0,$acc0 # ret = a-b 736e1051a39Sopenharmony_ci subfe $acc1,$t1,$acc1 737e1051a39Sopenharmony_ci subfe $acc2,$t2,$acc2 738e1051a39Sopenharmony_ci subfe $acc3,$t3,$acc3 739e1051a39Sopenharmony_ci subfe $t0,$t0,$t0 # t0 = borrow ? -1 : 0 740e1051a39Sopenharmony_ci 741e1051a39Sopenharmony_ci # if a-b borrowed, add modulus 742e1051a39Sopenharmony_ci 743e1051a39Sopenharmony_ci addc $acc0,$acc0,$t0 # ret -= modulus & t0 744e1051a39Sopenharmony_ci and $t1,$poly1,$t0 745e1051a39Sopenharmony_ci and $t3,$poly3,$t0 746e1051a39Sopenharmony_ci adde $acc1,$acc1,$t1 747e1051a39Sopenharmony_ci addze $acc2,$acc2 748e1051a39Sopenharmony_ci adde $acc3,$acc3,$t3 749e1051a39Sopenharmony_ci 750e1051a39Sopenharmony_ci std $acc0,0($rp) 751e1051a39Sopenharmony_ci std $acc1,8($rp) 752e1051a39Sopenharmony_ci std $acc2,16($rp) 753e1051a39Sopenharmony_ci std $acc3,24($rp) 754e1051a39Sopenharmony_ci 755e1051a39Sopenharmony_ci blr 756e1051a39Sopenharmony_ci .long 0 757e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,3,0 758e1051a39Sopenharmony_ci .long 0 759e1051a39Sopenharmony_ci.size __ecp_nistz256_sub_from,.-__ecp_nistz256_sub_from 760e1051a39Sopenharmony_ci 761e1051a39Sopenharmony_ci.type __ecp_nistz256_sub_morf,\@function 762e1051a39Sopenharmony_ci.align 4 763e1051a39Sopenharmony_ci__ecp_nistz256_sub_morf: 764e1051a39Sopenharmony_ci ld $t0,0($bp) 765e1051a39Sopenharmony_ci ld $t1,8($bp) 766e1051a39Sopenharmony_ci ld $t2,16($bp) 767e1051a39Sopenharmony_ci ld $t3,24($bp) 768e1051a39Sopenharmony_ci subfc $acc0,$acc0,$t0 # ret = b-a 769e1051a39Sopenharmony_ci subfe $acc1,$acc1,$t1 770e1051a39Sopenharmony_ci subfe $acc2,$acc2,$t2 771e1051a39Sopenharmony_ci subfe $acc3,$acc3,$t3 772e1051a39Sopenharmony_ci subfe $t0,$t0,$t0 # t0 = borrow ? -1 : 0 773e1051a39Sopenharmony_ci 774e1051a39Sopenharmony_ci # if b-a borrowed, add modulus 775e1051a39Sopenharmony_ci 776e1051a39Sopenharmony_ci addc $acc0,$acc0,$t0 # ret -= modulus & t0 777e1051a39Sopenharmony_ci and $t1,$poly1,$t0 778e1051a39Sopenharmony_ci and $t3,$poly3,$t0 779e1051a39Sopenharmony_ci adde $acc1,$acc1,$t1 780e1051a39Sopenharmony_ci addze $acc2,$acc2 781e1051a39Sopenharmony_ci adde $acc3,$acc3,$t3 782e1051a39Sopenharmony_ci 783e1051a39Sopenharmony_ci std $acc0,0($rp) 784e1051a39Sopenharmony_ci std $acc1,8($rp) 785e1051a39Sopenharmony_ci std $acc2,16($rp) 786e1051a39Sopenharmony_ci std $acc3,24($rp) 787e1051a39Sopenharmony_ci 788e1051a39Sopenharmony_ci blr 789e1051a39Sopenharmony_ci .long 0 790e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,3,0 791e1051a39Sopenharmony_ci .long 0 792e1051a39Sopenharmony_ci.size __ecp_nistz256_sub_morf,.-__ecp_nistz256_sub_morf 793e1051a39Sopenharmony_ci 794e1051a39Sopenharmony_ci.type __ecp_nistz256_div_by_2,\@function 795e1051a39Sopenharmony_ci.align 4 796e1051a39Sopenharmony_ci__ecp_nistz256_div_by_2: 797e1051a39Sopenharmony_ci andi. $t0,$acc0,1 798e1051a39Sopenharmony_ci addic $acc0,$acc0,-1 # a += modulus 799e1051a39Sopenharmony_ci neg $t0,$t0 800e1051a39Sopenharmony_ci adde $acc1,$acc1,$poly1 801e1051a39Sopenharmony_ci not $t0,$t0 802e1051a39Sopenharmony_ci addze $acc2,$acc2 803e1051a39Sopenharmony_ci li $t2,0 804e1051a39Sopenharmony_ci adde $acc3,$acc3,$poly3 805e1051a39Sopenharmony_ci and $t1,$poly1,$t0 806e1051a39Sopenharmony_ci addze $ap,$t2 # ap = carry 807e1051a39Sopenharmony_ci and $t3,$poly3,$t0 808e1051a39Sopenharmony_ci 809e1051a39Sopenharmony_ci subfc $acc0,$t0,$acc0 # a -= modulus if a was even 810e1051a39Sopenharmony_ci subfe $acc1,$t1,$acc1 811e1051a39Sopenharmony_ci subfe $acc2,$t2,$acc2 812e1051a39Sopenharmony_ci subfe $acc3,$t3,$acc3 813e1051a39Sopenharmony_ci subfe $ap, $t2,$ap 814e1051a39Sopenharmony_ci 815e1051a39Sopenharmony_ci srdi $acc0,$acc0,1 816e1051a39Sopenharmony_ci sldi $t0,$acc1,63 817e1051a39Sopenharmony_ci srdi $acc1,$acc1,1 818e1051a39Sopenharmony_ci sldi $t1,$acc2,63 819e1051a39Sopenharmony_ci srdi $acc2,$acc2,1 820e1051a39Sopenharmony_ci sldi $t2,$acc3,63 821e1051a39Sopenharmony_ci srdi $acc3,$acc3,1 822e1051a39Sopenharmony_ci sldi $t3,$ap,63 823e1051a39Sopenharmony_ci or $acc0,$acc0,$t0 824e1051a39Sopenharmony_ci or $acc1,$acc1,$t1 825e1051a39Sopenharmony_ci or $acc2,$acc2,$t2 826e1051a39Sopenharmony_ci or $acc3,$acc3,$t3 827e1051a39Sopenharmony_ci 828e1051a39Sopenharmony_ci std $acc0,0($rp) 829e1051a39Sopenharmony_ci std $acc1,8($rp) 830e1051a39Sopenharmony_ci std $acc2,16($rp) 831e1051a39Sopenharmony_ci std $acc3,24($rp) 832e1051a39Sopenharmony_ci 833e1051a39Sopenharmony_ci blr 834e1051a39Sopenharmony_ci .long 0 835e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,1,0 836e1051a39Sopenharmony_ci .long 0 837e1051a39Sopenharmony_ci.size __ecp_nistz256_div_by_2,.-__ecp_nistz256_div_by_2 838e1051a39Sopenharmony_ci___ 839e1051a39Sopenharmony_ci######################################################################## 840e1051a39Sopenharmony_ci# following subroutines are "literal" implementation of those found in 841e1051a39Sopenharmony_ci# ecp_nistz256.c 842e1051a39Sopenharmony_ci# 843e1051a39Sopenharmony_ci######################################################################## 844e1051a39Sopenharmony_ci# void ecp_nistz256_point_double(P256_POINT *out,const P256_POINT *inp); 845e1051a39Sopenharmony_ci# 846e1051a39Sopenharmony_ciif (1) { 847e1051a39Sopenharmony_cimy $FRAME=64+32*4+12*8; 848e1051a39Sopenharmony_cimy ($S,$M,$Zsqr,$tmp0)=map(64+32*$_,(0..3)); 849e1051a39Sopenharmony_ci# above map() describes stack layout with 4 temporary 850e1051a39Sopenharmony_ci# 256-bit vectors on top. 851e1051a39Sopenharmony_cimy ($rp_real,$ap_real) = map("r$_",(20,21)); 852e1051a39Sopenharmony_ci 853e1051a39Sopenharmony_ci$code.=<<___; 854e1051a39Sopenharmony_ci.globl ecp_nistz256_point_double 855e1051a39Sopenharmony_ci.align 5 856e1051a39Sopenharmony_ciecp_nistz256_point_double: 857e1051a39Sopenharmony_ci stdu $sp,-$FRAME($sp) 858e1051a39Sopenharmony_ci mflr r0 859e1051a39Sopenharmony_ci std r20,$FRAME-8*12($sp) 860e1051a39Sopenharmony_ci std r21,$FRAME-8*11($sp) 861e1051a39Sopenharmony_ci std r22,$FRAME-8*10($sp) 862e1051a39Sopenharmony_ci std r23,$FRAME-8*9($sp) 863e1051a39Sopenharmony_ci std r24,$FRAME-8*8($sp) 864e1051a39Sopenharmony_ci std r25,$FRAME-8*7($sp) 865e1051a39Sopenharmony_ci std r26,$FRAME-8*6($sp) 866e1051a39Sopenharmony_ci std r27,$FRAME-8*5($sp) 867e1051a39Sopenharmony_ci std r28,$FRAME-8*4($sp) 868e1051a39Sopenharmony_ci std r29,$FRAME-8*3($sp) 869e1051a39Sopenharmony_ci std r30,$FRAME-8*2($sp) 870e1051a39Sopenharmony_ci std r31,$FRAME-8*1($sp) 871e1051a39Sopenharmony_ci 872e1051a39Sopenharmony_ci li $poly1,-1 873e1051a39Sopenharmony_ci srdi $poly1,$poly1,32 # 0x00000000ffffffff 874e1051a39Sopenharmony_ci li $poly3,1 875e1051a39Sopenharmony_ci orc $poly3,$poly3,$poly1 # 0xffffffff00000001 876e1051a39Sopenharmony_ci.Ldouble_shortcut: 877e1051a39Sopenharmony_ci ld $acc0,32($ap) 878e1051a39Sopenharmony_ci ld $acc1,40($ap) 879e1051a39Sopenharmony_ci ld $acc2,48($ap) 880e1051a39Sopenharmony_ci ld $acc3,56($ap) 881e1051a39Sopenharmony_ci mr $t0,$acc0 882e1051a39Sopenharmony_ci mr $t1,$acc1 883e1051a39Sopenharmony_ci mr $t2,$acc2 884e1051a39Sopenharmony_ci mr $t3,$acc3 885e1051a39Sopenharmony_ci ld $a0,64($ap) # forward load for p256_sqr_mont 886e1051a39Sopenharmony_ci ld $a1,72($ap) 887e1051a39Sopenharmony_ci ld $a2,80($ap) 888e1051a39Sopenharmony_ci ld $a3,88($ap) 889e1051a39Sopenharmony_ci mr $rp_real,$rp 890e1051a39Sopenharmony_ci mr $ap_real,$ap 891e1051a39Sopenharmony_ci addi $rp,$sp,$S 892e1051a39Sopenharmony_ci bl __ecp_nistz256_add # p256_mul_by_2(S, in_y); 893e1051a39Sopenharmony_ci 894e1051a39Sopenharmony_ci addi $rp,$sp,$Zsqr 895e1051a39Sopenharmony_ci bl __ecp_nistz256_sqr_mont # p256_sqr_mont(Zsqr, in_z); 896e1051a39Sopenharmony_ci 897e1051a39Sopenharmony_ci ld $t0,0($ap_real) 898e1051a39Sopenharmony_ci ld $t1,8($ap_real) 899e1051a39Sopenharmony_ci ld $t2,16($ap_real) 900e1051a39Sopenharmony_ci ld $t3,24($ap_real) 901e1051a39Sopenharmony_ci mr $a0,$acc0 # put Zsqr aside for p256_sub 902e1051a39Sopenharmony_ci mr $a1,$acc1 903e1051a39Sopenharmony_ci mr $a2,$acc2 904e1051a39Sopenharmony_ci mr $a3,$acc3 905e1051a39Sopenharmony_ci addi $rp,$sp,$M 906e1051a39Sopenharmony_ci bl __ecp_nistz256_add # p256_add(M, Zsqr, in_x); 907e1051a39Sopenharmony_ci 908e1051a39Sopenharmony_ci addi $bp,$ap_real,0 909e1051a39Sopenharmony_ci mr $acc0,$a0 # restore Zsqr 910e1051a39Sopenharmony_ci mr $acc1,$a1 911e1051a39Sopenharmony_ci mr $acc2,$a2 912e1051a39Sopenharmony_ci mr $acc3,$a3 913e1051a39Sopenharmony_ci ld $a0,$S+0($sp) # forward load for p256_sqr_mont 914e1051a39Sopenharmony_ci ld $a1,$S+8($sp) 915e1051a39Sopenharmony_ci ld $a2,$S+16($sp) 916e1051a39Sopenharmony_ci ld $a3,$S+24($sp) 917e1051a39Sopenharmony_ci addi $rp,$sp,$Zsqr 918e1051a39Sopenharmony_ci bl __ecp_nistz256_sub_morf # p256_sub(Zsqr, in_x, Zsqr); 919e1051a39Sopenharmony_ci 920e1051a39Sopenharmony_ci addi $rp,$sp,$S 921e1051a39Sopenharmony_ci bl __ecp_nistz256_sqr_mont # p256_sqr_mont(S, S); 922e1051a39Sopenharmony_ci 923e1051a39Sopenharmony_ci ld $bi,32($ap_real) 924e1051a39Sopenharmony_ci ld $a0,64($ap_real) 925e1051a39Sopenharmony_ci ld $a1,72($ap_real) 926e1051a39Sopenharmony_ci ld $a2,80($ap_real) 927e1051a39Sopenharmony_ci ld $a3,88($ap_real) 928e1051a39Sopenharmony_ci addi $bp,$ap_real,32 929e1051a39Sopenharmony_ci addi $rp,$sp,$tmp0 930e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(tmp0, in_z, in_y); 931e1051a39Sopenharmony_ci 932e1051a39Sopenharmony_ci mr $t0,$acc0 933e1051a39Sopenharmony_ci mr $t1,$acc1 934e1051a39Sopenharmony_ci mr $t2,$acc2 935e1051a39Sopenharmony_ci mr $t3,$acc3 936e1051a39Sopenharmony_ci ld $a0,$S+0($sp) # forward load for p256_sqr_mont 937e1051a39Sopenharmony_ci ld $a1,$S+8($sp) 938e1051a39Sopenharmony_ci ld $a2,$S+16($sp) 939e1051a39Sopenharmony_ci ld $a3,$S+24($sp) 940e1051a39Sopenharmony_ci addi $rp,$rp_real,64 941e1051a39Sopenharmony_ci bl __ecp_nistz256_add # p256_mul_by_2(res_z, tmp0); 942e1051a39Sopenharmony_ci 943e1051a39Sopenharmony_ci addi $rp,$sp,$tmp0 944e1051a39Sopenharmony_ci bl __ecp_nistz256_sqr_mont # p256_sqr_mont(tmp0, S); 945e1051a39Sopenharmony_ci 946e1051a39Sopenharmony_ci ld $bi,$Zsqr($sp) # forward load for p256_mul_mont 947e1051a39Sopenharmony_ci ld $a0,$M+0($sp) 948e1051a39Sopenharmony_ci ld $a1,$M+8($sp) 949e1051a39Sopenharmony_ci ld $a2,$M+16($sp) 950e1051a39Sopenharmony_ci ld $a3,$M+24($sp) 951e1051a39Sopenharmony_ci addi $rp,$rp_real,32 952e1051a39Sopenharmony_ci bl __ecp_nistz256_div_by_2 # p256_div_by_2(res_y, tmp0); 953e1051a39Sopenharmony_ci 954e1051a39Sopenharmony_ci addi $bp,$sp,$Zsqr 955e1051a39Sopenharmony_ci addi $rp,$sp,$M 956e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(M, M, Zsqr); 957e1051a39Sopenharmony_ci 958e1051a39Sopenharmony_ci mr $t0,$acc0 # duplicate M 959e1051a39Sopenharmony_ci mr $t1,$acc1 960e1051a39Sopenharmony_ci mr $t2,$acc2 961e1051a39Sopenharmony_ci mr $t3,$acc3 962e1051a39Sopenharmony_ci mr $a0,$acc0 # put M aside 963e1051a39Sopenharmony_ci mr $a1,$acc1 964e1051a39Sopenharmony_ci mr $a2,$acc2 965e1051a39Sopenharmony_ci mr $a3,$acc3 966e1051a39Sopenharmony_ci addi $rp,$sp,$M 967e1051a39Sopenharmony_ci bl __ecp_nistz256_add 968e1051a39Sopenharmony_ci mr $t0,$a0 # restore M 969e1051a39Sopenharmony_ci mr $t1,$a1 970e1051a39Sopenharmony_ci mr $t2,$a2 971e1051a39Sopenharmony_ci mr $t3,$a3 972e1051a39Sopenharmony_ci ld $bi,0($ap_real) # forward load for p256_mul_mont 973e1051a39Sopenharmony_ci ld $a0,$S+0($sp) 974e1051a39Sopenharmony_ci ld $a1,$S+8($sp) 975e1051a39Sopenharmony_ci ld $a2,$S+16($sp) 976e1051a39Sopenharmony_ci ld $a3,$S+24($sp) 977e1051a39Sopenharmony_ci bl __ecp_nistz256_add # p256_mul_by_3(M, M); 978e1051a39Sopenharmony_ci 979e1051a39Sopenharmony_ci addi $bp,$ap_real,0 980e1051a39Sopenharmony_ci addi $rp,$sp,$S 981e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(S, S, in_x); 982e1051a39Sopenharmony_ci 983e1051a39Sopenharmony_ci mr $t0,$acc0 984e1051a39Sopenharmony_ci mr $t1,$acc1 985e1051a39Sopenharmony_ci mr $t2,$acc2 986e1051a39Sopenharmony_ci mr $t3,$acc3 987e1051a39Sopenharmony_ci ld $a0,$M+0($sp) # forward load for p256_sqr_mont 988e1051a39Sopenharmony_ci ld $a1,$M+8($sp) 989e1051a39Sopenharmony_ci ld $a2,$M+16($sp) 990e1051a39Sopenharmony_ci ld $a3,$M+24($sp) 991e1051a39Sopenharmony_ci addi $rp,$sp,$tmp0 992e1051a39Sopenharmony_ci bl __ecp_nistz256_add # p256_mul_by_2(tmp0, S); 993e1051a39Sopenharmony_ci 994e1051a39Sopenharmony_ci addi $rp,$rp_real,0 995e1051a39Sopenharmony_ci bl __ecp_nistz256_sqr_mont # p256_sqr_mont(res_x, M); 996e1051a39Sopenharmony_ci 997e1051a39Sopenharmony_ci addi $bp,$sp,$tmp0 998e1051a39Sopenharmony_ci bl __ecp_nistz256_sub_from # p256_sub(res_x, res_x, tmp0); 999e1051a39Sopenharmony_ci 1000e1051a39Sopenharmony_ci addi $bp,$sp,$S 1001e1051a39Sopenharmony_ci addi $rp,$sp,$S 1002e1051a39Sopenharmony_ci bl __ecp_nistz256_sub_morf # p256_sub(S, S, res_x); 1003e1051a39Sopenharmony_ci 1004e1051a39Sopenharmony_ci ld $bi,$M($sp) 1005e1051a39Sopenharmony_ci mr $a0,$acc0 # copy S 1006e1051a39Sopenharmony_ci mr $a1,$acc1 1007e1051a39Sopenharmony_ci mr $a2,$acc2 1008e1051a39Sopenharmony_ci mr $a3,$acc3 1009e1051a39Sopenharmony_ci addi $bp,$sp,$M 1010e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(S, S, M); 1011e1051a39Sopenharmony_ci 1012e1051a39Sopenharmony_ci addi $bp,$rp_real,32 1013e1051a39Sopenharmony_ci addi $rp,$rp_real,32 1014e1051a39Sopenharmony_ci bl __ecp_nistz256_sub_from # p256_sub(res_y, S, res_y); 1015e1051a39Sopenharmony_ci 1016e1051a39Sopenharmony_ci mtlr r0 1017e1051a39Sopenharmony_ci ld r20,$FRAME-8*12($sp) 1018e1051a39Sopenharmony_ci ld r21,$FRAME-8*11($sp) 1019e1051a39Sopenharmony_ci ld r22,$FRAME-8*10($sp) 1020e1051a39Sopenharmony_ci ld r23,$FRAME-8*9($sp) 1021e1051a39Sopenharmony_ci ld r24,$FRAME-8*8($sp) 1022e1051a39Sopenharmony_ci ld r25,$FRAME-8*7($sp) 1023e1051a39Sopenharmony_ci ld r26,$FRAME-8*6($sp) 1024e1051a39Sopenharmony_ci ld r27,$FRAME-8*5($sp) 1025e1051a39Sopenharmony_ci ld r28,$FRAME-8*4($sp) 1026e1051a39Sopenharmony_ci ld r29,$FRAME-8*3($sp) 1027e1051a39Sopenharmony_ci ld r30,$FRAME-8*2($sp) 1028e1051a39Sopenharmony_ci ld r31,$FRAME-8*1($sp) 1029e1051a39Sopenharmony_ci addi $sp,$sp,$FRAME 1030e1051a39Sopenharmony_ci blr 1031e1051a39Sopenharmony_ci .long 0 1032e1051a39Sopenharmony_ci .byte 0,12,4,0,0x80,12,2,0 1033e1051a39Sopenharmony_ci .long 0 1034e1051a39Sopenharmony_ci.size ecp_nistz256_point_double,.-ecp_nistz256_point_double 1035e1051a39Sopenharmony_ci___ 1036e1051a39Sopenharmony_ci} 1037e1051a39Sopenharmony_ci 1038e1051a39Sopenharmony_ci######################################################################## 1039e1051a39Sopenharmony_ci# void ecp_nistz256_point_add(P256_POINT *out,const P256_POINT *in1, 1040e1051a39Sopenharmony_ci# const P256_POINT *in2); 1041e1051a39Sopenharmony_ciif (1) { 1042e1051a39Sopenharmony_cimy $FRAME = 64 + 32*12 + 16*8; 1043e1051a39Sopenharmony_cimy ($res_x,$res_y,$res_z, 1044e1051a39Sopenharmony_ci $H,$Hsqr,$R,$Rsqr,$Hcub, 1045e1051a39Sopenharmony_ci $U1,$U2,$S1,$S2)=map(64+32*$_,(0..11)); 1046e1051a39Sopenharmony_cimy ($Z1sqr, $Z2sqr) = ($Hsqr, $Rsqr); 1047e1051a39Sopenharmony_ci# above map() describes stack layout with 12 temporary 1048e1051a39Sopenharmony_ci# 256-bit vectors on top. 1049e1051a39Sopenharmony_cimy ($rp_real,$ap_real,$bp_real,$in1infty,$in2infty,$temp)=map("r$_",(16..21)); 1050e1051a39Sopenharmony_ci 1051e1051a39Sopenharmony_ci$code.=<<___; 1052e1051a39Sopenharmony_ci.globl ecp_nistz256_point_add 1053e1051a39Sopenharmony_ci.align 5 1054e1051a39Sopenharmony_ciecp_nistz256_point_add: 1055e1051a39Sopenharmony_ci stdu $sp,-$FRAME($sp) 1056e1051a39Sopenharmony_ci mflr r0 1057e1051a39Sopenharmony_ci std r16,$FRAME-8*16($sp) 1058e1051a39Sopenharmony_ci std r17,$FRAME-8*15($sp) 1059e1051a39Sopenharmony_ci std r18,$FRAME-8*14($sp) 1060e1051a39Sopenharmony_ci std r19,$FRAME-8*13($sp) 1061e1051a39Sopenharmony_ci std r20,$FRAME-8*12($sp) 1062e1051a39Sopenharmony_ci std r21,$FRAME-8*11($sp) 1063e1051a39Sopenharmony_ci std r22,$FRAME-8*10($sp) 1064e1051a39Sopenharmony_ci std r23,$FRAME-8*9($sp) 1065e1051a39Sopenharmony_ci std r24,$FRAME-8*8($sp) 1066e1051a39Sopenharmony_ci std r25,$FRAME-8*7($sp) 1067e1051a39Sopenharmony_ci std r26,$FRAME-8*6($sp) 1068e1051a39Sopenharmony_ci std r27,$FRAME-8*5($sp) 1069e1051a39Sopenharmony_ci std r28,$FRAME-8*4($sp) 1070e1051a39Sopenharmony_ci std r29,$FRAME-8*3($sp) 1071e1051a39Sopenharmony_ci std r30,$FRAME-8*2($sp) 1072e1051a39Sopenharmony_ci std r31,$FRAME-8*1($sp) 1073e1051a39Sopenharmony_ci 1074e1051a39Sopenharmony_ci li $poly1,-1 1075e1051a39Sopenharmony_ci srdi $poly1,$poly1,32 # 0x00000000ffffffff 1076e1051a39Sopenharmony_ci li $poly3,1 1077e1051a39Sopenharmony_ci orc $poly3,$poly3,$poly1 # 0xffffffff00000001 1078e1051a39Sopenharmony_ci 1079e1051a39Sopenharmony_ci ld $a0,64($bp) # in2_z 1080e1051a39Sopenharmony_ci ld $a1,72($bp) 1081e1051a39Sopenharmony_ci ld $a2,80($bp) 1082e1051a39Sopenharmony_ci ld $a3,88($bp) 1083e1051a39Sopenharmony_ci mr $rp_real,$rp 1084e1051a39Sopenharmony_ci mr $ap_real,$ap 1085e1051a39Sopenharmony_ci mr $bp_real,$bp 1086e1051a39Sopenharmony_ci or $t0,$a0,$a1 1087e1051a39Sopenharmony_ci or $t2,$a2,$a3 1088e1051a39Sopenharmony_ci or $in2infty,$t0,$t2 1089e1051a39Sopenharmony_ci neg $t0,$in2infty 1090e1051a39Sopenharmony_ci or $in2infty,$in2infty,$t0 1091e1051a39Sopenharmony_ci sradi $in2infty,$in2infty,63 # !in2infty 1092e1051a39Sopenharmony_ci addi $rp,$sp,$Z2sqr 1093e1051a39Sopenharmony_ci bl __ecp_nistz256_sqr_mont # p256_sqr_mont(Z2sqr, in2_z); 1094e1051a39Sopenharmony_ci 1095e1051a39Sopenharmony_ci ld $a0,64($ap_real) # in1_z 1096e1051a39Sopenharmony_ci ld $a1,72($ap_real) 1097e1051a39Sopenharmony_ci ld $a2,80($ap_real) 1098e1051a39Sopenharmony_ci ld $a3,88($ap_real) 1099e1051a39Sopenharmony_ci or $t0,$a0,$a1 1100e1051a39Sopenharmony_ci or $t2,$a2,$a3 1101e1051a39Sopenharmony_ci or $in1infty,$t0,$t2 1102e1051a39Sopenharmony_ci neg $t0,$in1infty 1103e1051a39Sopenharmony_ci or $in1infty,$in1infty,$t0 1104e1051a39Sopenharmony_ci sradi $in1infty,$in1infty,63 # !in1infty 1105e1051a39Sopenharmony_ci addi $rp,$sp,$Z1sqr 1106e1051a39Sopenharmony_ci bl __ecp_nistz256_sqr_mont # p256_sqr_mont(Z1sqr, in1_z); 1107e1051a39Sopenharmony_ci 1108e1051a39Sopenharmony_ci ld $bi,64($bp_real) 1109e1051a39Sopenharmony_ci ld $a0,$Z2sqr+0($sp) 1110e1051a39Sopenharmony_ci ld $a1,$Z2sqr+8($sp) 1111e1051a39Sopenharmony_ci ld $a2,$Z2sqr+16($sp) 1112e1051a39Sopenharmony_ci ld $a3,$Z2sqr+24($sp) 1113e1051a39Sopenharmony_ci addi $bp,$bp_real,64 1114e1051a39Sopenharmony_ci addi $rp,$sp,$S1 1115e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(S1, Z2sqr, in2_z); 1116e1051a39Sopenharmony_ci 1117e1051a39Sopenharmony_ci ld $bi,64($ap_real) 1118e1051a39Sopenharmony_ci ld $a0,$Z1sqr+0($sp) 1119e1051a39Sopenharmony_ci ld $a1,$Z1sqr+8($sp) 1120e1051a39Sopenharmony_ci ld $a2,$Z1sqr+16($sp) 1121e1051a39Sopenharmony_ci ld $a3,$Z1sqr+24($sp) 1122e1051a39Sopenharmony_ci addi $bp,$ap_real,64 1123e1051a39Sopenharmony_ci addi $rp,$sp,$S2 1124e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(S2, Z1sqr, in1_z); 1125e1051a39Sopenharmony_ci 1126e1051a39Sopenharmony_ci ld $bi,32($ap_real) 1127e1051a39Sopenharmony_ci ld $a0,$S1+0($sp) 1128e1051a39Sopenharmony_ci ld $a1,$S1+8($sp) 1129e1051a39Sopenharmony_ci ld $a2,$S1+16($sp) 1130e1051a39Sopenharmony_ci ld $a3,$S1+24($sp) 1131e1051a39Sopenharmony_ci addi $bp,$ap_real,32 1132e1051a39Sopenharmony_ci addi $rp,$sp,$S1 1133e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(S1, S1, in1_y); 1134e1051a39Sopenharmony_ci 1135e1051a39Sopenharmony_ci ld $bi,32($bp_real) 1136e1051a39Sopenharmony_ci ld $a0,$S2+0($sp) 1137e1051a39Sopenharmony_ci ld $a1,$S2+8($sp) 1138e1051a39Sopenharmony_ci ld $a2,$S2+16($sp) 1139e1051a39Sopenharmony_ci ld $a3,$S2+24($sp) 1140e1051a39Sopenharmony_ci addi $bp,$bp_real,32 1141e1051a39Sopenharmony_ci addi $rp,$sp,$S2 1142e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(S2, S2, in2_y); 1143e1051a39Sopenharmony_ci 1144e1051a39Sopenharmony_ci addi $bp,$sp,$S1 1145e1051a39Sopenharmony_ci ld $bi,$Z2sqr($sp) # forward load for p256_mul_mont 1146e1051a39Sopenharmony_ci ld $a0,0($ap_real) 1147e1051a39Sopenharmony_ci ld $a1,8($ap_real) 1148e1051a39Sopenharmony_ci ld $a2,16($ap_real) 1149e1051a39Sopenharmony_ci ld $a3,24($ap_real) 1150e1051a39Sopenharmony_ci addi $rp,$sp,$R 1151e1051a39Sopenharmony_ci bl __ecp_nistz256_sub_from # p256_sub(R, S2, S1); 1152e1051a39Sopenharmony_ci 1153e1051a39Sopenharmony_ci or $acc0,$acc0,$acc1 # see if result is zero 1154e1051a39Sopenharmony_ci or $acc2,$acc2,$acc3 1155e1051a39Sopenharmony_ci or $temp,$acc0,$acc2 1156e1051a39Sopenharmony_ci 1157e1051a39Sopenharmony_ci addi $bp,$sp,$Z2sqr 1158e1051a39Sopenharmony_ci addi $rp,$sp,$U1 1159e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(U1, in1_x, Z2sqr); 1160e1051a39Sopenharmony_ci 1161e1051a39Sopenharmony_ci ld $bi,$Z1sqr($sp) 1162e1051a39Sopenharmony_ci ld $a0,0($bp_real) 1163e1051a39Sopenharmony_ci ld $a1,8($bp_real) 1164e1051a39Sopenharmony_ci ld $a2,16($bp_real) 1165e1051a39Sopenharmony_ci ld $a3,24($bp_real) 1166e1051a39Sopenharmony_ci addi $bp,$sp,$Z1sqr 1167e1051a39Sopenharmony_ci addi $rp,$sp,$U2 1168e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(U2, in2_x, Z1sqr); 1169e1051a39Sopenharmony_ci 1170e1051a39Sopenharmony_ci addi $bp,$sp,$U1 1171e1051a39Sopenharmony_ci ld $a0,$R+0($sp) # forward load for p256_sqr_mont 1172e1051a39Sopenharmony_ci ld $a1,$R+8($sp) 1173e1051a39Sopenharmony_ci ld $a2,$R+16($sp) 1174e1051a39Sopenharmony_ci ld $a3,$R+24($sp) 1175e1051a39Sopenharmony_ci addi $rp,$sp,$H 1176e1051a39Sopenharmony_ci bl __ecp_nistz256_sub_from # p256_sub(H, U2, U1); 1177e1051a39Sopenharmony_ci 1178e1051a39Sopenharmony_ci or $acc0,$acc0,$acc1 # see if result is zero 1179e1051a39Sopenharmony_ci or $acc2,$acc2,$acc3 1180e1051a39Sopenharmony_ci or. $acc0,$acc0,$acc2 1181e1051a39Sopenharmony_ci bne .Ladd_proceed # is_equal(U1,U2)? 1182e1051a39Sopenharmony_ci 1183e1051a39Sopenharmony_ci and. $t0,$in1infty,$in2infty 1184e1051a39Sopenharmony_ci beq .Ladd_proceed # (in1infty || in2infty)? 1185e1051a39Sopenharmony_ci 1186e1051a39Sopenharmony_ci cmpldi $temp,0 1187e1051a39Sopenharmony_ci beq .Ladd_double # is_equal(S1,S2)? 1188e1051a39Sopenharmony_ci 1189e1051a39Sopenharmony_ci xor $a0,$a0,$a0 1190e1051a39Sopenharmony_ci std $a0,0($rp_real) 1191e1051a39Sopenharmony_ci std $a0,8($rp_real) 1192e1051a39Sopenharmony_ci std $a0,16($rp_real) 1193e1051a39Sopenharmony_ci std $a0,24($rp_real) 1194e1051a39Sopenharmony_ci std $a0,32($rp_real) 1195e1051a39Sopenharmony_ci std $a0,40($rp_real) 1196e1051a39Sopenharmony_ci std $a0,48($rp_real) 1197e1051a39Sopenharmony_ci std $a0,56($rp_real) 1198e1051a39Sopenharmony_ci std $a0,64($rp_real) 1199e1051a39Sopenharmony_ci std $a0,72($rp_real) 1200e1051a39Sopenharmony_ci std $a0,80($rp_real) 1201e1051a39Sopenharmony_ci std $a0,88($rp_real) 1202e1051a39Sopenharmony_ci b .Ladd_done 1203e1051a39Sopenharmony_ci 1204e1051a39Sopenharmony_ci.align 4 1205e1051a39Sopenharmony_ci.Ladd_double: 1206e1051a39Sopenharmony_ci ld $bp,0($sp) # back-link 1207e1051a39Sopenharmony_ci mr $ap,$ap_real 1208e1051a39Sopenharmony_ci mr $rp,$rp_real 1209e1051a39Sopenharmony_ci ld r16,$FRAME-8*16($sp) 1210e1051a39Sopenharmony_ci ld r17,$FRAME-8*15($sp) 1211e1051a39Sopenharmony_ci ld r18,$FRAME-8*14($sp) 1212e1051a39Sopenharmony_ci ld r19,$FRAME-8*13($sp) 1213e1051a39Sopenharmony_ci stdu $bp,$FRAME-288($sp) # difference in stack frame sizes 1214e1051a39Sopenharmony_ci b .Ldouble_shortcut 1215e1051a39Sopenharmony_ci 1216e1051a39Sopenharmony_ci.align 4 1217e1051a39Sopenharmony_ci.Ladd_proceed: 1218e1051a39Sopenharmony_ci addi $rp,$sp,$Rsqr 1219e1051a39Sopenharmony_ci bl __ecp_nistz256_sqr_mont # p256_sqr_mont(Rsqr, R); 1220e1051a39Sopenharmony_ci 1221e1051a39Sopenharmony_ci ld $bi,64($ap_real) 1222e1051a39Sopenharmony_ci ld $a0,$H+0($sp) 1223e1051a39Sopenharmony_ci ld $a1,$H+8($sp) 1224e1051a39Sopenharmony_ci ld $a2,$H+16($sp) 1225e1051a39Sopenharmony_ci ld $a3,$H+24($sp) 1226e1051a39Sopenharmony_ci addi $bp,$ap_real,64 1227e1051a39Sopenharmony_ci addi $rp,$sp,$res_z 1228e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(res_z, H, in1_z); 1229e1051a39Sopenharmony_ci 1230e1051a39Sopenharmony_ci ld $a0,$H+0($sp) 1231e1051a39Sopenharmony_ci ld $a1,$H+8($sp) 1232e1051a39Sopenharmony_ci ld $a2,$H+16($sp) 1233e1051a39Sopenharmony_ci ld $a3,$H+24($sp) 1234e1051a39Sopenharmony_ci addi $rp,$sp,$Hsqr 1235e1051a39Sopenharmony_ci bl __ecp_nistz256_sqr_mont # p256_sqr_mont(Hsqr, H); 1236e1051a39Sopenharmony_ci 1237e1051a39Sopenharmony_ci ld $bi,64($bp_real) 1238e1051a39Sopenharmony_ci ld $a0,$res_z+0($sp) 1239e1051a39Sopenharmony_ci ld $a1,$res_z+8($sp) 1240e1051a39Sopenharmony_ci ld $a2,$res_z+16($sp) 1241e1051a39Sopenharmony_ci ld $a3,$res_z+24($sp) 1242e1051a39Sopenharmony_ci addi $bp,$bp_real,64 1243e1051a39Sopenharmony_ci addi $rp,$sp,$res_z 1244e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(res_z, res_z, in2_z); 1245e1051a39Sopenharmony_ci 1246e1051a39Sopenharmony_ci ld $bi,$H($sp) 1247e1051a39Sopenharmony_ci ld $a0,$Hsqr+0($sp) 1248e1051a39Sopenharmony_ci ld $a1,$Hsqr+8($sp) 1249e1051a39Sopenharmony_ci ld $a2,$Hsqr+16($sp) 1250e1051a39Sopenharmony_ci ld $a3,$Hsqr+24($sp) 1251e1051a39Sopenharmony_ci addi $bp,$sp,$H 1252e1051a39Sopenharmony_ci addi $rp,$sp,$Hcub 1253e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(Hcub, Hsqr, H); 1254e1051a39Sopenharmony_ci 1255e1051a39Sopenharmony_ci ld $bi,$Hsqr($sp) 1256e1051a39Sopenharmony_ci ld $a0,$U1+0($sp) 1257e1051a39Sopenharmony_ci ld $a1,$U1+8($sp) 1258e1051a39Sopenharmony_ci ld $a2,$U1+16($sp) 1259e1051a39Sopenharmony_ci ld $a3,$U1+24($sp) 1260e1051a39Sopenharmony_ci addi $bp,$sp,$Hsqr 1261e1051a39Sopenharmony_ci addi $rp,$sp,$U2 1262e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(U2, U1, Hsqr); 1263e1051a39Sopenharmony_ci 1264e1051a39Sopenharmony_ci mr $t0,$acc0 1265e1051a39Sopenharmony_ci mr $t1,$acc1 1266e1051a39Sopenharmony_ci mr $t2,$acc2 1267e1051a39Sopenharmony_ci mr $t3,$acc3 1268e1051a39Sopenharmony_ci addi $rp,$sp,$Hsqr 1269e1051a39Sopenharmony_ci bl __ecp_nistz256_add # p256_mul_by_2(Hsqr, U2); 1270e1051a39Sopenharmony_ci 1271e1051a39Sopenharmony_ci addi $bp,$sp,$Rsqr 1272e1051a39Sopenharmony_ci addi $rp,$sp,$res_x 1273e1051a39Sopenharmony_ci bl __ecp_nistz256_sub_morf # p256_sub(res_x, Rsqr, Hsqr); 1274e1051a39Sopenharmony_ci 1275e1051a39Sopenharmony_ci addi $bp,$sp,$Hcub 1276e1051a39Sopenharmony_ci bl __ecp_nistz256_sub_from # p256_sub(res_x, res_x, Hcub); 1277e1051a39Sopenharmony_ci 1278e1051a39Sopenharmony_ci addi $bp,$sp,$U2 1279e1051a39Sopenharmony_ci ld $bi,$Hcub($sp) # forward load for p256_mul_mont 1280e1051a39Sopenharmony_ci ld $a0,$S1+0($sp) 1281e1051a39Sopenharmony_ci ld $a1,$S1+8($sp) 1282e1051a39Sopenharmony_ci ld $a2,$S1+16($sp) 1283e1051a39Sopenharmony_ci ld $a3,$S1+24($sp) 1284e1051a39Sopenharmony_ci addi $rp,$sp,$res_y 1285e1051a39Sopenharmony_ci bl __ecp_nistz256_sub_morf # p256_sub(res_y, U2, res_x); 1286e1051a39Sopenharmony_ci 1287e1051a39Sopenharmony_ci addi $bp,$sp,$Hcub 1288e1051a39Sopenharmony_ci addi $rp,$sp,$S2 1289e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(S2, S1, Hcub); 1290e1051a39Sopenharmony_ci 1291e1051a39Sopenharmony_ci ld $bi,$R($sp) 1292e1051a39Sopenharmony_ci ld $a0,$res_y+0($sp) 1293e1051a39Sopenharmony_ci ld $a1,$res_y+8($sp) 1294e1051a39Sopenharmony_ci ld $a2,$res_y+16($sp) 1295e1051a39Sopenharmony_ci ld $a3,$res_y+24($sp) 1296e1051a39Sopenharmony_ci addi $bp,$sp,$R 1297e1051a39Sopenharmony_ci addi $rp,$sp,$res_y 1298e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(res_y, res_y, R); 1299e1051a39Sopenharmony_ci 1300e1051a39Sopenharmony_ci addi $bp,$sp,$S2 1301e1051a39Sopenharmony_ci bl __ecp_nistz256_sub_from # p256_sub(res_y, res_y, S2); 1302e1051a39Sopenharmony_ci 1303e1051a39Sopenharmony_ci ld $t0,0($bp_real) # in2 1304e1051a39Sopenharmony_ci ld $t1,8($bp_real) 1305e1051a39Sopenharmony_ci ld $t2,16($bp_real) 1306e1051a39Sopenharmony_ci ld $t3,24($bp_real) 1307e1051a39Sopenharmony_ci ld $a0,$res_x+0($sp) # res 1308e1051a39Sopenharmony_ci ld $a1,$res_x+8($sp) 1309e1051a39Sopenharmony_ci ld $a2,$res_x+16($sp) 1310e1051a39Sopenharmony_ci ld $a3,$res_x+24($sp) 1311e1051a39Sopenharmony_ci___ 1312e1051a39Sopenharmony_cifor($i=0;$i<64;$i+=32) { # conditional moves 1313e1051a39Sopenharmony_ci$code.=<<___; 1314e1051a39Sopenharmony_ci ld $acc0,$i+0($ap_real) # in1 1315e1051a39Sopenharmony_ci ld $acc1,$i+8($ap_real) 1316e1051a39Sopenharmony_ci ld $acc2,$i+16($ap_real) 1317e1051a39Sopenharmony_ci ld $acc3,$i+24($ap_real) 1318e1051a39Sopenharmony_ci andc $t0,$t0,$in1infty 1319e1051a39Sopenharmony_ci andc $t1,$t1,$in1infty 1320e1051a39Sopenharmony_ci andc $t2,$t2,$in1infty 1321e1051a39Sopenharmony_ci andc $t3,$t3,$in1infty 1322e1051a39Sopenharmony_ci and $a0,$a0,$in1infty 1323e1051a39Sopenharmony_ci and $a1,$a1,$in1infty 1324e1051a39Sopenharmony_ci and $a2,$a2,$in1infty 1325e1051a39Sopenharmony_ci and $a3,$a3,$in1infty 1326e1051a39Sopenharmony_ci or $t0,$t0,$a0 1327e1051a39Sopenharmony_ci or $t1,$t1,$a1 1328e1051a39Sopenharmony_ci or $t2,$t2,$a2 1329e1051a39Sopenharmony_ci or $t3,$t3,$a3 1330e1051a39Sopenharmony_ci andc $acc0,$acc0,$in2infty 1331e1051a39Sopenharmony_ci andc $acc1,$acc1,$in2infty 1332e1051a39Sopenharmony_ci andc $acc2,$acc2,$in2infty 1333e1051a39Sopenharmony_ci andc $acc3,$acc3,$in2infty 1334e1051a39Sopenharmony_ci and $t0,$t0,$in2infty 1335e1051a39Sopenharmony_ci and $t1,$t1,$in2infty 1336e1051a39Sopenharmony_ci and $t2,$t2,$in2infty 1337e1051a39Sopenharmony_ci and $t3,$t3,$in2infty 1338e1051a39Sopenharmony_ci or $acc0,$acc0,$t0 1339e1051a39Sopenharmony_ci or $acc1,$acc1,$t1 1340e1051a39Sopenharmony_ci or $acc2,$acc2,$t2 1341e1051a39Sopenharmony_ci or $acc3,$acc3,$t3 1342e1051a39Sopenharmony_ci 1343e1051a39Sopenharmony_ci ld $t0,$i+32($bp_real) # in2 1344e1051a39Sopenharmony_ci ld $t1,$i+40($bp_real) 1345e1051a39Sopenharmony_ci ld $t2,$i+48($bp_real) 1346e1051a39Sopenharmony_ci ld $t3,$i+56($bp_real) 1347e1051a39Sopenharmony_ci ld $a0,$res_x+$i+32($sp) 1348e1051a39Sopenharmony_ci ld $a1,$res_x+$i+40($sp) 1349e1051a39Sopenharmony_ci ld $a2,$res_x+$i+48($sp) 1350e1051a39Sopenharmony_ci ld $a3,$res_x+$i+56($sp) 1351e1051a39Sopenharmony_ci std $acc0,$i+0($rp_real) 1352e1051a39Sopenharmony_ci std $acc1,$i+8($rp_real) 1353e1051a39Sopenharmony_ci std $acc2,$i+16($rp_real) 1354e1051a39Sopenharmony_ci std $acc3,$i+24($rp_real) 1355e1051a39Sopenharmony_ci___ 1356e1051a39Sopenharmony_ci} 1357e1051a39Sopenharmony_ci$code.=<<___; 1358e1051a39Sopenharmony_ci ld $acc0,$i+0($ap_real) # in1 1359e1051a39Sopenharmony_ci ld $acc1,$i+8($ap_real) 1360e1051a39Sopenharmony_ci ld $acc2,$i+16($ap_real) 1361e1051a39Sopenharmony_ci ld $acc3,$i+24($ap_real) 1362e1051a39Sopenharmony_ci andc $t0,$t0,$in1infty 1363e1051a39Sopenharmony_ci andc $t1,$t1,$in1infty 1364e1051a39Sopenharmony_ci andc $t2,$t2,$in1infty 1365e1051a39Sopenharmony_ci andc $t3,$t3,$in1infty 1366e1051a39Sopenharmony_ci and $a0,$a0,$in1infty 1367e1051a39Sopenharmony_ci and $a1,$a1,$in1infty 1368e1051a39Sopenharmony_ci and $a2,$a2,$in1infty 1369e1051a39Sopenharmony_ci and $a3,$a3,$in1infty 1370e1051a39Sopenharmony_ci or $t0,$t0,$a0 1371e1051a39Sopenharmony_ci or $t1,$t1,$a1 1372e1051a39Sopenharmony_ci or $t2,$t2,$a2 1373e1051a39Sopenharmony_ci or $t3,$t3,$a3 1374e1051a39Sopenharmony_ci andc $acc0,$acc0,$in2infty 1375e1051a39Sopenharmony_ci andc $acc1,$acc1,$in2infty 1376e1051a39Sopenharmony_ci andc $acc2,$acc2,$in2infty 1377e1051a39Sopenharmony_ci andc $acc3,$acc3,$in2infty 1378e1051a39Sopenharmony_ci and $t0,$t0,$in2infty 1379e1051a39Sopenharmony_ci and $t1,$t1,$in2infty 1380e1051a39Sopenharmony_ci and $t2,$t2,$in2infty 1381e1051a39Sopenharmony_ci and $t3,$t3,$in2infty 1382e1051a39Sopenharmony_ci or $acc0,$acc0,$t0 1383e1051a39Sopenharmony_ci or $acc1,$acc1,$t1 1384e1051a39Sopenharmony_ci or $acc2,$acc2,$t2 1385e1051a39Sopenharmony_ci or $acc3,$acc3,$t3 1386e1051a39Sopenharmony_ci std $acc0,$i+0($rp_real) 1387e1051a39Sopenharmony_ci std $acc1,$i+8($rp_real) 1388e1051a39Sopenharmony_ci std $acc2,$i+16($rp_real) 1389e1051a39Sopenharmony_ci std $acc3,$i+24($rp_real) 1390e1051a39Sopenharmony_ci 1391e1051a39Sopenharmony_ci.Ladd_done: 1392e1051a39Sopenharmony_ci mtlr r0 1393e1051a39Sopenharmony_ci ld r16,$FRAME-8*16($sp) 1394e1051a39Sopenharmony_ci ld r17,$FRAME-8*15($sp) 1395e1051a39Sopenharmony_ci ld r18,$FRAME-8*14($sp) 1396e1051a39Sopenharmony_ci ld r19,$FRAME-8*13($sp) 1397e1051a39Sopenharmony_ci ld r20,$FRAME-8*12($sp) 1398e1051a39Sopenharmony_ci ld r21,$FRAME-8*11($sp) 1399e1051a39Sopenharmony_ci ld r22,$FRAME-8*10($sp) 1400e1051a39Sopenharmony_ci ld r23,$FRAME-8*9($sp) 1401e1051a39Sopenharmony_ci ld r24,$FRAME-8*8($sp) 1402e1051a39Sopenharmony_ci ld r25,$FRAME-8*7($sp) 1403e1051a39Sopenharmony_ci ld r26,$FRAME-8*6($sp) 1404e1051a39Sopenharmony_ci ld r27,$FRAME-8*5($sp) 1405e1051a39Sopenharmony_ci ld r28,$FRAME-8*4($sp) 1406e1051a39Sopenharmony_ci ld r29,$FRAME-8*3($sp) 1407e1051a39Sopenharmony_ci ld r30,$FRAME-8*2($sp) 1408e1051a39Sopenharmony_ci ld r31,$FRAME-8*1($sp) 1409e1051a39Sopenharmony_ci addi $sp,$sp,$FRAME 1410e1051a39Sopenharmony_ci blr 1411e1051a39Sopenharmony_ci .long 0 1412e1051a39Sopenharmony_ci .byte 0,12,4,0,0x80,16,3,0 1413e1051a39Sopenharmony_ci .long 0 1414e1051a39Sopenharmony_ci.size ecp_nistz256_point_add,.-ecp_nistz256_point_add 1415e1051a39Sopenharmony_ci___ 1416e1051a39Sopenharmony_ci} 1417e1051a39Sopenharmony_ci 1418e1051a39Sopenharmony_ci######################################################################## 1419e1051a39Sopenharmony_ci# void ecp_nistz256_point_add_affine(P256_POINT *out,const P256_POINT *in1, 1420e1051a39Sopenharmony_ci# const P256_POINT_AFFINE *in2); 1421e1051a39Sopenharmony_ciif (1) { 1422e1051a39Sopenharmony_cimy $FRAME = 64 + 32*10 + 16*8; 1423e1051a39Sopenharmony_cimy ($res_x,$res_y,$res_z, 1424e1051a39Sopenharmony_ci $U2,$S2,$H,$R,$Hsqr,$Hcub,$Rsqr)=map(64+32*$_,(0..9)); 1425e1051a39Sopenharmony_cimy $Z1sqr = $S2; 1426e1051a39Sopenharmony_ci# above map() describes stack layout with 10 temporary 1427e1051a39Sopenharmony_ci# 256-bit vectors on top. 1428e1051a39Sopenharmony_cimy ($rp_real,$ap_real,$bp_real,$in1infty,$in2infty,$temp)=map("r$_",(16..21)); 1429e1051a39Sopenharmony_ci 1430e1051a39Sopenharmony_ci$code.=<<___; 1431e1051a39Sopenharmony_ci.globl ecp_nistz256_point_add_affine 1432e1051a39Sopenharmony_ci.align 5 1433e1051a39Sopenharmony_ciecp_nistz256_point_add_affine: 1434e1051a39Sopenharmony_ci stdu $sp,-$FRAME($sp) 1435e1051a39Sopenharmony_ci mflr r0 1436e1051a39Sopenharmony_ci std r16,$FRAME-8*16($sp) 1437e1051a39Sopenharmony_ci std r17,$FRAME-8*15($sp) 1438e1051a39Sopenharmony_ci std r18,$FRAME-8*14($sp) 1439e1051a39Sopenharmony_ci std r19,$FRAME-8*13($sp) 1440e1051a39Sopenharmony_ci std r20,$FRAME-8*12($sp) 1441e1051a39Sopenharmony_ci std r21,$FRAME-8*11($sp) 1442e1051a39Sopenharmony_ci std r22,$FRAME-8*10($sp) 1443e1051a39Sopenharmony_ci std r23,$FRAME-8*9($sp) 1444e1051a39Sopenharmony_ci std r24,$FRAME-8*8($sp) 1445e1051a39Sopenharmony_ci std r25,$FRAME-8*7($sp) 1446e1051a39Sopenharmony_ci std r26,$FRAME-8*6($sp) 1447e1051a39Sopenharmony_ci std r27,$FRAME-8*5($sp) 1448e1051a39Sopenharmony_ci std r28,$FRAME-8*4($sp) 1449e1051a39Sopenharmony_ci std r29,$FRAME-8*3($sp) 1450e1051a39Sopenharmony_ci std r30,$FRAME-8*2($sp) 1451e1051a39Sopenharmony_ci std r31,$FRAME-8*1($sp) 1452e1051a39Sopenharmony_ci 1453e1051a39Sopenharmony_ci li $poly1,-1 1454e1051a39Sopenharmony_ci srdi $poly1,$poly1,32 # 0x00000000ffffffff 1455e1051a39Sopenharmony_ci li $poly3,1 1456e1051a39Sopenharmony_ci orc $poly3,$poly3,$poly1 # 0xffffffff00000001 1457e1051a39Sopenharmony_ci 1458e1051a39Sopenharmony_ci mr $rp_real,$rp 1459e1051a39Sopenharmony_ci mr $ap_real,$ap 1460e1051a39Sopenharmony_ci mr $bp_real,$bp 1461e1051a39Sopenharmony_ci 1462e1051a39Sopenharmony_ci ld $a0,64($ap) # in1_z 1463e1051a39Sopenharmony_ci ld $a1,72($ap) 1464e1051a39Sopenharmony_ci ld $a2,80($ap) 1465e1051a39Sopenharmony_ci ld $a3,88($ap) 1466e1051a39Sopenharmony_ci or $t0,$a0,$a1 1467e1051a39Sopenharmony_ci or $t2,$a2,$a3 1468e1051a39Sopenharmony_ci or $in1infty,$t0,$t2 1469e1051a39Sopenharmony_ci neg $t0,$in1infty 1470e1051a39Sopenharmony_ci or $in1infty,$in1infty,$t0 1471e1051a39Sopenharmony_ci sradi $in1infty,$in1infty,63 # !in1infty 1472e1051a39Sopenharmony_ci 1473e1051a39Sopenharmony_ci ld $acc0,0($bp) # in2_x 1474e1051a39Sopenharmony_ci ld $acc1,8($bp) 1475e1051a39Sopenharmony_ci ld $acc2,16($bp) 1476e1051a39Sopenharmony_ci ld $acc3,24($bp) 1477e1051a39Sopenharmony_ci ld $t0,32($bp) # in2_y 1478e1051a39Sopenharmony_ci ld $t1,40($bp) 1479e1051a39Sopenharmony_ci ld $t2,48($bp) 1480e1051a39Sopenharmony_ci ld $t3,56($bp) 1481e1051a39Sopenharmony_ci or $acc0,$acc0,$acc1 1482e1051a39Sopenharmony_ci or $acc2,$acc2,$acc3 1483e1051a39Sopenharmony_ci or $acc0,$acc0,$acc2 1484e1051a39Sopenharmony_ci or $t0,$t0,$t1 1485e1051a39Sopenharmony_ci or $t2,$t2,$t3 1486e1051a39Sopenharmony_ci or $t0,$t0,$t2 1487e1051a39Sopenharmony_ci or $in2infty,$acc0,$t0 1488e1051a39Sopenharmony_ci neg $t0,$in2infty 1489e1051a39Sopenharmony_ci or $in2infty,$in2infty,$t0 1490e1051a39Sopenharmony_ci sradi $in2infty,$in2infty,63 # !in2infty 1491e1051a39Sopenharmony_ci 1492e1051a39Sopenharmony_ci addi $rp,$sp,$Z1sqr 1493e1051a39Sopenharmony_ci bl __ecp_nistz256_sqr_mont # p256_sqr_mont(Z1sqr, in1_z); 1494e1051a39Sopenharmony_ci 1495e1051a39Sopenharmony_ci mr $a0,$acc0 1496e1051a39Sopenharmony_ci mr $a1,$acc1 1497e1051a39Sopenharmony_ci mr $a2,$acc2 1498e1051a39Sopenharmony_ci mr $a3,$acc3 1499e1051a39Sopenharmony_ci ld $bi,0($bp_real) 1500e1051a39Sopenharmony_ci addi $bp,$bp_real,0 1501e1051a39Sopenharmony_ci addi $rp,$sp,$U2 1502e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(U2, Z1sqr, in2_x); 1503e1051a39Sopenharmony_ci 1504e1051a39Sopenharmony_ci addi $bp,$ap_real,0 1505e1051a39Sopenharmony_ci ld $bi,64($ap_real) # forward load for p256_mul_mont 1506e1051a39Sopenharmony_ci ld $a0,$Z1sqr+0($sp) 1507e1051a39Sopenharmony_ci ld $a1,$Z1sqr+8($sp) 1508e1051a39Sopenharmony_ci ld $a2,$Z1sqr+16($sp) 1509e1051a39Sopenharmony_ci ld $a3,$Z1sqr+24($sp) 1510e1051a39Sopenharmony_ci addi $rp,$sp,$H 1511e1051a39Sopenharmony_ci bl __ecp_nistz256_sub_from # p256_sub(H, U2, in1_x); 1512e1051a39Sopenharmony_ci 1513e1051a39Sopenharmony_ci addi $bp,$ap_real,64 1514e1051a39Sopenharmony_ci addi $rp,$sp,$S2 1515e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(S2, Z1sqr, in1_z); 1516e1051a39Sopenharmony_ci 1517e1051a39Sopenharmony_ci ld $bi,64($ap_real) 1518e1051a39Sopenharmony_ci ld $a0,$H+0($sp) 1519e1051a39Sopenharmony_ci ld $a1,$H+8($sp) 1520e1051a39Sopenharmony_ci ld $a2,$H+16($sp) 1521e1051a39Sopenharmony_ci ld $a3,$H+24($sp) 1522e1051a39Sopenharmony_ci addi $bp,$ap_real,64 1523e1051a39Sopenharmony_ci addi $rp,$sp,$res_z 1524e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(res_z, H, in1_z); 1525e1051a39Sopenharmony_ci 1526e1051a39Sopenharmony_ci ld $bi,32($bp_real) 1527e1051a39Sopenharmony_ci ld $a0,$S2+0($sp) 1528e1051a39Sopenharmony_ci ld $a1,$S2+8($sp) 1529e1051a39Sopenharmony_ci ld $a2,$S2+16($sp) 1530e1051a39Sopenharmony_ci ld $a3,$S2+24($sp) 1531e1051a39Sopenharmony_ci addi $bp,$bp_real,32 1532e1051a39Sopenharmony_ci addi $rp,$sp,$S2 1533e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(S2, S2, in2_y); 1534e1051a39Sopenharmony_ci 1535e1051a39Sopenharmony_ci addi $bp,$ap_real,32 1536e1051a39Sopenharmony_ci ld $a0,$H+0($sp) # forward load for p256_sqr_mont 1537e1051a39Sopenharmony_ci ld $a1,$H+8($sp) 1538e1051a39Sopenharmony_ci ld $a2,$H+16($sp) 1539e1051a39Sopenharmony_ci ld $a3,$H+24($sp) 1540e1051a39Sopenharmony_ci addi $rp,$sp,$R 1541e1051a39Sopenharmony_ci bl __ecp_nistz256_sub_from # p256_sub(R, S2, in1_y); 1542e1051a39Sopenharmony_ci 1543e1051a39Sopenharmony_ci addi $rp,$sp,$Hsqr 1544e1051a39Sopenharmony_ci bl __ecp_nistz256_sqr_mont # p256_sqr_mont(Hsqr, H); 1545e1051a39Sopenharmony_ci 1546e1051a39Sopenharmony_ci ld $a0,$R+0($sp) 1547e1051a39Sopenharmony_ci ld $a1,$R+8($sp) 1548e1051a39Sopenharmony_ci ld $a2,$R+16($sp) 1549e1051a39Sopenharmony_ci ld $a3,$R+24($sp) 1550e1051a39Sopenharmony_ci addi $rp,$sp,$Rsqr 1551e1051a39Sopenharmony_ci bl __ecp_nistz256_sqr_mont # p256_sqr_mont(Rsqr, R); 1552e1051a39Sopenharmony_ci 1553e1051a39Sopenharmony_ci ld $bi,$H($sp) 1554e1051a39Sopenharmony_ci ld $a0,$Hsqr+0($sp) 1555e1051a39Sopenharmony_ci ld $a1,$Hsqr+8($sp) 1556e1051a39Sopenharmony_ci ld $a2,$Hsqr+16($sp) 1557e1051a39Sopenharmony_ci ld $a3,$Hsqr+24($sp) 1558e1051a39Sopenharmony_ci addi $bp,$sp,$H 1559e1051a39Sopenharmony_ci addi $rp,$sp,$Hcub 1560e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(Hcub, Hsqr, H); 1561e1051a39Sopenharmony_ci 1562e1051a39Sopenharmony_ci ld $bi,0($ap_real) 1563e1051a39Sopenharmony_ci ld $a0,$Hsqr+0($sp) 1564e1051a39Sopenharmony_ci ld $a1,$Hsqr+8($sp) 1565e1051a39Sopenharmony_ci ld $a2,$Hsqr+16($sp) 1566e1051a39Sopenharmony_ci ld $a3,$Hsqr+24($sp) 1567e1051a39Sopenharmony_ci addi $bp,$ap_real,0 1568e1051a39Sopenharmony_ci addi $rp,$sp,$U2 1569e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(U2, in1_x, Hsqr); 1570e1051a39Sopenharmony_ci 1571e1051a39Sopenharmony_ci mr $t0,$acc0 1572e1051a39Sopenharmony_ci mr $t1,$acc1 1573e1051a39Sopenharmony_ci mr $t2,$acc2 1574e1051a39Sopenharmony_ci mr $t3,$acc3 1575e1051a39Sopenharmony_ci addi $rp,$sp,$Hsqr 1576e1051a39Sopenharmony_ci bl __ecp_nistz256_add # p256_mul_by_2(Hsqr, U2); 1577e1051a39Sopenharmony_ci 1578e1051a39Sopenharmony_ci addi $bp,$sp,$Rsqr 1579e1051a39Sopenharmony_ci addi $rp,$sp,$res_x 1580e1051a39Sopenharmony_ci bl __ecp_nistz256_sub_morf # p256_sub(res_x, Rsqr, Hsqr); 1581e1051a39Sopenharmony_ci 1582e1051a39Sopenharmony_ci addi $bp,$sp,$Hcub 1583e1051a39Sopenharmony_ci bl __ecp_nistz256_sub_from # p256_sub(res_x, res_x, Hcub); 1584e1051a39Sopenharmony_ci 1585e1051a39Sopenharmony_ci addi $bp,$sp,$U2 1586e1051a39Sopenharmony_ci ld $bi,32($ap_real) # forward load for p256_mul_mont 1587e1051a39Sopenharmony_ci ld $a0,$Hcub+0($sp) 1588e1051a39Sopenharmony_ci ld $a1,$Hcub+8($sp) 1589e1051a39Sopenharmony_ci ld $a2,$Hcub+16($sp) 1590e1051a39Sopenharmony_ci ld $a3,$Hcub+24($sp) 1591e1051a39Sopenharmony_ci addi $rp,$sp,$res_y 1592e1051a39Sopenharmony_ci bl __ecp_nistz256_sub_morf # p256_sub(res_y, U2, res_x); 1593e1051a39Sopenharmony_ci 1594e1051a39Sopenharmony_ci addi $bp,$ap_real,32 1595e1051a39Sopenharmony_ci addi $rp,$sp,$S2 1596e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(S2, in1_y, Hcub); 1597e1051a39Sopenharmony_ci 1598e1051a39Sopenharmony_ci ld $bi,$R($sp) 1599e1051a39Sopenharmony_ci ld $a0,$res_y+0($sp) 1600e1051a39Sopenharmony_ci ld $a1,$res_y+8($sp) 1601e1051a39Sopenharmony_ci ld $a2,$res_y+16($sp) 1602e1051a39Sopenharmony_ci ld $a3,$res_y+24($sp) 1603e1051a39Sopenharmony_ci addi $bp,$sp,$R 1604e1051a39Sopenharmony_ci addi $rp,$sp,$res_y 1605e1051a39Sopenharmony_ci bl __ecp_nistz256_mul_mont # p256_mul_mont(res_y, res_y, R); 1606e1051a39Sopenharmony_ci 1607e1051a39Sopenharmony_ci addi $bp,$sp,$S2 1608e1051a39Sopenharmony_ci bl __ecp_nistz256_sub_from # p256_sub(res_y, res_y, S2); 1609e1051a39Sopenharmony_ci 1610e1051a39Sopenharmony_ci ld $t0,0($bp_real) # in2 1611e1051a39Sopenharmony_ci ld $t1,8($bp_real) 1612e1051a39Sopenharmony_ci ld $t2,16($bp_real) 1613e1051a39Sopenharmony_ci ld $t3,24($bp_real) 1614e1051a39Sopenharmony_ci ld $a0,$res_x+0($sp) # res 1615e1051a39Sopenharmony_ci ld $a1,$res_x+8($sp) 1616e1051a39Sopenharmony_ci ld $a2,$res_x+16($sp) 1617e1051a39Sopenharmony_ci ld $a3,$res_x+24($sp) 1618e1051a39Sopenharmony_ci___ 1619e1051a39Sopenharmony_cifor($i=0;$i<64;$i+=32) { # conditional moves 1620e1051a39Sopenharmony_ci$code.=<<___; 1621e1051a39Sopenharmony_ci ld $acc0,$i+0($ap_real) # in1 1622e1051a39Sopenharmony_ci ld $acc1,$i+8($ap_real) 1623e1051a39Sopenharmony_ci ld $acc2,$i+16($ap_real) 1624e1051a39Sopenharmony_ci ld $acc3,$i+24($ap_real) 1625e1051a39Sopenharmony_ci andc $t0,$t0,$in1infty 1626e1051a39Sopenharmony_ci andc $t1,$t1,$in1infty 1627e1051a39Sopenharmony_ci andc $t2,$t2,$in1infty 1628e1051a39Sopenharmony_ci andc $t3,$t3,$in1infty 1629e1051a39Sopenharmony_ci and $a0,$a0,$in1infty 1630e1051a39Sopenharmony_ci and $a1,$a1,$in1infty 1631e1051a39Sopenharmony_ci and $a2,$a2,$in1infty 1632e1051a39Sopenharmony_ci and $a3,$a3,$in1infty 1633e1051a39Sopenharmony_ci or $t0,$t0,$a0 1634e1051a39Sopenharmony_ci or $t1,$t1,$a1 1635e1051a39Sopenharmony_ci or $t2,$t2,$a2 1636e1051a39Sopenharmony_ci or $t3,$t3,$a3 1637e1051a39Sopenharmony_ci andc $acc0,$acc0,$in2infty 1638e1051a39Sopenharmony_ci andc $acc1,$acc1,$in2infty 1639e1051a39Sopenharmony_ci andc $acc2,$acc2,$in2infty 1640e1051a39Sopenharmony_ci andc $acc3,$acc3,$in2infty 1641e1051a39Sopenharmony_ci and $t0,$t0,$in2infty 1642e1051a39Sopenharmony_ci and $t1,$t1,$in2infty 1643e1051a39Sopenharmony_ci and $t2,$t2,$in2infty 1644e1051a39Sopenharmony_ci and $t3,$t3,$in2infty 1645e1051a39Sopenharmony_ci or $acc0,$acc0,$t0 1646e1051a39Sopenharmony_ci or $acc1,$acc1,$t1 1647e1051a39Sopenharmony_ci or $acc2,$acc2,$t2 1648e1051a39Sopenharmony_ci or $acc3,$acc3,$t3 1649e1051a39Sopenharmony_ci___ 1650e1051a39Sopenharmony_ci$code.=<<___ if ($i==0); 1651e1051a39Sopenharmony_ci ld $t0,32($bp_real) # in2 1652e1051a39Sopenharmony_ci ld $t1,40($bp_real) 1653e1051a39Sopenharmony_ci ld $t2,48($bp_real) 1654e1051a39Sopenharmony_ci ld $t3,56($bp_real) 1655e1051a39Sopenharmony_ci___ 1656e1051a39Sopenharmony_ci$code.=<<___ if ($i==32); 1657e1051a39Sopenharmony_ci li $t0,1 # Lone_mont 1658e1051a39Sopenharmony_ci not $t1,$poly1 1659e1051a39Sopenharmony_ci li $t2,-1 1660e1051a39Sopenharmony_ci not $t3,$poly3 1661e1051a39Sopenharmony_ci___ 1662e1051a39Sopenharmony_ci$code.=<<___; 1663e1051a39Sopenharmony_ci ld $a0,$res_x+$i+32($sp) 1664e1051a39Sopenharmony_ci ld $a1,$res_x+$i+40($sp) 1665e1051a39Sopenharmony_ci ld $a2,$res_x+$i+48($sp) 1666e1051a39Sopenharmony_ci ld $a3,$res_x+$i+56($sp) 1667e1051a39Sopenharmony_ci std $acc0,$i+0($rp_real) 1668e1051a39Sopenharmony_ci std $acc1,$i+8($rp_real) 1669e1051a39Sopenharmony_ci std $acc2,$i+16($rp_real) 1670e1051a39Sopenharmony_ci std $acc3,$i+24($rp_real) 1671e1051a39Sopenharmony_ci___ 1672e1051a39Sopenharmony_ci} 1673e1051a39Sopenharmony_ci$code.=<<___; 1674e1051a39Sopenharmony_ci ld $acc0,$i+0($ap_real) # in1 1675e1051a39Sopenharmony_ci ld $acc1,$i+8($ap_real) 1676e1051a39Sopenharmony_ci ld $acc2,$i+16($ap_real) 1677e1051a39Sopenharmony_ci ld $acc3,$i+24($ap_real) 1678e1051a39Sopenharmony_ci andc $t0,$t0,$in1infty 1679e1051a39Sopenharmony_ci andc $t1,$t1,$in1infty 1680e1051a39Sopenharmony_ci andc $t2,$t2,$in1infty 1681e1051a39Sopenharmony_ci andc $t3,$t3,$in1infty 1682e1051a39Sopenharmony_ci and $a0,$a0,$in1infty 1683e1051a39Sopenharmony_ci and $a1,$a1,$in1infty 1684e1051a39Sopenharmony_ci and $a2,$a2,$in1infty 1685e1051a39Sopenharmony_ci and $a3,$a3,$in1infty 1686e1051a39Sopenharmony_ci or $t0,$t0,$a0 1687e1051a39Sopenharmony_ci or $t1,$t1,$a1 1688e1051a39Sopenharmony_ci or $t2,$t2,$a2 1689e1051a39Sopenharmony_ci or $t3,$t3,$a3 1690e1051a39Sopenharmony_ci andc $acc0,$acc0,$in2infty 1691e1051a39Sopenharmony_ci andc $acc1,$acc1,$in2infty 1692e1051a39Sopenharmony_ci andc $acc2,$acc2,$in2infty 1693e1051a39Sopenharmony_ci andc $acc3,$acc3,$in2infty 1694e1051a39Sopenharmony_ci and $t0,$t0,$in2infty 1695e1051a39Sopenharmony_ci and $t1,$t1,$in2infty 1696e1051a39Sopenharmony_ci and $t2,$t2,$in2infty 1697e1051a39Sopenharmony_ci and $t3,$t3,$in2infty 1698e1051a39Sopenharmony_ci or $acc0,$acc0,$t0 1699e1051a39Sopenharmony_ci or $acc1,$acc1,$t1 1700e1051a39Sopenharmony_ci or $acc2,$acc2,$t2 1701e1051a39Sopenharmony_ci or $acc3,$acc3,$t3 1702e1051a39Sopenharmony_ci std $acc0,$i+0($rp_real) 1703e1051a39Sopenharmony_ci std $acc1,$i+8($rp_real) 1704e1051a39Sopenharmony_ci std $acc2,$i+16($rp_real) 1705e1051a39Sopenharmony_ci std $acc3,$i+24($rp_real) 1706e1051a39Sopenharmony_ci 1707e1051a39Sopenharmony_ci mtlr r0 1708e1051a39Sopenharmony_ci ld r16,$FRAME-8*16($sp) 1709e1051a39Sopenharmony_ci ld r17,$FRAME-8*15($sp) 1710e1051a39Sopenharmony_ci ld r18,$FRAME-8*14($sp) 1711e1051a39Sopenharmony_ci ld r19,$FRAME-8*13($sp) 1712e1051a39Sopenharmony_ci ld r20,$FRAME-8*12($sp) 1713e1051a39Sopenharmony_ci ld r21,$FRAME-8*11($sp) 1714e1051a39Sopenharmony_ci ld r22,$FRAME-8*10($sp) 1715e1051a39Sopenharmony_ci ld r23,$FRAME-8*9($sp) 1716e1051a39Sopenharmony_ci ld r24,$FRAME-8*8($sp) 1717e1051a39Sopenharmony_ci ld r25,$FRAME-8*7($sp) 1718e1051a39Sopenharmony_ci ld r26,$FRAME-8*6($sp) 1719e1051a39Sopenharmony_ci ld r27,$FRAME-8*5($sp) 1720e1051a39Sopenharmony_ci ld r28,$FRAME-8*4($sp) 1721e1051a39Sopenharmony_ci ld r29,$FRAME-8*3($sp) 1722e1051a39Sopenharmony_ci ld r30,$FRAME-8*2($sp) 1723e1051a39Sopenharmony_ci ld r31,$FRAME-8*1($sp) 1724e1051a39Sopenharmony_ci addi $sp,$sp,$FRAME 1725e1051a39Sopenharmony_ci blr 1726e1051a39Sopenharmony_ci .long 0 1727e1051a39Sopenharmony_ci .byte 0,12,4,0,0x80,16,3,0 1728e1051a39Sopenharmony_ci .long 0 1729e1051a39Sopenharmony_ci.size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine 1730e1051a39Sopenharmony_ci___ 1731e1051a39Sopenharmony_ci} 1732e1051a39Sopenharmony_ciif (1) { 1733e1051a39Sopenharmony_cimy ($ordk,$ord0,$ord1,$t4) = map("r$_",(18..21)); 1734e1051a39Sopenharmony_cimy ($ord2,$ord3,$zr) = ($poly1,$poly3,"r0"); 1735e1051a39Sopenharmony_ci 1736e1051a39Sopenharmony_ci$code.=<<___; 1737e1051a39Sopenharmony_ci######################################################################## 1738e1051a39Sopenharmony_ci# void ecp_nistz256_ord_mul_mont(uint64_t res[4], uint64_t a[4], 1739e1051a39Sopenharmony_ci# uint64_t b[4]); 1740e1051a39Sopenharmony_ci.globl ecp_nistz256_ord_mul_mont 1741e1051a39Sopenharmony_ci.align 5 1742e1051a39Sopenharmony_ciecp_nistz256_ord_mul_mont: 1743e1051a39Sopenharmony_ci stdu $sp,-160($sp) 1744e1051a39Sopenharmony_ci std r18,48($sp) 1745e1051a39Sopenharmony_ci std r19,56($sp) 1746e1051a39Sopenharmony_ci std r20,64($sp) 1747e1051a39Sopenharmony_ci std r21,72($sp) 1748e1051a39Sopenharmony_ci std r22,80($sp) 1749e1051a39Sopenharmony_ci std r23,88($sp) 1750e1051a39Sopenharmony_ci std r24,96($sp) 1751e1051a39Sopenharmony_ci std r25,104($sp) 1752e1051a39Sopenharmony_ci std r26,112($sp) 1753e1051a39Sopenharmony_ci std r27,120($sp) 1754e1051a39Sopenharmony_ci std r28,128($sp) 1755e1051a39Sopenharmony_ci std r29,136($sp) 1756e1051a39Sopenharmony_ci std r30,144($sp) 1757e1051a39Sopenharmony_ci std r31,152($sp) 1758e1051a39Sopenharmony_ci 1759e1051a39Sopenharmony_ci ld $a0,0($ap) 1760e1051a39Sopenharmony_ci ld $bi,0($bp) 1761e1051a39Sopenharmony_ci ld $a1,8($ap) 1762e1051a39Sopenharmony_ci ld $a2,16($ap) 1763e1051a39Sopenharmony_ci ld $a3,24($ap) 1764e1051a39Sopenharmony_ci 1765e1051a39Sopenharmony_ci lis $ordk,0xccd1 1766e1051a39Sopenharmony_ci lis $ord0,0xf3b9 1767e1051a39Sopenharmony_ci lis $ord1,0xbce6 1768e1051a39Sopenharmony_ci ori $ordk,$ordk,0xc8aa 1769e1051a39Sopenharmony_ci ori $ord0,$ord0,0xcac2 1770e1051a39Sopenharmony_ci ori $ord1,$ord1,0xfaad 1771e1051a39Sopenharmony_ci sldi $ordk,$ordk,32 1772e1051a39Sopenharmony_ci sldi $ord0,$ord0,32 1773e1051a39Sopenharmony_ci sldi $ord1,$ord1,32 1774e1051a39Sopenharmony_ci oris $ordk,$ordk,0xee00 1775e1051a39Sopenharmony_ci oris $ord0,$ord0,0xfc63 1776e1051a39Sopenharmony_ci oris $ord1,$ord1,0xa717 1777e1051a39Sopenharmony_ci ori $ordk,$ordk,0xbc4f # 0xccd1c8aaee00bc4f 1778e1051a39Sopenharmony_ci ori $ord0,$ord0,0x2551 # 0xf3b9cac2fc632551 1779e1051a39Sopenharmony_ci ori $ord1,$ord1,0x9e84 # 0xbce6faada7179e84 1780e1051a39Sopenharmony_ci li $ord2,-1 # 0xffffffffffffffff 1781e1051a39Sopenharmony_ci sldi $ord3,$ord2,32 # 0xffffffff00000000 1782e1051a39Sopenharmony_ci li $zr,0 1783e1051a39Sopenharmony_ci 1784e1051a39Sopenharmony_ci mulld $acc0,$a0,$bi # a[0]*b[0] 1785e1051a39Sopenharmony_ci mulhdu $t0,$a0,$bi 1786e1051a39Sopenharmony_ci 1787e1051a39Sopenharmony_ci mulld $acc1,$a1,$bi # a[1]*b[0] 1788e1051a39Sopenharmony_ci mulhdu $t1,$a1,$bi 1789e1051a39Sopenharmony_ci 1790e1051a39Sopenharmony_ci mulld $acc2,$a2,$bi # a[2]*b[0] 1791e1051a39Sopenharmony_ci mulhdu $t2,$a2,$bi 1792e1051a39Sopenharmony_ci 1793e1051a39Sopenharmony_ci mulld $acc3,$a3,$bi # a[3]*b[0] 1794e1051a39Sopenharmony_ci mulhdu $acc4,$a3,$bi 1795e1051a39Sopenharmony_ci 1796e1051a39Sopenharmony_ci mulld $t4,$acc0,$ordk 1797e1051a39Sopenharmony_ci 1798e1051a39Sopenharmony_ci addc $acc1,$acc1,$t0 # accumulate high parts of multiplication 1799e1051a39Sopenharmony_ci adde $acc2,$acc2,$t1 1800e1051a39Sopenharmony_ci adde $acc3,$acc3,$t2 1801e1051a39Sopenharmony_ci addze $acc4,$acc4 1802e1051a39Sopenharmony_ci li $acc5,0 1803e1051a39Sopenharmony_ci___ 1804e1051a39Sopenharmony_cifor ($i=1;$i<4;$i++) { 1805e1051a39Sopenharmony_ci ################################################################ 1806e1051a39Sopenharmony_ci # ffff0000.ffffffff.yyyyyyyy.zzzzzzzz 1807e1051a39Sopenharmony_ci # * abcdefgh 1808e1051a39Sopenharmony_ci # + xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx 1809e1051a39Sopenharmony_ci # 1810e1051a39Sopenharmony_ci # Now observing that ff..ff*x = (2^n-1)*x = 2^n*x-x, we 1811e1051a39Sopenharmony_ci # rewrite above as: 1812e1051a39Sopenharmony_ci # 1813e1051a39Sopenharmony_ci # xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx 1814e1051a39Sopenharmony_ci # - 0000abcd.efgh0000.abcdefgh.00000000.00000000 1815e1051a39Sopenharmony_ci # + abcdefgh.abcdefgh.yzayzbyz.cyzdyzey.zfyzgyzh 1816e1051a39Sopenharmony_ci$code.=<<___; 1817e1051a39Sopenharmony_ci ld $bi,8*$i($bp) # b[i] 1818e1051a39Sopenharmony_ci 1819e1051a39Sopenharmony_ci sldi $t0,$t4,32 1820e1051a39Sopenharmony_ci subfc $acc2,$t4,$acc2 1821e1051a39Sopenharmony_ci srdi $t1,$t4,32 1822e1051a39Sopenharmony_ci subfe $acc3,$t0,$acc3 1823e1051a39Sopenharmony_ci subfe $acc4,$t1,$acc4 1824e1051a39Sopenharmony_ci subfe $acc5,$zr,$acc5 1825e1051a39Sopenharmony_ci 1826e1051a39Sopenharmony_ci addic $t0,$acc0,-1 # discarded 1827e1051a39Sopenharmony_ci mulhdu $t1,$ord0,$t4 1828e1051a39Sopenharmony_ci mulld $t2,$ord1,$t4 1829e1051a39Sopenharmony_ci mulhdu $t3,$ord1,$t4 1830e1051a39Sopenharmony_ci 1831e1051a39Sopenharmony_ci adde $t2,$t2,$t1 1832e1051a39Sopenharmony_ci mulld $t0,$a0,$bi 1833e1051a39Sopenharmony_ci addze $t3,$t3 1834e1051a39Sopenharmony_ci mulld $t1,$a1,$bi 1835e1051a39Sopenharmony_ci 1836e1051a39Sopenharmony_ci addc $acc0,$acc1,$t2 1837e1051a39Sopenharmony_ci mulld $t2,$a2,$bi 1838e1051a39Sopenharmony_ci adde $acc1,$acc2,$t3 1839e1051a39Sopenharmony_ci mulld $t3,$a3,$bi 1840e1051a39Sopenharmony_ci adde $acc2,$acc3,$t4 1841e1051a39Sopenharmony_ci adde $acc3,$acc4,$t4 1842e1051a39Sopenharmony_ci addze $acc4,$acc5 1843e1051a39Sopenharmony_ci 1844e1051a39Sopenharmony_ci addc $acc0,$acc0,$t0 # accumulate low parts 1845e1051a39Sopenharmony_ci mulhdu $t0,$a0,$bi 1846e1051a39Sopenharmony_ci adde $acc1,$acc1,$t1 1847e1051a39Sopenharmony_ci mulhdu $t1,$a1,$bi 1848e1051a39Sopenharmony_ci adde $acc2,$acc2,$t2 1849e1051a39Sopenharmony_ci mulhdu $t2,$a2,$bi 1850e1051a39Sopenharmony_ci adde $acc3,$acc3,$t3 1851e1051a39Sopenharmony_ci mulhdu $t3,$a3,$bi 1852e1051a39Sopenharmony_ci addze $acc4,$acc4 1853e1051a39Sopenharmony_ci mulld $t4,$acc0,$ordk 1854e1051a39Sopenharmony_ci addc $acc1,$acc1,$t0 # accumulate high parts 1855e1051a39Sopenharmony_ci adde $acc2,$acc2,$t1 1856e1051a39Sopenharmony_ci adde $acc3,$acc3,$t2 1857e1051a39Sopenharmony_ci adde $acc4,$acc4,$t3 1858e1051a39Sopenharmony_ci addze $acc5,$zr 1859e1051a39Sopenharmony_ci___ 1860e1051a39Sopenharmony_ci} 1861e1051a39Sopenharmony_ci$code.=<<___; 1862e1051a39Sopenharmony_ci sldi $t0,$t4,32 # last reduction 1863e1051a39Sopenharmony_ci subfc $acc2,$t4,$acc2 1864e1051a39Sopenharmony_ci srdi $t1,$t4,32 1865e1051a39Sopenharmony_ci subfe $acc3,$t0,$acc3 1866e1051a39Sopenharmony_ci subfe $acc4,$t1,$acc4 1867e1051a39Sopenharmony_ci subfe $acc5,$zr,$acc5 1868e1051a39Sopenharmony_ci 1869e1051a39Sopenharmony_ci addic $t0,$acc0,-1 # discarded 1870e1051a39Sopenharmony_ci mulhdu $t1,$ord0,$t4 1871e1051a39Sopenharmony_ci mulld $t2,$ord1,$t4 1872e1051a39Sopenharmony_ci mulhdu $t3,$ord1,$t4 1873e1051a39Sopenharmony_ci 1874e1051a39Sopenharmony_ci adde $t2,$t2,$t1 1875e1051a39Sopenharmony_ci addze $t3,$t3 1876e1051a39Sopenharmony_ci 1877e1051a39Sopenharmony_ci addc $acc0,$acc1,$t2 1878e1051a39Sopenharmony_ci adde $acc1,$acc2,$t3 1879e1051a39Sopenharmony_ci adde $acc2,$acc3,$t4 1880e1051a39Sopenharmony_ci adde $acc3,$acc4,$t4 1881e1051a39Sopenharmony_ci addze $acc4,$acc5 1882e1051a39Sopenharmony_ci 1883e1051a39Sopenharmony_ci subfc $acc0,$ord0,$acc0 # ret -= modulus 1884e1051a39Sopenharmony_ci subfe $acc1,$ord1,$acc1 1885e1051a39Sopenharmony_ci subfe $acc2,$ord2,$acc2 1886e1051a39Sopenharmony_ci subfe $acc3,$ord3,$acc3 1887e1051a39Sopenharmony_ci subfe $acc4,$zr,$acc4 1888e1051a39Sopenharmony_ci 1889e1051a39Sopenharmony_ci and $t0,$ord0,$acc4 1890e1051a39Sopenharmony_ci and $t1,$ord1,$acc4 1891e1051a39Sopenharmony_ci addc $acc0,$acc0,$t0 # ret += modulus if borrow 1892e1051a39Sopenharmony_ci and $t3,$ord3,$acc4 1893e1051a39Sopenharmony_ci adde $acc1,$acc1,$t1 1894e1051a39Sopenharmony_ci adde $acc2,$acc2,$acc4 1895e1051a39Sopenharmony_ci adde $acc3,$acc3,$t3 1896e1051a39Sopenharmony_ci 1897e1051a39Sopenharmony_ci std $acc0,0($rp) 1898e1051a39Sopenharmony_ci std $acc1,8($rp) 1899e1051a39Sopenharmony_ci std $acc2,16($rp) 1900e1051a39Sopenharmony_ci std $acc3,24($rp) 1901e1051a39Sopenharmony_ci 1902e1051a39Sopenharmony_ci ld r18,48($sp) 1903e1051a39Sopenharmony_ci ld r19,56($sp) 1904e1051a39Sopenharmony_ci ld r20,64($sp) 1905e1051a39Sopenharmony_ci ld r21,72($sp) 1906e1051a39Sopenharmony_ci ld r22,80($sp) 1907e1051a39Sopenharmony_ci ld r23,88($sp) 1908e1051a39Sopenharmony_ci ld r24,96($sp) 1909e1051a39Sopenharmony_ci ld r25,104($sp) 1910e1051a39Sopenharmony_ci ld r26,112($sp) 1911e1051a39Sopenharmony_ci ld r27,120($sp) 1912e1051a39Sopenharmony_ci ld r28,128($sp) 1913e1051a39Sopenharmony_ci ld r29,136($sp) 1914e1051a39Sopenharmony_ci ld r30,144($sp) 1915e1051a39Sopenharmony_ci ld r31,152($sp) 1916e1051a39Sopenharmony_ci addi $sp,$sp,160 1917e1051a39Sopenharmony_ci blr 1918e1051a39Sopenharmony_ci .long 0 1919e1051a39Sopenharmony_ci .byte 0,12,4,0,0x80,14,3,0 1920e1051a39Sopenharmony_ci .long 0 1921e1051a39Sopenharmony_ci.size ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont 1922e1051a39Sopenharmony_ci 1923e1051a39Sopenharmony_ci################################################################################ 1924e1051a39Sopenharmony_ci# void ecp_nistz256_ord_sqr_mont(uint64_t res[4], uint64_t a[4], 1925e1051a39Sopenharmony_ci# uint64_t rep); 1926e1051a39Sopenharmony_ci.globl ecp_nistz256_ord_sqr_mont 1927e1051a39Sopenharmony_ci.align 5 1928e1051a39Sopenharmony_ciecp_nistz256_ord_sqr_mont: 1929e1051a39Sopenharmony_ci stdu $sp,-160($sp) 1930e1051a39Sopenharmony_ci std r18,48($sp) 1931e1051a39Sopenharmony_ci std r19,56($sp) 1932e1051a39Sopenharmony_ci std r20,64($sp) 1933e1051a39Sopenharmony_ci std r21,72($sp) 1934e1051a39Sopenharmony_ci std r22,80($sp) 1935e1051a39Sopenharmony_ci std r23,88($sp) 1936e1051a39Sopenharmony_ci std r24,96($sp) 1937e1051a39Sopenharmony_ci std r25,104($sp) 1938e1051a39Sopenharmony_ci std r26,112($sp) 1939e1051a39Sopenharmony_ci std r27,120($sp) 1940e1051a39Sopenharmony_ci std r28,128($sp) 1941e1051a39Sopenharmony_ci std r29,136($sp) 1942e1051a39Sopenharmony_ci std r30,144($sp) 1943e1051a39Sopenharmony_ci std r31,152($sp) 1944e1051a39Sopenharmony_ci 1945e1051a39Sopenharmony_ci mtctr $bp 1946e1051a39Sopenharmony_ci 1947e1051a39Sopenharmony_ci ld $a0,0($ap) 1948e1051a39Sopenharmony_ci ld $a1,8($ap) 1949e1051a39Sopenharmony_ci ld $a2,16($ap) 1950e1051a39Sopenharmony_ci ld $a3,24($ap) 1951e1051a39Sopenharmony_ci 1952e1051a39Sopenharmony_ci lis $ordk,0xccd1 1953e1051a39Sopenharmony_ci lis $ord0,0xf3b9 1954e1051a39Sopenharmony_ci lis $ord1,0xbce6 1955e1051a39Sopenharmony_ci ori $ordk,$ordk,0xc8aa 1956e1051a39Sopenharmony_ci ori $ord0,$ord0,0xcac2 1957e1051a39Sopenharmony_ci ori $ord1,$ord1,0xfaad 1958e1051a39Sopenharmony_ci sldi $ordk,$ordk,32 1959e1051a39Sopenharmony_ci sldi $ord0,$ord0,32 1960e1051a39Sopenharmony_ci sldi $ord1,$ord1,32 1961e1051a39Sopenharmony_ci oris $ordk,$ordk,0xee00 1962e1051a39Sopenharmony_ci oris $ord0,$ord0,0xfc63 1963e1051a39Sopenharmony_ci oris $ord1,$ord1,0xa717 1964e1051a39Sopenharmony_ci ori $ordk,$ordk,0xbc4f # 0xccd1c8aaee00bc4f 1965e1051a39Sopenharmony_ci ori $ord0,$ord0,0x2551 # 0xf3b9cac2fc632551 1966e1051a39Sopenharmony_ci ori $ord1,$ord1,0x9e84 # 0xbce6faada7179e84 1967e1051a39Sopenharmony_ci li $ord2,-1 # 0xffffffffffffffff 1968e1051a39Sopenharmony_ci sldi $ord3,$ord2,32 # 0xffffffff00000000 1969e1051a39Sopenharmony_ci li $zr,0 1970e1051a39Sopenharmony_ci b .Loop_ord_sqr 1971e1051a39Sopenharmony_ci 1972e1051a39Sopenharmony_ci.align 5 1973e1051a39Sopenharmony_ci.Loop_ord_sqr: 1974e1051a39Sopenharmony_ci ################################################################ 1975e1051a39Sopenharmony_ci # | | | | | |a1*a0| | 1976e1051a39Sopenharmony_ci # | | | | |a2*a0| | | 1977e1051a39Sopenharmony_ci # | |a3*a2|a3*a0| | | | 1978e1051a39Sopenharmony_ci # | | | |a2*a1| | | | 1979e1051a39Sopenharmony_ci # | | |a3*a1| | | | | 1980e1051a39Sopenharmony_ci # *| | | | | | | | 2| 1981e1051a39Sopenharmony_ci # +|a3*a3|a2*a2|a1*a1|a0*a0| 1982e1051a39Sopenharmony_ci # |--+--+--+--+--+--+--+--| 1983e1051a39Sopenharmony_ci # |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is $accx, i.e. follow $accx 1984e1051a39Sopenharmony_ci # 1985e1051a39Sopenharmony_ci # "can't overflow" below mark carrying into high part of 1986e1051a39Sopenharmony_ci # multiplication result, which can't overflow, because it 1987e1051a39Sopenharmony_ci # can never be all ones. 1988e1051a39Sopenharmony_ci 1989e1051a39Sopenharmony_ci mulld $acc1,$a1,$a0 # a[1]*a[0] 1990e1051a39Sopenharmony_ci mulhdu $t1,$a1,$a0 1991e1051a39Sopenharmony_ci mulld $acc2,$a2,$a0 # a[2]*a[0] 1992e1051a39Sopenharmony_ci mulhdu $t2,$a2,$a0 1993e1051a39Sopenharmony_ci mulld $acc3,$a3,$a0 # a[3]*a[0] 1994e1051a39Sopenharmony_ci mulhdu $acc4,$a3,$a0 1995e1051a39Sopenharmony_ci 1996e1051a39Sopenharmony_ci addc $acc2,$acc2,$t1 # accumulate high parts of multiplication 1997e1051a39Sopenharmony_ci mulld $t0,$a2,$a1 # a[2]*a[1] 1998e1051a39Sopenharmony_ci mulhdu $t1,$a2,$a1 1999e1051a39Sopenharmony_ci adde $acc3,$acc3,$t2 2000e1051a39Sopenharmony_ci mulld $t2,$a3,$a1 # a[3]*a[1] 2001e1051a39Sopenharmony_ci mulhdu $t3,$a3,$a1 2002e1051a39Sopenharmony_ci addze $acc4,$acc4 # can't overflow 2003e1051a39Sopenharmony_ci 2004e1051a39Sopenharmony_ci mulld $acc5,$a3,$a2 # a[3]*a[2] 2005e1051a39Sopenharmony_ci mulhdu $acc6,$a3,$a2 2006e1051a39Sopenharmony_ci 2007e1051a39Sopenharmony_ci addc $t1,$t1,$t2 # accumulate high parts of multiplication 2008e1051a39Sopenharmony_ci mulld $acc0,$a0,$a0 # a[0]*a[0] 2009e1051a39Sopenharmony_ci addze $t2,$t3 # can't overflow 2010e1051a39Sopenharmony_ci 2011e1051a39Sopenharmony_ci addc $acc3,$acc3,$t0 # accumulate low parts of multiplication 2012e1051a39Sopenharmony_ci mulhdu $a0,$a0,$a0 2013e1051a39Sopenharmony_ci adde $acc4,$acc4,$t1 2014e1051a39Sopenharmony_ci mulld $t1,$a1,$a1 # a[1]*a[1] 2015e1051a39Sopenharmony_ci adde $acc5,$acc5,$t2 2016e1051a39Sopenharmony_ci mulhdu $a1,$a1,$a1 2017e1051a39Sopenharmony_ci addze $acc6,$acc6 # can't overflow 2018e1051a39Sopenharmony_ci 2019e1051a39Sopenharmony_ci addc $acc1,$acc1,$acc1 # acc[1-6]*=2 2020e1051a39Sopenharmony_ci mulld $t2,$a2,$a2 # a[2]*a[2] 2021e1051a39Sopenharmony_ci adde $acc2,$acc2,$acc2 2022e1051a39Sopenharmony_ci mulhdu $a2,$a2,$a2 2023e1051a39Sopenharmony_ci adde $acc3,$acc3,$acc3 2024e1051a39Sopenharmony_ci mulld $t3,$a3,$a3 # a[3]*a[3] 2025e1051a39Sopenharmony_ci adde $acc4,$acc4,$acc4 2026e1051a39Sopenharmony_ci mulhdu $a3,$a3,$a3 2027e1051a39Sopenharmony_ci adde $acc5,$acc5,$acc5 2028e1051a39Sopenharmony_ci adde $acc6,$acc6,$acc6 2029e1051a39Sopenharmony_ci addze $acc7,$zr 2030e1051a39Sopenharmony_ci 2031e1051a39Sopenharmony_ci addc $acc1,$acc1,$a0 # +a[i]*a[i] 2032e1051a39Sopenharmony_ci mulld $t4,$acc0,$ordk 2033e1051a39Sopenharmony_ci adde $acc2,$acc2,$t1 2034e1051a39Sopenharmony_ci adde $acc3,$acc3,$a1 2035e1051a39Sopenharmony_ci adde $acc4,$acc4,$t2 2036e1051a39Sopenharmony_ci adde $acc5,$acc5,$a2 2037e1051a39Sopenharmony_ci adde $acc6,$acc6,$t3 2038e1051a39Sopenharmony_ci adde $acc7,$acc7,$a3 2039e1051a39Sopenharmony_ci___ 2040e1051a39Sopenharmony_cifor($i=0; $i<4; $i++) { # reductions 2041e1051a39Sopenharmony_ci$code.=<<___; 2042e1051a39Sopenharmony_ci addic $t0,$acc0,-1 # discarded 2043e1051a39Sopenharmony_ci mulhdu $t1,$ord0,$t4 2044e1051a39Sopenharmony_ci mulld $t2,$ord1,$t4 2045e1051a39Sopenharmony_ci mulhdu $t3,$ord1,$t4 2046e1051a39Sopenharmony_ci 2047e1051a39Sopenharmony_ci adde $t2,$t2,$t1 2048e1051a39Sopenharmony_ci addze $t3,$t3 2049e1051a39Sopenharmony_ci 2050e1051a39Sopenharmony_ci addc $acc0,$acc1,$t2 2051e1051a39Sopenharmony_ci adde $acc1,$acc2,$t3 2052e1051a39Sopenharmony_ci adde $acc2,$acc3,$t4 2053e1051a39Sopenharmony_ci adde $acc3,$zr,$t4 # can't overflow 2054e1051a39Sopenharmony_ci___ 2055e1051a39Sopenharmony_ci$code.=<<___ if ($i<3); 2056e1051a39Sopenharmony_ci mulld $t3,$acc0,$ordk 2057e1051a39Sopenharmony_ci___ 2058e1051a39Sopenharmony_ci$code.=<<___; 2059e1051a39Sopenharmony_ci sldi $t0,$t4,32 2060e1051a39Sopenharmony_ci subfc $acc1,$t4,$acc1 2061e1051a39Sopenharmony_ci srdi $t1,$t4,32 2062e1051a39Sopenharmony_ci subfe $acc2,$t0,$acc2 2063e1051a39Sopenharmony_ci subfe $acc3,$t1,$acc3 # can't borrow 2064e1051a39Sopenharmony_ci___ 2065e1051a39Sopenharmony_ci ($t3,$t4) = ($t4,$t3); 2066e1051a39Sopenharmony_ci} 2067e1051a39Sopenharmony_ci$code.=<<___; 2068e1051a39Sopenharmony_ci addc $acc0,$acc0,$acc4 # accumulate upper half 2069e1051a39Sopenharmony_ci adde $acc1,$acc1,$acc5 2070e1051a39Sopenharmony_ci adde $acc2,$acc2,$acc6 2071e1051a39Sopenharmony_ci adde $acc3,$acc3,$acc7 2072e1051a39Sopenharmony_ci addze $acc4,$zr 2073e1051a39Sopenharmony_ci 2074e1051a39Sopenharmony_ci subfc $acc0,$ord0,$acc0 # ret -= modulus 2075e1051a39Sopenharmony_ci subfe $acc1,$ord1,$acc1 2076e1051a39Sopenharmony_ci subfe $acc2,$ord2,$acc2 2077e1051a39Sopenharmony_ci subfe $acc3,$ord3,$acc3 2078e1051a39Sopenharmony_ci subfe $acc4,$zr,$acc4 2079e1051a39Sopenharmony_ci 2080e1051a39Sopenharmony_ci and $t0,$ord0,$acc4 2081e1051a39Sopenharmony_ci and $t1,$ord1,$acc4 2082e1051a39Sopenharmony_ci addc $a0,$acc0,$t0 # ret += modulus if borrow 2083e1051a39Sopenharmony_ci and $t3,$ord3,$acc4 2084e1051a39Sopenharmony_ci adde $a1,$acc1,$t1 2085e1051a39Sopenharmony_ci adde $a2,$acc2,$acc4 2086e1051a39Sopenharmony_ci adde $a3,$acc3,$t3 2087e1051a39Sopenharmony_ci 2088e1051a39Sopenharmony_ci bdnz .Loop_ord_sqr 2089e1051a39Sopenharmony_ci 2090e1051a39Sopenharmony_ci std $a0,0($rp) 2091e1051a39Sopenharmony_ci std $a1,8($rp) 2092e1051a39Sopenharmony_ci std $a2,16($rp) 2093e1051a39Sopenharmony_ci std $a3,24($rp) 2094e1051a39Sopenharmony_ci 2095e1051a39Sopenharmony_ci ld r18,48($sp) 2096e1051a39Sopenharmony_ci ld r19,56($sp) 2097e1051a39Sopenharmony_ci ld r20,64($sp) 2098e1051a39Sopenharmony_ci ld r21,72($sp) 2099e1051a39Sopenharmony_ci ld r22,80($sp) 2100e1051a39Sopenharmony_ci ld r23,88($sp) 2101e1051a39Sopenharmony_ci ld r24,96($sp) 2102e1051a39Sopenharmony_ci ld r25,104($sp) 2103e1051a39Sopenharmony_ci ld r26,112($sp) 2104e1051a39Sopenharmony_ci ld r27,120($sp) 2105e1051a39Sopenharmony_ci ld r28,128($sp) 2106e1051a39Sopenharmony_ci ld r29,136($sp) 2107e1051a39Sopenharmony_ci ld r30,144($sp) 2108e1051a39Sopenharmony_ci ld r31,152($sp) 2109e1051a39Sopenharmony_ci addi $sp,$sp,160 2110e1051a39Sopenharmony_ci blr 2111e1051a39Sopenharmony_ci .long 0 2112e1051a39Sopenharmony_ci .byte 0,12,4,0,0x80,14,3,0 2113e1051a39Sopenharmony_ci .long 0 2114e1051a39Sopenharmony_ci.size ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont 2115e1051a39Sopenharmony_ci___ 2116e1051a39Sopenharmony_ci} } 2117e1051a39Sopenharmony_ci 2118e1051a39Sopenharmony_ci######################################################################## 2119e1051a39Sopenharmony_ci# scatter-gather subroutines 2120e1051a39Sopenharmony_ci{ 2121e1051a39Sopenharmony_cimy ($out,$inp,$index,$mask)=map("r$_",(3..7)); 2122e1051a39Sopenharmony_ci$code.=<<___; 2123e1051a39Sopenharmony_ci######################################################################## 2124e1051a39Sopenharmony_ci# void ecp_nistz256_scatter_w5(void *out, const P256_POINT *inp, 2125e1051a39Sopenharmony_ci# int index); 2126e1051a39Sopenharmony_ci.globl ecp_nistz256_scatter_w5 2127e1051a39Sopenharmony_ci.align 4 2128e1051a39Sopenharmony_ciecp_nistz256_scatter_w5: 2129e1051a39Sopenharmony_ci slwi $index,$index,2 2130e1051a39Sopenharmony_ci add $out,$out,$index 2131e1051a39Sopenharmony_ci 2132e1051a39Sopenharmony_ci ld r8, 0($inp) # X 2133e1051a39Sopenharmony_ci ld r9, 8($inp) 2134e1051a39Sopenharmony_ci ld r10,16($inp) 2135e1051a39Sopenharmony_ci ld r11,24($inp) 2136e1051a39Sopenharmony_ci 2137e1051a39Sopenharmony_ci stw r8, 64*0-4($out) 2138e1051a39Sopenharmony_ci srdi r8, r8, 32 2139e1051a39Sopenharmony_ci stw r9, 64*1-4($out) 2140e1051a39Sopenharmony_ci srdi r9, r9, 32 2141e1051a39Sopenharmony_ci stw r10,64*2-4($out) 2142e1051a39Sopenharmony_ci srdi r10,r10,32 2143e1051a39Sopenharmony_ci stw r11,64*3-4($out) 2144e1051a39Sopenharmony_ci srdi r11,r11,32 2145e1051a39Sopenharmony_ci stw r8, 64*4-4($out) 2146e1051a39Sopenharmony_ci stw r9, 64*5-4($out) 2147e1051a39Sopenharmony_ci stw r10,64*6-4($out) 2148e1051a39Sopenharmony_ci stw r11,64*7-4($out) 2149e1051a39Sopenharmony_ci addi $out,$out,64*8 2150e1051a39Sopenharmony_ci 2151e1051a39Sopenharmony_ci ld r8, 32($inp) # Y 2152e1051a39Sopenharmony_ci ld r9, 40($inp) 2153e1051a39Sopenharmony_ci ld r10,48($inp) 2154e1051a39Sopenharmony_ci ld r11,56($inp) 2155e1051a39Sopenharmony_ci 2156e1051a39Sopenharmony_ci stw r8, 64*0-4($out) 2157e1051a39Sopenharmony_ci srdi r8, r8, 32 2158e1051a39Sopenharmony_ci stw r9, 64*1-4($out) 2159e1051a39Sopenharmony_ci srdi r9, r9, 32 2160e1051a39Sopenharmony_ci stw r10,64*2-4($out) 2161e1051a39Sopenharmony_ci srdi r10,r10,32 2162e1051a39Sopenharmony_ci stw r11,64*3-4($out) 2163e1051a39Sopenharmony_ci srdi r11,r11,32 2164e1051a39Sopenharmony_ci stw r8, 64*4-4($out) 2165e1051a39Sopenharmony_ci stw r9, 64*5-4($out) 2166e1051a39Sopenharmony_ci stw r10,64*6-4($out) 2167e1051a39Sopenharmony_ci stw r11,64*7-4($out) 2168e1051a39Sopenharmony_ci addi $out,$out,64*8 2169e1051a39Sopenharmony_ci 2170e1051a39Sopenharmony_ci ld r8, 64($inp) # Z 2171e1051a39Sopenharmony_ci ld r9, 72($inp) 2172e1051a39Sopenharmony_ci ld r10,80($inp) 2173e1051a39Sopenharmony_ci ld r11,88($inp) 2174e1051a39Sopenharmony_ci 2175e1051a39Sopenharmony_ci stw r8, 64*0-4($out) 2176e1051a39Sopenharmony_ci srdi r8, r8, 32 2177e1051a39Sopenharmony_ci stw r9, 64*1-4($out) 2178e1051a39Sopenharmony_ci srdi r9, r9, 32 2179e1051a39Sopenharmony_ci stw r10,64*2-4($out) 2180e1051a39Sopenharmony_ci srdi r10,r10,32 2181e1051a39Sopenharmony_ci stw r11,64*3-4($out) 2182e1051a39Sopenharmony_ci srdi r11,r11,32 2183e1051a39Sopenharmony_ci stw r8, 64*4-4($out) 2184e1051a39Sopenharmony_ci stw r9, 64*5-4($out) 2185e1051a39Sopenharmony_ci stw r10,64*6-4($out) 2186e1051a39Sopenharmony_ci stw r11,64*7-4($out) 2187e1051a39Sopenharmony_ci 2188e1051a39Sopenharmony_ci blr 2189e1051a39Sopenharmony_ci .long 0 2190e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,3,0 2191e1051a39Sopenharmony_ci .long 0 2192e1051a39Sopenharmony_ci.size ecp_nistz256_scatter_w5,.-ecp_nistz256_scatter_w5 2193e1051a39Sopenharmony_ci 2194e1051a39Sopenharmony_ci######################################################################## 2195e1051a39Sopenharmony_ci# void ecp_nistz256_gather_w5(P256_POINT *out, const void *inp, 2196e1051a39Sopenharmony_ci# int index); 2197e1051a39Sopenharmony_ci.globl ecp_nistz256_gather_w5 2198e1051a39Sopenharmony_ci.align 4 2199e1051a39Sopenharmony_ciecp_nistz256_gather_w5: 2200e1051a39Sopenharmony_ci neg r0,$index 2201e1051a39Sopenharmony_ci sradi r0,r0,63 2202e1051a39Sopenharmony_ci 2203e1051a39Sopenharmony_ci add $index,$index,r0 2204e1051a39Sopenharmony_ci slwi $index,$index,2 2205e1051a39Sopenharmony_ci add $inp,$inp,$index 2206e1051a39Sopenharmony_ci 2207e1051a39Sopenharmony_ci lwz r5, 64*0($inp) 2208e1051a39Sopenharmony_ci lwz r6, 64*1($inp) 2209e1051a39Sopenharmony_ci lwz r7, 64*2($inp) 2210e1051a39Sopenharmony_ci lwz r8, 64*3($inp) 2211e1051a39Sopenharmony_ci lwz r9, 64*4($inp) 2212e1051a39Sopenharmony_ci lwz r10,64*5($inp) 2213e1051a39Sopenharmony_ci lwz r11,64*6($inp) 2214e1051a39Sopenharmony_ci lwz r12,64*7($inp) 2215e1051a39Sopenharmony_ci addi $inp,$inp,64*8 2216e1051a39Sopenharmony_ci sldi r9, r9, 32 2217e1051a39Sopenharmony_ci sldi r10,r10,32 2218e1051a39Sopenharmony_ci sldi r11,r11,32 2219e1051a39Sopenharmony_ci sldi r12,r12,32 2220e1051a39Sopenharmony_ci or r5,r5,r9 2221e1051a39Sopenharmony_ci or r6,r6,r10 2222e1051a39Sopenharmony_ci or r7,r7,r11 2223e1051a39Sopenharmony_ci or r8,r8,r12 2224e1051a39Sopenharmony_ci and r5,r5,r0 2225e1051a39Sopenharmony_ci and r6,r6,r0 2226e1051a39Sopenharmony_ci and r7,r7,r0 2227e1051a39Sopenharmony_ci and r8,r8,r0 2228e1051a39Sopenharmony_ci std r5,0($out) # X 2229e1051a39Sopenharmony_ci std r6,8($out) 2230e1051a39Sopenharmony_ci std r7,16($out) 2231e1051a39Sopenharmony_ci std r8,24($out) 2232e1051a39Sopenharmony_ci 2233e1051a39Sopenharmony_ci lwz r5, 64*0($inp) 2234e1051a39Sopenharmony_ci lwz r6, 64*1($inp) 2235e1051a39Sopenharmony_ci lwz r7, 64*2($inp) 2236e1051a39Sopenharmony_ci lwz r8, 64*3($inp) 2237e1051a39Sopenharmony_ci lwz r9, 64*4($inp) 2238e1051a39Sopenharmony_ci lwz r10,64*5($inp) 2239e1051a39Sopenharmony_ci lwz r11,64*6($inp) 2240e1051a39Sopenharmony_ci lwz r12,64*7($inp) 2241e1051a39Sopenharmony_ci addi $inp,$inp,64*8 2242e1051a39Sopenharmony_ci sldi r9, r9, 32 2243e1051a39Sopenharmony_ci sldi r10,r10,32 2244e1051a39Sopenharmony_ci sldi r11,r11,32 2245e1051a39Sopenharmony_ci sldi r12,r12,32 2246e1051a39Sopenharmony_ci or r5,r5,r9 2247e1051a39Sopenharmony_ci or r6,r6,r10 2248e1051a39Sopenharmony_ci or r7,r7,r11 2249e1051a39Sopenharmony_ci or r8,r8,r12 2250e1051a39Sopenharmony_ci and r5,r5,r0 2251e1051a39Sopenharmony_ci and r6,r6,r0 2252e1051a39Sopenharmony_ci and r7,r7,r0 2253e1051a39Sopenharmony_ci and r8,r8,r0 2254e1051a39Sopenharmony_ci std r5,32($out) # Y 2255e1051a39Sopenharmony_ci std r6,40($out) 2256e1051a39Sopenharmony_ci std r7,48($out) 2257e1051a39Sopenharmony_ci std r8,56($out) 2258e1051a39Sopenharmony_ci 2259e1051a39Sopenharmony_ci lwz r5, 64*0($inp) 2260e1051a39Sopenharmony_ci lwz r6, 64*1($inp) 2261e1051a39Sopenharmony_ci lwz r7, 64*2($inp) 2262e1051a39Sopenharmony_ci lwz r8, 64*3($inp) 2263e1051a39Sopenharmony_ci lwz r9, 64*4($inp) 2264e1051a39Sopenharmony_ci lwz r10,64*5($inp) 2265e1051a39Sopenharmony_ci lwz r11,64*6($inp) 2266e1051a39Sopenharmony_ci lwz r12,64*7($inp) 2267e1051a39Sopenharmony_ci sldi r9, r9, 32 2268e1051a39Sopenharmony_ci sldi r10,r10,32 2269e1051a39Sopenharmony_ci sldi r11,r11,32 2270e1051a39Sopenharmony_ci sldi r12,r12,32 2271e1051a39Sopenharmony_ci or r5,r5,r9 2272e1051a39Sopenharmony_ci or r6,r6,r10 2273e1051a39Sopenharmony_ci or r7,r7,r11 2274e1051a39Sopenharmony_ci or r8,r8,r12 2275e1051a39Sopenharmony_ci and r5,r5,r0 2276e1051a39Sopenharmony_ci and r6,r6,r0 2277e1051a39Sopenharmony_ci and r7,r7,r0 2278e1051a39Sopenharmony_ci and r8,r8,r0 2279e1051a39Sopenharmony_ci std r5,64($out) # Z 2280e1051a39Sopenharmony_ci std r6,72($out) 2281e1051a39Sopenharmony_ci std r7,80($out) 2282e1051a39Sopenharmony_ci std r8,88($out) 2283e1051a39Sopenharmony_ci 2284e1051a39Sopenharmony_ci blr 2285e1051a39Sopenharmony_ci .long 0 2286e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,3,0 2287e1051a39Sopenharmony_ci .long 0 2288e1051a39Sopenharmony_ci.size ecp_nistz256_gather_w5,.-ecp_nistz256_gather_w5 2289e1051a39Sopenharmony_ci 2290e1051a39Sopenharmony_ci######################################################################## 2291e1051a39Sopenharmony_ci# void ecp_nistz256_scatter_w7(void *out, const P256_POINT_AFFINE *inp, 2292e1051a39Sopenharmony_ci# int index); 2293e1051a39Sopenharmony_ci.globl ecp_nistz256_scatter_w7 2294e1051a39Sopenharmony_ci.align 4 2295e1051a39Sopenharmony_ciecp_nistz256_scatter_w7: 2296e1051a39Sopenharmony_ci li r0,8 2297e1051a39Sopenharmony_ci mtctr r0 2298e1051a39Sopenharmony_ci add $out,$out,$index 2299e1051a39Sopenharmony_ci subi $inp,$inp,8 2300e1051a39Sopenharmony_ci 2301e1051a39Sopenharmony_ci.Loop_scatter_w7: 2302e1051a39Sopenharmony_ci ldu r0,8($inp) 2303e1051a39Sopenharmony_ci stb r0,64*0($out) 2304e1051a39Sopenharmony_ci srdi r0,r0,8 2305e1051a39Sopenharmony_ci stb r0,64*1($out) 2306e1051a39Sopenharmony_ci srdi r0,r0,8 2307e1051a39Sopenharmony_ci stb r0,64*2($out) 2308e1051a39Sopenharmony_ci srdi r0,r0,8 2309e1051a39Sopenharmony_ci stb r0,64*3($out) 2310e1051a39Sopenharmony_ci srdi r0,r0,8 2311e1051a39Sopenharmony_ci stb r0,64*4($out) 2312e1051a39Sopenharmony_ci srdi r0,r0,8 2313e1051a39Sopenharmony_ci stb r0,64*5($out) 2314e1051a39Sopenharmony_ci srdi r0,r0,8 2315e1051a39Sopenharmony_ci stb r0,64*6($out) 2316e1051a39Sopenharmony_ci srdi r0,r0,8 2317e1051a39Sopenharmony_ci stb r0,64*7($out) 2318e1051a39Sopenharmony_ci addi $out,$out,64*8 2319e1051a39Sopenharmony_ci bdnz .Loop_scatter_w7 2320e1051a39Sopenharmony_ci 2321e1051a39Sopenharmony_ci blr 2322e1051a39Sopenharmony_ci .long 0 2323e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,3,0 2324e1051a39Sopenharmony_ci .long 0 2325e1051a39Sopenharmony_ci.size ecp_nistz256_scatter_w7,.-ecp_nistz256_scatter_w7 2326e1051a39Sopenharmony_ci 2327e1051a39Sopenharmony_ci######################################################################## 2328e1051a39Sopenharmony_ci# void ecp_nistz256_gather_w7(P256_POINT_AFFINE *out, const void *inp, 2329e1051a39Sopenharmony_ci# int index); 2330e1051a39Sopenharmony_ci.globl ecp_nistz256_gather_w7 2331e1051a39Sopenharmony_ci.align 4 2332e1051a39Sopenharmony_ciecp_nistz256_gather_w7: 2333e1051a39Sopenharmony_ci li r0,8 2334e1051a39Sopenharmony_ci mtctr r0 2335e1051a39Sopenharmony_ci neg r0,$index 2336e1051a39Sopenharmony_ci sradi r0,r0,63 2337e1051a39Sopenharmony_ci 2338e1051a39Sopenharmony_ci add $index,$index,r0 2339e1051a39Sopenharmony_ci add $inp,$inp,$index 2340e1051a39Sopenharmony_ci subi $out,$out,8 2341e1051a39Sopenharmony_ci 2342e1051a39Sopenharmony_ci.Loop_gather_w7: 2343e1051a39Sopenharmony_ci lbz r5, 64*0($inp) 2344e1051a39Sopenharmony_ci lbz r6, 64*1($inp) 2345e1051a39Sopenharmony_ci lbz r7, 64*2($inp) 2346e1051a39Sopenharmony_ci lbz r8, 64*3($inp) 2347e1051a39Sopenharmony_ci lbz r9, 64*4($inp) 2348e1051a39Sopenharmony_ci lbz r10,64*5($inp) 2349e1051a39Sopenharmony_ci lbz r11,64*6($inp) 2350e1051a39Sopenharmony_ci lbz r12,64*7($inp) 2351e1051a39Sopenharmony_ci addi $inp,$inp,64*8 2352e1051a39Sopenharmony_ci 2353e1051a39Sopenharmony_ci sldi r6, r6, 8 2354e1051a39Sopenharmony_ci sldi r7, r7, 16 2355e1051a39Sopenharmony_ci sldi r8, r8, 24 2356e1051a39Sopenharmony_ci sldi r9, r9, 32 2357e1051a39Sopenharmony_ci sldi r10,r10,40 2358e1051a39Sopenharmony_ci sldi r11,r11,48 2359e1051a39Sopenharmony_ci sldi r12,r12,56 2360e1051a39Sopenharmony_ci 2361e1051a39Sopenharmony_ci or r5,r5,r6 2362e1051a39Sopenharmony_ci or r7,r7,r8 2363e1051a39Sopenharmony_ci or r9,r9,r10 2364e1051a39Sopenharmony_ci or r11,r11,r12 2365e1051a39Sopenharmony_ci or r5,r5,r7 2366e1051a39Sopenharmony_ci or r9,r9,r11 2367e1051a39Sopenharmony_ci or r5,r5,r9 2368e1051a39Sopenharmony_ci and r5,r5,r0 2369e1051a39Sopenharmony_ci stdu r5,8($out) 2370e1051a39Sopenharmony_ci bdnz .Loop_gather_w7 2371e1051a39Sopenharmony_ci 2372e1051a39Sopenharmony_ci blr 2373e1051a39Sopenharmony_ci .long 0 2374e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,3,0 2375e1051a39Sopenharmony_ci .long 0 2376e1051a39Sopenharmony_ci.size ecp_nistz256_gather_w7,.-ecp_nistz256_gather_w7 2377e1051a39Sopenharmony_ci___ 2378e1051a39Sopenharmony_ci} 2379e1051a39Sopenharmony_ci 2380e1051a39Sopenharmony_ciforeach (split("\n",$code)) { 2381e1051a39Sopenharmony_ci s/\`([^\`]*)\`/eval $1/ge; 2382e1051a39Sopenharmony_ci 2383e1051a39Sopenharmony_ci print $_,"\n"; 2384e1051a39Sopenharmony_ci} 2385e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; # enforce flush 2386