1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2021-2023 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci# 9e1051a39Sopenharmony_ci# ==================================================================== 10e1051a39Sopenharmony_ci# Written by Amitay Isaacs <amitay@ozlabs.org> and Martin Schwenke 11e1051a39Sopenharmony_ci# <martin@meltin.net> for the OpenSSL project. 12e1051a39Sopenharmony_ci# ==================================================================== 13e1051a39Sopenharmony_ci# 14e1051a39Sopenharmony_ci# p521 lower-level primitives for PPC64 using vector instructions. 15e1051a39Sopenharmony_ci# 16e1051a39Sopenharmony_ci 17e1051a39Sopenharmony_ciuse strict; 18e1051a39Sopenharmony_ciuse warnings; 19e1051a39Sopenharmony_ci 20e1051a39Sopenharmony_cimy $flavour = shift; 21e1051a39Sopenharmony_cimy $output = ""; 22e1051a39Sopenharmony_ciwhile (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} 23e1051a39Sopenharmony_ciif (!$output) { 24e1051a39Sopenharmony_ci $output = "-"; 25e1051a39Sopenharmony_ci} 26e1051a39Sopenharmony_ci 27e1051a39Sopenharmony_cimy ($xlate, $dir); 28e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 29e1051a39Sopenharmony_ci( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 30e1051a39Sopenharmony_ci( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 31e1051a39Sopenharmony_cidie "can't locate ppc-xlate.pl"; 32e1051a39Sopenharmony_ci 33e1051a39Sopenharmony_ciopen OUT,"| \"$^X\" $xlate $flavour $output"; 34e1051a39Sopenharmony_ci*STDOUT=*OUT; 35e1051a39Sopenharmony_ci 36e1051a39Sopenharmony_cimy $code = ""; 37e1051a39Sopenharmony_ci 38e1051a39Sopenharmony_cimy ($sp, $outp, $savelr, $savesp) = ("r1", "r3", "r10", "r12"); 39e1051a39Sopenharmony_ci 40e1051a39Sopenharmony_cimy $vzero = "v32"; 41e1051a39Sopenharmony_ci 42e1051a39Sopenharmony_cisub startproc($) 43e1051a39Sopenharmony_ci{ 44e1051a39Sopenharmony_ci my ($name) = @_; 45e1051a39Sopenharmony_ci 46e1051a39Sopenharmony_ci $code.=<<___; 47e1051a39Sopenharmony_ci .globl ${name} 48e1051a39Sopenharmony_ci .align 5 49e1051a39Sopenharmony_ci${name}: 50e1051a39Sopenharmony_ci 51e1051a39Sopenharmony_ci___ 52e1051a39Sopenharmony_ci} 53e1051a39Sopenharmony_ci 54e1051a39Sopenharmony_cisub endproc($) 55e1051a39Sopenharmony_ci{ 56e1051a39Sopenharmony_ci my ($name) = @_; 57e1051a39Sopenharmony_ci 58e1051a39Sopenharmony_ci $code.=<<___; 59e1051a39Sopenharmony_ci blr 60e1051a39Sopenharmony_ci .size ${name},.-${name} 61e1051a39Sopenharmony_ci 62e1051a39Sopenharmony_ci___ 63e1051a39Sopenharmony_ci} 64e1051a39Sopenharmony_ci 65e1051a39Sopenharmony_ci 66e1051a39Sopenharmony_cisub push_vrs($$) 67e1051a39Sopenharmony_ci{ 68e1051a39Sopenharmony_ci my ($min, $max) = @_; 69e1051a39Sopenharmony_ci 70e1051a39Sopenharmony_ci my $count = $max - $min + 1; 71e1051a39Sopenharmony_ci 72e1051a39Sopenharmony_ci $code.=<<___; 73e1051a39Sopenharmony_ci mr $savesp,$sp 74e1051a39Sopenharmony_ci stdu $sp,-16*`$count+1`($sp) 75e1051a39Sopenharmony_ci 76e1051a39Sopenharmony_ci___ 77e1051a39Sopenharmony_ci for (my $i = $min; $i <= $max; $i++) { 78e1051a39Sopenharmony_ci my $mult = $max - $i + 1; 79e1051a39Sopenharmony_ci $code.=<<___; 80e1051a39Sopenharmony_ci stxv $i,-16*$mult($savesp) 81e1051a39Sopenharmony_ci___ 82e1051a39Sopenharmony_ci 83e1051a39Sopenharmony_ci } 84e1051a39Sopenharmony_ci 85e1051a39Sopenharmony_ci $code.=<<___; 86e1051a39Sopenharmony_ci 87e1051a39Sopenharmony_ci___ 88e1051a39Sopenharmony_ci} 89e1051a39Sopenharmony_ci 90e1051a39Sopenharmony_cisub pop_vrs($$) 91e1051a39Sopenharmony_ci{ 92e1051a39Sopenharmony_ci my ($min, $max) = @_; 93e1051a39Sopenharmony_ci 94e1051a39Sopenharmony_ci $code.=<<___; 95e1051a39Sopenharmony_ci ld $savesp,0($sp) 96e1051a39Sopenharmony_ci___ 97e1051a39Sopenharmony_ci for (my $i = $min; $i <= $max; $i++) { 98e1051a39Sopenharmony_ci my $mult = $max - $i + 1; 99e1051a39Sopenharmony_ci $code.=<<___; 100e1051a39Sopenharmony_ci lxv $i,-16*$mult($savesp) 101e1051a39Sopenharmony_ci___ 102e1051a39Sopenharmony_ci } 103e1051a39Sopenharmony_ci 104e1051a39Sopenharmony_ci $code.=<<___; 105e1051a39Sopenharmony_ci mr $sp,$savesp 106e1051a39Sopenharmony_ci 107e1051a39Sopenharmony_ci___ 108e1051a39Sopenharmony_ci} 109e1051a39Sopenharmony_ci 110e1051a39Sopenharmony_cisub load_vrs($$) 111e1051a39Sopenharmony_ci{ 112e1051a39Sopenharmony_ci my ($pointer, $reg_list) = @_; 113e1051a39Sopenharmony_ci 114e1051a39Sopenharmony_ci for (my $i = 0; $i <= 8; $i++) { 115e1051a39Sopenharmony_ci my $offset = $i * 8; 116e1051a39Sopenharmony_ci $code.=<<___; 117e1051a39Sopenharmony_ci lxsd $reg_list->[$i],$offset($pointer) 118e1051a39Sopenharmony_ci___ 119e1051a39Sopenharmony_ci } 120e1051a39Sopenharmony_ci 121e1051a39Sopenharmony_ci $code.=<<___; 122e1051a39Sopenharmony_ci 123e1051a39Sopenharmony_ci___ 124e1051a39Sopenharmony_ci} 125e1051a39Sopenharmony_ci 126e1051a39Sopenharmony_cisub store_vrs($$) 127e1051a39Sopenharmony_ci{ 128e1051a39Sopenharmony_ci my ($pointer, $reg_list) = @_; 129e1051a39Sopenharmony_ci 130e1051a39Sopenharmony_ci for (my $i = 0; $i <= 8; $i++) { 131e1051a39Sopenharmony_ci my $offset = $i * 16; 132e1051a39Sopenharmony_ci $code.=<<___; 133e1051a39Sopenharmony_ci stxv $reg_list->[$i],$offset($pointer) 134e1051a39Sopenharmony_ci___ 135e1051a39Sopenharmony_ci } 136e1051a39Sopenharmony_ci 137e1051a39Sopenharmony_ci $code.=<<___; 138e1051a39Sopenharmony_ci 139e1051a39Sopenharmony_ci___ 140e1051a39Sopenharmony_ci} 141e1051a39Sopenharmony_ci 142e1051a39Sopenharmony_ci$code.=<<___; 143e1051a39Sopenharmony_ci.machine "any" 144e1051a39Sopenharmony_ci.text 145e1051a39Sopenharmony_ci 146e1051a39Sopenharmony_ci___ 147e1051a39Sopenharmony_ci 148e1051a39Sopenharmony_ci{ 149e1051a39Sopenharmony_ci # mul/square common 150e1051a39Sopenharmony_ci my ($t1, $t2, $t3, $t4) = ("v33", "v34", "v44", "v54"); 151e1051a39Sopenharmony_ci my ($zero, $one) = ("r8", "r9"); 152e1051a39Sopenharmony_ci my @out = map("v$_",(55..63)); 153e1051a39Sopenharmony_ci 154e1051a39Sopenharmony_ci { 155e1051a39Sopenharmony_ci # 156e1051a39Sopenharmony_ci # p521_felem_mul 157e1051a39Sopenharmony_ci # 158e1051a39Sopenharmony_ci 159e1051a39Sopenharmony_ci my ($in1p, $in2p) = ("r4", "r5"); 160e1051a39Sopenharmony_ci my @in1 = map("v$_",(45..53)); 161e1051a39Sopenharmony_ci my @in2 = map("v$_",(35..43)); 162e1051a39Sopenharmony_ci 163e1051a39Sopenharmony_ci startproc("p521_felem_mul"); 164e1051a39Sopenharmony_ci 165e1051a39Sopenharmony_ci push_vrs(52, 63); 166e1051a39Sopenharmony_ci 167e1051a39Sopenharmony_ci $code.=<<___; 168e1051a39Sopenharmony_ci vspltisw $vzero,0 169e1051a39Sopenharmony_ci 170e1051a39Sopenharmony_ci___ 171e1051a39Sopenharmony_ci 172e1051a39Sopenharmony_ci load_vrs($in1p, \@in1); 173e1051a39Sopenharmony_ci load_vrs($in2p, \@in2); 174e1051a39Sopenharmony_ci 175e1051a39Sopenharmony_ci $code.=<<___; 176e1051a39Sopenharmony_ci vmsumudm $out[0],$in1[0],$in2[0],$vzero 177e1051a39Sopenharmony_ci 178e1051a39Sopenharmony_ci xxpermdi $t1,$in1[0],$in1[1],0b00 179e1051a39Sopenharmony_ci xxpermdi $t2,$in2[1],$in2[0],0b00 180e1051a39Sopenharmony_ci vmsumudm $out[1],$t1,$t2,$vzero 181e1051a39Sopenharmony_ci 182e1051a39Sopenharmony_ci xxpermdi $t2,$in2[2],$in2[1],0b00 183e1051a39Sopenharmony_ci vmsumudm $out[2],$t1,$t2,$vzero 184e1051a39Sopenharmony_ci vmsumudm $out[2],$in1[2],$in2[0],$out[2] 185e1051a39Sopenharmony_ci 186e1051a39Sopenharmony_ci xxpermdi $t2,$in2[3],$in2[2],0b00 187e1051a39Sopenharmony_ci vmsumudm $out[3],$t1,$t2,$vzero 188e1051a39Sopenharmony_ci xxpermdi $t3,$in1[2],$in1[3],0b00 189e1051a39Sopenharmony_ci xxpermdi $t4,$in2[1],$in2[0],0b00 190e1051a39Sopenharmony_ci vmsumudm $out[3],$t3,$t4,$out[3] 191e1051a39Sopenharmony_ci 192e1051a39Sopenharmony_ci xxpermdi $t2,$in2[4],$in2[3],0b00 193e1051a39Sopenharmony_ci vmsumudm $out[4],$t1,$t2,$vzero 194e1051a39Sopenharmony_ci xxpermdi $t4,$in2[2],$in2[1],0b00 195e1051a39Sopenharmony_ci vmsumudm $out[4],$t3,$t4,$out[4] 196e1051a39Sopenharmony_ci vmsumudm $out[4],$in1[4],$in2[0],$out[4] 197e1051a39Sopenharmony_ci 198e1051a39Sopenharmony_ci xxpermdi $t2,$in2[5],$in2[4],0b00 199e1051a39Sopenharmony_ci vmsumudm $out[5],$t1,$t2,$vzero 200e1051a39Sopenharmony_ci xxpermdi $t4,$in2[3],$in2[2],0b00 201e1051a39Sopenharmony_ci vmsumudm $out[5],$t3,$t4,$out[5] 202e1051a39Sopenharmony_ci 203e1051a39Sopenharmony_ci xxpermdi $t2,$in2[6],$in2[5],0b00 204e1051a39Sopenharmony_ci vmsumudm $out[6],$t1,$t2,$vzero 205e1051a39Sopenharmony_ci xxpermdi $t4,$in2[4],$in2[3],0b00 206e1051a39Sopenharmony_ci vmsumudm $out[6],$t3,$t4,$out[6] 207e1051a39Sopenharmony_ci 208e1051a39Sopenharmony_ci xxpermdi $t2,$in2[7],$in2[6],0b00 209e1051a39Sopenharmony_ci vmsumudm $out[7],$t1,$t2,$vzero 210e1051a39Sopenharmony_ci xxpermdi $t4,$in2[5],$in2[4],0b00 211e1051a39Sopenharmony_ci vmsumudm $out[7],$t3,$t4,$out[7] 212e1051a39Sopenharmony_ci 213e1051a39Sopenharmony_ci xxpermdi $t2,$in2[8],$in2[7],0b00 214e1051a39Sopenharmony_ci vmsumudm $out[8],$t1,$t2,$vzero 215e1051a39Sopenharmony_ci xxpermdi $t4,$in2[6],$in2[5],0b00 216e1051a39Sopenharmony_ci vmsumudm $out[8],$t3,$t4,$out[8] 217e1051a39Sopenharmony_ci 218e1051a39Sopenharmony_ci xxpermdi $t1,$in1[4],$in1[5],0b00 219e1051a39Sopenharmony_ci xxpermdi $t2,$in2[1],$in2[0],0b00 220e1051a39Sopenharmony_ci vmsumudm $out[5],$t1,$t2,$out[5] 221e1051a39Sopenharmony_ci 222e1051a39Sopenharmony_ci xxpermdi $t2,$in2[2],$in2[1],0b00 223e1051a39Sopenharmony_ci vmsumudm $out[6],$t1,$t2,$out[6] 224e1051a39Sopenharmony_ci vmsumudm $out[6],$in1[6],$in2[0],$out[6] 225e1051a39Sopenharmony_ci 226e1051a39Sopenharmony_ci xxpermdi $t2,$in2[3],$in2[2],0b00 227e1051a39Sopenharmony_ci vmsumudm $out[7],$t1,$t2,$out[7] 228e1051a39Sopenharmony_ci xxpermdi $t3,$in1[6],$in1[7],0b00 229e1051a39Sopenharmony_ci xxpermdi $t4,$in2[1],$in2[0],0b00 230e1051a39Sopenharmony_ci vmsumudm $out[7],$t3,$t4,$out[7] 231e1051a39Sopenharmony_ci 232e1051a39Sopenharmony_ci xxpermdi $t2,$in2[4],$in2[3],0b00 233e1051a39Sopenharmony_ci vmsumudm $out[8],$t1,$t2,$out[8] 234e1051a39Sopenharmony_ci xxpermdi $t4,$in2[2],$in2[1],0b00 235e1051a39Sopenharmony_ci vmsumudm $out[8],$t3,$t4,$out[8] 236e1051a39Sopenharmony_ci vmsumudm $out[8],$in1[8],$in2[0],$out[8] 237e1051a39Sopenharmony_ci 238e1051a39Sopenharmony_ci li $zero,0 239e1051a39Sopenharmony_ci li $one,1 240e1051a39Sopenharmony_ci mtvsrdd $t1,$one,$zero 241e1051a39Sopenharmony_ci___ 242e1051a39Sopenharmony_ci 243e1051a39Sopenharmony_ci for (my $i = 0; $i <= 8; $i++) { 244e1051a39Sopenharmony_ci $code.=<<___; 245e1051a39Sopenharmony_ci vsld $in2[$i],$in2[$i],$t1 246e1051a39Sopenharmony_ci___ 247e1051a39Sopenharmony_ci } 248e1051a39Sopenharmony_ci 249e1051a39Sopenharmony_ci $code.=<<___; 250e1051a39Sopenharmony_ci 251e1051a39Sopenharmony_ci vmsumudm $out[7],$in1[8],$in2[8],$out[7] 252e1051a39Sopenharmony_ci 253e1051a39Sopenharmony_ci xxpermdi $t2,$in2[8],$in2[7],0b00 254e1051a39Sopenharmony_ci xxpermdi $t1,$in1[7],$in1[8],0b00 255e1051a39Sopenharmony_ci vmsumudm $out[6],$t1,$t2,$out[6] 256e1051a39Sopenharmony_ci 257e1051a39Sopenharmony_ci xxpermdi $t1,$in1[6],$in1[7],0b00 258e1051a39Sopenharmony_ci vmsumudm $out[5],$t1,$t2,$out[5] 259e1051a39Sopenharmony_ci vmsumudm $out[5],$in1[8],$in2[6],$out[5] 260e1051a39Sopenharmony_ci 261e1051a39Sopenharmony_ci xxpermdi $t1,$in1[5],$in1[6],0b00 262e1051a39Sopenharmony_ci vmsumudm $out[4],$t1,$t2,$out[4] 263e1051a39Sopenharmony_ci xxpermdi $t4,$in2[6],$in2[5],0b00 264e1051a39Sopenharmony_ci xxpermdi $t3,$in1[7],$in1[8],0b00 265e1051a39Sopenharmony_ci vmsumudm $out[4],$t3,$t4,$out[4] 266e1051a39Sopenharmony_ci 267e1051a39Sopenharmony_ci xxpermdi $t1,$in1[4],$in1[5],0b00 268e1051a39Sopenharmony_ci vmsumudm $out[3],$t1,$t2,$out[3] 269e1051a39Sopenharmony_ci xxpermdi $t3,$in1[6],$in1[7],0b00 270e1051a39Sopenharmony_ci vmsumudm $out[3],$t3,$t4,$out[3] 271e1051a39Sopenharmony_ci vmsumudm $out[3],$in1[8],$in2[4],$out[3] 272e1051a39Sopenharmony_ci 273e1051a39Sopenharmony_ci xxpermdi $t1,$in1[3],$in1[4],0b00 274e1051a39Sopenharmony_ci vmsumudm $out[2],$t1,$t2,$out[2] 275e1051a39Sopenharmony_ci xxpermdi $t3,$in1[5],$in1[6],0b00 276e1051a39Sopenharmony_ci vmsumudm $out[2],$t3,$t4,$out[2] 277e1051a39Sopenharmony_ci 278e1051a39Sopenharmony_ci xxpermdi $t1,$in1[2],$in1[3],0b00 279e1051a39Sopenharmony_ci vmsumudm $out[1],$t1,$t2,$out[1] 280e1051a39Sopenharmony_ci xxpermdi $t3,$in1[4],$in1[5],0b00 281e1051a39Sopenharmony_ci vmsumudm $out[1],$t3,$t4,$out[1] 282e1051a39Sopenharmony_ci 283e1051a39Sopenharmony_ci xxpermdi $t1,$in1[1],$in1[2],0b00 284e1051a39Sopenharmony_ci vmsumudm $out[0],$t1,$t2,$out[0] 285e1051a39Sopenharmony_ci xxpermdi $t3,$in1[3],$in1[4],0b00 286e1051a39Sopenharmony_ci vmsumudm $out[0],$t3,$t4,$out[0] 287e1051a39Sopenharmony_ci 288e1051a39Sopenharmony_ci xxpermdi $t2,$in2[4],$in2[3],0b00 289e1051a39Sopenharmony_ci xxpermdi $t1,$in1[7],$in1[8],0b00 290e1051a39Sopenharmony_ci vmsumudm $out[2],$t1,$t2,$out[2] 291e1051a39Sopenharmony_ci 292e1051a39Sopenharmony_ci xxpermdi $t1,$in1[6],$in1[7],0b00 293e1051a39Sopenharmony_ci vmsumudm $out[1],$t1,$t2,$out[1] 294e1051a39Sopenharmony_ci vmsumudm $out[1],$in1[8],$in2[2],$out[1] 295e1051a39Sopenharmony_ci 296e1051a39Sopenharmony_ci xxpermdi $t1,$in1[5],$in1[6],0b00 297e1051a39Sopenharmony_ci vmsumudm $out[0],$t1,$t2,$out[0] 298e1051a39Sopenharmony_ci xxpermdi $t4,$in2[2],$in2[1],0b00 299e1051a39Sopenharmony_ci xxpermdi $t3,$in1[7],$in1[8],0b00 300e1051a39Sopenharmony_ci vmsumudm $out[0],$t3,$t4,$out[0] 301e1051a39Sopenharmony_ci 302e1051a39Sopenharmony_ci___ 303e1051a39Sopenharmony_ci 304e1051a39Sopenharmony_ci store_vrs($outp, \@out); 305e1051a39Sopenharmony_ci 306e1051a39Sopenharmony_ci pop_vrs(52, 63); 307e1051a39Sopenharmony_ci 308e1051a39Sopenharmony_ci endproc("p521_felem_mul"); 309e1051a39Sopenharmony_ci } 310e1051a39Sopenharmony_ci 311e1051a39Sopenharmony_ci { 312e1051a39Sopenharmony_ci # 313e1051a39Sopenharmony_ci # p51_felem_square 314e1051a39Sopenharmony_ci # 315e1051a39Sopenharmony_ci 316e1051a39Sopenharmony_ci my ($inp) = ("r4"); 317e1051a39Sopenharmony_ci my @in = map("v$_",(45..53)); 318e1051a39Sopenharmony_ci my @inx2 = map("v$_",(35..43)); 319e1051a39Sopenharmony_ci 320e1051a39Sopenharmony_ci startproc("p521_felem_square"); 321e1051a39Sopenharmony_ci 322e1051a39Sopenharmony_ci push_vrs(52, 63); 323e1051a39Sopenharmony_ci 324e1051a39Sopenharmony_ci $code.=<<___; 325e1051a39Sopenharmony_ci vspltisw $vzero,0 326e1051a39Sopenharmony_ci 327e1051a39Sopenharmony_ci___ 328e1051a39Sopenharmony_ci 329e1051a39Sopenharmony_ci load_vrs($inp, \@in); 330e1051a39Sopenharmony_ci 331e1051a39Sopenharmony_ci $code.=<<___; 332e1051a39Sopenharmony_ci li $zero,0 333e1051a39Sopenharmony_ci li $one,1 334e1051a39Sopenharmony_ci mtvsrdd $t1,$one,$zero 335e1051a39Sopenharmony_ci___ 336e1051a39Sopenharmony_ci 337e1051a39Sopenharmony_ci for (my $i = 0; $i <= 8; $i++) { 338e1051a39Sopenharmony_ci $code.=<<___; 339e1051a39Sopenharmony_ci vsld $inx2[$i],$in[$i],$t1 340e1051a39Sopenharmony_ci___ 341e1051a39Sopenharmony_ci } 342e1051a39Sopenharmony_ci 343e1051a39Sopenharmony_ci $code.=<<___; 344e1051a39Sopenharmony_ci vmsumudm $out[0],$in[0],$in[0],$vzero 345e1051a39Sopenharmony_ci 346e1051a39Sopenharmony_ci vmsumudm $out[1],$in[0],$inx2[1],$vzero 347e1051a39Sopenharmony_ci 348e1051a39Sopenharmony_ci xxpermdi $t1,$in[0],$in[1],0b00 349e1051a39Sopenharmony_ci xxpermdi $t2,$inx2[2],$in[1],0b00 350e1051a39Sopenharmony_ci vmsumudm $out[2],$t1,$t2,$vzero 351e1051a39Sopenharmony_ci 352e1051a39Sopenharmony_ci xxpermdi $t2,$inx2[3],$inx2[2],0b00 353e1051a39Sopenharmony_ci vmsumudm $out[3],$t1,$t2,$vzero 354e1051a39Sopenharmony_ci 355e1051a39Sopenharmony_ci xxpermdi $t2,$inx2[4],$inx2[3],0b00 356e1051a39Sopenharmony_ci vmsumudm $out[4],$t1,$t2,$vzero 357e1051a39Sopenharmony_ci vmsumudm $out[4],$in[2],$in[2],$out[4] 358e1051a39Sopenharmony_ci 359e1051a39Sopenharmony_ci xxpermdi $t2,$inx2[5],$inx2[4],0b00 360e1051a39Sopenharmony_ci vmsumudm $out[5],$t1,$t2,$vzero 361e1051a39Sopenharmony_ci vmsumudm $out[5],$in[2],$inx2[3],$out[5] 362e1051a39Sopenharmony_ci 363e1051a39Sopenharmony_ci xxpermdi $t2,$inx2[6],$inx2[5],0b00 364e1051a39Sopenharmony_ci vmsumudm $out[6],$t1,$t2,$vzero 365e1051a39Sopenharmony_ci xxpermdi $t3,$in[2],$in[3],0b00 366e1051a39Sopenharmony_ci xxpermdi $t4,$inx2[4],$in[3],0b00 367e1051a39Sopenharmony_ci vmsumudm $out[6],$t3,$t4,$out[6] 368e1051a39Sopenharmony_ci 369e1051a39Sopenharmony_ci xxpermdi $t2,$inx2[7],$inx2[6],0b00 370e1051a39Sopenharmony_ci vmsumudm $out[7],$t1,$t2,$vzero 371e1051a39Sopenharmony_ci xxpermdi $t4,$inx2[5],$inx2[4],0b00 372e1051a39Sopenharmony_ci vmsumudm $out[7],$t3,$t4,$out[7] 373e1051a39Sopenharmony_ci 374e1051a39Sopenharmony_ci xxpermdi $t2,$inx2[8],$inx2[7],0b00 375e1051a39Sopenharmony_ci vmsumudm $out[8],$t1,$t2,$vzero 376e1051a39Sopenharmony_ci xxpermdi $t4,$inx2[6],$inx2[5],0b00 377e1051a39Sopenharmony_ci vmsumudm $out[8],$t3,$t4,$out[8] 378e1051a39Sopenharmony_ci vmsumudm $out[8],$in[4],$in[4],$out[8] 379e1051a39Sopenharmony_ci 380e1051a39Sopenharmony_ci vmsumudm $out[1],$in[5],$inx2[5],$out[1] 381e1051a39Sopenharmony_ci 382e1051a39Sopenharmony_ci vmsumudm $out[3],$in[6],$inx2[6],$out[3] 383e1051a39Sopenharmony_ci 384e1051a39Sopenharmony_ci vmsumudm $out[5],$in[7],$inx2[7],$out[5] 385e1051a39Sopenharmony_ci 386e1051a39Sopenharmony_ci vmsumudm $out[7],$in[8],$inx2[8],$out[7] 387e1051a39Sopenharmony_ci 388e1051a39Sopenharmony_ci mtvsrdd $t1,$one,$zero 389e1051a39Sopenharmony_ci___ 390e1051a39Sopenharmony_ci 391e1051a39Sopenharmony_ci for (my $i = 5; $i <= 8; $i++) { 392e1051a39Sopenharmony_ci $code.=<<___; 393e1051a39Sopenharmony_ci vsld $inx2[$i],$inx2[$i],$t1 394e1051a39Sopenharmony_ci___ 395e1051a39Sopenharmony_ci } 396e1051a39Sopenharmony_ci 397e1051a39Sopenharmony_ci $code.=<<___; 398e1051a39Sopenharmony_ci 399e1051a39Sopenharmony_ci vmsumudm $out[6],$in[7],$inx2[8],$out[6] 400e1051a39Sopenharmony_ci 401e1051a39Sopenharmony_ci vmsumudm $out[5],$in[6],$inx2[8],$out[5] 402e1051a39Sopenharmony_ci 403e1051a39Sopenharmony_ci xxpermdi $t2,$inx2[8],$inx2[7],0b00 404e1051a39Sopenharmony_ci xxpermdi $t1,$in[5],$in[6],0b00 405e1051a39Sopenharmony_ci vmsumudm $out[4],$t1,$t2,$out[4] 406e1051a39Sopenharmony_ci 407e1051a39Sopenharmony_ci xxpermdi $t1,$in[4],$in[5],0b00 408e1051a39Sopenharmony_ci vmsumudm $out[3],$t1,$t2,$out[3] 409e1051a39Sopenharmony_ci 410e1051a39Sopenharmony_ci xxpermdi $t1,$in[3],$in[4],0b00 411e1051a39Sopenharmony_ci vmsumudm $out[2],$t1,$t2,$out[2] 412e1051a39Sopenharmony_ci vmsumudm $out[2],$in[5],$inx2[6],$out[2] 413e1051a39Sopenharmony_ci 414e1051a39Sopenharmony_ci xxpermdi $t1,$in[2],$in[3],0b00 415e1051a39Sopenharmony_ci vmsumudm $out[1],$t1,$t2,$out[1] 416e1051a39Sopenharmony_ci vmsumudm $out[1],$in[4],$inx2[6],$out[1] 417e1051a39Sopenharmony_ci 418e1051a39Sopenharmony_ci xxpermdi $t1,$in[1],$in[2],0b00 419e1051a39Sopenharmony_ci vmsumudm $out[0],$t1,$t2,$out[0] 420e1051a39Sopenharmony_ci xxpermdi $t2,$inx2[6],$inx2[5],0b00 421e1051a39Sopenharmony_ci xxpermdi $t1,$in[3],$in[4],0b00 422e1051a39Sopenharmony_ci vmsumudm $out[0],$t1,$t2,$out[0] 423e1051a39Sopenharmony_ci 424e1051a39Sopenharmony_ci___ 425e1051a39Sopenharmony_ci 426e1051a39Sopenharmony_ci store_vrs($outp, \@out); 427e1051a39Sopenharmony_ci 428e1051a39Sopenharmony_ci pop_vrs(52, 63); 429e1051a39Sopenharmony_ci 430e1051a39Sopenharmony_ci endproc("p521_felem_square"); 431e1051a39Sopenharmony_ci } 432e1051a39Sopenharmony_ci} 433e1051a39Sopenharmony_ci 434e1051a39Sopenharmony_ci$code =~ s/\`([^\`]*)\`/eval $1/gem; 435e1051a39Sopenharmony_ciprint $code; 436e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 437