1e1051a39Sopenharmony_ci#!/usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2018-2020 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci# 9e1051a39Sopenharmony_ci# ==================================================================== 10e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 11e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and 12e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further 13e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/. 14e1051a39Sopenharmony_ci# ==================================================================== 15e1051a39Sopenharmony_ci# 16e1051a39Sopenharmony_ci# X25519 lower-level primitives for x86_64. 17e1051a39Sopenharmony_ci# 18e1051a39Sopenharmony_ci# February 2018. 19e1051a39Sopenharmony_ci# 20e1051a39Sopenharmony_ci# This module implements radix 2^51 multiplication and squaring, and 21e1051a39Sopenharmony_ci# radix 2^64 multiplication, squaring, addition, subtraction and final 22e1051a39Sopenharmony_ci# reduction. Latter radix is used on ADCX/ADOX-capable processors such 23e1051a39Sopenharmony_ci# as Broadwell. On related note one should mention that there are 24e1051a39Sopenharmony_ci# vector implementations that provide significantly better performance 25e1051a39Sopenharmony_ci# on some processors(*), but they are large and overly complex. Which 26e1051a39Sopenharmony_ci# in combination with them being effectively processor-specific makes 27e1051a39Sopenharmony_ci# the undertaking hard to justify. The goal for this implementation 28e1051a39Sopenharmony_ci# is rather versatility and simplicity [and ultimately formal 29e1051a39Sopenharmony_ci# verification]. 30e1051a39Sopenharmony_ci# 31e1051a39Sopenharmony_ci# (*) For example sandy2x should provide ~30% improvement on Sandy 32e1051a39Sopenharmony_ci# Bridge, but only nominal ~5% on Haswell [and big loss on 33e1051a39Sopenharmony_ci# Broadwell and successors]. 34e1051a39Sopenharmony_ci# 35e1051a39Sopenharmony_ci###################################################################### 36e1051a39Sopenharmony_ci# Improvement coefficients: 37e1051a39Sopenharmony_ci# 38e1051a39Sopenharmony_ci# amd64-51(*) gcc-5.x(**) 39e1051a39Sopenharmony_ci# 40e1051a39Sopenharmony_ci# P4 +22% +40% 41e1051a39Sopenharmony_ci# Sandy Bridge -3% +11% 42e1051a39Sopenharmony_ci# Haswell -1% +13% 43e1051a39Sopenharmony_ci# Broadwell(***) +30% +35% 44e1051a39Sopenharmony_ci# Skylake(***) +33% +47% 45e1051a39Sopenharmony_ci# Silvermont +20% +26% 46e1051a39Sopenharmony_ci# Goldmont +40% +50% 47e1051a39Sopenharmony_ci# Bulldozer +20% +9% 48e1051a39Sopenharmony_ci# Ryzen(***) +43% +40% 49e1051a39Sopenharmony_ci# VIA +170% +120% 50e1051a39Sopenharmony_ci# 51e1051a39Sopenharmony_ci# (*) amd64-51 is popular assembly implementation with 2^51 radix, 52e1051a39Sopenharmony_ci# only multiplication and squaring subroutines were linked 53e1051a39Sopenharmony_ci# for comparison, but not complete ladder step; gain on most 54e1051a39Sopenharmony_ci# processors is because this module refrains from shld, and 55e1051a39Sopenharmony_ci# minor regression on others is because this does result in 56e1051a39Sopenharmony_ci# higher instruction count; 57e1051a39Sopenharmony_ci# (**) compiler is free to inline functions, in assembly one would 58e1051a39Sopenharmony_ci# need to implement ladder step to do that, and it will improve 59e1051a39Sopenharmony_ci# performance by several percent; 60e1051a39Sopenharmony_ci# (***) ADCX/ADOX result for 2^64 radix, there is no corresponding 61e1051a39Sopenharmony_ci# C implementation, so that comparison is always against 62e1051a39Sopenharmony_ci# 2^51 radix; 63e1051a39Sopenharmony_ci 64e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension) 65e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file 66e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 67e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 68e1051a39Sopenharmony_ci 69e1051a39Sopenharmony_ci$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 70e1051a39Sopenharmony_ci 71e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 72e1051a39Sopenharmony_ci( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 73e1051a39Sopenharmony_ci( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 74e1051a39Sopenharmony_cidie "can't locate x86_64-xlate.pl"; 75e1051a39Sopenharmony_ci 76e1051a39Sopenharmony_ciopen OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"" 77e1051a39Sopenharmony_ci or die "can't call $xlate: $!"; 78e1051a39Sopenharmony_ci*STDOUT=*OUT; 79e1051a39Sopenharmony_ci 80e1051a39Sopenharmony_ciif (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` 81e1051a39Sopenharmony_ci =~ /GNU assembler version ([2-9]\.[0-9]+)/) { 82e1051a39Sopenharmony_ci $addx = ($1>=2.23); 83e1051a39Sopenharmony_ci} 84e1051a39Sopenharmony_ci 85e1051a39Sopenharmony_ciif (!$addx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && 86e1051a39Sopenharmony_ci `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { 87e1051a39Sopenharmony_ci $addx = ($1>=2.10); 88e1051a39Sopenharmony_ci} 89e1051a39Sopenharmony_ci 90e1051a39Sopenharmony_ciif (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && 91e1051a39Sopenharmony_ci `ml64 2>&1` =~ /Version ([0-9]+)\./) { 92e1051a39Sopenharmony_ci $addx = ($1>=12); 93e1051a39Sopenharmony_ci} 94e1051a39Sopenharmony_ci 95e1051a39Sopenharmony_ciif (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+)\.([0-9]+)/) { 96e1051a39Sopenharmony_ci my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 97e1051a39Sopenharmony_ci $addx = ($ver>=3.03); 98e1051a39Sopenharmony_ci} 99e1051a39Sopenharmony_ci 100e1051a39Sopenharmony_ci$code.=<<___; 101e1051a39Sopenharmony_ci.text 102e1051a39Sopenharmony_ci 103e1051a39Sopenharmony_ci.globl x25519_fe51_mul 104e1051a39Sopenharmony_ci.type x25519_fe51_mul,\@function,3 105e1051a39Sopenharmony_ci.align 32 106e1051a39Sopenharmony_cix25519_fe51_mul: 107e1051a39Sopenharmony_ci.cfi_startproc 108e1051a39Sopenharmony_ci push %rbp 109e1051a39Sopenharmony_ci.cfi_push %rbp 110e1051a39Sopenharmony_ci push %rbx 111e1051a39Sopenharmony_ci.cfi_push %rbx 112e1051a39Sopenharmony_ci push %r12 113e1051a39Sopenharmony_ci.cfi_push %r12 114e1051a39Sopenharmony_ci push %r13 115e1051a39Sopenharmony_ci.cfi_push %r13 116e1051a39Sopenharmony_ci push %r14 117e1051a39Sopenharmony_ci.cfi_push %r14 118e1051a39Sopenharmony_ci push %r15 119e1051a39Sopenharmony_ci.cfi_push %r15 120e1051a39Sopenharmony_ci lea -8*5(%rsp),%rsp 121e1051a39Sopenharmony_ci.cfi_adjust_cfa_offset 40 122e1051a39Sopenharmony_ci.Lfe51_mul_body: 123e1051a39Sopenharmony_ci 124e1051a39Sopenharmony_ci mov 8*0(%rsi),%rax # f[0] 125e1051a39Sopenharmony_ci mov 8*0(%rdx),%r11 # load g[0-4] 126e1051a39Sopenharmony_ci mov 8*1(%rdx),%r12 127e1051a39Sopenharmony_ci mov 8*2(%rdx),%r13 128e1051a39Sopenharmony_ci mov 8*3(%rdx),%rbp 129e1051a39Sopenharmony_ci mov 8*4(%rdx),%r14 130e1051a39Sopenharmony_ci 131e1051a39Sopenharmony_ci mov %rdi,8*4(%rsp) # offload 1st argument 132e1051a39Sopenharmony_ci mov %rax,%rdi 133e1051a39Sopenharmony_ci mulq %r11 # f[0]*g[0] 134e1051a39Sopenharmony_ci mov %r11,8*0(%rsp) # offload g[0] 135e1051a39Sopenharmony_ci mov %rax,%rbx # %rbx:%rcx = h0 136e1051a39Sopenharmony_ci mov %rdi,%rax 137e1051a39Sopenharmony_ci mov %rdx,%rcx 138e1051a39Sopenharmony_ci mulq %r12 # f[0]*g[1] 139e1051a39Sopenharmony_ci mov %r12,8*1(%rsp) # offload g[1] 140e1051a39Sopenharmony_ci mov %rax,%r8 # %r8:%r9 = h1 141e1051a39Sopenharmony_ci mov %rdi,%rax 142e1051a39Sopenharmony_ci lea (%r14,%r14,8),%r15 143e1051a39Sopenharmony_ci mov %rdx,%r9 144e1051a39Sopenharmony_ci mulq %r13 # f[0]*g[2] 145e1051a39Sopenharmony_ci mov %r13,8*2(%rsp) # offload g[2] 146e1051a39Sopenharmony_ci mov %rax,%r10 # %r10:%r11 = h2 147e1051a39Sopenharmony_ci mov %rdi,%rax 148e1051a39Sopenharmony_ci lea (%r14,%r15,2),%rdi # g[4]*19 149e1051a39Sopenharmony_ci mov %rdx,%r11 150e1051a39Sopenharmony_ci mulq %rbp # f[0]*g[3] 151e1051a39Sopenharmony_ci mov %rax,%r12 # %r12:%r13 = h3 152e1051a39Sopenharmony_ci mov 8*0(%rsi),%rax # f[0] 153e1051a39Sopenharmony_ci mov %rdx,%r13 154e1051a39Sopenharmony_ci mulq %r14 # f[0]*g[4] 155e1051a39Sopenharmony_ci mov %rax,%r14 # %r14:%r15 = h4 156e1051a39Sopenharmony_ci mov 8*1(%rsi),%rax # f[1] 157e1051a39Sopenharmony_ci mov %rdx,%r15 158e1051a39Sopenharmony_ci 159e1051a39Sopenharmony_ci mulq %rdi # f[1]*g[4]*19 160e1051a39Sopenharmony_ci add %rax,%rbx 161e1051a39Sopenharmony_ci mov 8*2(%rsi),%rax # f[2] 162e1051a39Sopenharmony_ci adc %rdx,%rcx 163e1051a39Sopenharmony_ci mulq %rdi # f[2]*g[4]*19 164e1051a39Sopenharmony_ci add %rax,%r8 165e1051a39Sopenharmony_ci mov 8*3(%rsi),%rax # f[3] 166e1051a39Sopenharmony_ci adc %rdx,%r9 167e1051a39Sopenharmony_ci mulq %rdi # f[3]*g[4]*19 168e1051a39Sopenharmony_ci add %rax,%r10 169e1051a39Sopenharmony_ci mov 8*4(%rsi),%rax # f[4] 170e1051a39Sopenharmony_ci adc %rdx,%r11 171e1051a39Sopenharmony_ci mulq %rdi # f[4]*g[4]*19 172e1051a39Sopenharmony_ci imulq \$19,%rbp,%rdi # g[3]*19 173e1051a39Sopenharmony_ci add %rax,%r12 174e1051a39Sopenharmony_ci mov 8*1(%rsi),%rax # f[1] 175e1051a39Sopenharmony_ci adc %rdx,%r13 176e1051a39Sopenharmony_ci mulq %rbp # f[1]*g[3] 177e1051a39Sopenharmony_ci mov 8*2(%rsp),%rbp # g[2] 178e1051a39Sopenharmony_ci add %rax,%r14 179e1051a39Sopenharmony_ci mov 8*2(%rsi),%rax # f[2] 180e1051a39Sopenharmony_ci adc %rdx,%r15 181e1051a39Sopenharmony_ci 182e1051a39Sopenharmony_ci mulq %rdi # f[2]*g[3]*19 183e1051a39Sopenharmony_ci add %rax,%rbx 184e1051a39Sopenharmony_ci mov 8*3(%rsi),%rax # f[3] 185e1051a39Sopenharmony_ci adc %rdx,%rcx 186e1051a39Sopenharmony_ci mulq %rdi # f[3]*g[3]*19 187e1051a39Sopenharmony_ci add %rax,%r8 188e1051a39Sopenharmony_ci mov 8*4(%rsi),%rax # f[4] 189e1051a39Sopenharmony_ci adc %rdx,%r9 190e1051a39Sopenharmony_ci mulq %rdi # f[4]*g[3]*19 191e1051a39Sopenharmony_ci imulq \$19,%rbp,%rdi # g[2]*19 192e1051a39Sopenharmony_ci add %rax,%r10 193e1051a39Sopenharmony_ci mov 8*1(%rsi),%rax # f[1] 194e1051a39Sopenharmony_ci adc %rdx,%r11 195e1051a39Sopenharmony_ci mulq %rbp # f[1]*g[2] 196e1051a39Sopenharmony_ci add %rax,%r12 197e1051a39Sopenharmony_ci mov 8*2(%rsi),%rax # f[2] 198e1051a39Sopenharmony_ci adc %rdx,%r13 199e1051a39Sopenharmony_ci mulq %rbp # f[2]*g[2] 200e1051a39Sopenharmony_ci mov 8*1(%rsp),%rbp # g[1] 201e1051a39Sopenharmony_ci add %rax,%r14 202e1051a39Sopenharmony_ci mov 8*3(%rsi),%rax # f[3] 203e1051a39Sopenharmony_ci adc %rdx,%r15 204e1051a39Sopenharmony_ci 205e1051a39Sopenharmony_ci mulq %rdi # f[3]*g[2]*19 206e1051a39Sopenharmony_ci add %rax,%rbx 207e1051a39Sopenharmony_ci mov 8*4(%rsi),%rax # f[3] 208e1051a39Sopenharmony_ci adc %rdx,%rcx 209e1051a39Sopenharmony_ci mulq %rdi # f[4]*g[2]*19 210e1051a39Sopenharmony_ci add %rax,%r8 211e1051a39Sopenharmony_ci mov 8*1(%rsi),%rax # f[1] 212e1051a39Sopenharmony_ci adc %rdx,%r9 213e1051a39Sopenharmony_ci mulq %rbp # f[1]*g[1] 214e1051a39Sopenharmony_ci imulq \$19,%rbp,%rdi 215e1051a39Sopenharmony_ci add %rax,%r10 216e1051a39Sopenharmony_ci mov 8*2(%rsi),%rax # f[2] 217e1051a39Sopenharmony_ci adc %rdx,%r11 218e1051a39Sopenharmony_ci mulq %rbp # f[2]*g[1] 219e1051a39Sopenharmony_ci add %rax,%r12 220e1051a39Sopenharmony_ci mov 8*3(%rsi),%rax # f[3] 221e1051a39Sopenharmony_ci adc %rdx,%r13 222e1051a39Sopenharmony_ci mulq %rbp # f[3]*g[1] 223e1051a39Sopenharmony_ci mov 8*0(%rsp),%rbp # g[0] 224e1051a39Sopenharmony_ci add %rax,%r14 225e1051a39Sopenharmony_ci mov 8*4(%rsi),%rax # f[4] 226e1051a39Sopenharmony_ci adc %rdx,%r15 227e1051a39Sopenharmony_ci 228e1051a39Sopenharmony_ci mulq %rdi # f[4]*g[1]*19 229e1051a39Sopenharmony_ci add %rax,%rbx 230e1051a39Sopenharmony_ci mov 8*1(%rsi),%rax # f[1] 231e1051a39Sopenharmony_ci adc %rdx,%rcx 232e1051a39Sopenharmony_ci mul %rbp # f[1]*g[0] 233e1051a39Sopenharmony_ci add %rax,%r8 234e1051a39Sopenharmony_ci mov 8*2(%rsi),%rax # f[2] 235e1051a39Sopenharmony_ci adc %rdx,%r9 236e1051a39Sopenharmony_ci mul %rbp # f[2]*g[0] 237e1051a39Sopenharmony_ci add %rax,%r10 238e1051a39Sopenharmony_ci mov 8*3(%rsi),%rax # f[3] 239e1051a39Sopenharmony_ci adc %rdx,%r11 240e1051a39Sopenharmony_ci mul %rbp # f[3]*g[0] 241e1051a39Sopenharmony_ci add %rax,%r12 242e1051a39Sopenharmony_ci mov 8*4(%rsi),%rax # f[4] 243e1051a39Sopenharmony_ci adc %rdx,%r13 244e1051a39Sopenharmony_ci mulq %rbp # f[4]*g[0] 245e1051a39Sopenharmony_ci add %rax,%r14 246e1051a39Sopenharmony_ci adc %rdx,%r15 247e1051a39Sopenharmony_ci 248e1051a39Sopenharmony_ci mov 8*4(%rsp),%rdi # restore 1st argument 249e1051a39Sopenharmony_ci jmp .Lreduce51 250e1051a39Sopenharmony_ci.Lfe51_mul_epilogue: 251e1051a39Sopenharmony_ci.cfi_endproc 252e1051a39Sopenharmony_ci.size x25519_fe51_mul,.-x25519_fe51_mul 253e1051a39Sopenharmony_ci 254e1051a39Sopenharmony_ci.globl x25519_fe51_sqr 255e1051a39Sopenharmony_ci.type x25519_fe51_sqr,\@function,2 256e1051a39Sopenharmony_ci.align 32 257e1051a39Sopenharmony_cix25519_fe51_sqr: 258e1051a39Sopenharmony_ci.cfi_startproc 259e1051a39Sopenharmony_ci push %rbp 260e1051a39Sopenharmony_ci.cfi_push %rbp 261e1051a39Sopenharmony_ci push %rbx 262e1051a39Sopenharmony_ci.cfi_push %rbx 263e1051a39Sopenharmony_ci push %r12 264e1051a39Sopenharmony_ci.cfi_push %r12 265e1051a39Sopenharmony_ci push %r13 266e1051a39Sopenharmony_ci.cfi_push %r13 267e1051a39Sopenharmony_ci push %r14 268e1051a39Sopenharmony_ci.cfi_push %r14 269e1051a39Sopenharmony_ci push %r15 270e1051a39Sopenharmony_ci.cfi_push %r15 271e1051a39Sopenharmony_ci lea -8*5(%rsp),%rsp 272e1051a39Sopenharmony_ci.cfi_adjust_cfa_offset 40 273e1051a39Sopenharmony_ci.Lfe51_sqr_body: 274e1051a39Sopenharmony_ci 275e1051a39Sopenharmony_ci mov 8*0(%rsi),%rax # g[0] 276e1051a39Sopenharmony_ci mov 8*2(%rsi),%r15 # g[2] 277e1051a39Sopenharmony_ci mov 8*4(%rsi),%rbp # g[4] 278e1051a39Sopenharmony_ci 279e1051a39Sopenharmony_ci mov %rdi,8*4(%rsp) # offload 1st argument 280e1051a39Sopenharmony_ci lea (%rax,%rax),%r14 281e1051a39Sopenharmony_ci mulq %rax # g[0]*g[0] 282e1051a39Sopenharmony_ci mov %rax,%rbx 283e1051a39Sopenharmony_ci mov 8*1(%rsi),%rax # g[1] 284e1051a39Sopenharmony_ci mov %rdx,%rcx 285e1051a39Sopenharmony_ci mulq %r14 # 2*g[0]*g[1] 286e1051a39Sopenharmony_ci mov %rax,%r8 287e1051a39Sopenharmony_ci mov %r15,%rax 288e1051a39Sopenharmony_ci mov %r15,8*0(%rsp) # offload g[2] 289e1051a39Sopenharmony_ci mov %rdx,%r9 290e1051a39Sopenharmony_ci mulq %r14 # 2*g[0]*g[2] 291e1051a39Sopenharmony_ci mov %rax,%r10 292e1051a39Sopenharmony_ci mov 8*3(%rsi),%rax 293e1051a39Sopenharmony_ci mov %rdx,%r11 294e1051a39Sopenharmony_ci imulq \$19,%rbp,%rdi # g[4]*19 295e1051a39Sopenharmony_ci mulq %r14 # 2*g[0]*g[3] 296e1051a39Sopenharmony_ci mov %rax,%r12 297e1051a39Sopenharmony_ci mov %rbp,%rax 298e1051a39Sopenharmony_ci mov %rdx,%r13 299e1051a39Sopenharmony_ci mulq %r14 # 2*g[0]*g[4] 300e1051a39Sopenharmony_ci mov %rax,%r14 301e1051a39Sopenharmony_ci mov %rbp,%rax 302e1051a39Sopenharmony_ci mov %rdx,%r15 303e1051a39Sopenharmony_ci 304e1051a39Sopenharmony_ci mulq %rdi # g[4]*g[4]*19 305e1051a39Sopenharmony_ci add %rax,%r12 306e1051a39Sopenharmony_ci mov 8*1(%rsi),%rax # g[1] 307e1051a39Sopenharmony_ci adc %rdx,%r13 308e1051a39Sopenharmony_ci 309e1051a39Sopenharmony_ci mov 8*3(%rsi),%rsi # g[3] 310e1051a39Sopenharmony_ci lea (%rax,%rax),%rbp 311e1051a39Sopenharmony_ci mulq %rax # g[1]*g[1] 312e1051a39Sopenharmony_ci add %rax,%r10 313e1051a39Sopenharmony_ci mov 8*0(%rsp),%rax # g[2] 314e1051a39Sopenharmony_ci adc %rdx,%r11 315e1051a39Sopenharmony_ci mulq %rbp # 2*g[1]*g[2] 316e1051a39Sopenharmony_ci add %rax,%r12 317e1051a39Sopenharmony_ci mov %rbp,%rax 318e1051a39Sopenharmony_ci adc %rdx,%r13 319e1051a39Sopenharmony_ci mulq %rsi # 2*g[1]*g[3] 320e1051a39Sopenharmony_ci add %rax,%r14 321e1051a39Sopenharmony_ci mov %rbp,%rax 322e1051a39Sopenharmony_ci adc %rdx,%r15 323e1051a39Sopenharmony_ci imulq \$19,%rsi,%rbp # g[3]*19 324e1051a39Sopenharmony_ci mulq %rdi # 2*g[1]*g[4]*19 325e1051a39Sopenharmony_ci add %rax,%rbx 326e1051a39Sopenharmony_ci lea (%rsi,%rsi),%rax 327e1051a39Sopenharmony_ci adc %rdx,%rcx 328e1051a39Sopenharmony_ci 329e1051a39Sopenharmony_ci mulq %rdi # 2*g[3]*g[4]*19 330e1051a39Sopenharmony_ci add %rax,%r10 331e1051a39Sopenharmony_ci mov %rsi,%rax 332e1051a39Sopenharmony_ci adc %rdx,%r11 333e1051a39Sopenharmony_ci mulq %rbp # g[3]*g[3]*19 334e1051a39Sopenharmony_ci add %rax,%r8 335e1051a39Sopenharmony_ci mov 8*0(%rsp),%rax # g[2] 336e1051a39Sopenharmony_ci adc %rdx,%r9 337e1051a39Sopenharmony_ci 338e1051a39Sopenharmony_ci lea (%rax,%rax),%rsi 339e1051a39Sopenharmony_ci mulq %rax # g[2]*g[2] 340e1051a39Sopenharmony_ci add %rax,%r14 341e1051a39Sopenharmony_ci mov %rbp,%rax 342e1051a39Sopenharmony_ci adc %rdx,%r15 343e1051a39Sopenharmony_ci mulq %rsi # 2*g[2]*g[3]*19 344e1051a39Sopenharmony_ci add %rax,%rbx 345e1051a39Sopenharmony_ci mov %rsi,%rax 346e1051a39Sopenharmony_ci adc %rdx,%rcx 347e1051a39Sopenharmony_ci mulq %rdi # 2*g[2]*g[4]*19 348e1051a39Sopenharmony_ci add %rax,%r8 349e1051a39Sopenharmony_ci adc %rdx,%r9 350e1051a39Sopenharmony_ci 351e1051a39Sopenharmony_ci mov 8*4(%rsp),%rdi # restore 1st argument 352e1051a39Sopenharmony_ci jmp .Lreduce51 353e1051a39Sopenharmony_ci 354e1051a39Sopenharmony_ci.align 32 355e1051a39Sopenharmony_ci.Lreduce51: 356e1051a39Sopenharmony_ci mov \$0x7ffffffffffff,%rbp 357e1051a39Sopenharmony_ci 358e1051a39Sopenharmony_ci mov %r10,%rdx 359e1051a39Sopenharmony_ci shr \$51,%r10 360e1051a39Sopenharmony_ci shl \$13,%r11 361e1051a39Sopenharmony_ci and %rbp,%rdx # %rdx = g2 = h2 & mask 362e1051a39Sopenharmony_ci or %r10,%r11 # h2>>51 363e1051a39Sopenharmony_ci add %r11,%r12 364e1051a39Sopenharmony_ci adc \$0,%r13 # h3 += h2>>51 365e1051a39Sopenharmony_ci 366e1051a39Sopenharmony_ci mov %rbx,%rax 367e1051a39Sopenharmony_ci shr \$51,%rbx 368e1051a39Sopenharmony_ci shl \$13,%rcx 369e1051a39Sopenharmony_ci and %rbp,%rax # %rax = g0 = h0 & mask 370e1051a39Sopenharmony_ci or %rbx,%rcx # h0>>51 371e1051a39Sopenharmony_ci add %rcx,%r8 # h1 += h0>>51 372e1051a39Sopenharmony_ci adc \$0,%r9 373e1051a39Sopenharmony_ci 374e1051a39Sopenharmony_ci mov %r12,%rbx 375e1051a39Sopenharmony_ci shr \$51,%r12 376e1051a39Sopenharmony_ci shl \$13,%r13 377e1051a39Sopenharmony_ci and %rbp,%rbx # %rbx = g3 = h3 & mask 378e1051a39Sopenharmony_ci or %r12,%r13 # h3>>51 379e1051a39Sopenharmony_ci add %r13,%r14 # h4 += h3>>51 380e1051a39Sopenharmony_ci adc \$0,%r15 381e1051a39Sopenharmony_ci 382e1051a39Sopenharmony_ci mov %r8,%rcx 383e1051a39Sopenharmony_ci shr \$51,%r8 384e1051a39Sopenharmony_ci shl \$13,%r9 385e1051a39Sopenharmony_ci and %rbp,%rcx # %rcx = g1 = h1 & mask 386e1051a39Sopenharmony_ci or %r8,%r9 387e1051a39Sopenharmony_ci add %r9,%rdx # g2 += h1>>51 388e1051a39Sopenharmony_ci 389e1051a39Sopenharmony_ci mov %r14,%r10 390e1051a39Sopenharmony_ci shr \$51,%r14 391e1051a39Sopenharmony_ci shl \$13,%r15 392e1051a39Sopenharmony_ci and %rbp,%r10 # %r10 = g4 = h0 & mask 393e1051a39Sopenharmony_ci or %r14,%r15 # h0>>51 394e1051a39Sopenharmony_ci 395e1051a39Sopenharmony_ci lea (%r15,%r15,8),%r14 396e1051a39Sopenharmony_ci lea (%r15,%r14,2),%r15 397e1051a39Sopenharmony_ci add %r15,%rax # g0 += (h0>>51)*19 398e1051a39Sopenharmony_ci 399e1051a39Sopenharmony_ci mov %rdx,%r8 400e1051a39Sopenharmony_ci and %rbp,%rdx # g2 &= mask 401e1051a39Sopenharmony_ci shr \$51,%r8 402e1051a39Sopenharmony_ci add %r8,%rbx # g3 += g2>>51 403e1051a39Sopenharmony_ci 404e1051a39Sopenharmony_ci mov %rax,%r9 405e1051a39Sopenharmony_ci and %rbp,%rax # g0 &= mask 406e1051a39Sopenharmony_ci shr \$51,%r9 407e1051a39Sopenharmony_ci add %r9,%rcx # g1 += g0>>51 408e1051a39Sopenharmony_ci 409e1051a39Sopenharmony_ci mov %rax,8*0(%rdi) # save the result 410e1051a39Sopenharmony_ci mov %rcx,8*1(%rdi) 411e1051a39Sopenharmony_ci mov %rdx,8*2(%rdi) 412e1051a39Sopenharmony_ci mov %rbx,8*3(%rdi) 413e1051a39Sopenharmony_ci mov %r10,8*4(%rdi) 414e1051a39Sopenharmony_ci 415e1051a39Sopenharmony_ci mov 8*5(%rsp),%r15 416e1051a39Sopenharmony_ci.cfi_restore %r15 417e1051a39Sopenharmony_ci mov 8*6(%rsp),%r14 418e1051a39Sopenharmony_ci.cfi_restore %r14 419e1051a39Sopenharmony_ci mov 8*7(%rsp),%r13 420e1051a39Sopenharmony_ci.cfi_restore %r13 421e1051a39Sopenharmony_ci mov 8*8(%rsp),%r12 422e1051a39Sopenharmony_ci.cfi_restore %r12 423e1051a39Sopenharmony_ci mov 8*9(%rsp),%rbx 424e1051a39Sopenharmony_ci.cfi_restore %rbx 425e1051a39Sopenharmony_ci mov 8*10(%rsp),%rbp 426e1051a39Sopenharmony_ci.cfi_restore %rbp 427e1051a39Sopenharmony_ci lea 8*11(%rsp),%rsp 428e1051a39Sopenharmony_ci.cfi_adjust_cfa_offset 88 429e1051a39Sopenharmony_ci.Lfe51_sqr_epilogue: 430e1051a39Sopenharmony_ci ret 431e1051a39Sopenharmony_ci.cfi_endproc 432e1051a39Sopenharmony_ci.size x25519_fe51_sqr,.-x25519_fe51_sqr 433e1051a39Sopenharmony_ci 434e1051a39Sopenharmony_ci.globl x25519_fe51_mul121666 435e1051a39Sopenharmony_ci.type x25519_fe51_mul121666,\@function,2 436e1051a39Sopenharmony_ci.align 32 437e1051a39Sopenharmony_cix25519_fe51_mul121666: 438e1051a39Sopenharmony_ci.cfi_startproc 439e1051a39Sopenharmony_ci push %rbp 440e1051a39Sopenharmony_ci.cfi_push %rbp 441e1051a39Sopenharmony_ci push %rbx 442e1051a39Sopenharmony_ci.cfi_push %rbx 443e1051a39Sopenharmony_ci push %r12 444e1051a39Sopenharmony_ci.cfi_push %r12 445e1051a39Sopenharmony_ci push %r13 446e1051a39Sopenharmony_ci.cfi_push %r13 447e1051a39Sopenharmony_ci push %r14 448e1051a39Sopenharmony_ci.cfi_push %r14 449e1051a39Sopenharmony_ci push %r15 450e1051a39Sopenharmony_ci.cfi_push %r15 451e1051a39Sopenharmony_ci lea -8*5(%rsp),%rsp 452e1051a39Sopenharmony_ci.cfi_adjust_cfa_offset 40 453e1051a39Sopenharmony_ci.Lfe51_mul121666_body: 454e1051a39Sopenharmony_ci mov \$121666,%eax 455e1051a39Sopenharmony_ci 456e1051a39Sopenharmony_ci mulq 8*0(%rsi) 457e1051a39Sopenharmony_ci mov %rax,%rbx # %rbx:%rcx = h0 458e1051a39Sopenharmony_ci mov \$121666,%eax 459e1051a39Sopenharmony_ci mov %rdx,%rcx 460e1051a39Sopenharmony_ci mulq 8*1(%rsi) 461e1051a39Sopenharmony_ci mov %rax,%r8 # %r8:%r9 = h1 462e1051a39Sopenharmony_ci mov \$121666,%eax 463e1051a39Sopenharmony_ci mov %rdx,%r9 464e1051a39Sopenharmony_ci mulq 8*2(%rsi) 465e1051a39Sopenharmony_ci mov %rax,%r10 # %r10:%r11 = h2 466e1051a39Sopenharmony_ci mov \$121666,%eax 467e1051a39Sopenharmony_ci mov %rdx,%r11 468e1051a39Sopenharmony_ci mulq 8*3(%rsi) 469e1051a39Sopenharmony_ci mov %rax,%r12 # %r12:%r13 = h3 470e1051a39Sopenharmony_ci mov \$121666,%eax # f[0] 471e1051a39Sopenharmony_ci mov %rdx,%r13 472e1051a39Sopenharmony_ci mulq 8*4(%rsi) 473e1051a39Sopenharmony_ci mov %rax,%r14 # %r14:%r15 = h4 474e1051a39Sopenharmony_ci mov %rdx,%r15 475e1051a39Sopenharmony_ci 476e1051a39Sopenharmony_ci jmp .Lreduce51 477e1051a39Sopenharmony_ci.Lfe51_mul121666_epilogue: 478e1051a39Sopenharmony_ci.cfi_endproc 479e1051a39Sopenharmony_ci.size x25519_fe51_mul121666,.-x25519_fe51_mul121666 480e1051a39Sopenharmony_ci___ 481e1051a39Sopenharmony_ci######################################################################## 482e1051a39Sopenharmony_ci# Base 2^64 subroutines modulo 2*(2^255-19) 483e1051a39Sopenharmony_ci# 484e1051a39Sopenharmony_ciif ($addx) { 485e1051a39Sopenharmony_cimy ($acc0,$acc1,$acc2,$acc3,$acc4,$acc5,$acc6,$acc7) = map("%r$_",(8..15)); 486e1051a39Sopenharmony_ci 487e1051a39Sopenharmony_ci$code.=<<___; 488e1051a39Sopenharmony_ci.extern OPENSSL_ia32cap_P 489e1051a39Sopenharmony_ci.globl x25519_fe64_eligible 490e1051a39Sopenharmony_ci.type x25519_fe64_eligible,\@abi-omnipotent 491e1051a39Sopenharmony_ci.align 32 492e1051a39Sopenharmony_cix25519_fe64_eligible: 493e1051a39Sopenharmony_ci.cfi_startproc 494e1051a39Sopenharmony_ci mov OPENSSL_ia32cap_P+8(%rip),%ecx 495e1051a39Sopenharmony_ci xor %eax,%eax 496e1051a39Sopenharmony_ci and \$0x80100,%ecx 497e1051a39Sopenharmony_ci cmp \$0x80100,%ecx 498e1051a39Sopenharmony_ci cmove %ecx,%eax 499e1051a39Sopenharmony_ci ret 500e1051a39Sopenharmony_ci.cfi_endproc 501e1051a39Sopenharmony_ci.size x25519_fe64_eligible,.-x25519_fe64_eligible 502e1051a39Sopenharmony_ci 503e1051a39Sopenharmony_ci.globl x25519_fe64_mul 504e1051a39Sopenharmony_ci.type x25519_fe64_mul,\@function,3 505e1051a39Sopenharmony_ci.align 32 506e1051a39Sopenharmony_cix25519_fe64_mul: 507e1051a39Sopenharmony_ci.cfi_startproc 508e1051a39Sopenharmony_ci push %rbp 509e1051a39Sopenharmony_ci.cfi_push %rbp 510e1051a39Sopenharmony_ci push %rbx 511e1051a39Sopenharmony_ci.cfi_push %rbx 512e1051a39Sopenharmony_ci push %r12 513e1051a39Sopenharmony_ci.cfi_push %r12 514e1051a39Sopenharmony_ci push %r13 515e1051a39Sopenharmony_ci.cfi_push %r13 516e1051a39Sopenharmony_ci push %r14 517e1051a39Sopenharmony_ci.cfi_push %r14 518e1051a39Sopenharmony_ci push %r15 519e1051a39Sopenharmony_ci.cfi_push %r15 520e1051a39Sopenharmony_ci push %rdi # offload dst 521e1051a39Sopenharmony_ci.cfi_push %rdi 522e1051a39Sopenharmony_ci lea -8*2(%rsp),%rsp 523e1051a39Sopenharmony_ci.cfi_adjust_cfa_offset 16 524e1051a39Sopenharmony_ci.Lfe64_mul_body: 525e1051a39Sopenharmony_ci 526e1051a39Sopenharmony_ci mov %rdx,%rax 527e1051a39Sopenharmony_ci mov 8*0(%rdx),%rbp # b[0] 528e1051a39Sopenharmony_ci mov 8*0(%rsi),%rdx # a[0] 529e1051a39Sopenharmony_ci mov 8*1(%rax),%rcx # b[1] 530e1051a39Sopenharmony_ci mov 8*2(%rax),$acc6 # b[2] 531e1051a39Sopenharmony_ci mov 8*3(%rax),$acc7 # b[3] 532e1051a39Sopenharmony_ci 533e1051a39Sopenharmony_ci mulx %rbp,$acc0,%rax # a[0]*b[0] 534e1051a39Sopenharmony_ci xor %edi,%edi # cf=0,of=0 535e1051a39Sopenharmony_ci mulx %rcx,$acc1,%rbx # a[0]*b[1] 536e1051a39Sopenharmony_ci adcx %rax,$acc1 537e1051a39Sopenharmony_ci mulx $acc6,$acc2,%rax # a[0]*b[2] 538e1051a39Sopenharmony_ci adcx %rbx,$acc2 539e1051a39Sopenharmony_ci mulx $acc7,$acc3,$acc4 # a[0]*b[3] 540e1051a39Sopenharmony_ci mov 8*1(%rsi),%rdx # a[1] 541e1051a39Sopenharmony_ci adcx %rax,$acc3 542e1051a39Sopenharmony_ci mov $acc6,(%rsp) # offload b[2] 543e1051a39Sopenharmony_ci adcx %rdi,$acc4 # cf=0 544e1051a39Sopenharmony_ci 545e1051a39Sopenharmony_ci mulx %rbp,%rax,%rbx # a[1]*b[0] 546e1051a39Sopenharmony_ci adox %rax,$acc1 547e1051a39Sopenharmony_ci adcx %rbx,$acc2 548e1051a39Sopenharmony_ci mulx %rcx,%rax,%rbx # a[1]*b[1] 549e1051a39Sopenharmony_ci adox %rax,$acc2 550e1051a39Sopenharmony_ci adcx %rbx,$acc3 551e1051a39Sopenharmony_ci mulx $acc6,%rax,%rbx # a[1]*b[2] 552e1051a39Sopenharmony_ci adox %rax,$acc3 553e1051a39Sopenharmony_ci adcx %rbx,$acc4 554e1051a39Sopenharmony_ci mulx $acc7,%rax,$acc5 # a[1]*b[3] 555e1051a39Sopenharmony_ci mov 8*2(%rsi),%rdx # a[2] 556e1051a39Sopenharmony_ci adox %rax,$acc4 557e1051a39Sopenharmony_ci adcx %rdi,$acc5 # cf=0 558e1051a39Sopenharmony_ci adox %rdi,$acc5 # of=0 559e1051a39Sopenharmony_ci 560e1051a39Sopenharmony_ci mulx %rbp,%rax,%rbx # a[2]*b[0] 561e1051a39Sopenharmony_ci adcx %rax,$acc2 562e1051a39Sopenharmony_ci adox %rbx,$acc3 563e1051a39Sopenharmony_ci mulx %rcx,%rax,%rbx # a[2]*b[1] 564e1051a39Sopenharmony_ci adcx %rax,$acc3 565e1051a39Sopenharmony_ci adox %rbx,$acc4 566e1051a39Sopenharmony_ci mulx $acc6,%rax,%rbx # a[2]*b[2] 567e1051a39Sopenharmony_ci adcx %rax,$acc4 568e1051a39Sopenharmony_ci adox %rbx,$acc5 569e1051a39Sopenharmony_ci mulx $acc7,%rax,$acc6 # a[2]*b[3] 570e1051a39Sopenharmony_ci mov 8*3(%rsi),%rdx # a[3] 571e1051a39Sopenharmony_ci adcx %rax,$acc5 572e1051a39Sopenharmony_ci adox %rdi,$acc6 # of=0 573e1051a39Sopenharmony_ci adcx %rdi,$acc6 # cf=0 574e1051a39Sopenharmony_ci 575e1051a39Sopenharmony_ci mulx %rbp,%rax,%rbx # a[3]*b[0] 576e1051a39Sopenharmony_ci adox %rax,$acc3 577e1051a39Sopenharmony_ci adcx %rbx,$acc4 578e1051a39Sopenharmony_ci mulx %rcx,%rax,%rbx # a[3]*b[1] 579e1051a39Sopenharmony_ci adox %rax,$acc4 580e1051a39Sopenharmony_ci adcx %rbx,$acc5 581e1051a39Sopenharmony_ci mulx (%rsp),%rax,%rbx # a[3]*b[2] 582e1051a39Sopenharmony_ci adox %rax,$acc5 583e1051a39Sopenharmony_ci adcx %rbx,$acc6 584e1051a39Sopenharmony_ci mulx $acc7,%rax,$acc7 # a[3]*b[3] 585e1051a39Sopenharmony_ci mov \$38,%edx 586e1051a39Sopenharmony_ci adox %rax,$acc6 587e1051a39Sopenharmony_ci adcx %rdi,$acc7 # cf=0 588e1051a39Sopenharmony_ci adox %rdi,$acc7 # of=0 589e1051a39Sopenharmony_ci 590e1051a39Sopenharmony_ci jmp .Lreduce64 591e1051a39Sopenharmony_ci.Lfe64_mul_epilogue: 592e1051a39Sopenharmony_ci.cfi_endproc 593e1051a39Sopenharmony_ci.size x25519_fe64_mul,.-x25519_fe64_mul 594e1051a39Sopenharmony_ci 595e1051a39Sopenharmony_ci.globl x25519_fe64_sqr 596e1051a39Sopenharmony_ci.type x25519_fe64_sqr,\@function,2 597e1051a39Sopenharmony_ci.align 32 598e1051a39Sopenharmony_cix25519_fe64_sqr: 599e1051a39Sopenharmony_ci.cfi_startproc 600e1051a39Sopenharmony_ci push %rbp 601e1051a39Sopenharmony_ci.cfi_push %rbp 602e1051a39Sopenharmony_ci push %rbx 603e1051a39Sopenharmony_ci.cfi_push %rbx 604e1051a39Sopenharmony_ci push %r12 605e1051a39Sopenharmony_ci.cfi_push %r12 606e1051a39Sopenharmony_ci push %r13 607e1051a39Sopenharmony_ci.cfi_push %r13 608e1051a39Sopenharmony_ci push %r14 609e1051a39Sopenharmony_ci.cfi_push %r14 610e1051a39Sopenharmony_ci push %r15 611e1051a39Sopenharmony_ci.cfi_push %r15 612e1051a39Sopenharmony_ci push %rdi # offload dst 613e1051a39Sopenharmony_ci.cfi_push %rdi 614e1051a39Sopenharmony_ci lea -8*2(%rsp),%rsp 615e1051a39Sopenharmony_ci.cfi_adjust_cfa_offset 16 616e1051a39Sopenharmony_ci.Lfe64_sqr_body: 617e1051a39Sopenharmony_ci 618e1051a39Sopenharmony_ci mov 8*0(%rsi),%rdx # a[0] 619e1051a39Sopenharmony_ci mov 8*1(%rsi),%rcx # a[1] 620e1051a39Sopenharmony_ci mov 8*2(%rsi),%rbp # a[2] 621e1051a39Sopenharmony_ci mov 8*3(%rsi),%rsi # a[3] 622e1051a39Sopenharmony_ci 623e1051a39Sopenharmony_ci ################################################################ 624e1051a39Sopenharmony_ci mulx %rdx,$acc0,$acc7 # a[0]*a[0] 625e1051a39Sopenharmony_ci mulx %rcx,$acc1,%rax # a[0]*a[1] 626e1051a39Sopenharmony_ci xor %edi,%edi # cf=0,of=0 627e1051a39Sopenharmony_ci mulx %rbp,$acc2,%rbx # a[0]*a[2] 628e1051a39Sopenharmony_ci adcx %rax,$acc2 629e1051a39Sopenharmony_ci mulx %rsi,$acc3,$acc4 # a[0]*a[3] 630e1051a39Sopenharmony_ci mov %rcx,%rdx # a[1] 631e1051a39Sopenharmony_ci adcx %rbx,$acc3 632e1051a39Sopenharmony_ci adcx %rdi,$acc4 # cf=0 633e1051a39Sopenharmony_ci 634e1051a39Sopenharmony_ci ################################################################ 635e1051a39Sopenharmony_ci mulx %rbp,%rax,%rbx # a[1]*a[2] 636e1051a39Sopenharmony_ci adox %rax,$acc3 637e1051a39Sopenharmony_ci adcx %rbx,$acc4 638e1051a39Sopenharmony_ci mulx %rsi,%rax,$acc5 # a[1]*a[3] 639e1051a39Sopenharmony_ci mov %rbp,%rdx # a[2] 640e1051a39Sopenharmony_ci adox %rax,$acc4 641e1051a39Sopenharmony_ci adcx %rdi,$acc5 642e1051a39Sopenharmony_ci 643e1051a39Sopenharmony_ci ################################################################ 644e1051a39Sopenharmony_ci mulx %rsi,%rax,$acc6 # a[2]*a[3] 645e1051a39Sopenharmony_ci mov %rcx,%rdx # a[1] 646e1051a39Sopenharmony_ci adox %rax,$acc5 647e1051a39Sopenharmony_ci adcx %rdi,$acc6 # cf=0 648e1051a39Sopenharmony_ci adox %rdi,$acc6 # of=0 649e1051a39Sopenharmony_ci 650e1051a39Sopenharmony_ci adcx $acc1,$acc1 # acc1:6<<1 651e1051a39Sopenharmony_ci adox $acc7,$acc1 652e1051a39Sopenharmony_ci adcx $acc2,$acc2 653e1051a39Sopenharmony_ci mulx %rdx,%rax,%rbx # a[1]*a[1] 654e1051a39Sopenharmony_ci mov %rbp,%rdx # a[2] 655e1051a39Sopenharmony_ci adcx $acc3,$acc3 656e1051a39Sopenharmony_ci adox %rax,$acc2 657e1051a39Sopenharmony_ci adcx $acc4,$acc4 658e1051a39Sopenharmony_ci adox %rbx,$acc3 659e1051a39Sopenharmony_ci mulx %rdx,%rax,%rbx # a[2]*a[2] 660e1051a39Sopenharmony_ci mov %rsi,%rdx # a[3] 661e1051a39Sopenharmony_ci adcx $acc5,$acc5 662e1051a39Sopenharmony_ci adox %rax,$acc4 663e1051a39Sopenharmony_ci adcx $acc6,$acc6 664e1051a39Sopenharmony_ci adox %rbx,$acc5 665e1051a39Sopenharmony_ci mulx %rdx,%rax,$acc7 # a[3]*a[3] 666e1051a39Sopenharmony_ci mov \$38,%edx 667e1051a39Sopenharmony_ci adox %rax,$acc6 668e1051a39Sopenharmony_ci adcx %rdi,$acc7 # cf=0 669e1051a39Sopenharmony_ci adox %rdi,$acc7 # of=0 670e1051a39Sopenharmony_ci jmp .Lreduce64 671e1051a39Sopenharmony_ci 672e1051a39Sopenharmony_ci.align 32 673e1051a39Sopenharmony_ci.Lreduce64: 674e1051a39Sopenharmony_ci mulx $acc4,%rax,%rbx 675e1051a39Sopenharmony_ci adcx %rax,$acc0 676e1051a39Sopenharmony_ci adox %rbx,$acc1 677e1051a39Sopenharmony_ci mulx $acc5,%rax,%rbx 678e1051a39Sopenharmony_ci adcx %rax,$acc1 679e1051a39Sopenharmony_ci adox %rbx,$acc2 680e1051a39Sopenharmony_ci mulx $acc6,%rax,%rbx 681e1051a39Sopenharmony_ci adcx %rax,$acc2 682e1051a39Sopenharmony_ci adox %rbx,$acc3 683e1051a39Sopenharmony_ci mulx $acc7,%rax,$acc4 684e1051a39Sopenharmony_ci adcx %rax,$acc3 685e1051a39Sopenharmony_ci adox %rdi,$acc4 686e1051a39Sopenharmony_ci adcx %rdi,$acc4 687e1051a39Sopenharmony_ci 688e1051a39Sopenharmony_ci mov 8*2(%rsp),%rdi # restore dst 689e1051a39Sopenharmony_ci imulq %rdx,$acc4 690e1051a39Sopenharmony_ci 691e1051a39Sopenharmony_ci add $acc4,$acc0 692e1051a39Sopenharmony_ci adc \$0,$acc1 693e1051a39Sopenharmony_ci adc \$0,$acc2 694e1051a39Sopenharmony_ci adc \$0,$acc3 695e1051a39Sopenharmony_ci 696e1051a39Sopenharmony_ci sbb %rax,%rax # cf -> mask 697e1051a39Sopenharmony_ci and \$38,%rax 698e1051a39Sopenharmony_ci 699e1051a39Sopenharmony_ci add %rax,$acc0 700e1051a39Sopenharmony_ci mov $acc1,8*1(%rdi) 701e1051a39Sopenharmony_ci mov $acc2,8*2(%rdi) 702e1051a39Sopenharmony_ci mov $acc3,8*3(%rdi) 703e1051a39Sopenharmony_ci mov $acc0,8*0(%rdi) 704e1051a39Sopenharmony_ci 705e1051a39Sopenharmony_ci mov 8*3(%rsp),%r15 706e1051a39Sopenharmony_ci.cfi_restore %r15 707e1051a39Sopenharmony_ci mov 8*4(%rsp),%r14 708e1051a39Sopenharmony_ci.cfi_restore %r14 709e1051a39Sopenharmony_ci mov 8*5(%rsp),%r13 710e1051a39Sopenharmony_ci.cfi_restore %r13 711e1051a39Sopenharmony_ci mov 8*6(%rsp),%r12 712e1051a39Sopenharmony_ci.cfi_restore %r12 713e1051a39Sopenharmony_ci mov 8*7(%rsp),%rbx 714e1051a39Sopenharmony_ci.cfi_restore %rbx 715e1051a39Sopenharmony_ci mov 8*8(%rsp),%rbp 716e1051a39Sopenharmony_ci.cfi_restore %rbp 717e1051a39Sopenharmony_ci lea 8*9(%rsp),%rsp 718e1051a39Sopenharmony_ci.cfi_adjust_cfa_offset 88 719e1051a39Sopenharmony_ci.Lfe64_sqr_epilogue: 720e1051a39Sopenharmony_ci ret 721e1051a39Sopenharmony_ci.cfi_endproc 722e1051a39Sopenharmony_ci.size x25519_fe64_sqr,.-x25519_fe64_sqr 723e1051a39Sopenharmony_ci 724e1051a39Sopenharmony_ci.globl x25519_fe64_mul121666 725e1051a39Sopenharmony_ci.type x25519_fe64_mul121666,\@function,2 726e1051a39Sopenharmony_ci.align 32 727e1051a39Sopenharmony_cix25519_fe64_mul121666: 728e1051a39Sopenharmony_ci.Lfe64_mul121666_body: 729e1051a39Sopenharmony_ci.cfi_startproc 730e1051a39Sopenharmony_ci mov \$121666,%edx 731e1051a39Sopenharmony_ci mulx 8*0(%rsi),$acc0,%rcx 732e1051a39Sopenharmony_ci mulx 8*1(%rsi),$acc1,%rax 733e1051a39Sopenharmony_ci add %rcx,$acc1 734e1051a39Sopenharmony_ci mulx 8*2(%rsi),$acc2,%rcx 735e1051a39Sopenharmony_ci adc %rax,$acc2 736e1051a39Sopenharmony_ci mulx 8*3(%rsi),$acc3,%rax 737e1051a39Sopenharmony_ci adc %rcx,$acc3 738e1051a39Sopenharmony_ci adc \$0,%rax 739e1051a39Sopenharmony_ci 740e1051a39Sopenharmony_ci imulq \$38,%rax,%rax 741e1051a39Sopenharmony_ci 742e1051a39Sopenharmony_ci add %rax,$acc0 743e1051a39Sopenharmony_ci adc \$0,$acc1 744e1051a39Sopenharmony_ci adc \$0,$acc2 745e1051a39Sopenharmony_ci adc \$0,$acc3 746e1051a39Sopenharmony_ci 747e1051a39Sopenharmony_ci sbb %rax,%rax # cf -> mask 748e1051a39Sopenharmony_ci and \$38,%rax 749e1051a39Sopenharmony_ci 750e1051a39Sopenharmony_ci add %rax,$acc0 751e1051a39Sopenharmony_ci mov $acc1,8*1(%rdi) 752e1051a39Sopenharmony_ci mov $acc2,8*2(%rdi) 753e1051a39Sopenharmony_ci mov $acc3,8*3(%rdi) 754e1051a39Sopenharmony_ci mov $acc0,8*0(%rdi) 755e1051a39Sopenharmony_ci 756e1051a39Sopenharmony_ci.Lfe64_mul121666_epilogue: 757e1051a39Sopenharmony_ci ret 758e1051a39Sopenharmony_ci.cfi_endproc 759e1051a39Sopenharmony_ci.size x25519_fe64_mul121666,.-x25519_fe64_mul121666 760e1051a39Sopenharmony_ci 761e1051a39Sopenharmony_ci.globl x25519_fe64_add 762e1051a39Sopenharmony_ci.type x25519_fe64_add,\@function,3 763e1051a39Sopenharmony_ci.align 32 764e1051a39Sopenharmony_cix25519_fe64_add: 765e1051a39Sopenharmony_ci.Lfe64_add_body: 766e1051a39Sopenharmony_ci.cfi_startproc 767e1051a39Sopenharmony_ci mov 8*0(%rsi),$acc0 768e1051a39Sopenharmony_ci mov 8*1(%rsi),$acc1 769e1051a39Sopenharmony_ci mov 8*2(%rsi),$acc2 770e1051a39Sopenharmony_ci mov 8*3(%rsi),$acc3 771e1051a39Sopenharmony_ci 772e1051a39Sopenharmony_ci add 8*0(%rdx),$acc0 773e1051a39Sopenharmony_ci adc 8*1(%rdx),$acc1 774e1051a39Sopenharmony_ci adc 8*2(%rdx),$acc2 775e1051a39Sopenharmony_ci adc 8*3(%rdx),$acc3 776e1051a39Sopenharmony_ci 777e1051a39Sopenharmony_ci sbb %rax,%rax # cf -> mask 778e1051a39Sopenharmony_ci and \$38,%rax 779e1051a39Sopenharmony_ci 780e1051a39Sopenharmony_ci add %rax,$acc0 781e1051a39Sopenharmony_ci adc \$0,$acc1 782e1051a39Sopenharmony_ci adc \$0,$acc2 783e1051a39Sopenharmony_ci mov $acc1,8*1(%rdi) 784e1051a39Sopenharmony_ci adc \$0,$acc3 785e1051a39Sopenharmony_ci mov $acc2,8*2(%rdi) 786e1051a39Sopenharmony_ci sbb %rax,%rax # cf -> mask 787e1051a39Sopenharmony_ci mov $acc3,8*3(%rdi) 788e1051a39Sopenharmony_ci and \$38,%rax 789e1051a39Sopenharmony_ci 790e1051a39Sopenharmony_ci add %rax,$acc0 791e1051a39Sopenharmony_ci mov $acc0,8*0(%rdi) 792e1051a39Sopenharmony_ci 793e1051a39Sopenharmony_ci.Lfe64_add_epilogue: 794e1051a39Sopenharmony_ci ret 795e1051a39Sopenharmony_ci.cfi_endproc 796e1051a39Sopenharmony_ci.size x25519_fe64_add,.-x25519_fe64_add 797e1051a39Sopenharmony_ci 798e1051a39Sopenharmony_ci.globl x25519_fe64_sub 799e1051a39Sopenharmony_ci.type x25519_fe64_sub,\@function,3 800e1051a39Sopenharmony_ci.align 32 801e1051a39Sopenharmony_cix25519_fe64_sub: 802e1051a39Sopenharmony_ci.Lfe64_sub_body: 803e1051a39Sopenharmony_ci.cfi_startproc 804e1051a39Sopenharmony_ci mov 8*0(%rsi),$acc0 805e1051a39Sopenharmony_ci mov 8*1(%rsi),$acc1 806e1051a39Sopenharmony_ci mov 8*2(%rsi),$acc2 807e1051a39Sopenharmony_ci mov 8*3(%rsi),$acc3 808e1051a39Sopenharmony_ci 809e1051a39Sopenharmony_ci sub 8*0(%rdx),$acc0 810e1051a39Sopenharmony_ci sbb 8*1(%rdx),$acc1 811e1051a39Sopenharmony_ci sbb 8*2(%rdx),$acc2 812e1051a39Sopenharmony_ci sbb 8*3(%rdx),$acc3 813e1051a39Sopenharmony_ci 814e1051a39Sopenharmony_ci sbb %rax,%rax # cf -> mask 815e1051a39Sopenharmony_ci and \$38,%rax 816e1051a39Sopenharmony_ci 817e1051a39Sopenharmony_ci sub %rax,$acc0 818e1051a39Sopenharmony_ci sbb \$0,$acc1 819e1051a39Sopenharmony_ci sbb \$0,$acc2 820e1051a39Sopenharmony_ci mov $acc1,8*1(%rdi) 821e1051a39Sopenharmony_ci sbb \$0,$acc3 822e1051a39Sopenharmony_ci mov $acc2,8*2(%rdi) 823e1051a39Sopenharmony_ci sbb %rax,%rax # cf -> mask 824e1051a39Sopenharmony_ci mov $acc3,8*3(%rdi) 825e1051a39Sopenharmony_ci and \$38,%rax 826e1051a39Sopenharmony_ci 827e1051a39Sopenharmony_ci sub %rax,$acc0 828e1051a39Sopenharmony_ci mov $acc0,8*0(%rdi) 829e1051a39Sopenharmony_ci 830e1051a39Sopenharmony_ci.Lfe64_sub_epilogue: 831e1051a39Sopenharmony_ci ret 832e1051a39Sopenharmony_ci.cfi_endproc 833e1051a39Sopenharmony_ci.size x25519_fe64_sub,.-x25519_fe64_sub 834e1051a39Sopenharmony_ci 835e1051a39Sopenharmony_ci.globl x25519_fe64_tobytes 836e1051a39Sopenharmony_ci.type x25519_fe64_tobytes,\@function,2 837e1051a39Sopenharmony_ci.align 32 838e1051a39Sopenharmony_cix25519_fe64_tobytes: 839e1051a39Sopenharmony_ci.Lfe64_to_body: 840e1051a39Sopenharmony_ci.cfi_startproc 841e1051a39Sopenharmony_ci mov 8*0(%rsi),$acc0 842e1051a39Sopenharmony_ci mov 8*1(%rsi),$acc1 843e1051a39Sopenharmony_ci mov 8*2(%rsi),$acc2 844e1051a39Sopenharmony_ci mov 8*3(%rsi),$acc3 845e1051a39Sopenharmony_ci 846e1051a39Sopenharmony_ci ################################# reduction modulo 2^255-19 847e1051a39Sopenharmony_ci lea ($acc3,$acc3),%rax 848e1051a39Sopenharmony_ci sar \$63,$acc3 # most significant bit -> mask 849e1051a39Sopenharmony_ci shr \$1,%rax # most significant bit cleared 850e1051a39Sopenharmony_ci and \$19,$acc3 851e1051a39Sopenharmony_ci add \$19,$acc3 # compare to modulus in the same go 852e1051a39Sopenharmony_ci 853e1051a39Sopenharmony_ci add $acc3,$acc0 854e1051a39Sopenharmony_ci adc \$0,$acc1 855e1051a39Sopenharmony_ci adc \$0,$acc2 856e1051a39Sopenharmony_ci adc \$0,%rax 857e1051a39Sopenharmony_ci 858e1051a39Sopenharmony_ci lea (%rax,%rax),$acc3 859e1051a39Sopenharmony_ci sar \$63,%rax # most significant bit -> mask 860e1051a39Sopenharmony_ci shr \$1,$acc3 # most significant bit cleared 861e1051a39Sopenharmony_ci not %rax 862e1051a39Sopenharmony_ci and \$19,%rax 863e1051a39Sopenharmony_ci 864e1051a39Sopenharmony_ci sub %rax,$acc0 865e1051a39Sopenharmony_ci sbb \$0,$acc1 866e1051a39Sopenharmony_ci sbb \$0,$acc2 867e1051a39Sopenharmony_ci sbb \$0,$acc3 868e1051a39Sopenharmony_ci 869e1051a39Sopenharmony_ci mov $acc0,8*0(%rdi) 870e1051a39Sopenharmony_ci mov $acc1,8*1(%rdi) 871e1051a39Sopenharmony_ci mov $acc2,8*2(%rdi) 872e1051a39Sopenharmony_ci mov $acc3,8*3(%rdi) 873e1051a39Sopenharmony_ci 874e1051a39Sopenharmony_ci.Lfe64_to_epilogue: 875e1051a39Sopenharmony_ci ret 876e1051a39Sopenharmony_ci.cfi_endproc 877e1051a39Sopenharmony_ci.size x25519_fe64_tobytes,.-x25519_fe64_tobytes 878e1051a39Sopenharmony_ci___ 879e1051a39Sopenharmony_ci} else { 880e1051a39Sopenharmony_ci$code.=<<___; 881e1051a39Sopenharmony_ci.globl x25519_fe64_eligible 882e1051a39Sopenharmony_ci.type x25519_fe64_eligible,\@abi-omnipotent 883e1051a39Sopenharmony_ci.align 32 884e1051a39Sopenharmony_cix25519_fe64_eligible: 885e1051a39Sopenharmony_ci.cfi_startproc 886e1051a39Sopenharmony_ci xor %eax,%eax 887e1051a39Sopenharmony_ci ret 888e1051a39Sopenharmony_ci.cfi_endproc 889e1051a39Sopenharmony_ci.size x25519_fe64_eligible,.-x25519_fe64_eligible 890e1051a39Sopenharmony_ci 891e1051a39Sopenharmony_ci.globl x25519_fe64_mul 892e1051a39Sopenharmony_ci.type x25519_fe64_mul,\@abi-omnipotent 893e1051a39Sopenharmony_ci.globl x25519_fe64_sqr 894e1051a39Sopenharmony_ci.globl x25519_fe64_mul121666 895e1051a39Sopenharmony_ci.globl x25519_fe64_add 896e1051a39Sopenharmony_ci.globl x25519_fe64_sub 897e1051a39Sopenharmony_ci.globl x25519_fe64_tobytes 898e1051a39Sopenharmony_cix25519_fe64_mul: 899e1051a39Sopenharmony_cix25519_fe64_sqr: 900e1051a39Sopenharmony_cix25519_fe64_mul121666: 901e1051a39Sopenharmony_cix25519_fe64_add: 902e1051a39Sopenharmony_cix25519_fe64_sub: 903e1051a39Sopenharmony_cix25519_fe64_tobytes: 904e1051a39Sopenharmony_ci.cfi_startproc 905e1051a39Sopenharmony_ci .byte 0x0f,0x0b # ud2 906e1051a39Sopenharmony_ci ret 907e1051a39Sopenharmony_ci.cfi_endproc 908e1051a39Sopenharmony_ci.size x25519_fe64_mul,.-x25519_fe64_mul 909e1051a39Sopenharmony_ci___ 910e1051a39Sopenharmony_ci} 911e1051a39Sopenharmony_ci$code.=<<___; 912e1051a39Sopenharmony_ci.asciz "X25519 primitives for x86_64, CRYPTOGAMS by <appro\@openssl.org>" 913e1051a39Sopenharmony_ci___ 914e1051a39Sopenharmony_ci 915e1051a39Sopenharmony_ci# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, 916e1051a39Sopenharmony_ci# CONTEXT *context,DISPATCHER_CONTEXT *disp) 917e1051a39Sopenharmony_ciif ($win64) { 918e1051a39Sopenharmony_ci$rec="%rcx"; 919e1051a39Sopenharmony_ci$frame="%rdx"; 920e1051a39Sopenharmony_ci$context="%r8"; 921e1051a39Sopenharmony_ci$disp="%r9"; 922e1051a39Sopenharmony_ci 923e1051a39Sopenharmony_ci$code.=<<___; 924e1051a39Sopenharmony_ci.extern __imp_RtlVirtualUnwind 925e1051a39Sopenharmony_ci 926e1051a39Sopenharmony_ci.type short_handler,\@abi-omnipotent 927e1051a39Sopenharmony_ci.align 16 928e1051a39Sopenharmony_cishort_handler: 929e1051a39Sopenharmony_ci push %rsi 930e1051a39Sopenharmony_ci push %rdi 931e1051a39Sopenharmony_ci push %rbx 932e1051a39Sopenharmony_ci push %rbp 933e1051a39Sopenharmony_ci push %r12 934e1051a39Sopenharmony_ci push %r13 935e1051a39Sopenharmony_ci push %r14 936e1051a39Sopenharmony_ci push %r15 937e1051a39Sopenharmony_ci pushfq 938e1051a39Sopenharmony_ci sub \$64,%rsp 939e1051a39Sopenharmony_ci 940e1051a39Sopenharmony_ci mov 120($context),%rax # pull context->Rax 941e1051a39Sopenharmony_ci mov 248($context),%rbx # pull context->Rip 942e1051a39Sopenharmony_ci 943e1051a39Sopenharmony_ci mov 8($disp),%rsi # disp->ImageBase 944e1051a39Sopenharmony_ci mov 56($disp),%r11 # disp->HandlerData 945e1051a39Sopenharmony_ci 946e1051a39Sopenharmony_ci mov 0(%r11),%r10d # HandlerData[0] 947e1051a39Sopenharmony_ci lea (%rsi,%r10),%r10 # end of prologue label 948e1051a39Sopenharmony_ci cmp %r10,%rbx # context->Rip<end of prologue label 949e1051a39Sopenharmony_ci jb .Lcommon_seh_tail 950e1051a39Sopenharmony_ci 951e1051a39Sopenharmony_ci mov 152($context),%rax # pull context->Rsp 952e1051a39Sopenharmony_ci jmp .Lcommon_seh_tail 953e1051a39Sopenharmony_ci.size short_handler,.-short_handler 954e1051a39Sopenharmony_ci 955e1051a39Sopenharmony_ci.type full_handler,\@abi-omnipotent 956e1051a39Sopenharmony_ci.align 16 957e1051a39Sopenharmony_cifull_handler: 958e1051a39Sopenharmony_ci push %rsi 959e1051a39Sopenharmony_ci push %rdi 960e1051a39Sopenharmony_ci push %rbx 961e1051a39Sopenharmony_ci push %rbp 962e1051a39Sopenharmony_ci push %r12 963e1051a39Sopenharmony_ci push %r13 964e1051a39Sopenharmony_ci push %r14 965e1051a39Sopenharmony_ci push %r15 966e1051a39Sopenharmony_ci pushfq 967e1051a39Sopenharmony_ci sub \$64,%rsp 968e1051a39Sopenharmony_ci 969e1051a39Sopenharmony_ci mov 120($context),%rax # pull context->Rax 970e1051a39Sopenharmony_ci mov 248($context),%rbx # pull context->Rip 971e1051a39Sopenharmony_ci 972e1051a39Sopenharmony_ci mov 8($disp),%rsi # disp->ImageBase 973e1051a39Sopenharmony_ci mov 56($disp),%r11 # disp->HandlerData 974e1051a39Sopenharmony_ci 975e1051a39Sopenharmony_ci mov 0(%r11),%r10d # HandlerData[0] 976e1051a39Sopenharmony_ci lea (%rsi,%r10),%r10 # end of prologue label 977e1051a39Sopenharmony_ci cmp %r10,%rbx # context->Rip<end of prologue label 978e1051a39Sopenharmony_ci jb .Lcommon_seh_tail 979e1051a39Sopenharmony_ci 980e1051a39Sopenharmony_ci mov 152($context),%rax # pull context->Rsp 981e1051a39Sopenharmony_ci 982e1051a39Sopenharmony_ci mov 4(%r11),%r10d # HandlerData[1] 983e1051a39Sopenharmony_ci lea (%rsi,%r10),%r10 # epilogue label 984e1051a39Sopenharmony_ci cmp %r10,%rbx # context->Rip>=epilogue label 985e1051a39Sopenharmony_ci jae .Lcommon_seh_tail 986e1051a39Sopenharmony_ci 987e1051a39Sopenharmony_ci mov 8(%r11),%r10d # HandlerData[2] 988e1051a39Sopenharmony_ci lea (%rax,%r10),%rax 989e1051a39Sopenharmony_ci 990e1051a39Sopenharmony_ci mov -8(%rax),%rbp 991e1051a39Sopenharmony_ci mov -16(%rax),%rbx 992e1051a39Sopenharmony_ci mov -24(%rax),%r12 993e1051a39Sopenharmony_ci mov -32(%rax),%r13 994e1051a39Sopenharmony_ci mov -40(%rax),%r14 995e1051a39Sopenharmony_ci mov -48(%rax),%r15 996e1051a39Sopenharmony_ci mov %rbx,144($context) # restore context->Rbx 997e1051a39Sopenharmony_ci mov %rbp,160($context) # restore context->Rbp 998e1051a39Sopenharmony_ci mov %r12,216($context) # restore context->R12 999e1051a39Sopenharmony_ci mov %r13,224($context) # restore context->R13 1000e1051a39Sopenharmony_ci mov %r14,232($context) # restore context->R14 1001e1051a39Sopenharmony_ci mov %r15,240($context) # restore context->R15 1002e1051a39Sopenharmony_ci 1003e1051a39Sopenharmony_ci.Lcommon_seh_tail: 1004e1051a39Sopenharmony_ci mov 8(%rax),%rdi 1005e1051a39Sopenharmony_ci mov 16(%rax),%rsi 1006e1051a39Sopenharmony_ci mov %rax,152($context) # restore context->Rsp 1007e1051a39Sopenharmony_ci mov %rsi,168($context) # restore context->Rsi 1008e1051a39Sopenharmony_ci mov %rdi,176($context) # restore context->Rdi 1009e1051a39Sopenharmony_ci 1010e1051a39Sopenharmony_ci mov 40($disp),%rdi # disp->ContextRecord 1011e1051a39Sopenharmony_ci mov $context,%rsi # context 1012e1051a39Sopenharmony_ci mov \$154,%ecx # sizeof(CONTEXT) 1013e1051a39Sopenharmony_ci .long 0xa548f3fc # cld; rep movsq 1014e1051a39Sopenharmony_ci 1015e1051a39Sopenharmony_ci mov $disp,%rsi 1016e1051a39Sopenharmony_ci xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER 1017e1051a39Sopenharmony_ci mov 8(%rsi),%rdx # arg2, disp->ImageBase 1018e1051a39Sopenharmony_ci mov 0(%rsi),%r8 # arg3, disp->ControlPc 1019e1051a39Sopenharmony_ci mov 16(%rsi),%r9 # arg4, disp->FunctionEntry 1020e1051a39Sopenharmony_ci mov 40(%rsi),%r10 # disp->ContextRecord 1021e1051a39Sopenharmony_ci lea 56(%rsi),%r11 # &disp->HandlerData 1022e1051a39Sopenharmony_ci lea 24(%rsi),%r12 # &disp->EstablisherFrame 1023e1051a39Sopenharmony_ci mov %r10,32(%rsp) # arg5 1024e1051a39Sopenharmony_ci mov %r11,40(%rsp) # arg6 1025e1051a39Sopenharmony_ci mov %r12,48(%rsp) # arg7 1026e1051a39Sopenharmony_ci mov %rcx,56(%rsp) # arg8, (NULL) 1027e1051a39Sopenharmony_ci call *__imp_RtlVirtualUnwind(%rip) 1028e1051a39Sopenharmony_ci 1029e1051a39Sopenharmony_ci mov \$1,%eax # ExceptionContinueSearch 1030e1051a39Sopenharmony_ci add \$64,%rsp 1031e1051a39Sopenharmony_ci popfq 1032e1051a39Sopenharmony_ci pop %r15 1033e1051a39Sopenharmony_ci pop %r14 1034e1051a39Sopenharmony_ci pop %r13 1035e1051a39Sopenharmony_ci pop %r12 1036e1051a39Sopenharmony_ci pop %rbp 1037e1051a39Sopenharmony_ci pop %rbx 1038e1051a39Sopenharmony_ci pop %rdi 1039e1051a39Sopenharmony_ci pop %rsi 1040e1051a39Sopenharmony_ci ret 1041e1051a39Sopenharmony_ci.size full_handler,.-full_handler 1042e1051a39Sopenharmony_ci 1043e1051a39Sopenharmony_ci.section .pdata 1044e1051a39Sopenharmony_ci.align 4 1045e1051a39Sopenharmony_ci .rva .LSEH_begin_x25519_fe51_mul 1046e1051a39Sopenharmony_ci .rva .LSEH_end_x25519_fe51_mul 1047e1051a39Sopenharmony_ci .rva .LSEH_info_x25519_fe51_mul 1048e1051a39Sopenharmony_ci 1049e1051a39Sopenharmony_ci .rva .LSEH_begin_x25519_fe51_sqr 1050e1051a39Sopenharmony_ci .rva .LSEH_end_x25519_fe51_sqr 1051e1051a39Sopenharmony_ci .rva .LSEH_info_x25519_fe51_sqr 1052e1051a39Sopenharmony_ci 1053e1051a39Sopenharmony_ci .rva .LSEH_begin_x25519_fe51_mul121666 1054e1051a39Sopenharmony_ci .rva .LSEH_end_x25519_fe51_mul121666 1055e1051a39Sopenharmony_ci .rva .LSEH_info_x25519_fe51_mul121666 1056e1051a39Sopenharmony_ci___ 1057e1051a39Sopenharmony_ci$code.=<<___ if ($addx); 1058e1051a39Sopenharmony_ci .rva .LSEH_begin_x25519_fe64_mul 1059e1051a39Sopenharmony_ci .rva .LSEH_end_x25519_fe64_mul 1060e1051a39Sopenharmony_ci .rva .LSEH_info_x25519_fe64_mul 1061e1051a39Sopenharmony_ci 1062e1051a39Sopenharmony_ci .rva .LSEH_begin_x25519_fe64_sqr 1063e1051a39Sopenharmony_ci .rva .LSEH_end_x25519_fe64_sqr 1064e1051a39Sopenharmony_ci .rva .LSEH_info_x25519_fe64_sqr 1065e1051a39Sopenharmony_ci 1066e1051a39Sopenharmony_ci .rva .LSEH_begin_x25519_fe64_mul121666 1067e1051a39Sopenharmony_ci .rva .LSEH_end_x25519_fe64_mul121666 1068e1051a39Sopenharmony_ci .rva .LSEH_info_x25519_fe64_mul121666 1069e1051a39Sopenharmony_ci 1070e1051a39Sopenharmony_ci .rva .LSEH_begin_x25519_fe64_add 1071e1051a39Sopenharmony_ci .rva .LSEH_end_x25519_fe64_add 1072e1051a39Sopenharmony_ci .rva .LSEH_info_x25519_fe64_add 1073e1051a39Sopenharmony_ci 1074e1051a39Sopenharmony_ci .rva .LSEH_begin_x25519_fe64_sub 1075e1051a39Sopenharmony_ci .rva .LSEH_end_x25519_fe64_sub 1076e1051a39Sopenharmony_ci .rva .LSEH_info_x25519_fe64_sub 1077e1051a39Sopenharmony_ci 1078e1051a39Sopenharmony_ci .rva .LSEH_begin_x25519_fe64_tobytes 1079e1051a39Sopenharmony_ci .rva .LSEH_end_x25519_fe64_tobytes 1080e1051a39Sopenharmony_ci .rva .LSEH_info_x25519_fe64_tobytes 1081e1051a39Sopenharmony_ci___ 1082e1051a39Sopenharmony_ci$code.=<<___; 1083e1051a39Sopenharmony_ci.section .xdata 1084e1051a39Sopenharmony_ci.align 8 1085e1051a39Sopenharmony_ci.LSEH_info_x25519_fe51_mul: 1086e1051a39Sopenharmony_ci .byte 9,0,0,0 1087e1051a39Sopenharmony_ci .rva full_handler 1088e1051a39Sopenharmony_ci .rva .Lfe51_mul_body,.Lfe51_mul_epilogue # HandlerData[] 1089e1051a39Sopenharmony_ci .long 88,0 1090e1051a39Sopenharmony_ci.LSEH_info_x25519_fe51_sqr: 1091e1051a39Sopenharmony_ci .byte 9,0,0,0 1092e1051a39Sopenharmony_ci .rva full_handler 1093e1051a39Sopenharmony_ci .rva .Lfe51_sqr_body,.Lfe51_sqr_epilogue # HandlerData[] 1094e1051a39Sopenharmony_ci .long 88,0 1095e1051a39Sopenharmony_ci.LSEH_info_x25519_fe51_mul121666: 1096e1051a39Sopenharmony_ci .byte 9,0,0,0 1097e1051a39Sopenharmony_ci .rva full_handler 1098e1051a39Sopenharmony_ci .rva .Lfe51_mul121666_body,.Lfe51_mul121666_epilogue # HandlerData[] 1099e1051a39Sopenharmony_ci .long 88,0 1100e1051a39Sopenharmony_ci___ 1101e1051a39Sopenharmony_ci$code.=<<___ if ($addx); 1102e1051a39Sopenharmony_ci.LSEH_info_x25519_fe64_mul: 1103e1051a39Sopenharmony_ci .byte 9,0,0,0 1104e1051a39Sopenharmony_ci .rva full_handler 1105e1051a39Sopenharmony_ci .rva .Lfe64_mul_body,.Lfe64_mul_epilogue # HandlerData[] 1106e1051a39Sopenharmony_ci .long 72,0 1107e1051a39Sopenharmony_ci.LSEH_info_x25519_fe64_sqr: 1108e1051a39Sopenharmony_ci .byte 9,0,0,0 1109e1051a39Sopenharmony_ci .rva full_handler 1110e1051a39Sopenharmony_ci .rva .Lfe64_sqr_body,.Lfe64_sqr_epilogue # HandlerData[] 1111e1051a39Sopenharmony_ci .long 72,0 1112e1051a39Sopenharmony_ci.LSEH_info_x25519_fe64_mul121666: 1113e1051a39Sopenharmony_ci .byte 9,0,0,0 1114e1051a39Sopenharmony_ci .rva short_handler 1115e1051a39Sopenharmony_ci .rva .Lfe64_mul121666_body,.Lfe64_mul121666_epilogue # HandlerData[] 1116e1051a39Sopenharmony_ci.LSEH_info_x25519_fe64_add: 1117e1051a39Sopenharmony_ci .byte 9,0,0,0 1118e1051a39Sopenharmony_ci .rva short_handler 1119e1051a39Sopenharmony_ci .rva .Lfe64_add_body,.Lfe64_add_epilogue # HandlerData[] 1120e1051a39Sopenharmony_ci.LSEH_info_x25519_fe64_sub: 1121e1051a39Sopenharmony_ci .byte 9,0,0,0 1122e1051a39Sopenharmony_ci .rva short_handler 1123e1051a39Sopenharmony_ci .rva .Lfe64_sub_body,.Lfe64_sub_epilogue # HandlerData[] 1124e1051a39Sopenharmony_ci.LSEH_info_x25519_fe64_tobytes: 1125e1051a39Sopenharmony_ci .byte 9,0,0,0 1126e1051a39Sopenharmony_ci .rva short_handler 1127e1051a39Sopenharmony_ci .rva .Lfe64_to_body,.Lfe64_to_epilogue # HandlerData[] 1128e1051a39Sopenharmony_ci___ 1129e1051a39Sopenharmony_ci} 1130e1051a39Sopenharmony_ci 1131e1051a39Sopenharmony_ci$code =~ s/\`([^\`]*)\`/eval $1/gem; 1132e1051a39Sopenharmony_ciprint $code; 1133e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 1134