1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2011-2023 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci 9e1051a39Sopenharmony_ci 10e1051a39Sopenharmony_ci# ==================================================================== 11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and 13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further 14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/. 15e1051a39Sopenharmony_ci# ==================================================================== 16e1051a39Sopenharmony_ci 17e1051a39Sopenharmony_ci# September 2011 18e1051a39Sopenharmony_ci# 19e1051a39Sopenharmony_ci# Assembler helpers for Padlock engine. See even e_padlock-x86.pl for 20e1051a39Sopenharmony_ci# details. 21e1051a39Sopenharmony_ci 22e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension) 23e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file 24e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 25e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 26e1051a39Sopenharmony_ci 27e1051a39Sopenharmony_ci$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 28e1051a39Sopenharmony_ci 29e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 30e1051a39Sopenharmony_ci( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 31e1051a39Sopenharmony_ci( $xlate="${dir}../../crypto/perlasm/x86_64-xlate.pl" and -f $xlate) or 32e1051a39Sopenharmony_cidie "can't locate x86_64-xlate.pl"; 33e1051a39Sopenharmony_ci 34e1051a39Sopenharmony_ciopen OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"" 35e1051a39Sopenharmony_ci or die "can't call $xlate: $!"; 36e1051a39Sopenharmony_ci*STDOUT=*OUT; 37e1051a39Sopenharmony_ci 38e1051a39Sopenharmony_ci$code=".text\n"; 39e1051a39Sopenharmony_ci 40e1051a39Sopenharmony_ci%PADLOCK_PREFETCH=(ecb=>128, cbc=>64, ctr32=>32); # prefetch errata 41e1051a39Sopenharmony_ci$PADLOCK_CHUNK=512; # Must be a power of 2 between 32 and 2^20 42e1051a39Sopenharmony_ci 43e1051a39Sopenharmony_ci$ctx="%rdx"; 44e1051a39Sopenharmony_ci$out="%rdi"; 45e1051a39Sopenharmony_ci$inp="%rsi"; 46e1051a39Sopenharmony_ci$len="%rcx"; 47e1051a39Sopenharmony_ci$chunk="%rbx"; 48e1051a39Sopenharmony_ci 49e1051a39Sopenharmony_ci($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order 50e1051a39Sopenharmony_ci ("%rdi","%rsi","%rdx","%rcx"); # Unix order 51e1051a39Sopenharmony_ci 52e1051a39Sopenharmony_ci$code.=<<___; 53e1051a39Sopenharmony_ci.globl padlock_capability 54e1051a39Sopenharmony_ci.type padlock_capability,\@abi-omnipotent 55e1051a39Sopenharmony_ci.align 16 56e1051a39Sopenharmony_cipadlock_capability: 57e1051a39Sopenharmony_ci mov %rbx,%r8 58e1051a39Sopenharmony_ci xor %eax,%eax 59e1051a39Sopenharmony_ci cpuid 60e1051a39Sopenharmony_ci xor %eax,%eax 61e1051a39Sopenharmony_ci cmp \$`"0x".unpack("H*",'tneC')`,%ebx 62e1051a39Sopenharmony_ci jne .Lzhaoxin 63e1051a39Sopenharmony_ci cmp \$`"0x".unpack("H*",'Hrua')`,%edx 64e1051a39Sopenharmony_ci jne .Lnoluck 65e1051a39Sopenharmony_ci cmp \$`"0x".unpack("H*",'slua')`,%ecx 66e1051a39Sopenharmony_ci jne .Lnoluck 67e1051a39Sopenharmony_ci jmp .LzhaoxinEnd 68e1051a39Sopenharmony_ci.Lzhaoxin: 69e1051a39Sopenharmony_ci cmp \$`"0x".unpack("H*",'hS ')`,%ebx 70e1051a39Sopenharmony_ci jne .Lnoluck 71e1051a39Sopenharmony_ci cmp \$`"0x".unpack("H*",'hgna')`,%edx 72e1051a39Sopenharmony_ci jne .Lnoluck 73e1051a39Sopenharmony_ci cmp \$`"0x".unpack("H*",' ia')`,%ecx 74e1051a39Sopenharmony_ci jne .Lnoluck 75e1051a39Sopenharmony_ci.LzhaoxinEnd: 76e1051a39Sopenharmony_ci mov \$0xC0000000,%eax 77e1051a39Sopenharmony_ci cpuid 78e1051a39Sopenharmony_ci mov %eax,%edx 79e1051a39Sopenharmony_ci xor %eax,%eax 80e1051a39Sopenharmony_ci cmp \$0xC0000001,%edx 81e1051a39Sopenharmony_ci jb .Lnoluck 82e1051a39Sopenharmony_ci mov \$0xC0000001,%eax 83e1051a39Sopenharmony_ci cpuid 84e1051a39Sopenharmony_ci mov %edx,%eax 85e1051a39Sopenharmony_ci and \$0xffffffef,%eax 86e1051a39Sopenharmony_ci or \$0x10,%eax # set Nano bit#4 87e1051a39Sopenharmony_ci.Lnoluck: 88e1051a39Sopenharmony_ci mov %r8,%rbx 89e1051a39Sopenharmony_ci ret 90e1051a39Sopenharmony_ci.size padlock_capability,.-padlock_capability 91e1051a39Sopenharmony_ci 92e1051a39Sopenharmony_ci.globl padlock_key_bswap 93e1051a39Sopenharmony_ci.type padlock_key_bswap,\@abi-omnipotent,0 94e1051a39Sopenharmony_ci.align 16 95e1051a39Sopenharmony_cipadlock_key_bswap: 96e1051a39Sopenharmony_ci mov 240($arg1),%edx 97e1051a39Sopenharmony_ci inc %edx 98e1051a39Sopenharmony_ci shl \$2,%edx 99e1051a39Sopenharmony_ci.Lbswap_loop: 100e1051a39Sopenharmony_ci mov ($arg1),%eax 101e1051a39Sopenharmony_ci bswap %eax 102e1051a39Sopenharmony_ci mov %eax,($arg1) 103e1051a39Sopenharmony_ci lea 4($arg1),$arg1 104e1051a39Sopenharmony_ci sub \$1,%edx 105e1051a39Sopenharmony_ci jnz .Lbswap_loop 106e1051a39Sopenharmony_ci ret 107e1051a39Sopenharmony_ci.size padlock_key_bswap,.-padlock_key_bswap 108e1051a39Sopenharmony_ci 109e1051a39Sopenharmony_ci.globl padlock_verify_context 110e1051a39Sopenharmony_ci.type padlock_verify_context,\@abi-omnipotent 111e1051a39Sopenharmony_ci.align 16 112e1051a39Sopenharmony_cipadlock_verify_context: 113e1051a39Sopenharmony_ci mov $arg1,$ctx 114e1051a39Sopenharmony_ci pushf 115e1051a39Sopenharmony_ci lea .Lpadlock_saved_context(%rip),%rax 116e1051a39Sopenharmony_ci call _padlock_verify_ctx 117e1051a39Sopenharmony_ci lea 8(%rsp),%rsp 118e1051a39Sopenharmony_ci ret 119e1051a39Sopenharmony_ci.size padlock_verify_context,.-padlock_verify_context 120e1051a39Sopenharmony_ci 121e1051a39Sopenharmony_ci.type _padlock_verify_ctx,\@abi-omnipotent 122e1051a39Sopenharmony_ci.align 16 123e1051a39Sopenharmony_ci_padlock_verify_ctx: 124e1051a39Sopenharmony_ci mov 8(%rsp),%r8 125e1051a39Sopenharmony_ci bt \$30,%r8 126e1051a39Sopenharmony_ci jnc .Lverified 127e1051a39Sopenharmony_ci cmp (%rax),$ctx 128e1051a39Sopenharmony_ci je .Lverified 129e1051a39Sopenharmony_ci pushf 130e1051a39Sopenharmony_ci popf 131e1051a39Sopenharmony_ci.Lverified: 132e1051a39Sopenharmony_ci mov $ctx,(%rax) 133e1051a39Sopenharmony_ci ret 134e1051a39Sopenharmony_ci.size _padlock_verify_ctx,.-_padlock_verify_ctx 135e1051a39Sopenharmony_ci 136e1051a39Sopenharmony_ci.globl padlock_reload_key 137e1051a39Sopenharmony_ci.type padlock_reload_key,\@abi-omnipotent 138e1051a39Sopenharmony_ci.align 16 139e1051a39Sopenharmony_cipadlock_reload_key: 140e1051a39Sopenharmony_ci pushf 141e1051a39Sopenharmony_ci popf 142e1051a39Sopenharmony_ci ret 143e1051a39Sopenharmony_ci.size padlock_reload_key,.-padlock_reload_key 144e1051a39Sopenharmony_ci 145e1051a39Sopenharmony_ci.globl padlock_aes_block 146e1051a39Sopenharmony_ci.type padlock_aes_block,\@function,3 147e1051a39Sopenharmony_ci.align 16 148e1051a39Sopenharmony_cipadlock_aes_block: 149e1051a39Sopenharmony_ci mov %rbx,%r8 150e1051a39Sopenharmony_ci mov \$1,$len 151e1051a39Sopenharmony_ci lea 32($ctx),%rbx # key 152e1051a39Sopenharmony_ci lea 16($ctx),$ctx # control word 153e1051a39Sopenharmony_ci .byte 0xf3,0x0f,0xa7,0xc8 # rep xcryptecb 154e1051a39Sopenharmony_ci mov %r8,%rbx 155e1051a39Sopenharmony_ci ret 156e1051a39Sopenharmony_ci.size padlock_aes_block,.-padlock_aes_block 157e1051a39Sopenharmony_ci 158e1051a39Sopenharmony_ci.globl padlock_xstore 159e1051a39Sopenharmony_ci.type padlock_xstore,\@function,2 160e1051a39Sopenharmony_ci.align 16 161e1051a39Sopenharmony_cipadlock_xstore: 162e1051a39Sopenharmony_ci mov %esi,%edx 163e1051a39Sopenharmony_ci .byte 0x0f,0xa7,0xc0 # xstore 164e1051a39Sopenharmony_ci ret 165e1051a39Sopenharmony_ci.size padlock_xstore,.-padlock_xstore 166e1051a39Sopenharmony_ci 167e1051a39Sopenharmony_ci.globl padlock_sha1_oneshot 168e1051a39Sopenharmony_ci.type padlock_sha1_oneshot,\@function,3 169e1051a39Sopenharmony_ci.align 16 170e1051a39Sopenharmony_cipadlock_sha1_oneshot: 171e1051a39Sopenharmony_ci mov %rdx,%rcx 172e1051a39Sopenharmony_ci mov %rdi,%rdx # put aside %rdi 173e1051a39Sopenharmony_ci movups (%rdi),%xmm0 # copy-in context 174e1051a39Sopenharmony_ci sub \$128+8,%rsp 175e1051a39Sopenharmony_ci mov 16(%rdi),%eax 176e1051a39Sopenharmony_ci movaps %xmm0,(%rsp) 177e1051a39Sopenharmony_ci mov %rsp,%rdi 178e1051a39Sopenharmony_ci mov %eax,16(%rsp) 179e1051a39Sopenharmony_ci xor %rax,%rax 180e1051a39Sopenharmony_ci .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1 181e1051a39Sopenharmony_ci movaps (%rsp),%xmm0 182e1051a39Sopenharmony_ci mov 16(%rsp),%eax 183e1051a39Sopenharmony_ci add \$128+8,%rsp 184e1051a39Sopenharmony_ci movups %xmm0,(%rdx) # copy-out context 185e1051a39Sopenharmony_ci mov %eax,16(%rdx) 186e1051a39Sopenharmony_ci ret 187e1051a39Sopenharmony_ci.size padlock_sha1_oneshot,.-padlock_sha1_oneshot 188e1051a39Sopenharmony_ci 189e1051a39Sopenharmony_ci.globl padlock_sha1_blocks 190e1051a39Sopenharmony_ci.type padlock_sha1_blocks,\@function,3 191e1051a39Sopenharmony_ci.align 16 192e1051a39Sopenharmony_cipadlock_sha1_blocks: 193e1051a39Sopenharmony_ci mov %rdx,%rcx 194e1051a39Sopenharmony_ci mov %rdi,%rdx # put aside %rdi 195e1051a39Sopenharmony_ci movups (%rdi),%xmm0 # copy-in context 196e1051a39Sopenharmony_ci sub \$128+8,%rsp 197e1051a39Sopenharmony_ci mov 16(%rdi),%eax 198e1051a39Sopenharmony_ci movaps %xmm0,(%rsp) 199e1051a39Sopenharmony_ci mov %rsp,%rdi 200e1051a39Sopenharmony_ci mov %eax,16(%rsp) 201e1051a39Sopenharmony_ci mov \$-1,%rax 202e1051a39Sopenharmony_ci .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1 203e1051a39Sopenharmony_ci movaps (%rsp),%xmm0 204e1051a39Sopenharmony_ci mov 16(%rsp),%eax 205e1051a39Sopenharmony_ci add \$128+8,%rsp 206e1051a39Sopenharmony_ci movups %xmm0,(%rdx) # copy-out context 207e1051a39Sopenharmony_ci mov %eax,16(%rdx) 208e1051a39Sopenharmony_ci ret 209e1051a39Sopenharmony_ci.size padlock_sha1_blocks,.-padlock_sha1_blocks 210e1051a39Sopenharmony_ci 211e1051a39Sopenharmony_ci.globl padlock_sha256_oneshot 212e1051a39Sopenharmony_ci.type padlock_sha256_oneshot,\@function,3 213e1051a39Sopenharmony_ci.align 16 214e1051a39Sopenharmony_cipadlock_sha256_oneshot: 215e1051a39Sopenharmony_ci mov %rdx,%rcx 216e1051a39Sopenharmony_ci mov %rdi,%rdx # put aside %rdi 217e1051a39Sopenharmony_ci movups (%rdi),%xmm0 # copy-in context 218e1051a39Sopenharmony_ci sub \$128+8,%rsp 219e1051a39Sopenharmony_ci movups 16(%rdi),%xmm1 220e1051a39Sopenharmony_ci movaps %xmm0,(%rsp) 221e1051a39Sopenharmony_ci mov %rsp,%rdi 222e1051a39Sopenharmony_ci movaps %xmm1,16(%rsp) 223e1051a39Sopenharmony_ci xor %rax,%rax 224e1051a39Sopenharmony_ci .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256 225e1051a39Sopenharmony_ci movaps (%rsp),%xmm0 226e1051a39Sopenharmony_ci movaps 16(%rsp),%xmm1 227e1051a39Sopenharmony_ci add \$128+8,%rsp 228e1051a39Sopenharmony_ci movups %xmm0,(%rdx) # copy-out context 229e1051a39Sopenharmony_ci movups %xmm1,16(%rdx) 230e1051a39Sopenharmony_ci ret 231e1051a39Sopenharmony_ci.size padlock_sha256_oneshot,.-padlock_sha256_oneshot 232e1051a39Sopenharmony_ci 233e1051a39Sopenharmony_ci.globl padlock_sha256_blocks 234e1051a39Sopenharmony_ci.type padlock_sha256_blocks,\@function,3 235e1051a39Sopenharmony_ci.align 16 236e1051a39Sopenharmony_cipadlock_sha256_blocks: 237e1051a39Sopenharmony_ci mov %rdx,%rcx 238e1051a39Sopenharmony_ci mov %rdi,%rdx # put aside %rdi 239e1051a39Sopenharmony_ci movups (%rdi),%xmm0 # copy-in context 240e1051a39Sopenharmony_ci sub \$128+8,%rsp 241e1051a39Sopenharmony_ci movups 16(%rdi),%xmm1 242e1051a39Sopenharmony_ci movaps %xmm0,(%rsp) 243e1051a39Sopenharmony_ci mov %rsp,%rdi 244e1051a39Sopenharmony_ci movaps %xmm1,16(%rsp) 245e1051a39Sopenharmony_ci mov \$-1,%rax 246e1051a39Sopenharmony_ci .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256 247e1051a39Sopenharmony_ci movaps (%rsp),%xmm0 248e1051a39Sopenharmony_ci movaps 16(%rsp),%xmm1 249e1051a39Sopenharmony_ci add \$128+8,%rsp 250e1051a39Sopenharmony_ci movups %xmm0,(%rdx) # copy-out context 251e1051a39Sopenharmony_ci movups %xmm1,16(%rdx) 252e1051a39Sopenharmony_ci ret 253e1051a39Sopenharmony_ci.size padlock_sha256_blocks,.-padlock_sha256_blocks 254e1051a39Sopenharmony_ci 255e1051a39Sopenharmony_ci.globl padlock_sha512_blocks 256e1051a39Sopenharmony_ci.type padlock_sha512_blocks,\@function,3 257e1051a39Sopenharmony_ci.align 16 258e1051a39Sopenharmony_cipadlock_sha512_blocks: 259e1051a39Sopenharmony_ci mov %rdx,%rcx 260e1051a39Sopenharmony_ci mov %rdi,%rdx # put aside %rdi 261e1051a39Sopenharmony_ci movups (%rdi),%xmm0 # copy-in context 262e1051a39Sopenharmony_ci sub \$128+8,%rsp 263e1051a39Sopenharmony_ci movups 16(%rdi),%xmm1 264e1051a39Sopenharmony_ci movups 32(%rdi),%xmm2 265e1051a39Sopenharmony_ci movups 48(%rdi),%xmm3 266e1051a39Sopenharmony_ci movaps %xmm0,(%rsp) 267e1051a39Sopenharmony_ci mov %rsp,%rdi 268e1051a39Sopenharmony_ci movaps %xmm1,16(%rsp) 269e1051a39Sopenharmony_ci movaps %xmm2,32(%rsp) 270e1051a39Sopenharmony_ci movaps %xmm3,48(%rsp) 271e1051a39Sopenharmony_ci .byte 0xf3,0x0f,0xa6,0xe0 # rep xha512 272e1051a39Sopenharmony_ci movaps (%rsp),%xmm0 273e1051a39Sopenharmony_ci movaps 16(%rsp),%xmm1 274e1051a39Sopenharmony_ci movaps 32(%rsp),%xmm2 275e1051a39Sopenharmony_ci movaps 48(%rsp),%xmm3 276e1051a39Sopenharmony_ci add \$128+8,%rsp 277e1051a39Sopenharmony_ci movups %xmm0,(%rdx) # copy-out context 278e1051a39Sopenharmony_ci movups %xmm1,16(%rdx) 279e1051a39Sopenharmony_ci movups %xmm2,32(%rdx) 280e1051a39Sopenharmony_ci movups %xmm3,48(%rdx) 281e1051a39Sopenharmony_ci ret 282e1051a39Sopenharmony_ci.size padlock_sha512_blocks,.-padlock_sha512_blocks 283e1051a39Sopenharmony_ci___ 284e1051a39Sopenharmony_ci 285e1051a39Sopenharmony_cisub generate_mode { 286e1051a39Sopenharmony_cimy ($mode,$opcode) = @_; 287e1051a39Sopenharmony_ci# int padlock_$mode_encrypt(void *out, const void *inp, 288e1051a39Sopenharmony_ci# struct padlock_cipher_data *ctx, size_t len); 289e1051a39Sopenharmony_ci$code.=<<___; 290e1051a39Sopenharmony_ci.globl padlock_${mode}_encrypt 291e1051a39Sopenharmony_ci.type padlock_${mode}_encrypt,\@function,4 292e1051a39Sopenharmony_ci.align 16 293e1051a39Sopenharmony_cipadlock_${mode}_encrypt: 294e1051a39Sopenharmony_ci push %rbp 295e1051a39Sopenharmony_ci push %rbx 296e1051a39Sopenharmony_ci 297e1051a39Sopenharmony_ci xor %eax,%eax 298e1051a39Sopenharmony_ci test \$15,$ctx 299e1051a39Sopenharmony_ci jnz .L${mode}_abort 300e1051a39Sopenharmony_ci test \$15,$len 301e1051a39Sopenharmony_ci jnz .L${mode}_abort 302e1051a39Sopenharmony_ci lea .Lpadlock_saved_context(%rip),%rax 303e1051a39Sopenharmony_ci pushf 304e1051a39Sopenharmony_ci cld 305e1051a39Sopenharmony_ci call _padlock_verify_ctx 306e1051a39Sopenharmony_ci lea 16($ctx),$ctx # control word 307e1051a39Sopenharmony_ci xor %eax,%eax 308e1051a39Sopenharmony_ci xor %ebx,%ebx 309e1051a39Sopenharmony_ci testl \$`1<<5`,($ctx) # align bit in control word 310e1051a39Sopenharmony_ci jnz .L${mode}_aligned 311e1051a39Sopenharmony_ci test \$0x0f,$out 312e1051a39Sopenharmony_ci setz %al # !out_misaligned 313e1051a39Sopenharmony_ci test \$0x0f,$inp 314e1051a39Sopenharmony_ci setz %bl # !inp_misaligned 315e1051a39Sopenharmony_ci test %ebx,%eax 316e1051a39Sopenharmony_ci jnz .L${mode}_aligned 317e1051a39Sopenharmony_ci neg %rax 318e1051a39Sopenharmony_ci mov \$$PADLOCK_CHUNK,$chunk 319e1051a39Sopenharmony_ci not %rax # out_misaligned?-1:0 320e1051a39Sopenharmony_ci lea (%rsp),%rbp 321e1051a39Sopenharmony_ci cmp $chunk,$len 322e1051a39Sopenharmony_ci cmovc $len,$chunk # chunk=len>PADLOCK_CHUNK?PADLOCK_CHUNK:len 323e1051a39Sopenharmony_ci and $chunk,%rax # out_misaligned?chunk:0 324e1051a39Sopenharmony_ci mov $len,$chunk 325e1051a39Sopenharmony_ci neg %rax 326e1051a39Sopenharmony_ci and \$$PADLOCK_CHUNK-1,$chunk # chunk%=PADLOCK_CHUNK 327e1051a39Sopenharmony_ci lea (%rax,%rbp),%rsp 328e1051a39Sopenharmony_ci mov \$$PADLOCK_CHUNK,%rax 329e1051a39Sopenharmony_ci cmovz %rax,$chunk # chunk=chunk?:PADLOCK_CHUNK 330e1051a39Sopenharmony_ci___ 331e1051a39Sopenharmony_ci$code.=<<___ if ($mode eq "ctr32"); 332e1051a39Sopenharmony_ci.L${mode}_reenter: 333e1051a39Sopenharmony_ci mov -4($ctx),%eax # pull 32-bit counter 334e1051a39Sopenharmony_ci bswap %eax 335e1051a39Sopenharmony_ci neg %eax 336e1051a39Sopenharmony_ci and \$`$PADLOCK_CHUNK/16-1`,%eax 337e1051a39Sopenharmony_ci mov \$$PADLOCK_CHUNK,$chunk 338e1051a39Sopenharmony_ci shl \$4,%eax 339e1051a39Sopenharmony_ci cmovz $chunk,%rax 340e1051a39Sopenharmony_ci cmp %rax,$len 341e1051a39Sopenharmony_ci cmova %rax,$chunk # don't let counter cross PADLOCK_CHUNK 342e1051a39Sopenharmony_ci cmovbe $len,$chunk 343e1051a39Sopenharmony_ci___ 344e1051a39Sopenharmony_ci$code.=<<___ if ($PADLOCK_PREFETCH{$mode}); 345e1051a39Sopenharmony_ci cmp $chunk,$len 346e1051a39Sopenharmony_ci ja .L${mode}_loop 347e1051a39Sopenharmony_ci mov $inp,%rax # check if prefetch crosses page 348e1051a39Sopenharmony_ci cmp %rsp,%rbp 349e1051a39Sopenharmony_ci cmove $out,%rax 350e1051a39Sopenharmony_ci add $len,%rax 351e1051a39Sopenharmony_ci neg %rax 352e1051a39Sopenharmony_ci and \$0xfff,%rax # distance to page boundary 353e1051a39Sopenharmony_ci cmp \$$PADLOCK_PREFETCH{$mode},%rax 354e1051a39Sopenharmony_ci mov \$-$PADLOCK_PREFETCH{$mode},%rax 355e1051a39Sopenharmony_ci cmovae $chunk,%rax # mask=distance<prefetch?-prefetch:-1 356e1051a39Sopenharmony_ci and %rax,$chunk 357e1051a39Sopenharmony_ci jz .L${mode}_unaligned_tail 358e1051a39Sopenharmony_ci___ 359e1051a39Sopenharmony_ci$code.=<<___; 360e1051a39Sopenharmony_ci jmp .L${mode}_loop 361e1051a39Sopenharmony_ci.align 16 362e1051a39Sopenharmony_ci.L${mode}_loop: 363e1051a39Sopenharmony_ci cmp $len,$chunk # ctr32 artefact 364e1051a39Sopenharmony_ci cmova $len,$chunk # ctr32 artefact 365e1051a39Sopenharmony_ci mov $out,%r8 # save parameters 366e1051a39Sopenharmony_ci mov $inp,%r9 367e1051a39Sopenharmony_ci mov $len,%r10 368e1051a39Sopenharmony_ci mov $chunk,$len 369e1051a39Sopenharmony_ci mov $chunk,%r11 370e1051a39Sopenharmony_ci test \$0x0f,$out # out_misaligned 371e1051a39Sopenharmony_ci cmovnz %rsp,$out 372e1051a39Sopenharmony_ci test \$0x0f,$inp # inp_misaligned 373e1051a39Sopenharmony_ci jz .L${mode}_inp_aligned 374e1051a39Sopenharmony_ci shr \$3,$len 375e1051a39Sopenharmony_ci .byte 0xf3,0x48,0xa5 # rep movsq 376e1051a39Sopenharmony_ci sub $chunk,$out 377e1051a39Sopenharmony_ci mov $chunk,$len 378e1051a39Sopenharmony_ci mov $out,$inp 379e1051a39Sopenharmony_ci.L${mode}_inp_aligned: 380e1051a39Sopenharmony_ci lea -16($ctx),%rax # ivp 381e1051a39Sopenharmony_ci lea 16($ctx),%rbx # key 382e1051a39Sopenharmony_ci shr \$4,$len 383e1051a39Sopenharmony_ci .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt* 384e1051a39Sopenharmony_ci___ 385e1051a39Sopenharmony_ci$code.=<<___ if ($mode !~ /ecb|ctr/); 386e1051a39Sopenharmony_ci movdqa (%rax),%xmm0 387e1051a39Sopenharmony_ci movdqa %xmm0,-16($ctx) # copy [or refresh] iv 388e1051a39Sopenharmony_ci___ 389e1051a39Sopenharmony_ci$code.=<<___ if ($mode eq "ctr32"); 390e1051a39Sopenharmony_ci mov -4($ctx),%eax # pull 32-bit counter 391e1051a39Sopenharmony_ci test \$0xffff0000,%eax 392e1051a39Sopenharmony_ci jnz .L${mode}_no_carry 393e1051a39Sopenharmony_ci bswap %eax 394e1051a39Sopenharmony_ci add \$0x10000,%eax 395e1051a39Sopenharmony_ci bswap %eax 396e1051a39Sopenharmony_ci mov %eax,-4($ctx) 397e1051a39Sopenharmony_ci.L${mode}_no_carry: 398e1051a39Sopenharmony_ci___ 399e1051a39Sopenharmony_ci$code.=<<___; 400e1051a39Sopenharmony_ci mov %r8,$out # restore parameters 401e1051a39Sopenharmony_ci mov %r11,$chunk 402e1051a39Sopenharmony_ci test \$0x0f,$out 403e1051a39Sopenharmony_ci jz .L${mode}_out_aligned 404e1051a39Sopenharmony_ci mov $chunk,$len 405e1051a39Sopenharmony_ci lea (%rsp),$inp 406e1051a39Sopenharmony_ci shr \$3,$len 407e1051a39Sopenharmony_ci .byte 0xf3,0x48,0xa5 # rep movsq 408e1051a39Sopenharmony_ci sub $chunk,$out 409e1051a39Sopenharmony_ci.L${mode}_out_aligned: 410e1051a39Sopenharmony_ci mov %r9,$inp 411e1051a39Sopenharmony_ci mov %r10,$len 412e1051a39Sopenharmony_ci add $chunk,$out 413e1051a39Sopenharmony_ci add $chunk,$inp 414e1051a39Sopenharmony_ci sub $chunk,$len 415e1051a39Sopenharmony_ci mov \$$PADLOCK_CHUNK,$chunk 416e1051a39Sopenharmony_ci___ 417e1051a39Sopenharmony_ci if (!$PADLOCK_PREFETCH{$mode}) { 418e1051a39Sopenharmony_ci$code.=<<___; 419e1051a39Sopenharmony_ci jnz .L${mode}_loop 420e1051a39Sopenharmony_ci___ 421e1051a39Sopenharmony_ci } else { 422e1051a39Sopenharmony_ci$code.=<<___; 423e1051a39Sopenharmony_ci jz .L${mode}_break 424e1051a39Sopenharmony_ci cmp $chunk,$len 425e1051a39Sopenharmony_ci jae .L${mode}_loop 426e1051a39Sopenharmony_ci___ 427e1051a39Sopenharmony_ci$code.=<<___ if ($mode eq "ctr32"); 428e1051a39Sopenharmony_ci mov $len,$chunk 429e1051a39Sopenharmony_ci mov $inp,%rax # check if prefetch crosses page 430e1051a39Sopenharmony_ci cmp %rsp,%rbp 431e1051a39Sopenharmony_ci cmove $out,%rax 432e1051a39Sopenharmony_ci add $len,%rax 433e1051a39Sopenharmony_ci neg %rax 434e1051a39Sopenharmony_ci and \$0xfff,%rax # distance to page boundary 435e1051a39Sopenharmony_ci cmp \$$PADLOCK_PREFETCH{$mode},%rax 436e1051a39Sopenharmony_ci mov \$-$PADLOCK_PREFETCH{$mode},%rax 437e1051a39Sopenharmony_ci cmovae $chunk,%rax 438e1051a39Sopenharmony_ci and %rax,$chunk 439e1051a39Sopenharmony_ci jnz .L${mode}_loop 440e1051a39Sopenharmony_ci___ 441e1051a39Sopenharmony_ci$code.=<<___; 442e1051a39Sopenharmony_ci.L${mode}_unaligned_tail: 443e1051a39Sopenharmony_ci xor %eax,%eax 444e1051a39Sopenharmony_ci cmp %rsp,%rbp 445e1051a39Sopenharmony_ci cmove $len,%rax 446e1051a39Sopenharmony_ci mov $out,%r8 # save parameters 447e1051a39Sopenharmony_ci mov $len,$chunk 448e1051a39Sopenharmony_ci sub %rax,%rsp # alloca 449e1051a39Sopenharmony_ci shr \$3,$len 450e1051a39Sopenharmony_ci lea (%rsp),$out 451e1051a39Sopenharmony_ci .byte 0xf3,0x48,0xa5 # rep movsq 452e1051a39Sopenharmony_ci mov %rsp,$inp 453e1051a39Sopenharmony_ci mov %r8, $out # restore parameters 454e1051a39Sopenharmony_ci mov $chunk,$len 455e1051a39Sopenharmony_ci jmp .L${mode}_loop 456e1051a39Sopenharmony_ci.align 16 457e1051a39Sopenharmony_ci.L${mode}_break: 458e1051a39Sopenharmony_ci___ 459e1051a39Sopenharmony_ci } 460e1051a39Sopenharmony_ci$code.=<<___; 461e1051a39Sopenharmony_ci cmp %rbp,%rsp 462e1051a39Sopenharmony_ci je .L${mode}_done 463e1051a39Sopenharmony_ci 464e1051a39Sopenharmony_ci pxor %xmm0,%xmm0 465e1051a39Sopenharmony_ci lea (%rsp),%rax 466e1051a39Sopenharmony_ci.L${mode}_bzero: 467e1051a39Sopenharmony_ci movaps %xmm0,(%rax) 468e1051a39Sopenharmony_ci lea 16(%rax),%rax 469e1051a39Sopenharmony_ci cmp %rax,%rbp 470e1051a39Sopenharmony_ci ja .L${mode}_bzero 471e1051a39Sopenharmony_ci 472e1051a39Sopenharmony_ci.L${mode}_done: 473e1051a39Sopenharmony_ci lea (%rbp),%rsp 474e1051a39Sopenharmony_ci jmp .L${mode}_exit 475e1051a39Sopenharmony_ci 476e1051a39Sopenharmony_ci.align 16 477e1051a39Sopenharmony_ci.L${mode}_aligned: 478e1051a39Sopenharmony_ci___ 479e1051a39Sopenharmony_ci$code.=<<___ if ($mode eq "ctr32"); 480e1051a39Sopenharmony_ci mov -4($ctx),%eax # pull 32-bit counter 481e1051a39Sopenharmony_ci bswap %eax 482e1051a39Sopenharmony_ci neg %eax 483e1051a39Sopenharmony_ci and \$0xffff,%eax 484e1051a39Sopenharmony_ci mov \$`16*0x10000`,$chunk 485e1051a39Sopenharmony_ci shl \$4,%eax 486e1051a39Sopenharmony_ci cmovz $chunk,%rax 487e1051a39Sopenharmony_ci cmp %rax,$len 488e1051a39Sopenharmony_ci cmova %rax,$chunk # don't let counter cross 2^16 489e1051a39Sopenharmony_ci cmovbe $len,$chunk 490e1051a39Sopenharmony_ci jbe .L${mode}_aligned_skip 491e1051a39Sopenharmony_ci 492e1051a39Sopenharmony_ci.L${mode}_aligned_loop: 493e1051a39Sopenharmony_ci mov $len,%r10 # save parameters 494e1051a39Sopenharmony_ci mov $chunk,$len 495e1051a39Sopenharmony_ci mov $chunk,%r11 496e1051a39Sopenharmony_ci 497e1051a39Sopenharmony_ci lea -16($ctx),%rax # ivp 498e1051a39Sopenharmony_ci lea 16($ctx),%rbx # key 499e1051a39Sopenharmony_ci shr \$4,$len # len/=AES_BLOCK_SIZE 500e1051a39Sopenharmony_ci .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt* 501e1051a39Sopenharmony_ci 502e1051a39Sopenharmony_ci mov -4($ctx),%eax # pull 32-bit counter 503e1051a39Sopenharmony_ci bswap %eax 504e1051a39Sopenharmony_ci add \$0x10000,%eax 505e1051a39Sopenharmony_ci bswap %eax 506e1051a39Sopenharmony_ci mov %eax,-4($ctx) 507e1051a39Sopenharmony_ci 508e1051a39Sopenharmony_ci mov %r10,$len # restore parameters 509e1051a39Sopenharmony_ci sub %r11,$len 510e1051a39Sopenharmony_ci mov \$`16*0x10000`,$chunk 511e1051a39Sopenharmony_ci jz .L${mode}_exit 512e1051a39Sopenharmony_ci cmp $chunk,$len 513e1051a39Sopenharmony_ci jae .L${mode}_aligned_loop 514e1051a39Sopenharmony_ci 515e1051a39Sopenharmony_ci.L${mode}_aligned_skip: 516e1051a39Sopenharmony_ci___ 517e1051a39Sopenharmony_ci$code.=<<___ if ($PADLOCK_PREFETCH{$mode}); 518e1051a39Sopenharmony_ci lea ($inp,$len),%rbp 519e1051a39Sopenharmony_ci neg %rbp 520e1051a39Sopenharmony_ci and \$0xfff,%rbp # distance to page boundary 521e1051a39Sopenharmony_ci xor %eax,%eax 522e1051a39Sopenharmony_ci cmp \$$PADLOCK_PREFETCH{$mode},%rbp 523e1051a39Sopenharmony_ci mov \$$PADLOCK_PREFETCH{$mode}-1,%rbp 524e1051a39Sopenharmony_ci cmovae %rax,%rbp 525e1051a39Sopenharmony_ci and $len,%rbp # remainder 526e1051a39Sopenharmony_ci sub %rbp,$len 527e1051a39Sopenharmony_ci jz .L${mode}_aligned_tail 528e1051a39Sopenharmony_ci___ 529e1051a39Sopenharmony_ci$code.=<<___; 530e1051a39Sopenharmony_ci lea -16($ctx),%rax # ivp 531e1051a39Sopenharmony_ci lea 16($ctx),%rbx # key 532e1051a39Sopenharmony_ci shr \$4,$len # len/=AES_BLOCK_SIZE 533e1051a39Sopenharmony_ci .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt* 534e1051a39Sopenharmony_ci___ 535e1051a39Sopenharmony_ci$code.=<<___ if ($mode !~ /ecb|ctr/); 536e1051a39Sopenharmony_ci movdqa (%rax),%xmm0 537e1051a39Sopenharmony_ci movdqa %xmm0,-16($ctx) # copy [or refresh] iv 538e1051a39Sopenharmony_ci___ 539e1051a39Sopenharmony_ci$code.=<<___ if ($PADLOCK_PREFETCH{$mode}); 540e1051a39Sopenharmony_ci test %rbp,%rbp # check remainder 541e1051a39Sopenharmony_ci jz .L${mode}_exit 542e1051a39Sopenharmony_ci 543e1051a39Sopenharmony_ci.L${mode}_aligned_tail: 544e1051a39Sopenharmony_ci mov $out,%r8 545e1051a39Sopenharmony_ci mov %rbp,$chunk 546e1051a39Sopenharmony_ci mov %rbp,$len 547e1051a39Sopenharmony_ci lea (%rsp),%rbp 548e1051a39Sopenharmony_ci sub $len,%rsp 549e1051a39Sopenharmony_ci shr \$3,$len 550e1051a39Sopenharmony_ci lea (%rsp),$out 551e1051a39Sopenharmony_ci .byte 0xf3,0x48,0xa5 # rep movsq 552e1051a39Sopenharmony_ci lea (%r8),$out 553e1051a39Sopenharmony_ci lea (%rsp),$inp 554e1051a39Sopenharmony_ci mov $chunk,$len 555e1051a39Sopenharmony_ci jmp .L${mode}_loop 556e1051a39Sopenharmony_ci___ 557e1051a39Sopenharmony_ci$code.=<<___; 558e1051a39Sopenharmony_ci.L${mode}_exit: 559e1051a39Sopenharmony_ci mov \$1,%eax 560e1051a39Sopenharmony_ci lea 8(%rsp),%rsp 561e1051a39Sopenharmony_ci.L${mode}_abort: 562e1051a39Sopenharmony_ci pop %rbx 563e1051a39Sopenharmony_ci pop %rbp 564e1051a39Sopenharmony_ci ret 565e1051a39Sopenharmony_ci.size padlock_${mode}_encrypt,.-padlock_${mode}_encrypt 566e1051a39Sopenharmony_ci___ 567e1051a39Sopenharmony_ci} 568e1051a39Sopenharmony_ci 569e1051a39Sopenharmony_ci&generate_mode("ecb",0xc8); 570e1051a39Sopenharmony_ci&generate_mode("cbc",0xd0); 571e1051a39Sopenharmony_ci&generate_mode("cfb",0xe0); 572e1051a39Sopenharmony_ci&generate_mode("ofb",0xe8); 573e1051a39Sopenharmony_ci&generate_mode("ctr32",0xd8); # all 64-bit CPUs have working CTR... 574e1051a39Sopenharmony_ci 575e1051a39Sopenharmony_ci$code.=<<___; 576e1051a39Sopenharmony_ci.asciz "VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>" 577e1051a39Sopenharmony_ci.align 16 578e1051a39Sopenharmony_ci.data 579e1051a39Sopenharmony_ci.align 8 580e1051a39Sopenharmony_ci.Lpadlock_saved_context: 581e1051a39Sopenharmony_ci .quad 0 582e1051a39Sopenharmony_ci___ 583e1051a39Sopenharmony_ci$code =~ s/\`([^\`]*)\`/eval($1)/gem; 584e1051a39Sopenharmony_ci 585e1051a39Sopenharmony_ciprint $code; 586e1051a39Sopenharmony_ci 587e1051a39Sopenharmony_ciclose STDOUT; 588