1e1051a39Sopenharmony_ci#!/usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2017-2020 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci# 9e1051a39Sopenharmony_ci# ==================================================================== 10e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 11e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and 12e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further 13e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/. 14e1051a39Sopenharmony_ci# ==================================================================== 15e1051a39Sopenharmony_ci# 16e1051a39Sopenharmony_ci# Keccak-1600 for s390x. 17e1051a39Sopenharmony_ci# 18e1051a39Sopenharmony_ci# June 2017. 19e1051a39Sopenharmony_ci# 20e1051a39Sopenharmony_ci# Below code is [lane complementing] KECCAK_2X implementation (see 21e1051a39Sopenharmony_ci# sha/keccak1600.c) with C[5] and D[5] held in register bank. Though 22e1051a39Sopenharmony_ci# instead of actually unrolling the loop pair-wise I simply flip 23e1051a39Sopenharmony_ci# pointers to T[][] and A[][] at the end of round. Since number of 24e1051a39Sopenharmony_ci# rounds is even, last round writes to A[][] and everything works out. 25e1051a39Sopenharmony_ci# In the nutshell it's transliteration of x86_64 module, because both 26e1051a39Sopenharmony_ci# architectures have similar capabilities/limitations. Performance 27e1051a39Sopenharmony_ci# measurement is problematic as I don't have access to an idle system. 28e1051a39Sopenharmony_ci# It looks like z13 processes one byte [out of long message] in ~14 29e1051a39Sopenharmony_ci# cycles. At least the result is consistent with estimate based on 30e1051a39Sopenharmony_ci# amount of instruction and assumed instruction issue rate. It's ~2.5x 31e1051a39Sopenharmony_ci# faster than compiler-generated code. 32e1051a39Sopenharmony_ci 33e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension) 34e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file 35e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 36e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 37e1051a39Sopenharmony_ci 38e1051a39Sopenharmony_ciif ($flavour =~ /3[12]/) { 39e1051a39Sopenharmony_ci $SIZE_T=4; 40e1051a39Sopenharmony_ci $g=""; 41e1051a39Sopenharmony_ci} else { 42e1051a39Sopenharmony_ci $SIZE_T=8; 43e1051a39Sopenharmony_ci $g="g"; 44e1051a39Sopenharmony_ci} 45e1051a39Sopenharmony_ci 46e1051a39Sopenharmony_ci$output and open STDOUT,">$output"; 47e1051a39Sopenharmony_ci 48e1051a39Sopenharmony_cimy @A = map([ 8*$_, 8*($_+1), 8*($_+2), 8*($_+3), 8*($_+4) ], (0,5,10,15,20)); 49e1051a39Sopenharmony_ci 50e1051a39Sopenharmony_cimy @C = map("%r$_",(0,1,5..7)); 51e1051a39Sopenharmony_cimy @D = map("%r$_",(8..12)); 52e1051a39Sopenharmony_cimy @T = map("%r$_",(13..14)); 53e1051a39Sopenharmony_cimy ($src,$dst,$iotas) = map("%r$_",(2..4)); 54e1051a39Sopenharmony_cimy $sp = "%r15"; 55e1051a39Sopenharmony_ci 56e1051a39Sopenharmony_ci$stdframe=16*$SIZE_T+4*8; 57e1051a39Sopenharmony_ci$frame=$stdframe+25*8; 58e1051a39Sopenharmony_ci 59e1051a39Sopenharmony_cimy @rhotates = ([ 0, 1, 62, 28, 27 ], 60e1051a39Sopenharmony_ci [ 36, 44, 6, 55, 20 ], 61e1051a39Sopenharmony_ci [ 3, 10, 43, 25, 39 ], 62e1051a39Sopenharmony_ci [ 41, 45, 15, 21, 8 ], 63e1051a39Sopenharmony_ci [ 18, 2, 61, 56, 14 ]); 64e1051a39Sopenharmony_ci 65e1051a39Sopenharmony_ci{ my @C = @C; # copy, because we mess them up... 66e1051a39Sopenharmony_ci my @D = @D; 67e1051a39Sopenharmony_ci 68e1051a39Sopenharmony_ci$code.=<<___; 69e1051a39Sopenharmony_ci.text 70e1051a39Sopenharmony_ci 71e1051a39Sopenharmony_ci.type __KeccakF1600,\@function 72e1051a39Sopenharmony_ci.align 32 73e1051a39Sopenharmony_ci__KeccakF1600: 74e1051a39Sopenharmony_ci st${g} %r14,$SIZE_T*14($sp) 75e1051a39Sopenharmony_ci lg @C[0],$A[4][0]($src) 76e1051a39Sopenharmony_ci lg @C[1],$A[4][1]($src) 77e1051a39Sopenharmony_ci lg @C[2],$A[4][2]($src) 78e1051a39Sopenharmony_ci lg @C[3],$A[4][3]($src) 79e1051a39Sopenharmony_ci lg @C[4],$A[4][4]($src) 80e1051a39Sopenharmony_ci larl $iotas,iotas 81e1051a39Sopenharmony_ci j .Loop 82e1051a39Sopenharmony_ci 83e1051a39Sopenharmony_ci.align 16 84e1051a39Sopenharmony_ci.Loop: 85e1051a39Sopenharmony_ci lg @D[0],$A[0][0]($src) 86e1051a39Sopenharmony_ci lg @D[1],$A[1][1]($src) 87e1051a39Sopenharmony_ci lg @D[2],$A[2][2]($src) 88e1051a39Sopenharmony_ci lg @D[3],$A[3][3]($src) 89e1051a39Sopenharmony_ci 90e1051a39Sopenharmony_ci xgr @C[0],@D[0] 91e1051a39Sopenharmony_ci xg @C[1],$A[0][1]($src) 92e1051a39Sopenharmony_ci xg @C[2],$A[0][2]($src) 93e1051a39Sopenharmony_ci xg @C[3],$A[0][3]($src) 94e1051a39Sopenharmony_ci lgr @D[4],@C[4] 95e1051a39Sopenharmony_ci xg @C[4],$A[0][4]($src) 96e1051a39Sopenharmony_ci 97e1051a39Sopenharmony_ci xg @C[0],$A[1][0]($src) 98e1051a39Sopenharmony_ci xgr @C[1],@D[1] 99e1051a39Sopenharmony_ci xg @C[2],$A[1][2]($src) 100e1051a39Sopenharmony_ci xg @C[3],$A[1][3]($src) 101e1051a39Sopenharmony_ci xg @C[4],$A[1][4]($src) 102e1051a39Sopenharmony_ci 103e1051a39Sopenharmony_ci xg @C[0],$A[2][0]($src) 104e1051a39Sopenharmony_ci xg @C[1],$A[2][1]($src) 105e1051a39Sopenharmony_ci xgr @C[2],@D[2] 106e1051a39Sopenharmony_ci xg @C[3],$A[2][3]($src) 107e1051a39Sopenharmony_ci xg @C[4],$A[2][4]($src) 108e1051a39Sopenharmony_ci 109e1051a39Sopenharmony_ci xg @C[0],$A[3][0]($src) 110e1051a39Sopenharmony_ci xg @C[1],$A[3][1]($src) 111e1051a39Sopenharmony_ci xg @C[2],$A[3][2]($src) 112e1051a39Sopenharmony_ci xgr @C[3],@D[3] 113e1051a39Sopenharmony_ci xg @C[4],$A[3][4]($src) 114e1051a39Sopenharmony_ci 115e1051a39Sopenharmony_ci lgr @T[0],@C[2] 116e1051a39Sopenharmony_ci rllg @C[2],@C[2],1 117e1051a39Sopenharmony_ci xgr @C[2],@C[0] # D[1] = ROL64(C[2], 1) ^ C[0] 118e1051a39Sopenharmony_ci 119e1051a39Sopenharmony_ci rllg @C[0],@C[0],1 120e1051a39Sopenharmony_ci xgr @C[0],@C[3] # D[4] = ROL64(C[0], 1) ^ C[3] 121e1051a39Sopenharmony_ci 122e1051a39Sopenharmony_ci rllg @C[3],@C[3],1 123e1051a39Sopenharmony_ci xgr @C[3],@C[1] # D[2] = ROL64(C[3], 1) ^ C[1] 124e1051a39Sopenharmony_ci 125e1051a39Sopenharmony_ci rllg @C[1],@C[1],1 126e1051a39Sopenharmony_ci xgr @C[1],@C[4] # D[0] = ROL64(C[1], 1) ^ C[4] 127e1051a39Sopenharmony_ci 128e1051a39Sopenharmony_ci rllg @C[4],@C[4],1 129e1051a39Sopenharmony_ci xgr @C[4],@T[0] # D[3] = ROL64(C[4], 1) ^ C[2] 130e1051a39Sopenharmony_ci___ 131e1051a39Sopenharmony_ci (@D[0..4], @C) = (@C[1..4,0], @D); 132e1051a39Sopenharmony_ci$code.=<<___; 133e1051a39Sopenharmony_ci xgr @C[1],@D[1] 134e1051a39Sopenharmony_ci xgr @C[2],@D[2] 135e1051a39Sopenharmony_ci xgr @C[3],@D[3] 136e1051a39Sopenharmony_ci rllg @C[1],@C[1],$rhotates[1][1] 137e1051a39Sopenharmony_ci xgr @C[4],@D[4] 138e1051a39Sopenharmony_ci rllg @C[2],@C[2],$rhotates[2][2] 139e1051a39Sopenharmony_ci xgr @C[0],@D[0] 140e1051a39Sopenharmony_ci 141e1051a39Sopenharmony_ci lgr @T[0],@C[1] 142e1051a39Sopenharmony_ci ogr @C[1],@C[2] 143e1051a39Sopenharmony_ci rllg @C[3],@C[3],$rhotates[3][3] 144e1051a39Sopenharmony_ci xgr @C[1],@C[0] # C[0] ^ ( C[1] | C[2]) 145e1051a39Sopenharmony_ci rllg @C[4],@C[4],$rhotates[4][4] 146e1051a39Sopenharmony_ci xg @C[1],0($iotas) 147e1051a39Sopenharmony_ci la $iotas,8($iotas) 148e1051a39Sopenharmony_ci stg @C[1],$A[0][0]($dst) # R[0][0] = C[0] ^ ( C[1] | C[2]) ^ iotas[i] 149e1051a39Sopenharmony_ci 150e1051a39Sopenharmony_ci lgr @T[1],@C[4] 151e1051a39Sopenharmony_ci ngr @C[4],@C[3] 152e1051a39Sopenharmony_ci lghi @C[1],-1 # no 'not' instruction :-( 153e1051a39Sopenharmony_ci xgr @C[4],@C[2] # C[2] ^ ( C[4] & C[3]) 154e1051a39Sopenharmony_ci xgr @C[2],@C[1] # not @C[2] 155e1051a39Sopenharmony_ci stg @C[4],$A[0][2]($dst) # R[0][2] = C[2] ^ ( C[4] & C[3]) 156e1051a39Sopenharmony_ci ogr @C[2],@C[3] 157e1051a39Sopenharmony_ci xgr @C[2],@T[0] # C[1] ^ (~C[2] | C[3]) 158e1051a39Sopenharmony_ci 159e1051a39Sopenharmony_ci ngr @T[0],@C[0] 160e1051a39Sopenharmony_ci stg @C[2],$A[0][1]($dst) # R[0][1] = C[1] ^ (~C[2] | C[3]) 161e1051a39Sopenharmony_ci xgr @T[0],@T[1] # C[4] ^ ( C[1] & C[0]) 162e1051a39Sopenharmony_ci ogr @T[1],@C[0] 163e1051a39Sopenharmony_ci stg @T[0],$A[0][4]($dst) # R[0][4] = C[4] ^ ( C[1] & C[0]) 164e1051a39Sopenharmony_ci xgr @T[1],@C[3] # C[3] ^ ( C[4] | C[0]) 165e1051a39Sopenharmony_ci stg @T[1],$A[0][3]($dst) # R[0][3] = C[3] ^ ( C[4] | C[0]) 166e1051a39Sopenharmony_ci 167e1051a39Sopenharmony_ci 168e1051a39Sopenharmony_ci lg @C[0],$A[0][3]($src) 169e1051a39Sopenharmony_ci lg @C[4],$A[4][2]($src) 170e1051a39Sopenharmony_ci lg @C[3],$A[3][1]($src) 171e1051a39Sopenharmony_ci lg @C[1],$A[1][4]($src) 172e1051a39Sopenharmony_ci lg @C[2],$A[2][0]($src) 173e1051a39Sopenharmony_ci 174e1051a39Sopenharmony_ci xgr @C[0],@D[3] 175e1051a39Sopenharmony_ci xgr @C[4],@D[2] 176e1051a39Sopenharmony_ci rllg @C[0],@C[0],$rhotates[0][3] 177e1051a39Sopenharmony_ci xgr @C[3],@D[1] 178e1051a39Sopenharmony_ci rllg @C[4],@C[4],$rhotates[4][2] 179e1051a39Sopenharmony_ci xgr @C[1],@D[4] 180e1051a39Sopenharmony_ci rllg @C[3],@C[3],$rhotates[3][1] 181e1051a39Sopenharmony_ci xgr @C[2],@D[0] 182e1051a39Sopenharmony_ci 183e1051a39Sopenharmony_ci lgr @T[0],@C[0] 184e1051a39Sopenharmony_ci ogr @C[0],@C[4] 185e1051a39Sopenharmony_ci rllg @C[1],@C[1],$rhotates[1][4] 186e1051a39Sopenharmony_ci xgr @C[0],@C[3] # C[3] ^ (C[0] | C[4]) 187e1051a39Sopenharmony_ci rllg @C[2],@C[2],$rhotates[2][0] 188e1051a39Sopenharmony_ci stg @C[0],$A[1][3]($dst) # R[1][3] = C[3] ^ (C[0] | C[4]) 189e1051a39Sopenharmony_ci 190e1051a39Sopenharmony_ci lgr @T[1],@C[1] 191e1051a39Sopenharmony_ci ngr @C[1],@T[0] 192e1051a39Sopenharmony_ci lghi @C[0],-1 # no 'not' instruction :-( 193e1051a39Sopenharmony_ci xgr @C[1],@C[4] # C[4] ^ (C[1] & C[0]) 194e1051a39Sopenharmony_ci xgr @C[4],@C[0] # not @C[4] 195e1051a39Sopenharmony_ci stg @C[1],$A[1][4]($dst) # R[1][4] = C[4] ^ (C[1] & C[0]) 196e1051a39Sopenharmony_ci 197e1051a39Sopenharmony_ci ogr @C[4],@C[3] 198e1051a39Sopenharmony_ci xgr @C[4],@C[2] # C[2] ^ (~C[4] | C[3]) 199e1051a39Sopenharmony_ci 200e1051a39Sopenharmony_ci ngr @C[3],@C[2] 201e1051a39Sopenharmony_ci stg @C[4],$A[1][2]($dst) # R[1][2] = C[2] ^ (~C[4] | C[3]) 202e1051a39Sopenharmony_ci xgr @C[3],@T[1] # C[1] ^ (C[3] & C[2]) 203e1051a39Sopenharmony_ci ogr @T[1],@C[2] 204e1051a39Sopenharmony_ci stg @C[3],$A[1][1]($dst) # R[1][1] = C[1] ^ (C[3] & C[2]) 205e1051a39Sopenharmony_ci xgr @T[1],@T[0] # C[0] ^ (C[1] | C[2]) 206e1051a39Sopenharmony_ci stg @T[1],$A[1][0]($dst) # R[1][0] = C[0] ^ (C[1] | C[2]) 207e1051a39Sopenharmony_ci 208e1051a39Sopenharmony_ci 209e1051a39Sopenharmony_ci lg @C[2],$A[2][3]($src) 210e1051a39Sopenharmony_ci lg @C[3],$A[3][4]($src) 211e1051a39Sopenharmony_ci lg @C[1],$A[1][2]($src) 212e1051a39Sopenharmony_ci lg @C[4],$A[4][0]($src) 213e1051a39Sopenharmony_ci lg @C[0],$A[0][1]($src) 214e1051a39Sopenharmony_ci 215e1051a39Sopenharmony_ci xgr @C[2],@D[3] 216e1051a39Sopenharmony_ci xgr @C[3],@D[4] 217e1051a39Sopenharmony_ci rllg @C[2],@C[2],$rhotates[2][3] 218e1051a39Sopenharmony_ci xgr @C[1],@D[2] 219e1051a39Sopenharmony_ci rllg @C[3],@C[3],$rhotates[3][4] 220e1051a39Sopenharmony_ci xgr @C[4],@D[0] 221e1051a39Sopenharmony_ci rllg @C[1],@C[1],$rhotates[1][2] 222e1051a39Sopenharmony_ci xgr @C[0],@D[1] 223e1051a39Sopenharmony_ci 224e1051a39Sopenharmony_ci lgr @T[0],@C[2] 225e1051a39Sopenharmony_ci ngr @C[2],@C[3] 226e1051a39Sopenharmony_ci rllg @C[4],@C[4],$rhotates[4][0] 227e1051a39Sopenharmony_ci xgr @C[2],@C[1] # C[1] ^ ( C[2] & C[3]) 228e1051a39Sopenharmony_ci lghi @T[1],-1 # no 'not' instruction :-( 229e1051a39Sopenharmony_ci stg @C[2],$A[2][1]($dst) # R[2][1] = C[1] ^ ( C[2] & C[3]) 230e1051a39Sopenharmony_ci 231e1051a39Sopenharmony_ci xgr @C[3],@T[1] # not @C[3] 232e1051a39Sopenharmony_ci lgr @T[1],@C[4] 233e1051a39Sopenharmony_ci ngr @C[4],@C[3] 234e1051a39Sopenharmony_ci rllg @C[0],@C[0],$rhotates[0][1] 235e1051a39Sopenharmony_ci xgr @C[4],@T[0] # C[2] ^ ( C[4] & ~C[3]) 236e1051a39Sopenharmony_ci ogr @T[0],@C[1] 237e1051a39Sopenharmony_ci stg @C[4],$A[2][2]($dst) # R[2][2] = C[2] ^ ( C[4] & ~C[3]) 238e1051a39Sopenharmony_ci xgr @T[0],@C[0] # C[0] ^ ( C[2] | C[1]) 239e1051a39Sopenharmony_ci 240e1051a39Sopenharmony_ci ngr @C[1],@C[0] 241e1051a39Sopenharmony_ci stg @T[0],$A[2][0]($dst) # R[2][0] = C[0] ^ ( C[2] | C[1]) 242e1051a39Sopenharmony_ci xgr @C[1],@T[1] # C[4] ^ ( C[1] & C[0]) 243e1051a39Sopenharmony_ci ogr @C[0],@T[1] 244e1051a39Sopenharmony_ci stg @C[1],$A[2][4]($dst) # R[2][4] = C[4] ^ ( C[1] & C[0]) 245e1051a39Sopenharmony_ci xgr @C[0],@C[3] # ~C[3] ^ ( C[0] | C[4]) 246e1051a39Sopenharmony_ci stg @C[0],$A[2][3]($dst) # R[2][3] = ~C[3] ^ ( C[0] | C[4]) 247e1051a39Sopenharmony_ci 248e1051a39Sopenharmony_ci 249e1051a39Sopenharmony_ci lg @C[2],$A[2][1]($src) 250e1051a39Sopenharmony_ci lg @C[3],$A[3][2]($src) 251e1051a39Sopenharmony_ci lg @C[1],$A[1][0]($src) 252e1051a39Sopenharmony_ci lg @C[4],$A[4][3]($src) 253e1051a39Sopenharmony_ci lg @C[0],$A[0][4]($src) 254e1051a39Sopenharmony_ci 255e1051a39Sopenharmony_ci xgr @C[2],@D[1] 256e1051a39Sopenharmony_ci xgr @C[3],@D[2] 257e1051a39Sopenharmony_ci rllg @C[2],@C[2],$rhotates[2][1] 258e1051a39Sopenharmony_ci xgr @C[1],@D[0] 259e1051a39Sopenharmony_ci rllg @C[3],@C[3],$rhotates[3][2] 260e1051a39Sopenharmony_ci xgr @C[4],@D[3] 261e1051a39Sopenharmony_ci rllg @C[1],@C[1],$rhotates[1][0] 262e1051a39Sopenharmony_ci xgr @C[0],@D[4] 263e1051a39Sopenharmony_ci rllg @C[4],@C[4],$rhotates[4][3] 264e1051a39Sopenharmony_ci 265e1051a39Sopenharmony_ci lgr @T[0],@C[2] 266e1051a39Sopenharmony_ci ogr @C[2],@C[3] 267e1051a39Sopenharmony_ci lghi @T[1],-1 # no 'not' instruction :-( 268e1051a39Sopenharmony_ci xgr @C[2],@C[1] # C[1] ^ ( C[2] | C[3]) 269e1051a39Sopenharmony_ci xgr @C[3],@T[1] # not @C[3] 270e1051a39Sopenharmony_ci stg @C[2],$A[3][1]($dst) # R[3][1] = C[1] ^ ( C[2] | C[3]) 271e1051a39Sopenharmony_ci 272e1051a39Sopenharmony_ci lgr @T[1],@C[4] 273e1051a39Sopenharmony_ci ogr @C[4],@C[3] 274e1051a39Sopenharmony_ci rllg @C[0],@C[0],$rhotates[0][4] 275e1051a39Sopenharmony_ci xgr @C[4],@T[0] # C[2] ^ ( C[4] | ~C[3]) 276e1051a39Sopenharmony_ci ngr @T[0],@C[1] 277e1051a39Sopenharmony_ci stg @C[4],$A[3][2]($dst) # R[3][2] = C[2] ^ ( C[4] | ~C[3]) 278e1051a39Sopenharmony_ci xgr @T[0],@C[0] # C[0] ^ ( C[2] & C[1]) 279e1051a39Sopenharmony_ci 280e1051a39Sopenharmony_ci ogr @C[1],@C[0] 281e1051a39Sopenharmony_ci stg @T[0],$A[3][0]($dst) # R[3][0] = C[0] ^ ( C[2] & C[1]) 282e1051a39Sopenharmony_ci xgr @C[1],@T[1] # C[4] ^ ( C[1] | C[0]) 283e1051a39Sopenharmony_ci ngr @C[0],@T[1] 284e1051a39Sopenharmony_ci stg @C[1],$A[3][4]($dst) # R[3][4] = C[4] ^ ( C[1] | C[0]) 285e1051a39Sopenharmony_ci xgr @C[0],@C[3] # ~C[3] ^ ( C[0] & C[4]) 286e1051a39Sopenharmony_ci stg @C[0],$A[3][3]($dst) # R[3][3] = ~C[3] ^ ( C[0] & C[4]) 287e1051a39Sopenharmony_ci 288e1051a39Sopenharmony_ci 289e1051a39Sopenharmony_ci xg @D[2],$A[0][2]($src) 290e1051a39Sopenharmony_ci xg @D[3],$A[1][3]($src) 291e1051a39Sopenharmony_ci xg @D[1],$A[4][1]($src) 292e1051a39Sopenharmony_ci xg @D[4],$A[2][4]($src) 293e1051a39Sopenharmony_ci xgr $dst,$src # xchg $dst,$src 294e1051a39Sopenharmony_ci rllg @D[2],@D[2],$rhotates[0][2] 295e1051a39Sopenharmony_ci xg @D[0],$A[3][0]($src) 296e1051a39Sopenharmony_ci rllg @D[3],@D[3],$rhotates[1][3] 297e1051a39Sopenharmony_ci xgr $src,$dst 298e1051a39Sopenharmony_ci rllg @D[1],@D[1],$rhotates[4][1] 299e1051a39Sopenharmony_ci xgr $dst,$src 300e1051a39Sopenharmony_ci rllg @D[4],@D[4],$rhotates[2][4] 301e1051a39Sopenharmony_ci___ 302e1051a39Sopenharmony_ci @C = @D[2..4,0,1]; 303e1051a39Sopenharmony_ci$code.=<<___; 304e1051a39Sopenharmony_ci lgr @T[0],@C[0] 305e1051a39Sopenharmony_ci ngr @C[0],@C[1] 306e1051a39Sopenharmony_ci lghi @T[1],-1 # no 'not' instruction :-( 307e1051a39Sopenharmony_ci xgr @C[0],@C[4] # C[4] ^ ( C[0] & C[1]) 308e1051a39Sopenharmony_ci xgr @C[1],@T[1] # not @C[1] 309e1051a39Sopenharmony_ci stg @C[0],$A[4][4]($src) # R[4][4] = C[4] ^ ( C[0] & C[1]) 310e1051a39Sopenharmony_ci 311e1051a39Sopenharmony_ci lgr @T[1],@C[2] 312e1051a39Sopenharmony_ci ngr @C[2],@C[1] 313e1051a39Sopenharmony_ci rllg @D[0],@D[0],$rhotates[3][0] 314e1051a39Sopenharmony_ci xgr @C[2],@T[0] # C[0] ^ ( C[2] & ~C[1]) 315e1051a39Sopenharmony_ci ogr @T[0],@C[4] 316e1051a39Sopenharmony_ci stg @C[2],$A[4][0]($src) # R[4][0] = C[0] ^ ( C[2] & ~C[1]) 317e1051a39Sopenharmony_ci xgr @T[0],@C[3] # C[3] ^ ( C[0] | C[4]) 318e1051a39Sopenharmony_ci 319e1051a39Sopenharmony_ci ngr @C[4],@C[3] 320e1051a39Sopenharmony_ci stg @T[0],$A[4][3]($src) # R[4][3] = C[3] ^ ( C[0] | C[4]) 321e1051a39Sopenharmony_ci xgr @C[4],@T[1] # C[2] ^ ( C[4] & C[3]) 322e1051a39Sopenharmony_ci ogr @C[3],@T[1] 323e1051a39Sopenharmony_ci stg @C[4],$A[4][2]($src) # R[4][2] = C[2] ^ ( C[4] & C[3]) 324e1051a39Sopenharmony_ci xgr @C[3],@C[1] # ~C[1] ^ ( C[2] | C[3]) 325e1051a39Sopenharmony_ci 326e1051a39Sopenharmony_ci lgr @C[1],@C[0] # harmonize with the loop top 327e1051a39Sopenharmony_ci lgr @C[0],@T[0] 328e1051a39Sopenharmony_ci stg @C[3],$A[4][1]($src) # R[4][1] = ~C[1] ^ ( C[2] | C[3]) 329e1051a39Sopenharmony_ci 330e1051a39Sopenharmony_ci tmll $iotas,255 331e1051a39Sopenharmony_ci jnz .Loop 332e1051a39Sopenharmony_ci 333e1051a39Sopenharmony_ci l${g} %r14,$SIZE_T*14($sp) 334e1051a39Sopenharmony_ci br %r14 335e1051a39Sopenharmony_ci.size __KeccakF1600,.-__KeccakF1600 336e1051a39Sopenharmony_ci___ 337e1051a39Sopenharmony_ci} 338e1051a39Sopenharmony_ci{ 339e1051a39Sopenharmony_ci$code.=<<___; 340e1051a39Sopenharmony_ci.type KeccakF1600,\@function 341e1051a39Sopenharmony_ci.align 32 342e1051a39Sopenharmony_ciKeccakF1600: 343e1051a39Sopenharmony_ci.LKeccakF1600: 344e1051a39Sopenharmony_ci lghi %r1,-$frame 345e1051a39Sopenharmony_ci stm${g} %r6,%r15,$SIZE_T*6($sp) 346e1051a39Sopenharmony_ci lgr %r0,$sp 347e1051a39Sopenharmony_ci la $sp,0(%r1,$sp) 348e1051a39Sopenharmony_ci st${g} %r0,0($sp) 349e1051a39Sopenharmony_ci 350e1051a39Sopenharmony_ci lghi @D[0],-1 # no 'not' instruction :-( 351e1051a39Sopenharmony_ci lghi @D[1],-1 352e1051a39Sopenharmony_ci lghi @D[2],-1 353e1051a39Sopenharmony_ci lghi @D[3],-1 354e1051a39Sopenharmony_ci lghi @D[4],-1 355e1051a39Sopenharmony_ci lghi @T[0],-1 356e1051a39Sopenharmony_ci xg @D[0],$A[0][1]($src) 357e1051a39Sopenharmony_ci xg @D[1],$A[0][2]($src) 358e1051a39Sopenharmony_ci xg @D[2],$A[1][3]($src) 359e1051a39Sopenharmony_ci xg @D[3],$A[2][2]($src) 360e1051a39Sopenharmony_ci xg @D[4],$A[3][2]($src) 361e1051a39Sopenharmony_ci xg @T[0],$A[4][0]($src) 362e1051a39Sopenharmony_ci stmg @D[0],@D[1],$A[0][1]($src) 363e1051a39Sopenharmony_ci stg @D[2],$A[1][3]($src) 364e1051a39Sopenharmony_ci stg @D[3],$A[2][2]($src) 365e1051a39Sopenharmony_ci stg @D[4],$A[3][2]($src) 366e1051a39Sopenharmony_ci stg @T[0],$A[4][0]($src) 367e1051a39Sopenharmony_ci 368e1051a39Sopenharmony_ci la $dst,$stdframe($sp) 369e1051a39Sopenharmony_ci 370e1051a39Sopenharmony_ci bras %r14,__KeccakF1600 371e1051a39Sopenharmony_ci 372e1051a39Sopenharmony_ci lghi @D[0],-1 # no 'not' instruction :-( 373e1051a39Sopenharmony_ci lghi @D[1],-1 374e1051a39Sopenharmony_ci lghi @D[2],-1 375e1051a39Sopenharmony_ci lghi @D[3],-1 376e1051a39Sopenharmony_ci lghi @D[4],-1 377e1051a39Sopenharmony_ci lghi @T[0],-1 378e1051a39Sopenharmony_ci xg @D[0],$A[0][1]($src) 379e1051a39Sopenharmony_ci xg @D[1],$A[0][2]($src) 380e1051a39Sopenharmony_ci xg @D[2],$A[1][3]($src) 381e1051a39Sopenharmony_ci xg @D[3],$A[2][2]($src) 382e1051a39Sopenharmony_ci xg @D[4],$A[3][2]($src) 383e1051a39Sopenharmony_ci xg @T[0],$A[4][0]($src) 384e1051a39Sopenharmony_ci stmg @D[0],@D[1],$A[0][1]($src) 385e1051a39Sopenharmony_ci stg @D[2],$A[1][3]($src) 386e1051a39Sopenharmony_ci stg @D[3],$A[2][2]($src) 387e1051a39Sopenharmony_ci stg @D[4],$A[3][2]($src) 388e1051a39Sopenharmony_ci stg @T[0],$A[4][0]($src) 389e1051a39Sopenharmony_ci 390e1051a39Sopenharmony_ci lm${g} %r6,%r15,$frame+6*$SIZE_T($sp) 391e1051a39Sopenharmony_ci br %r14 392e1051a39Sopenharmony_ci.size KeccakF1600,.-KeccakF1600 393e1051a39Sopenharmony_ci___ 394e1051a39Sopenharmony_ci} 395e1051a39Sopenharmony_ci{ my ($A_flat,$inp,$len,$bsz) = map("%r$_",(2..5)); 396e1051a39Sopenharmony_ci 397e1051a39Sopenharmony_ci$code.=<<___; 398e1051a39Sopenharmony_ci.globl SHA3_absorb 399e1051a39Sopenharmony_ci.type SHA3_absorb,\@function 400e1051a39Sopenharmony_ci.align 32 401e1051a39Sopenharmony_ciSHA3_absorb: 402e1051a39Sopenharmony_ci lghi %r1,-$frame 403e1051a39Sopenharmony_ci stm${g} %r5,%r15,$SIZE_T*5($sp) 404e1051a39Sopenharmony_ci lgr %r0,$sp 405e1051a39Sopenharmony_ci la $sp,0(%r1,$sp) 406e1051a39Sopenharmony_ci st${g} %r0,0($sp) 407e1051a39Sopenharmony_ci 408e1051a39Sopenharmony_ci lghi @D[0],-1 # no 'not' instruction :-( 409e1051a39Sopenharmony_ci lghi @D[1],-1 410e1051a39Sopenharmony_ci lghi @D[2],-1 411e1051a39Sopenharmony_ci lghi @D[3],-1 412e1051a39Sopenharmony_ci lghi @D[4],-1 413e1051a39Sopenharmony_ci lghi @T[0],-1 414e1051a39Sopenharmony_ci xg @D[0],$A[0][1]($src) 415e1051a39Sopenharmony_ci xg @D[1],$A[0][2]($src) 416e1051a39Sopenharmony_ci xg @D[2],$A[1][3]($src) 417e1051a39Sopenharmony_ci xg @D[3],$A[2][2]($src) 418e1051a39Sopenharmony_ci xg @D[4],$A[3][2]($src) 419e1051a39Sopenharmony_ci xg @T[0],$A[4][0]($src) 420e1051a39Sopenharmony_ci stmg @D[0],@D[1],$A[0][1]($src) 421e1051a39Sopenharmony_ci stg @D[2],$A[1][3]($src) 422e1051a39Sopenharmony_ci stg @D[3],$A[2][2]($src) 423e1051a39Sopenharmony_ci stg @D[4],$A[3][2]($src) 424e1051a39Sopenharmony_ci stg @T[0],$A[4][0]($src) 425e1051a39Sopenharmony_ci 426e1051a39Sopenharmony_ci.Loop_absorb: 427e1051a39Sopenharmony_ci cl${g}r $len,$bsz 428e1051a39Sopenharmony_ci jl .Ldone_absorb 429e1051a39Sopenharmony_ci 430e1051a39Sopenharmony_ci srl${g} $bsz,3 431e1051a39Sopenharmony_ci la %r1,0($A_flat) 432e1051a39Sopenharmony_ci 433e1051a39Sopenharmony_ci.Lblock_absorb: 434e1051a39Sopenharmony_ci lrvg %r0,0($inp) 435e1051a39Sopenharmony_ci la $inp,8($inp) 436e1051a39Sopenharmony_ci xg %r0,0(%r1) 437e1051a39Sopenharmony_ci a${g}hi $len,-8 438e1051a39Sopenharmony_ci stg %r0,0(%r1) 439e1051a39Sopenharmony_ci la %r1,8(%r1) 440e1051a39Sopenharmony_ci brct $bsz,.Lblock_absorb 441e1051a39Sopenharmony_ci 442e1051a39Sopenharmony_ci stm${g} $inp,$len,$frame+3*$SIZE_T($sp) 443e1051a39Sopenharmony_ci la $dst,$stdframe($sp) 444e1051a39Sopenharmony_ci bras %r14,__KeccakF1600 445e1051a39Sopenharmony_ci lm${g} $inp,$bsz,$frame+3*$SIZE_T($sp) 446e1051a39Sopenharmony_ci j .Loop_absorb 447e1051a39Sopenharmony_ci 448e1051a39Sopenharmony_ci.align 16 449e1051a39Sopenharmony_ci.Ldone_absorb: 450e1051a39Sopenharmony_ci lghi @D[0],-1 # no 'not' instruction :-( 451e1051a39Sopenharmony_ci lghi @D[1],-1 452e1051a39Sopenharmony_ci lghi @D[2],-1 453e1051a39Sopenharmony_ci lghi @D[3],-1 454e1051a39Sopenharmony_ci lghi @D[4],-1 455e1051a39Sopenharmony_ci lghi @T[0],-1 456e1051a39Sopenharmony_ci xg @D[0],$A[0][1]($src) 457e1051a39Sopenharmony_ci xg @D[1],$A[0][2]($src) 458e1051a39Sopenharmony_ci xg @D[2],$A[1][3]($src) 459e1051a39Sopenharmony_ci xg @D[3],$A[2][2]($src) 460e1051a39Sopenharmony_ci xg @D[4],$A[3][2]($src) 461e1051a39Sopenharmony_ci xg @T[0],$A[4][0]($src) 462e1051a39Sopenharmony_ci stmg @D[0],@D[1],$A[0][1]($src) 463e1051a39Sopenharmony_ci stg @D[2],$A[1][3]($src) 464e1051a39Sopenharmony_ci stg @D[3],$A[2][2]($src) 465e1051a39Sopenharmony_ci stg @D[4],$A[3][2]($src) 466e1051a39Sopenharmony_ci stg @T[0],$A[4][0]($src) 467e1051a39Sopenharmony_ci 468e1051a39Sopenharmony_ci lgr %r2,$len # return value 469e1051a39Sopenharmony_ci 470e1051a39Sopenharmony_ci lm${g} %r6,%r15,$frame+6*$SIZE_T($sp) 471e1051a39Sopenharmony_ci br %r14 472e1051a39Sopenharmony_ci.size SHA3_absorb,.-SHA3_absorb 473e1051a39Sopenharmony_ci___ 474e1051a39Sopenharmony_ci} 475e1051a39Sopenharmony_ci{ my ($A_flat,$out,$len,$bsz) = map("%r$_",(2..5)); 476e1051a39Sopenharmony_ci 477e1051a39Sopenharmony_ci$code.=<<___; 478e1051a39Sopenharmony_ci.globl SHA3_squeeze 479e1051a39Sopenharmony_ci.type SHA3_squeeze,\@function 480e1051a39Sopenharmony_ci.align 32 481e1051a39Sopenharmony_ciSHA3_squeeze: 482e1051a39Sopenharmony_ci srl${g} $bsz,3 483e1051a39Sopenharmony_ci st${g} %r14,2*$SIZE_T($sp) 484e1051a39Sopenharmony_ci lghi %r14,8 485e1051a39Sopenharmony_ci st${g} $bsz,5*$SIZE_T($sp) 486e1051a39Sopenharmony_ci la %r1,0($A_flat) 487e1051a39Sopenharmony_ci 488e1051a39Sopenharmony_ci j .Loop_squeeze 489e1051a39Sopenharmony_ci 490e1051a39Sopenharmony_ci.align 16 491e1051a39Sopenharmony_ci.Loop_squeeze: 492e1051a39Sopenharmony_ci cl${g}r $len,%r14 493e1051a39Sopenharmony_ci jl .Ltail_squeeze 494e1051a39Sopenharmony_ci 495e1051a39Sopenharmony_ci lrvg %r0,0(%r1) 496e1051a39Sopenharmony_ci la %r1,8(%r1) 497e1051a39Sopenharmony_ci stg %r0,0($out) 498e1051a39Sopenharmony_ci la $out,8($out) 499e1051a39Sopenharmony_ci a${g}hi $len,-8 # len -= 8 500e1051a39Sopenharmony_ci jz .Ldone_squeeze 501e1051a39Sopenharmony_ci 502e1051a39Sopenharmony_ci brct $bsz,.Loop_squeeze # bsz-- 503e1051a39Sopenharmony_ci 504e1051a39Sopenharmony_ci stm${g} $out,$len,3*$SIZE_T($sp) 505e1051a39Sopenharmony_ci bras %r14,.LKeccakF1600 506e1051a39Sopenharmony_ci lm${g} $out,$bsz,3*$SIZE_T($sp) 507e1051a39Sopenharmony_ci lghi %r14,8 508e1051a39Sopenharmony_ci la %r1,0($A_flat) 509e1051a39Sopenharmony_ci j .Loop_squeeze 510e1051a39Sopenharmony_ci 511e1051a39Sopenharmony_ci.Ltail_squeeze: 512e1051a39Sopenharmony_ci lg %r0,0(%r1) 513e1051a39Sopenharmony_ci.Loop_tail_squeeze: 514e1051a39Sopenharmony_ci stc %r0,0($out) 515e1051a39Sopenharmony_ci la $out,1($out) 516e1051a39Sopenharmony_ci srlg %r0,8 517e1051a39Sopenharmony_ci brct $len,.Loop_tail_squeeze 518e1051a39Sopenharmony_ci 519e1051a39Sopenharmony_ci.Ldone_squeeze: 520e1051a39Sopenharmony_ci l${g} %r14,2*$SIZE_T($sp) 521e1051a39Sopenharmony_ci br %r14 522e1051a39Sopenharmony_ci.size SHA3_squeeze,.-SHA3_squeeze 523e1051a39Sopenharmony_ci___ 524e1051a39Sopenharmony_ci} 525e1051a39Sopenharmony_ci$code.=<<___; 526e1051a39Sopenharmony_ci.align 256 527e1051a39Sopenharmony_ci .quad 0,0,0,0,0,0,0,0 528e1051a39Sopenharmony_ci.type iotas,\@object 529e1051a39Sopenharmony_ciiotas: 530e1051a39Sopenharmony_ci .quad 0x0000000000000001 531e1051a39Sopenharmony_ci .quad 0x0000000000008082 532e1051a39Sopenharmony_ci .quad 0x800000000000808a 533e1051a39Sopenharmony_ci .quad 0x8000000080008000 534e1051a39Sopenharmony_ci .quad 0x000000000000808b 535e1051a39Sopenharmony_ci .quad 0x0000000080000001 536e1051a39Sopenharmony_ci .quad 0x8000000080008081 537e1051a39Sopenharmony_ci .quad 0x8000000000008009 538e1051a39Sopenharmony_ci .quad 0x000000000000008a 539e1051a39Sopenharmony_ci .quad 0x0000000000000088 540e1051a39Sopenharmony_ci .quad 0x0000000080008009 541e1051a39Sopenharmony_ci .quad 0x000000008000000a 542e1051a39Sopenharmony_ci .quad 0x000000008000808b 543e1051a39Sopenharmony_ci .quad 0x800000000000008b 544e1051a39Sopenharmony_ci .quad 0x8000000000008089 545e1051a39Sopenharmony_ci .quad 0x8000000000008003 546e1051a39Sopenharmony_ci .quad 0x8000000000008002 547e1051a39Sopenharmony_ci .quad 0x8000000000000080 548e1051a39Sopenharmony_ci .quad 0x000000000000800a 549e1051a39Sopenharmony_ci .quad 0x800000008000000a 550e1051a39Sopenharmony_ci .quad 0x8000000080008081 551e1051a39Sopenharmony_ci .quad 0x8000000000008080 552e1051a39Sopenharmony_ci .quad 0x0000000080000001 553e1051a39Sopenharmony_ci .quad 0x8000000080008008 554e1051a39Sopenharmony_ci.size iotas,.-iotas 555e1051a39Sopenharmony_ci.asciz "Keccak-1600 absorb and squeeze for s390x, CRYPTOGAMS by <appro\@openssl.org>" 556e1051a39Sopenharmony_ci___ 557e1051a39Sopenharmony_ci 558e1051a39Sopenharmony_ci# unlike 32-bit shift 64-bit one takes three arguments 559e1051a39Sopenharmony_ci$code =~ s/(srlg\s+)(%r[0-9]+),/$1$2,$2,/gm; 560e1051a39Sopenharmony_ci 561e1051a39Sopenharmony_ciprint $code; 562e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 563