1e1051a39Sopenharmony_ci#!/usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2017-2020 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci# 9e1051a39Sopenharmony_ci# ==================================================================== 10e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 11e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and 12e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further 13e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/. 14e1051a39Sopenharmony_ci# ==================================================================== 15e1051a39Sopenharmony_ci# 16e1051a39Sopenharmony_ci# Keccak-1600 for PPC64. 17e1051a39Sopenharmony_ci# 18e1051a39Sopenharmony_ci# June 2017. 19e1051a39Sopenharmony_ci# 20e1051a39Sopenharmony_ci# This is straightforward KECCAK_1X_ALT implementation that works on 21e1051a39Sopenharmony_ci# *any* PPC64. Then PowerISA 2.07 adds 2x64-bit vector rotate, and 22e1051a39Sopenharmony_ci# it's possible to achieve performance better than below, but that is 23e1051a39Sopenharmony_ci# naturally option only for POWER8 and successors... 24e1051a39Sopenharmony_ci# 25e1051a39Sopenharmony_ci###################################################################### 26e1051a39Sopenharmony_ci# Numbers are cycles per processed byte. 27e1051a39Sopenharmony_ci# 28e1051a39Sopenharmony_ci# r=1088(*) 29e1051a39Sopenharmony_ci# 30e1051a39Sopenharmony_ci# PPC970/G5 14.0/+130% 31e1051a39Sopenharmony_ci# POWER7 9.7/+110% 32e1051a39Sopenharmony_ci# POWER8 10.6/+100% 33e1051a39Sopenharmony_ci# POWER9 8.2/+66% 34e1051a39Sopenharmony_ci# 35e1051a39Sopenharmony_ci# (*) Corresponds to SHA3-256. Percentage after slash is improvement 36e1051a39Sopenharmony_ci# over gcc-4.x-generated KECCAK_1X_ALT code. Newer compilers do 37e1051a39Sopenharmony_ci# much better (but watch out for them generating code specific 38e1051a39Sopenharmony_ci# to processor they execute on). 39e1051a39Sopenharmony_ci 40e1051a39Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension) 41e1051a39Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file 42e1051a39Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 43e1051a39Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 44e1051a39Sopenharmony_ci 45e1051a39Sopenharmony_ciif ($flavour =~ /64/) { 46e1051a39Sopenharmony_ci $SIZE_T =8; 47e1051a39Sopenharmony_ci $LRSAVE =2*$SIZE_T; 48e1051a39Sopenharmony_ci $UCMP ="cmpld"; 49e1051a39Sopenharmony_ci $STU ="stdu"; 50e1051a39Sopenharmony_ci $POP ="ld"; 51e1051a39Sopenharmony_ci $PUSH ="std"; 52e1051a39Sopenharmony_ci} else { die "nonsense $flavour"; } 53e1051a39Sopenharmony_ci 54e1051a39Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 55e1051a39Sopenharmony_ci( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 56e1051a39Sopenharmony_ci( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 57e1051a39Sopenharmony_cidie "can't locate ppc-xlate.pl"; 58e1051a39Sopenharmony_ci 59e1051a39Sopenharmony_ciopen STDOUT,"| $^X $xlate $flavour \"$output\"" 60e1051a39Sopenharmony_ci or die "can't call $xlate: $!"; 61e1051a39Sopenharmony_ci 62e1051a39Sopenharmony_ci$FRAME=24*$SIZE_T+6*$SIZE_T+32; 63e1051a39Sopenharmony_ci$LOCALS=6*$SIZE_T; 64e1051a39Sopenharmony_ci$TEMP=$LOCALS+6*$SIZE_T; 65e1051a39Sopenharmony_ci 66e1051a39Sopenharmony_cimy $sp ="r1"; 67e1051a39Sopenharmony_ci 68e1051a39Sopenharmony_cimy @A = map([ "r$_", "r".($_+1), "r".($_+2), "r".($_+3), "r".($_+4) ], 69e1051a39Sopenharmony_ci (7, 12, 17, 22, 27)); 70e1051a39Sopenharmony_ci $A[1][1] = "r6"; # r13 is reserved 71e1051a39Sopenharmony_ci 72e1051a39Sopenharmony_cimy @C = map("r$_", (0,3,4,5)); 73e1051a39Sopenharmony_ci 74e1051a39Sopenharmony_cimy @rhotates = ([ 0, 1, 62, 28, 27 ], 75e1051a39Sopenharmony_ci [ 36, 44, 6, 55, 20 ], 76e1051a39Sopenharmony_ci [ 3, 10, 43, 25, 39 ], 77e1051a39Sopenharmony_ci [ 41, 45, 15, 21, 8 ], 78e1051a39Sopenharmony_ci [ 18, 2, 61, 56, 14 ]); 79e1051a39Sopenharmony_ci 80e1051a39Sopenharmony_ci$code.=<<___; 81e1051a39Sopenharmony_ci.text 82e1051a39Sopenharmony_ci 83e1051a39Sopenharmony_ci.type KeccakF1600_int,\@function 84e1051a39Sopenharmony_ci.align 5 85e1051a39Sopenharmony_ciKeccakF1600_int: 86e1051a39Sopenharmony_ci li r0,24 87e1051a39Sopenharmony_ci mtctr r0 88e1051a39Sopenharmony_ci b .Loop 89e1051a39Sopenharmony_ci.align 4 90e1051a39Sopenharmony_ci.Loop: 91e1051a39Sopenharmony_ci xor $C[0],$A[0][0],$A[1][0] ; Theta 92e1051a39Sopenharmony_ci std $A[0][4],`$TEMP+0`($sp) 93e1051a39Sopenharmony_ci xor $C[1],$A[0][1],$A[1][1] 94e1051a39Sopenharmony_ci std $A[1][4],`$TEMP+8`($sp) 95e1051a39Sopenharmony_ci xor $C[2],$A[0][2],$A[1][2] 96e1051a39Sopenharmony_ci std $A[2][4],`$TEMP+16`($sp) 97e1051a39Sopenharmony_ci xor $C[3],$A[0][3],$A[1][3] 98e1051a39Sopenharmony_ci std $A[3][4],`$TEMP+24`($sp) 99e1051a39Sopenharmony_ci___ 100e1051a39Sopenharmony_ci $C[4]=$A[0][4]; 101e1051a39Sopenharmony_ci $C[5]=$A[1][4]; 102e1051a39Sopenharmony_ci $C[6]=$A[2][4]; 103e1051a39Sopenharmony_ci $C[7]=$A[3][4]; 104e1051a39Sopenharmony_ci$code.=<<___; 105e1051a39Sopenharmony_ci xor $C[4],$A[0][4],$A[1][4] 106e1051a39Sopenharmony_ci xor $C[0],$C[0],$A[2][0] 107e1051a39Sopenharmony_ci xor $C[1],$C[1],$A[2][1] 108e1051a39Sopenharmony_ci xor $C[2],$C[2],$A[2][2] 109e1051a39Sopenharmony_ci xor $C[3],$C[3],$A[2][3] 110e1051a39Sopenharmony_ci xor $C[4],$C[4],$A[2][4] 111e1051a39Sopenharmony_ci xor $C[0],$C[0],$A[3][0] 112e1051a39Sopenharmony_ci xor $C[1],$C[1],$A[3][1] 113e1051a39Sopenharmony_ci xor $C[2],$C[2],$A[3][2] 114e1051a39Sopenharmony_ci xor $C[3],$C[3],$A[3][3] 115e1051a39Sopenharmony_ci xor $C[4],$C[4],$A[3][4] 116e1051a39Sopenharmony_ci xor $C[0],$C[0],$A[4][0] 117e1051a39Sopenharmony_ci xor $C[2],$C[2],$A[4][2] 118e1051a39Sopenharmony_ci xor $C[1],$C[1],$A[4][1] 119e1051a39Sopenharmony_ci xor $C[3],$C[3],$A[4][3] 120e1051a39Sopenharmony_ci rotldi $C[5],$C[2],1 121e1051a39Sopenharmony_ci xor $C[4],$C[4],$A[4][4] 122e1051a39Sopenharmony_ci rotldi $C[6],$C[3],1 123e1051a39Sopenharmony_ci xor $C[5],$C[5],$C[0] 124e1051a39Sopenharmony_ci rotldi $C[7],$C[4],1 125e1051a39Sopenharmony_ci 126e1051a39Sopenharmony_ci xor $A[0][1],$A[0][1],$C[5] 127e1051a39Sopenharmony_ci xor $A[1][1],$A[1][1],$C[5] 128e1051a39Sopenharmony_ci xor $A[2][1],$A[2][1],$C[5] 129e1051a39Sopenharmony_ci xor $A[3][1],$A[3][1],$C[5] 130e1051a39Sopenharmony_ci xor $A[4][1],$A[4][1],$C[5] 131e1051a39Sopenharmony_ci 132e1051a39Sopenharmony_ci rotldi $C[5],$C[0],1 133e1051a39Sopenharmony_ci xor $C[6],$C[6],$C[1] 134e1051a39Sopenharmony_ci xor $C[2],$C[2],$C[7] 135e1051a39Sopenharmony_ci rotldi $C[7],$C[1],1 136e1051a39Sopenharmony_ci xor $C[3],$C[3],$C[5] 137e1051a39Sopenharmony_ci xor $C[4],$C[4],$C[7] 138e1051a39Sopenharmony_ci 139e1051a39Sopenharmony_ci xor $C[1], $A[0][2],$C[6] ;mr $C[1],$A[0][2] 140e1051a39Sopenharmony_ci xor $A[1][2],$A[1][2],$C[6] 141e1051a39Sopenharmony_ci xor $A[2][2],$A[2][2],$C[6] 142e1051a39Sopenharmony_ci xor $A[3][2],$A[3][2],$C[6] 143e1051a39Sopenharmony_ci xor $A[4][2],$A[4][2],$C[6] 144e1051a39Sopenharmony_ci 145e1051a39Sopenharmony_ci xor $A[0][0],$A[0][0],$C[4] 146e1051a39Sopenharmony_ci xor $A[1][0],$A[1][0],$C[4] 147e1051a39Sopenharmony_ci xor $A[2][0],$A[2][0],$C[4] 148e1051a39Sopenharmony_ci xor $A[3][0],$A[3][0],$C[4] 149e1051a39Sopenharmony_ci xor $A[4][0],$A[4][0],$C[4] 150e1051a39Sopenharmony_ci___ 151e1051a39Sopenharmony_ci $C[4]=undef; 152e1051a39Sopenharmony_ci $C[5]=undef; 153e1051a39Sopenharmony_ci $C[6]=undef; 154e1051a39Sopenharmony_ci $C[7]=undef; 155e1051a39Sopenharmony_ci$code.=<<___; 156e1051a39Sopenharmony_ci ld $A[0][4],`$TEMP+0`($sp) 157e1051a39Sopenharmony_ci xor $C[0], $A[0][3],$C[2] ;mr $C[0],$A[0][3] 158e1051a39Sopenharmony_ci ld $A[1][4],`$TEMP+8`($sp) 159e1051a39Sopenharmony_ci xor $A[1][3],$A[1][3],$C[2] 160e1051a39Sopenharmony_ci ld $A[2][4],`$TEMP+16`($sp) 161e1051a39Sopenharmony_ci xor $A[2][3],$A[2][3],$C[2] 162e1051a39Sopenharmony_ci ld $A[3][4],`$TEMP+24`($sp) 163e1051a39Sopenharmony_ci xor $A[3][3],$A[3][3],$C[2] 164e1051a39Sopenharmony_ci xor $A[4][3],$A[4][3],$C[2] 165e1051a39Sopenharmony_ci 166e1051a39Sopenharmony_ci xor $C[2], $A[0][4],$C[3] ;mr $C[2],$A[0][4] 167e1051a39Sopenharmony_ci xor $A[1][4],$A[1][4],$C[3] 168e1051a39Sopenharmony_ci xor $A[2][4],$A[2][4],$C[3] 169e1051a39Sopenharmony_ci xor $A[3][4],$A[3][4],$C[3] 170e1051a39Sopenharmony_ci xor $A[4][4],$A[4][4],$C[3] 171e1051a39Sopenharmony_ci 172e1051a39Sopenharmony_ci mr $C[3],$A[0][1] ; Rho+Pi 173e1051a39Sopenharmony_ci rotldi $A[0][1],$A[1][1],$rhotates[1][1] 174e1051a39Sopenharmony_ci ;mr $C[1],$A[0][2] 175e1051a39Sopenharmony_ci rotldi $A[0][2],$A[2][2],$rhotates[2][2] 176e1051a39Sopenharmony_ci ;mr $C[0],$A[0][3] 177e1051a39Sopenharmony_ci rotldi $A[0][3],$A[3][3],$rhotates[3][3] 178e1051a39Sopenharmony_ci ;mr $C[2],$A[0][4] 179e1051a39Sopenharmony_ci rotldi $A[0][4],$A[4][4],$rhotates[4][4] 180e1051a39Sopenharmony_ci 181e1051a39Sopenharmony_ci rotldi $A[1][1],$A[1][4],$rhotates[1][4] 182e1051a39Sopenharmony_ci rotldi $A[2][2],$A[2][3],$rhotates[2][3] 183e1051a39Sopenharmony_ci rotldi $A[3][3],$A[3][2],$rhotates[3][2] 184e1051a39Sopenharmony_ci rotldi $A[4][4],$A[4][1],$rhotates[4][1] 185e1051a39Sopenharmony_ci 186e1051a39Sopenharmony_ci rotldi $A[1][4],$A[4][2],$rhotates[4][2] 187e1051a39Sopenharmony_ci rotldi $A[2][3],$A[3][4],$rhotates[3][4] 188e1051a39Sopenharmony_ci rotldi $A[3][2],$A[2][1],$rhotates[2][1] 189e1051a39Sopenharmony_ci rotldi $A[4][1],$A[1][3],$rhotates[1][3] 190e1051a39Sopenharmony_ci 191e1051a39Sopenharmony_ci rotldi $A[4][2],$A[2][4],$rhotates[2][4] 192e1051a39Sopenharmony_ci rotldi $A[3][4],$A[4][3],$rhotates[4][3] 193e1051a39Sopenharmony_ci rotldi $A[2][1],$A[1][2],$rhotates[1][2] 194e1051a39Sopenharmony_ci rotldi $A[1][3],$A[3][1],$rhotates[3][1] 195e1051a39Sopenharmony_ci 196e1051a39Sopenharmony_ci rotldi $A[2][4],$A[4][0],$rhotates[4][0] 197e1051a39Sopenharmony_ci rotldi $A[4][3],$A[3][0],$rhotates[3][0] 198e1051a39Sopenharmony_ci rotldi $A[1][2],$A[2][0],$rhotates[2][0] 199e1051a39Sopenharmony_ci rotldi $A[3][1],$A[1][0],$rhotates[1][0] 200e1051a39Sopenharmony_ci 201e1051a39Sopenharmony_ci rotldi $A[1][0],$C[0],$rhotates[0][3] 202e1051a39Sopenharmony_ci rotldi $A[2][0],$C[3],$rhotates[0][1] 203e1051a39Sopenharmony_ci rotldi $A[3][0],$C[2],$rhotates[0][4] 204e1051a39Sopenharmony_ci rotldi $A[4][0],$C[1],$rhotates[0][2] 205e1051a39Sopenharmony_ci 206e1051a39Sopenharmony_ci andc $C[0],$A[0][2],$A[0][1] ; Chi+Iota 207e1051a39Sopenharmony_ci andc $C[1],$A[0][3],$A[0][2] 208e1051a39Sopenharmony_ci andc $C[2],$A[0][0],$A[0][4] 209e1051a39Sopenharmony_ci andc $C[3],$A[0][1],$A[0][0] 210e1051a39Sopenharmony_ci xor $A[0][0],$A[0][0],$C[0] 211e1051a39Sopenharmony_ci andc $C[0],$A[0][4],$A[0][3] 212e1051a39Sopenharmony_ci xor $A[0][1],$A[0][1],$C[1] 213e1051a39Sopenharmony_ci ld $C[1],`$LOCALS+4*$SIZE_T`($sp) 214e1051a39Sopenharmony_ci xor $A[0][3],$A[0][3],$C[2] 215e1051a39Sopenharmony_ci xor $A[0][4],$A[0][4],$C[3] 216e1051a39Sopenharmony_ci xor $A[0][2],$A[0][2],$C[0] 217e1051a39Sopenharmony_ci ldu $C[3],8($C[1]) ; Iota[i++] 218e1051a39Sopenharmony_ci 219e1051a39Sopenharmony_ci andc $C[0],$A[1][2],$A[1][1] 220e1051a39Sopenharmony_ci std $C[1],`$LOCALS+4*$SIZE_T`($sp) 221e1051a39Sopenharmony_ci andc $C[1],$A[1][3],$A[1][2] 222e1051a39Sopenharmony_ci andc $C[2],$A[1][0],$A[1][4] 223e1051a39Sopenharmony_ci xor $A[0][0],$A[0][0],$C[3] ; A[0][0] ^= Iota 224e1051a39Sopenharmony_ci andc $C[3],$A[1][1],$A[1][0] 225e1051a39Sopenharmony_ci xor $A[1][0],$A[1][0],$C[0] 226e1051a39Sopenharmony_ci andc $C[0],$A[1][4],$A[1][3] 227e1051a39Sopenharmony_ci xor $A[1][1],$A[1][1],$C[1] 228e1051a39Sopenharmony_ci xor $A[1][3],$A[1][3],$C[2] 229e1051a39Sopenharmony_ci xor $A[1][4],$A[1][4],$C[3] 230e1051a39Sopenharmony_ci xor $A[1][2],$A[1][2],$C[0] 231e1051a39Sopenharmony_ci 232e1051a39Sopenharmony_ci andc $C[0],$A[2][2],$A[2][1] 233e1051a39Sopenharmony_ci andc $C[1],$A[2][3],$A[2][2] 234e1051a39Sopenharmony_ci andc $C[2],$A[2][0],$A[2][4] 235e1051a39Sopenharmony_ci andc $C[3],$A[2][1],$A[2][0] 236e1051a39Sopenharmony_ci xor $A[2][0],$A[2][0],$C[0] 237e1051a39Sopenharmony_ci andc $C[0],$A[2][4],$A[2][3] 238e1051a39Sopenharmony_ci xor $A[2][1],$A[2][1],$C[1] 239e1051a39Sopenharmony_ci xor $A[2][3],$A[2][3],$C[2] 240e1051a39Sopenharmony_ci xor $A[2][4],$A[2][4],$C[3] 241e1051a39Sopenharmony_ci xor $A[2][2],$A[2][2],$C[0] 242e1051a39Sopenharmony_ci 243e1051a39Sopenharmony_ci andc $C[0],$A[3][2],$A[3][1] 244e1051a39Sopenharmony_ci andc $C[1],$A[3][3],$A[3][2] 245e1051a39Sopenharmony_ci andc $C[2],$A[3][0],$A[3][4] 246e1051a39Sopenharmony_ci andc $C[3],$A[3][1],$A[3][0] 247e1051a39Sopenharmony_ci xor $A[3][0],$A[3][0],$C[0] 248e1051a39Sopenharmony_ci andc $C[0],$A[3][4],$A[3][3] 249e1051a39Sopenharmony_ci xor $A[3][1],$A[3][1],$C[1] 250e1051a39Sopenharmony_ci xor $A[3][3],$A[3][3],$C[2] 251e1051a39Sopenharmony_ci xor $A[3][4],$A[3][4],$C[3] 252e1051a39Sopenharmony_ci xor $A[3][2],$A[3][2],$C[0] 253e1051a39Sopenharmony_ci 254e1051a39Sopenharmony_ci andc $C[0],$A[4][2],$A[4][1] 255e1051a39Sopenharmony_ci andc $C[1],$A[4][3],$A[4][2] 256e1051a39Sopenharmony_ci andc $C[2],$A[4][0],$A[4][4] 257e1051a39Sopenharmony_ci andc $C[3],$A[4][1],$A[4][0] 258e1051a39Sopenharmony_ci xor $A[4][0],$A[4][0],$C[0] 259e1051a39Sopenharmony_ci andc $C[0],$A[4][4],$A[4][3] 260e1051a39Sopenharmony_ci xor $A[4][1],$A[4][1],$C[1] 261e1051a39Sopenharmony_ci xor $A[4][3],$A[4][3],$C[2] 262e1051a39Sopenharmony_ci xor $A[4][4],$A[4][4],$C[3] 263e1051a39Sopenharmony_ci xor $A[4][2],$A[4][2],$C[0] 264e1051a39Sopenharmony_ci 265e1051a39Sopenharmony_ci bdnz .Loop 266e1051a39Sopenharmony_ci 267e1051a39Sopenharmony_ci blr 268e1051a39Sopenharmony_ci .long 0 269e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,0,0 270e1051a39Sopenharmony_ci.size KeccakF1600_int,.-KeccakF1600_int 271e1051a39Sopenharmony_ci 272e1051a39Sopenharmony_ci.type KeccakF1600,\@function 273e1051a39Sopenharmony_ci.align 5 274e1051a39Sopenharmony_ciKeccakF1600: 275e1051a39Sopenharmony_ci $STU $sp,-$FRAME($sp) 276e1051a39Sopenharmony_ci mflr r0 277e1051a39Sopenharmony_ci $PUSH r14,`$FRAME-$SIZE_T*18`($sp) 278e1051a39Sopenharmony_ci $PUSH r15,`$FRAME-$SIZE_T*17`($sp) 279e1051a39Sopenharmony_ci $PUSH r16,`$FRAME-$SIZE_T*16`($sp) 280e1051a39Sopenharmony_ci $PUSH r17,`$FRAME-$SIZE_T*15`($sp) 281e1051a39Sopenharmony_ci $PUSH r18,`$FRAME-$SIZE_T*14`($sp) 282e1051a39Sopenharmony_ci $PUSH r19,`$FRAME-$SIZE_T*13`($sp) 283e1051a39Sopenharmony_ci $PUSH r20,`$FRAME-$SIZE_T*12`($sp) 284e1051a39Sopenharmony_ci $PUSH r21,`$FRAME-$SIZE_T*11`($sp) 285e1051a39Sopenharmony_ci $PUSH r22,`$FRAME-$SIZE_T*10`($sp) 286e1051a39Sopenharmony_ci $PUSH r23,`$FRAME-$SIZE_T*9`($sp) 287e1051a39Sopenharmony_ci $PUSH r24,`$FRAME-$SIZE_T*8`($sp) 288e1051a39Sopenharmony_ci $PUSH r25,`$FRAME-$SIZE_T*7`($sp) 289e1051a39Sopenharmony_ci $PUSH r26,`$FRAME-$SIZE_T*6`($sp) 290e1051a39Sopenharmony_ci $PUSH r27,`$FRAME-$SIZE_T*5`($sp) 291e1051a39Sopenharmony_ci $PUSH r28,`$FRAME-$SIZE_T*4`($sp) 292e1051a39Sopenharmony_ci $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 293e1051a39Sopenharmony_ci $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 294e1051a39Sopenharmony_ci $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 295e1051a39Sopenharmony_ci $PUSH r0,`$FRAME+$LRSAVE`($sp) 296e1051a39Sopenharmony_ci 297e1051a39Sopenharmony_ci bl PICmeup 298e1051a39Sopenharmony_ci subi r12,r12,8 ; prepare for ldu 299e1051a39Sopenharmony_ci 300e1051a39Sopenharmony_ci $PUSH r3,`$LOCALS+0*$SIZE_T`($sp) 301e1051a39Sopenharmony_ci ;$PUSH r4,`$LOCALS+1*$SIZE_T`($sp) 302e1051a39Sopenharmony_ci ;$PUSH r5,`$LOCALS+2*$SIZE_T`($sp) 303e1051a39Sopenharmony_ci ;$PUSH r6,`$LOCALS+3*$SIZE_T`($sp) 304e1051a39Sopenharmony_ci $PUSH r12,`$LOCALS+4*$SIZE_T`($sp) 305e1051a39Sopenharmony_ci 306e1051a39Sopenharmony_ci ld $A[0][0],`8*0`(r3) ; load A[5][5] 307e1051a39Sopenharmony_ci ld $A[0][1],`8*1`(r3) 308e1051a39Sopenharmony_ci ld $A[0][2],`8*2`(r3) 309e1051a39Sopenharmony_ci ld $A[0][3],`8*3`(r3) 310e1051a39Sopenharmony_ci ld $A[0][4],`8*4`(r3) 311e1051a39Sopenharmony_ci ld $A[1][0],`8*5`(r3) 312e1051a39Sopenharmony_ci ld $A[1][1],`8*6`(r3) 313e1051a39Sopenharmony_ci ld $A[1][2],`8*7`(r3) 314e1051a39Sopenharmony_ci ld $A[1][3],`8*8`(r3) 315e1051a39Sopenharmony_ci ld $A[1][4],`8*9`(r3) 316e1051a39Sopenharmony_ci ld $A[2][0],`8*10`(r3) 317e1051a39Sopenharmony_ci ld $A[2][1],`8*11`(r3) 318e1051a39Sopenharmony_ci ld $A[2][2],`8*12`(r3) 319e1051a39Sopenharmony_ci ld $A[2][3],`8*13`(r3) 320e1051a39Sopenharmony_ci ld $A[2][4],`8*14`(r3) 321e1051a39Sopenharmony_ci ld $A[3][0],`8*15`(r3) 322e1051a39Sopenharmony_ci ld $A[3][1],`8*16`(r3) 323e1051a39Sopenharmony_ci ld $A[3][2],`8*17`(r3) 324e1051a39Sopenharmony_ci ld $A[3][3],`8*18`(r3) 325e1051a39Sopenharmony_ci ld $A[3][4],`8*19`(r3) 326e1051a39Sopenharmony_ci ld $A[4][0],`8*20`(r3) 327e1051a39Sopenharmony_ci ld $A[4][1],`8*21`(r3) 328e1051a39Sopenharmony_ci ld $A[4][2],`8*22`(r3) 329e1051a39Sopenharmony_ci ld $A[4][3],`8*23`(r3) 330e1051a39Sopenharmony_ci ld $A[4][4],`8*24`(r3) 331e1051a39Sopenharmony_ci 332e1051a39Sopenharmony_ci bl KeccakF1600_int 333e1051a39Sopenharmony_ci 334e1051a39Sopenharmony_ci $POP r3,`$LOCALS+0*$SIZE_T`($sp) 335e1051a39Sopenharmony_ci std $A[0][0],`8*0`(r3) ; return A[5][5] 336e1051a39Sopenharmony_ci std $A[0][1],`8*1`(r3) 337e1051a39Sopenharmony_ci std $A[0][2],`8*2`(r3) 338e1051a39Sopenharmony_ci std $A[0][3],`8*3`(r3) 339e1051a39Sopenharmony_ci std $A[0][4],`8*4`(r3) 340e1051a39Sopenharmony_ci std $A[1][0],`8*5`(r3) 341e1051a39Sopenharmony_ci std $A[1][1],`8*6`(r3) 342e1051a39Sopenharmony_ci std $A[1][2],`8*7`(r3) 343e1051a39Sopenharmony_ci std $A[1][3],`8*8`(r3) 344e1051a39Sopenharmony_ci std $A[1][4],`8*9`(r3) 345e1051a39Sopenharmony_ci std $A[2][0],`8*10`(r3) 346e1051a39Sopenharmony_ci std $A[2][1],`8*11`(r3) 347e1051a39Sopenharmony_ci std $A[2][2],`8*12`(r3) 348e1051a39Sopenharmony_ci std $A[2][3],`8*13`(r3) 349e1051a39Sopenharmony_ci std $A[2][4],`8*14`(r3) 350e1051a39Sopenharmony_ci std $A[3][0],`8*15`(r3) 351e1051a39Sopenharmony_ci std $A[3][1],`8*16`(r3) 352e1051a39Sopenharmony_ci std $A[3][2],`8*17`(r3) 353e1051a39Sopenharmony_ci std $A[3][3],`8*18`(r3) 354e1051a39Sopenharmony_ci std $A[3][4],`8*19`(r3) 355e1051a39Sopenharmony_ci std $A[4][0],`8*20`(r3) 356e1051a39Sopenharmony_ci std $A[4][1],`8*21`(r3) 357e1051a39Sopenharmony_ci std $A[4][2],`8*22`(r3) 358e1051a39Sopenharmony_ci std $A[4][3],`8*23`(r3) 359e1051a39Sopenharmony_ci std $A[4][4],`8*24`(r3) 360e1051a39Sopenharmony_ci 361e1051a39Sopenharmony_ci $POP r0,`$FRAME+$LRSAVE`($sp) 362e1051a39Sopenharmony_ci $POP r14,`$FRAME-$SIZE_T*18`($sp) 363e1051a39Sopenharmony_ci $POP r15,`$FRAME-$SIZE_T*17`($sp) 364e1051a39Sopenharmony_ci $POP r16,`$FRAME-$SIZE_T*16`($sp) 365e1051a39Sopenharmony_ci $POP r17,`$FRAME-$SIZE_T*15`($sp) 366e1051a39Sopenharmony_ci $POP r18,`$FRAME-$SIZE_T*14`($sp) 367e1051a39Sopenharmony_ci $POP r19,`$FRAME-$SIZE_T*13`($sp) 368e1051a39Sopenharmony_ci $POP r20,`$FRAME-$SIZE_T*12`($sp) 369e1051a39Sopenharmony_ci $POP r21,`$FRAME-$SIZE_T*11`($sp) 370e1051a39Sopenharmony_ci $POP r22,`$FRAME-$SIZE_T*10`($sp) 371e1051a39Sopenharmony_ci $POP r23,`$FRAME-$SIZE_T*9`($sp) 372e1051a39Sopenharmony_ci $POP r24,`$FRAME-$SIZE_T*8`($sp) 373e1051a39Sopenharmony_ci $POP r25,`$FRAME-$SIZE_T*7`($sp) 374e1051a39Sopenharmony_ci $POP r26,`$FRAME-$SIZE_T*6`($sp) 375e1051a39Sopenharmony_ci $POP r27,`$FRAME-$SIZE_T*5`($sp) 376e1051a39Sopenharmony_ci $POP r28,`$FRAME-$SIZE_T*4`($sp) 377e1051a39Sopenharmony_ci $POP r29,`$FRAME-$SIZE_T*3`($sp) 378e1051a39Sopenharmony_ci $POP r30,`$FRAME-$SIZE_T*2`($sp) 379e1051a39Sopenharmony_ci $POP r31,`$FRAME-$SIZE_T*1`($sp) 380e1051a39Sopenharmony_ci mtlr r0 381e1051a39Sopenharmony_ci addi $sp,$sp,$FRAME 382e1051a39Sopenharmony_ci blr 383e1051a39Sopenharmony_ci .long 0 384e1051a39Sopenharmony_ci .byte 0,12,4,1,0x80,18,1,0 385e1051a39Sopenharmony_ci .long 0 386e1051a39Sopenharmony_ci.size KeccakF1600,.-KeccakF1600 387e1051a39Sopenharmony_ci 388e1051a39Sopenharmony_ci.type dword_le_load,\@function 389e1051a39Sopenharmony_ci.align 5 390e1051a39Sopenharmony_cidword_le_load: 391e1051a39Sopenharmony_ci lbz r0,1(r3) 392e1051a39Sopenharmony_ci lbz r4,2(r3) 393e1051a39Sopenharmony_ci lbz r5,3(r3) 394e1051a39Sopenharmony_ci insrdi r0,r4,8,48 395e1051a39Sopenharmony_ci lbz r4,4(r3) 396e1051a39Sopenharmony_ci insrdi r0,r5,8,40 397e1051a39Sopenharmony_ci lbz r5,5(r3) 398e1051a39Sopenharmony_ci insrdi r0,r4,8,32 399e1051a39Sopenharmony_ci lbz r4,6(r3) 400e1051a39Sopenharmony_ci insrdi r0,r5,8,24 401e1051a39Sopenharmony_ci lbz r5,7(r3) 402e1051a39Sopenharmony_ci insrdi r0,r4,8,16 403e1051a39Sopenharmony_ci lbzu r4,8(r3) 404e1051a39Sopenharmony_ci insrdi r0,r5,8,8 405e1051a39Sopenharmony_ci insrdi r0,r4,8,0 406e1051a39Sopenharmony_ci blr 407e1051a39Sopenharmony_ci .long 0 408e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,1,0 409e1051a39Sopenharmony_ci .long 0 410e1051a39Sopenharmony_ci.size dword_le_load,.-dword_le_load 411e1051a39Sopenharmony_ci 412e1051a39Sopenharmony_ci.globl SHA3_absorb 413e1051a39Sopenharmony_ci.type SHA3_absorb,\@function 414e1051a39Sopenharmony_ci.align 5 415e1051a39Sopenharmony_ciSHA3_absorb: 416e1051a39Sopenharmony_ci $STU $sp,-$FRAME($sp) 417e1051a39Sopenharmony_ci mflr r0 418e1051a39Sopenharmony_ci $PUSH r14,`$FRAME-$SIZE_T*18`($sp) 419e1051a39Sopenharmony_ci $PUSH r15,`$FRAME-$SIZE_T*17`($sp) 420e1051a39Sopenharmony_ci $PUSH r16,`$FRAME-$SIZE_T*16`($sp) 421e1051a39Sopenharmony_ci $PUSH r17,`$FRAME-$SIZE_T*15`($sp) 422e1051a39Sopenharmony_ci $PUSH r18,`$FRAME-$SIZE_T*14`($sp) 423e1051a39Sopenharmony_ci $PUSH r19,`$FRAME-$SIZE_T*13`($sp) 424e1051a39Sopenharmony_ci $PUSH r20,`$FRAME-$SIZE_T*12`($sp) 425e1051a39Sopenharmony_ci $PUSH r21,`$FRAME-$SIZE_T*11`($sp) 426e1051a39Sopenharmony_ci $PUSH r22,`$FRAME-$SIZE_T*10`($sp) 427e1051a39Sopenharmony_ci $PUSH r23,`$FRAME-$SIZE_T*9`($sp) 428e1051a39Sopenharmony_ci $PUSH r24,`$FRAME-$SIZE_T*8`($sp) 429e1051a39Sopenharmony_ci $PUSH r25,`$FRAME-$SIZE_T*7`($sp) 430e1051a39Sopenharmony_ci $PUSH r26,`$FRAME-$SIZE_T*6`($sp) 431e1051a39Sopenharmony_ci $PUSH r27,`$FRAME-$SIZE_T*5`($sp) 432e1051a39Sopenharmony_ci $PUSH r28,`$FRAME-$SIZE_T*4`($sp) 433e1051a39Sopenharmony_ci $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 434e1051a39Sopenharmony_ci $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 435e1051a39Sopenharmony_ci $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 436e1051a39Sopenharmony_ci $PUSH r0,`$FRAME+$LRSAVE`($sp) 437e1051a39Sopenharmony_ci 438e1051a39Sopenharmony_ci bl PICmeup 439e1051a39Sopenharmony_ci subi r4,r4,1 ; prepare for lbzu 440e1051a39Sopenharmony_ci subi r12,r12,8 ; prepare for ldu 441e1051a39Sopenharmony_ci 442e1051a39Sopenharmony_ci $PUSH r3,`$LOCALS+0*$SIZE_T`($sp) ; save A[][] 443e1051a39Sopenharmony_ci $PUSH r4,`$LOCALS+1*$SIZE_T`($sp) ; save inp 444e1051a39Sopenharmony_ci $PUSH r5,`$LOCALS+2*$SIZE_T`($sp) ; save len 445e1051a39Sopenharmony_ci $PUSH r6,`$LOCALS+3*$SIZE_T`($sp) ; save bsz 446e1051a39Sopenharmony_ci mr r0,r6 447e1051a39Sopenharmony_ci $PUSH r12,`$LOCALS+4*$SIZE_T`($sp) 448e1051a39Sopenharmony_ci 449e1051a39Sopenharmony_ci ld $A[0][0],`8*0`(r3) ; load A[5][5] 450e1051a39Sopenharmony_ci ld $A[0][1],`8*1`(r3) 451e1051a39Sopenharmony_ci ld $A[0][2],`8*2`(r3) 452e1051a39Sopenharmony_ci ld $A[0][3],`8*3`(r3) 453e1051a39Sopenharmony_ci ld $A[0][4],`8*4`(r3) 454e1051a39Sopenharmony_ci ld $A[1][0],`8*5`(r3) 455e1051a39Sopenharmony_ci ld $A[1][1],`8*6`(r3) 456e1051a39Sopenharmony_ci ld $A[1][2],`8*7`(r3) 457e1051a39Sopenharmony_ci ld $A[1][3],`8*8`(r3) 458e1051a39Sopenharmony_ci ld $A[1][4],`8*9`(r3) 459e1051a39Sopenharmony_ci ld $A[2][0],`8*10`(r3) 460e1051a39Sopenharmony_ci ld $A[2][1],`8*11`(r3) 461e1051a39Sopenharmony_ci ld $A[2][2],`8*12`(r3) 462e1051a39Sopenharmony_ci ld $A[2][3],`8*13`(r3) 463e1051a39Sopenharmony_ci ld $A[2][4],`8*14`(r3) 464e1051a39Sopenharmony_ci ld $A[3][0],`8*15`(r3) 465e1051a39Sopenharmony_ci ld $A[3][1],`8*16`(r3) 466e1051a39Sopenharmony_ci ld $A[3][2],`8*17`(r3) 467e1051a39Sopenharmony_ci ld $A[3][3],`8*18`(r3) 468e1051a39Sopenharmony_ci ld $A[3][4],`8*19`(r3) 469e1051a39Sopenharmony_ci ld $A[4][0],`8*20`(r3) 470e1051a39Sopenharmony_ci ld $A[4][1],`8*21`(r3) 471e1051a39Sopenharmony_ci ld $A[4][2],`8*22`(r3) 472e1051a39Sopenharmony_ci ld $A[4][3],`8*23`(r3) 473e1051a39Sopenharmony_ci ld $A[4][4],`8*24`(r3) 474e1051a39Sopenharmony_ci 475e1051a39Sopenharmony_ci mr r3,r4 476e1051a39Sopenharmony_ci mr r4,r5 477e1051a39Sopenharmony_ci mr r5,r0 478e1051a39Sopenharmony_ci 479e1051a39Sopenharmony_ci b .Loop_absorb 480e1051a39Sopenharmony_ci 481e1051a39Sopenharmony_ci.align 4 482e1051a39Sopenharmony_ci.Loop_absorb: 483e1051a39Sopenharmony_ci $UCMP r4,r5 ; len < bsz? 484e1051a39Sopenharmony_ci blt .Labsorbed 485e1051a39Sopenharmony_ci 486e1051a39Sopenharmony_ci sub r4,r4,r5 ; len -= bsz 487e1051a39Sopenharmony_ci srwi r5,r5,3 488e1051a39Sopenharmony_ci $PUSH r4,`$LOCALS+2*$SIZE_T`($sp) ; save len 489e1051a39Sopenharmony_ci mtctr r5 490e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 491e1051a39Sopenharmony_ci xor $A[0][0],$A[0][0],r0 492e1051a39Sopenharmony_ci bdz .Lprocess_block 493e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 494e1051a39Sopenharmony_ci xor $A[0][1],$A[0][1],r0 495e1051a39Sopenharmony_ci bdz .Lprocess_block 496e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 497e1051a39Sopenharmony_ci xor $A[0][2],$A[0][2],r0 498e1051a39Sopenharmony_ci bdz .Lprocess_block 499e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 500e1051a39Sopenharmony_ci xor $A[0][3],$A[0][3],r0 501e1051a39Sopenharmony_ci bdz .Lprocess_block 502e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 503e1051a39Sopenharmony_ci xor $A[0][4],$A[0][4],r0 504e1051a39Sopenharmony_ci bdz .Lprocess_block 505e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 506e1051a39Sopenharmony_ci xor $A[1][0],$A[1][0],r0 507e1051a39Sopenharmony_ci bdz .Lprocess_block 508e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 509e1051a39Sopenharmony_ci xor $A[1][1],$A[1][1],r0 510e1051a39Sopenharmony_ci bdz .Lprocess_block 511e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 512e1051a39Sopenharmony_ci xor $A[1][2],$A[1][2],r0 513e1051a39Sopenharmony_ci bdz .Lprocess_block 514e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 515e1051a39Sopenharmony_ci xor $A[1][3],$A[1][3],r0 516e1051a39Sopenharmony_ci bdz .Lprocess_block 517e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 518e1051a39Sopenharmony_ci xor $A[1][4],$A[1][4],r0 519e1051a39Sopenharmony_ci bdz .Lprocess_block 520e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 521e1051a39Sopenharmony_ci xor $A[2][0],$A[2][0],r0 522e1051a39Sopenharmony_ci bdz .Lprocess_block 523e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 524e1051a39Sopenharmony_ci xor $A[2][1],$A[2][1],r0 525e1051a39Sopenharmony_ci bdz .Lprocess_block 526e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 527e1051a39Sopenharmony_ci xor $A[2][2],$A[2][2],r0 528e1051a39Sopenharmony_ci bdz .Lprocess_block 529e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 530e1051a39Sopenharmony_ci xor $A[2][3],$A[2][3],r0 531e1051a39Sopenharmony_ci bdz .Lprocess_block 532e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 533e1051a39Sopenharmony_ci xor $A[2][4],$A[2][4],r0 534e1051a39Sopenharmony_ci bdz .Lprocess_block 535e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 536e1051a39Sopenharmony_ci xor $A[3][0],$A[3][0],r0 537e1051a39Sopenharmony_ci bdz .Lprocess_block 538e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 539e1051a39Sopenharmony_ci xor $A[3][1],$A[3][1],r0 540e1051a39Sopenharmony_ci bdz .Lprocess_block 541e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 542e1051a39Sopenharmony_ci xor $A[3][2],$A[3][2],r0 543e1051a39Sopenharmony_ci bdz .Lprocess_block 544e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 545e1051a39Sopenharmony_ci xor $A[3][3],$A[3][3],r0 546e1051a39Sopenharmony_ci bdz .Lprocess_block 547e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 548e1051a39Sopenharmony_ci xor $A[3][4],$A[3][4],r0 549e1051a39Sopenharmony_ci bdz .Lprocess_block 550e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 551e1051a39Sopenharmony_ci xor $A[4][0],$A[4][0],r0 552e1051a39Sopenharmony_ci bdz .Lprocess_block 553e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 554e1051a39Sopenharmony_ci xor $A[4][1],$A[4][1],r0 555e1051a39Sopenharmony_ci bdz .Lprocess_block 556e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 557e1051a39Sopenharmony_ci xor $A[4][2],$A[4][2],r0 558e1051a39Sopenharmony_ci bdz .Lprocess_block 559e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 560e1051a39Sopenharmony_ci xor $A[4][3],$A[4][3],r0 561e1051a39Sopenharmony_ci bdz .Lprocess_block 562e1051a39Sopenharmony_ci bl dword_le_load ; *inp++ 563e1051a39Sopenharmony_ci xor $A[4][4],$A[4][4],r0 564e1051a39Sopenharmony_ci 565e1051a39Sopenharmony_ci.Lprocess_block: 566e1051a39Sopenharmony_ci $PUSH r3,`$LOCALS+1*$SIZE_T`($sp) ; save inp 567e1051a39Sopenharmony_ci 568e1051a39Sopenharmony_ci bl KeccakF1600_int 569e1051a39Sopenharmony_ci 570e1051a39Sopenharmony_ci $POP r0,`$LOCALS+4*$SIZE_T`($sp) ; pull iotas[24] 571e1051a39Sopenharmony_ci $POP r5,`$LOCALS+3*$SIZE_T`($sp) ; restore bsz 572e1051a39Sopenharmony_ci $POP r4,`$LOCALS+2*$SIZE_T`($sp) ; restore len 573e1051a39Sopenharmony_ci $POP r3,`$LOCALS+1*$SIZE_T`($sp) ; restore inp 574e1051a39Sopenharmony_ci addic r0,r0,`-8*24` ; rewind iotas 575e1051a39Sopenharmony_ci $PUSH r0,`$LOCALS+4*$SIZE_T`($sp) 576e1051a39Sopenharmony_ci 577e1051a39Sopenharmony_ci b .Loop_absorb 578e1051a39Sopenharmony_ci 579e1051a39Sopenharmony_ci.align 4 580e1051a39Sopenharmony_ci.Labsorbed: 581e1051a39Sopenharmony_ci $POP r3,`$LOCALS+0*$SIZE_T`($sp) 582e1051a39Sopenharmony_ci std $A[0][0],`8*0`(r3) ; return A[5][5] 583e1051a39Sopenharmony_ci std $A[0][1],`8*1`(r3) 584e1051a39Sopenharmony_ci std $A[0][2],`8*2`(r3) 585e1051a39Sopenharmony_ci std $A[0][3],`8*3`(r3) 586e1051a39Sopenharmony_ci std $A[0][4],`8*4`(r3) 587e1051a39Sopenharmony_ci std $A[1][0],`8*5`(r3) 588e1051a39Sopenharmony_ci std $A[1][1],`8*6`(r3) 589e1051a39Sopenharmony_ci std $A[1][2],`8*7`(r3) 590e1051a39Sopenharmony_ci std $A[1][3],`8*8`(r3) 591e1051a39Sopenharmony_ci std $A[1][4],`8*9`(r3) 592e1051a39Sopenharmony_ci std $A[2][0],`8*10`(r3) 593e1051a39Sopenharmony_ci std $A[2][1],`8*11`(r3) 594e1051a39Sopenharmony_ci std $A[2][2],`8*12`(r3) 595e1051a39Sopenharmony_ci std $A[2][3],`8*13`(r3) 596e1051a39Sopenharmony_ci std $A[2][4],`8*14`(r3) 597e1051a39Sopenharmony_ci std $A[3][0],`8*15`(r3) 598e1051a39Sopenharmony_ci std $A[3][1],`8*16`(r3) 599e1051a39Sopenharmony_ci std $A[3][2],`8*17`(r3) 600e1051a39Sopenharmony_ci std $A[3][3],`8*18`(r3) 601e1051a39Sopenharmony_ci std $A[3][4],`8*19`(r3) 602e1051a39Sopenharmony_ci std $A[4][0],`8*20`(r3) 603e1051a39Sopenharmony_ci std $A[4][1],`8*21`(r3) 604e1051a39Sopenharmony_ci std $A[4][2],`8*22`(r3) 605e1051a39Sopenharmony_ci std $A[4][3],`8*23`(r3) 606e1051a39Sopenharmony_ci std $A[4][4],`8*24`(r3) 607e1051a39Sopenharmony_ci 608e1051a39Sopenharmony_ci mr r3,r4 ; return value 609e1051a39Sopenharmony_ci $POP r0,`$FRAME+$LRSAVE`($sp) 610e1051a39Sopenharmony_ci $POP r14,`$FRAME-$SIZE_T*18`($sp) 611e1051a39Sopenharmony_ci $POP r15,`$FRAME-$SIZE_T*17`($sp) 612e1051a39Sopenharmony_ci $POP r16,`$FRAME-$SIZE_T*16`($sp) 613e1051a39Sopenharmony_ci $POP r17,`$FRAME-$SIZE_T*15`($sp) 614e1051a39Sopenharmony_ci $POP r18,`$FRAME-$SIZE_T*14`($sp) 615e1051a39Sopenharmony_ci $POP r19,`$FRAME-$SIZE_T*13`($sp) 616e1051a39Sopenharmony_ci $POP r20,`$FRAME-$SIZE_T*12`($sp) 617e1051a39Sopenharmony_ci $POP r21,`$FRAME-$SIZE_T*11`($sp) 618e1051a39Sopenharmony_ci $POP r22,`$FRAME-$SIZE_T*10`($sp) 619e1051a39Sopenharmony_ci $POP r23,`$FRAME-$SIZE_T*9`($sp) 620e1051a39Sopenharmony_ci $POP r24,`$FRAME-$SIZE_T*8`($sp) 621e1051a39Sopenharmony_ci $POP r25,`$FRAME-$SIZE_T*7`($sp) 622e1051a39Sopenharmony_ci $POP r26,`$FRAME-$SIZE_T*6`($sp) 623e1051a39Sopenharmony_ci $POP r27,`$FRAME-$SIZE_T*5`($sp) 624e1051a39Sopenharmony_ci $POP r28,`$FRAME-$SIZE_T*4`($sp) 625e1051a39Sopenharmony_ci $POP r29,`$FRAME-$SIZE_T*3`($sp) 626e1051a39Sopenharmony_ci $POP r30,`$FRAME-$SIZE_T*2`($sp) 627e1051a39Sopenharmony_ci $POP r31,`$FRAME-$SIZE_T*1`($sp) 628e1051a39Sopenharmony_ci mtlr r0 629e1051a39Sopenharmony_ci addi $sp,$sp,$FRAME 630e1051a39Sopenharmony_ci blr 631e1051a39Sopenharmony_ci .long 0 632e1051a39Sopenharmony_ci .byte 0,12,4,1,0x80,18,4,0 633e1051a39Sopenharmony_ci .long 0 634e1051a39Sopenharmony_ci.size SHA3_absorb,.-SHA3_absorb 635e1051a39Sopenharmony_ci___ 636e1051a39Sopenharmony_ci{ 637e1051a39Sopenharmony_cimy ($A_flat,$out,$len,$bsz) = map("r$_",(28..31)); 638e1051a39Sopenharmony_ci$code.=<<___; 639e1051a39Sopenharmony_ci.globl SHA3_squeeze 640e1051a39Sopenharmony_ci.type SHA3_squeeze,\@function 641e1051a39Sopenharmony_ci.align 5 642e1051a39Sopenharmony_ciSHA3_squeeze: 643e1051a39Sopenharmony_ci $STU $sp,`-10*$SIZE_T`($sp) 644e1051a39Sopenharmony_ci mflr r0 645e1051a39Sopenharmony_ci $PUSH r28,`6*$SIZE_T`($sp) 646e1051a39Sopenharmony_ci $PUSH r29,`7*$SIZE_T`($sp) 647e1051a39Sopenharmony_ci $PUSH r30,`8*$SIZE_T`($sp) 648e1051a39Sopenharmony_ci $PUSH r31,`9*$SIZE_T`($sp) 649e1051a39Sopenharmony_ci $PUSH r0,`10*$SIZE_T+$LRSAVE`($sp) 650e1051a39Sopenharmony_ci 651e1051a39Sopenharmony_ci mr $A_flat,r3 652e1051a39Sopenharmony_ci subi r3,r3,8 ; prepare for ldu 653e1051a39Sopenharmony_ci subi $out,r4,1 ; prepare for stbu 654e1051a39Sopenharmony_ci mr $len,r5 655e1051a39Sopenharmony_ci mr $bsz,r6 656e1051a39Sopenharmony_ci b .Loop_squeeze 657e1051a39Sopenharmony_ci 658e1051a39Sopenharmony_ci.align 4 659e1051a39Sopenharmony_ci.Loop_squeeze: 660e1051a39Sopenharmony_ci ldu r0,8(r3) 661e1051a39Sopenharmony_ci ${UCMP}i $len,8 662e1051a39Sopenharmony_ci blt .Lsqueeze_tail 663e1051a39Sopenharmony_ci 664e1051a39Sopenharmony_ci stb r0,1($out) 665e1051a39Sopenharmony_ci srdi r0,r0,8 666e1051a39Sopenharmony_ci stb r0,2($out) 667e1051a39Sopenharmony_ci srdi r0,r0,8 668e1051a39Sopenharmony_ci stb r0,3($out) 669e1051a39Sopenharmony_ci srdi r0,r0,8 670e1051a39Sopenharmony_ci stb r0,4($out) 671e1051a39Sopenharmony_ci srdi r0,r0,8 672e1051a39Sopenharmony_ci stb r0,5($out) 673e1051a39Sopenharmony_ci srdi r0,r0,8 674e1051a39Sopenharmony_ci stb r0,6($out) 675e1051a39Sopenharmony_ci srdi r0,r0,8 676e1051a39Sopenharmony_ci stb r0,7($out) 677e1051a39Sopenharmony_ci srdi r0,r0,8 678e1051a39Sopenharmony_ci stbu r0,8($out) 679e1051a39Sopenharmony_ci 680e1051a39Sopenharmony_ci subic. $len,$len,8 681e1051a39Sopenharmony_ci beq .Lsqueeze_done 682e1051a39Sopenharmony_ci 683e1051a39Sopenharmony_ci subic. r6,r6,8 684e1051a39Sopenharmony_ci bgt .Loop_squeeze 685e1051a39Sopenharmony_ci 686e1051a39Sopenharmony_ci mr r3,$A_flat 687e1051a39Sopenharmony_ci bl KeccakF1600 688e1051a39Sopenharmony_ci subi r3,$A_flat,8 ; prepare for ldu 689e1051a39Sopenharmony_ci mr r6,$bsz 690e1051a39Sopenharmony_ci b .Loop_squeeze 691e1051a39Sopenharmony_ci 692e1051a39Sopenharmony_ci.align 4 693e1051a39Sopenharmony_ci.Lsqueeze_tail: 694e1051a39Sopenharmony_ci mtctr $len 695e1051a39Sopenharmony_ci.Loop_tail: 696e1051a39Sopenharmony_ci stbu r0,1($out) 697e1051a39Sopenharmony_ci srdi r0,r0,8 698e1051a39Sopenharmony_ci bdnz .Loop_tail 699e1051a39Sopenharmony_ci 700e1051a39Sopenharmony_ci.Lsqueeze_done: 701e1051a39Sopenharmony_ci $POP r0,`10*$SIZE_T+$LRSAVE`($sp) 702e1051a39Sopenharmony_ci $POP r28,`6*$SIZE_T`($sp) 703e1051a39Sopenharmony_ci $POP r29,`7*$SIZE_T`($sp) 704e1051a39Sopenharmony_ci $POP r30,`8*$SIZE_T`($sp) 705e1051a39Sopenharmony_ci $POP r31,`9*$SIZE_T`($sp) 706e1051a39Sopenharmony_ci mtlr r0 707e1051a39Sopenharmony_ci addi $sp,$sp,`10*$SIZE_T` 708e1051a39Sopenharmony_ci blr 709e1051a39Sopenharmony_ci .long 0 710e1051a39Sopenharmony_ci .byte 0,12,4,1,0x80,4,4,0 711e1051a39Sopenharmony_ci .long 0 712e1051a39Sopenharmony_ci.size SHA3_squeeze,.-SHA3_squeeze 713e1051a39Sopenharmony_ci___ 714e1051a39Sopenharmony_ci} 715e1051a39Sopenharmony_ci 716e1051a39Sopenharmony_ci# Ugly hack here, because PPC assembler syntax seem to vary too 717e1051a39Sopenharmony_ci# much from platforms to platform... 718e1051a39Sopenharmony_ci$code.=<<___; 719e1051a39Sopenharmony_ci.align 6 720e1051a39Sopenharmony_ciPICmeup: 721e1051a39Sopenharmony_ci mflr r0 722e1051a39Sopenharmony_ci bcl 20,31,\$+4 723e1051a39Sopenharmony_ci mflr r12 ; vvvvvv "distance" between . and 1st data entry 724e1051a39Sopenharmony_ci addi r12,r12,`64-8` 725e1051a39Sopenharmony_ci mtlr r0 726e1051a39Sopenharmony_ci blr 727e1051a39Sopenharmony_ci .long 0 728e1051a39Sopenharmony_ci .byte 0,12,0x14,0,0,0,0,0 729e1051a39Sopenharmony_ci .space `64-9*4` 730e1051a39Sopenharmony_ci.type iotas,\@object 731e1051a39Sopenharmony_ciiotas: 732e1051a39Sopenharmony_ci .quad 0x0000000000000001 733e1051a39Sopenharmony_ci .quad 0x0000000000008082 734e1051a39Sopenharmony_ci .quad 0x800000000000808a 735e1051a39Sopenharmony_ci .quad 0x8000000080008000 736e1051a39Sopenharmony_ci .quad 0x000000000000808b 737e1051a39Sopenharmony_ci .quad 0x0000000080000001 738e1051a39Sopenharmony_ci .quad 0x8000000080008081 739e1051a39Sopenharmony_ci .quad 0x8000000000008009 740e1051a39Sopenharmony_ci .quad 0x000000000000008a 741e1051a39Sopenharmony_ci .quad 0x0000000000000088 742e1051a39Sopenharmony_ci .quad 0x0000000080008009 743e1051a39Sopenharmony_ci .quad 0x000000008000000a 744e1051a39Sopenharmony_ci .quad 0x000000008000808b 745e1051a39Sopenharmony_ci .quad 0x800000000000008b 746e1051a39Sopenharmony_ci .quad 0x8000000000008089 747e1051a39Sopenharmony_ci .quad 0x8000000000008003 748e1051a39Sopenharmony_ci .quad 0x8000000000008002 749e1051a39Sopenharmony_ci .quad 0x8000000000000080 750e1051a39Sopenharmony_ci .quad 0x000000000000800a 751e1051a39Sopenharmony_ci .quad 0x800000008000000a 752e1051a39Sopenharmony_ci .quad 0x8000000080008081 753e1051a39Sopenharmony_ci .quad 0x8000000000008080 754e1051a39Sopenharmony_ci .quad 0x0000000080000001 755e1051a39Sopenharmony_ci .quad 0x8000000080008008 756e1051a39Sopenharmony_ci.size iotas,.-iotas 757e1051a39Sopenharmony_ci.asciz "Keccak-1600 absorb and squeeze for PPC64, CRYPTOGAMS by <appro\@openssl.org>" 758e1051a39Sopenharmony_ci___ 759e1051a39Sopenharmony_ci 760e1051a39Sopenharmony_ci$code =~ s/\`([^\`]*)\`/eval $1/gem; 761e1051a39Sopenharmony_ciprint $code; 762e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 763