11cb0ef41Sopenharmony_ci#!/usr/bin/env perl 21cb0ef41Sopenharmony_ci# Copyright 2017-2020 The OpenSSL Project Authors. All Rights Reserved. 31cb0ef41Sopenharmony_ci# 41cb0ef41Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 51cb0ef41Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 61cb0ef41Sopenharmony_ci# in the file LICENSE in the source distribution or at 71cb0ef41Sopenharmony_ci# https://www.openssl.org/source/license.html 81cb0ef41Sopenharmony_ci# 91cb0ef41Sopenharmony_ci# ==================================================================== 101cb0ef41Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 111cb0ef41Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and 121cb0ef41Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further 131cb0ef41Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/. 141cb0ef41Sopenharmony_ci# ==================================================================== 151cb0ef41Sopenharmony_ci# 161cb0ef41Sopenharmony_ci# Keccak-1600 for PPC64. 171cb0ef41Sopenharmony_ci# 181cb0ef41Sopenharmony_ci# June 2017. 191cb0ef41Sopenharmony_ci# 201cb0ef41Sopenharmony_ci# This is straightforward KECCAK_1X_ALT implementation that works on 211cb0ef41Sopenharmony_ci# *any* PPC64. Then PowerISA 2.07 adds 2x64-bit vector rotate, and 221cb0ef41Sopenharmony_ci# it's possible to achieve performance better than below, but that is 231cb0ef41Sopenharmony_ci# naturally option only for POWER8 and successors... 241cb0ef41Sopenharmony_ci# 251cb0ef41Sopenharmony_ci###################################################################### 261cb0ef41Sopenharmony_ci# Numbers are cycles per processed byte. 271cb0ef41Sopenharmony_ci# 281cb0ef41Sopenharmony_ci# r=1088(*) 291cb0ef41Sopenharmony_ci# 301cb0ef41Sopenharmony_ci# PPC970/G5 14.0/+130% 311cb0ef41Sopenharmony_ci# POWER7 9.7/+110% 321cb0ef41Sopenharmony_ci# POWER8 10.6/+100% 331cb0ef41Sopenharmony_ci# POWER9 8.2/+66% 341cb0ef41Sopenharmony_ci# 351cb0ef41Sopenharmony_ci# (*) Corresponds to SHA3-256. Percentage after slash is improvement 361cb0ef41Sopenharmony_ci# over gcc-4.x-generated KECCAK_1X_ALT code. Newer compilers do 371cb0ef41Sopenharmony_ci# much better (but watch out for them generating code specific 381cb0ef41Sopenharmony_ci# to processor they execute on). 391cb0ef41Sopenharmony_ci 401cb0ef41Sopenharmony_ci# $output is the last argument if it looks like a file (it has an extension) 411cb0ef41Sopenharmony_ci# $flavour is the first argument if it doesn't look like a file 421cb0ef41Sopenharmony_ci$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 431cb0ef41Sopenharmony_ci$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 441cb0ef41Sopenharmony_ci 451cb0ef41Sopenharmony_ciif ($flavour =~ /64/) { 461cb0ef41Sopenharmony_ci $SIZE_T =8; 471cb0ef41Sopenharmony_ci $LRSAVE =2*$SIZE_T; 481cb0ef41Sopenharmony_ci $UCMP ="cmpld"; 491cb0ef41Sopenharmony_ci $STU ="stdu"; 501cb0ef41Sopenharmony_ci $POP ="ld"; 511cb0ef41Sopenharmony_ci $PUSH ="std"; 521cb0ef41Sopenharmony_ci} else { die "nonsense $flavour"; } 531cb0ef41Sopenharmony_ci 541cb0ef41Sopenharmony_ci$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 551cb0ef41Sopenharmony_ci( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 561cb0ef41Sopenharmony_ci( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 571cb0ef41Sopenharmony_cidie "can't locate ppc-xlate.pl"; 581cb0ef41Sopenharmony_ci 591cb0ef41Sopenharmony_ciopen STDOUT,"| $^X $xlate $flavour \"$output\"" 601cb0ef41Sopenharmony_ci or die "can't call $xlate: $!"; 611cb0ef41Sopenharmony_ci 621cb0ef41Sopenharmony_ci$FRAME=24*$SIZE_T+6*$SIZE_T+32; 631cb0ef41Sopenharmony_ci$LOCALS=6*$SIZE_T; 641cb0ef41Sopenharmony_ci$TEMP=$LOCALS+6*$SIZE_T; 651cb0ef41Sopenharmony_ci 661cb0ef41Sopenharmony_cimy $sp ="r1"; 671cb0ef41Sopenharmony_ci 681cb0ef41Sopenharmony_cimy @A = map([ "r$_", "r".($_+1), "r".($_+2), "r".($_+3), "r".($_+4) ], 691cb0ef41Sopenharmony_ci (7, 12, 17, 22, 27)); 701cb0ef41Sopenharmony_ci $A[1][1] = "r6"; # r13 is reserved 711cb0ef41Sopenharmony_ci 721cb0ef41Sopenharmony_cimy @C = map("r$_", (0,3,4,5)); 731cb0ef41Sopenharmony_ci 741cb0ef41Sopenharmony_cimy @rhotates = ([ 0, 1, 62, 28, 27 ], 751cb0ef41Sopenharmony_ci [ 36, 44, 6, 55, 20 ], 761cb0ef41Sopenharmony_ci [ 3, 10, 43, 25, 39 ], 771cb0ef41Sopenharmony_ci [ 41, 45, 15, 21, 8 ], 781cb0ef41Sopenharmony_ci [ 18, 2, 61, 56, 14 ]); 791cb0ef41Sopenharmony_ci 801cb0ef41Sopenharmony_ci$code.=<<___; 811cb0ef41Sopenharmony_ci.text 821cb0ef41Sopenharmony_ci 831cb0ef41Sopenharmony_ci.type KeccakF1600_int,\@function 841cb0ef41Sopenharmony_ci.align 5 851cb0ef41Sopenharmony_ciKeccakF1600_int: 861cb0ef41Sopenharmony_ci li r0,24 871cb0ef41Sopenharmony_ci mtctr r0 881cb0ef41Sopenharmony_ci b .Loop 891cb0ef41Sopenharmony_ci.align 4 901cb0ef41Sopenharmony_ci.Loop: 911cb0ef41Sopenharmony_ci xor $C[0],$A[0][0],$A[1][0] ; Theta 921cb0ef41Sopenharmony_ci std $A[0][4],`$TEMP+0`($sp) 931cb0ef41Sopenharmony_ci xor $C[1],$A[0][1],$A[1][1] 941cb0ef41Sopenharmony_ci std $A[1][4],`$TEMP+8`($sp) 951cb0ef41Sopenharmony_ci xor $C[2],$A[0][2],$A[1][2] 961cb0ef41Sopenharmony_ci std $A[2][4],`$TEMP+16`($sp) 971cb0ef41Sopenharmony_ci xor $C[3],$A[0][3],$A[1][3] 981cb0ef41Sopenharmony_ci std $A[3][4],`$TEMP+24`($sp) 991cb0ef41Sopenharmony_ci___ 1001cb0ef41Sopenharmony_ci $C[4]=$A[0][4]; 1011cb0ef41Sopenharmony_ci $C[5]=$A[1][4]; 1021cb0ef41Sopenharmony_ci $C[6]=$A[2][4]; 1031cb0ef41Sopenharmony_ci $C[7]=$A[3][4]; 1041cb0ef41Sopenharmony_ci$code.=<<___; 1051cb0ef41Sopenharmony_ci xor $C[4],$A[0][4],$A[1][4] 1061cb0ef41Sopenharmony_ci xor $C[0],$C[0],$A[2][0] 1071cb0ef41Sopenharmony_ci xor $C[1],$C[1],$A[2][1] 1081cb0ef41Sopenharmony_ci xor $C[2],$C[2],$A[2][2] 1091cb0ef41Sopenharmony_ci xor $C[3],$C[3],$A[2][3] 1101cb0ef41Sopenharmony_ci xor $C[4],$C[4],$A[2][4] 1111cb0ef41Sopenharmony_ci xor $C[0],$C[0],$A[3][0] 1121cb0ef41Sopenharmony_ci xor $C[1],$C[1],$A[3][1] 1131cb0ef41Sopenharmony_ci xor $C[2],$C[2],$A[3][2] 1141cb0ef41Sopenharmony_ci xor $C[3],$C[3],$A[3][3] 1151cb0ef41Sopenharmony_ci xor $C[4],$C[4],$A[3][4] 1161cb0ef41Sopenharmony_ci xor $C[0],$C[0],$A[4][0] 1171cb0ef41Sopenharmony_ci xor $C[2],$C[2],$A[4][2] 1181cb0ef41Sopenharmony_ci xor $C[1],$C[1],$A[4][1] 1191cb0ef41Sopenharmony_ci xor $C[3],$C[3],$A[4][3] 1201cb0ef41Sopenharmony_ci rotldi $C[5],$C[2],1 1211cb0ef41Sopenharmony_ci xor $C[4],$C[4],$A[4][4] 1221cb0ef41Sopenharmony_ci rotldi $C[6],$C[3],1 1231cb0ef41Sopenharmony_ci xor $C[5],$C[5],$C[0] 1241cb0ef41Sopenharmony_ci rotldi $C[7],$C[4],1 1251cb0ef41Sopenharmony_ci 1261cb0ef41Sopenharmony_ci xor $A[0][1],$A[0][1],$C[5] 1271cb0ef41Sopenharmony_ci xor $A[1][1],$A[1][1],$C[5] 1281cb0ef41Sopenharmony_ci xor $A[2][1],$A[2][1],$C[5] 1291cb0ef41Sopenharmony_ci xor $A[3][1],$A[3][1],$C[5] 1301cb0ef41Sopenharmony_ci xor $A[4][1],$A[4][1],$C[5] 1311cb0ef41Sopenharmony_ci 1321cb0ef41Sopenharmony_ci rotldi $C[5],$C[0],1 1331cb0ef41Sopenharmony_ci xor $C[6],$C[6],$C[1] 1341cb0ef41Sopenharmony_ci xor $C[2],$C[2],$C[7] 1351cb0ef41Sopenharmony_ci rotldi $C[7],$C[1],1 1361cb0ef41Sopenharmony_ci xor $C[3],$C[3],$C[5] 1371cb0ef41Sopenharmony_ci xor $C[4],$C[4],$C[7] 1381cb0ef41Sopenharmony_ci 1391cb0ef41Sopenharmony_ci xor $C[1], $A[0][2],$C[6] ;mr $C[1],$A[0][2] 1401cb0ef41Sopenharmony_ci xor $A[1][2],$A[1][2],$C[6] 1411cb0ef41Sopenharmony_ci xor $A[2][2],$A[2][2],$C[6] 1421cb0ef41Sopenharmony_ci xor $A[3][2],$A[3][2],$C[6] 1431cb0ef41Sopenharmony_ci xor $A[4][2],$A[4][2],$C[6] 1441cb0ef41Sopenharmony_ci 1451cb0ef41Sopenharmony_ci xor $A[0][0],$A[0][0],$C[4] 1461cb0ef41Sopenharmony_ci xor $A[1][0],$A[1][0],$C[4] 1471cb0ef41Sopenharmony_ci xor $A[2][0],$A[2][0],$C[4] 1481cb0ef41Sopenharmony_ci xor $A[3][0],$A[3][0],$C[4] 1491cb0ef41Sopenharmony_ci xor $A[4][0],$A[4][0],$C[4] 1501cb0ef41Sopenharmony_ci___ 1511cb0ef41Sopenharmony_ci $C[4]=undef; 1521cb0ef41Sopenharmony_ci $C[5]=undef; 1531cb0ef41Sopenharmony_ci $C[6]=undef; 1541cb0ef41Sopenharmony_ci $C[7]=undef; 1551cb0ef41Sopenharmony_ci$code.=<<___; 1561cb0ef41Sopenharmony_ci ld $A[0][4],`$TEMP+0`($sp) 1571cb0ef41Sopenharmony_ci xor $C[0], $A[0][3],$C[2] ;mr $C[0],$A[0][3] 1581cb0ef41Sopenharmony_ci ld $A[1][4],`$TEMP+8`($sp) 1591cb0ef41Sopenharmony_ci xor $A[1][3],$A[1][3],$C[2] 1601cb0ef41Sopenharmony_ci ld $A[2][4],`$TEMP+16`($sp) 1611cb0ef41Sopenharmony_ci xor $A[2][3],$A[2][3],$C[2] 1621cb0ef41Sopenharmony_ci ld $A[3][4],`$TEMP+24`($sp) 1631cb0ef41Sopenharmony_ci xor $A[3][3],$A[3][3],$C[2] 1641cb0ef41Sopenharmony_ci xor $A[4][3],$A[4][3],$C[2] 1651cb0ef41Sopenharmony_ci 1661cb0ef41Sopenharmony_ci xor $C[2], $A[0][4],$C[3] ;mr $C[2],$A[0][4] 1671cb0ef41Sopenharmony_ci xor $A[1][4],$A[1][4],$C[3] 1681cb0ef41Sopenharmony_ci xor $A[2][4],$A[2][4],$C[3] 1691cb0ef41Sopenharmony_ci xor $A[3][4],$A[3][4],$C[3] 1701cb0ef41Sopenharmony_ci xor $A[4][4],$A[4][4],$C[3] 1711cb0ef41Sopenharmony_ci 1721cb0ef41Sopenharmony_ci mr $C[3],$A[0][1] ; Rho+Pi 1731cb0ef41Sopenharmony_ci rotldi $A[0][1],$A[1][1],$rhotates[1][1] 1741cb0ef41Sopenharmony_ci ;mr $C[1],$A[0][2] 1751cb0ef41Sopenharmony_ci rotldi $A[0][2],$A[2][2],$rhotates[2][2] 1761cb0ef41Sopenharmony_ci ;mr $C[0],$A[0][3] 1771cb0ef41Sopenharmony_ci rotldi $A[0][3],$A[3][3],$rhotates[3][3] 1781cb0ef41Sopenharmony_ci ;mr $C[2],$A[0][4] 1791cb0ef41Sopenharmony_ci rotldi $A[0][4],$A[4][4],$rhotates[4][4] 1801cb0ef41Sopenharmony_ci 1811cb0ef41Sopenharmony_ci rotldi $A[1][1],$A[1][4],$rhotates[1][4] 1821cb0ef41Sopenharmony_ci rotldi $A[2][2],$A[2][3],$rhotates[2][3] 1831cb0ef41Sopenharmony_ci rotldi $A[3][3],$A[3][2],$rhotates[3][2] 1841cb0ef41Sopenharmony_ci rotldi $A[4][4],$A[4][1],$rhotates[4][1] 1851cb0ef41Sopenharmony_ci 1861cb0ef41Sopenharmony_ci rotldi $A[1][4],$A[4][2],$rhotates[4][2] 1871cb0ef41Sopenharmony_ci rotldi $A[2][3],$A[3][4],$rhotates[3][4] 1881cb0ef41Sopenharmony_ci rotldi $A[3][2],$A[2][1],$rhotates[2][1] 1891cb0ef41Sopenharmony_ci rotldi $A[4][1],$A[1][3],$rhotates[1][3] 1901cb0ef41Sopenharmony_ci 1911cb0ef41Sopenharmony_ci rotldi $A[4][2],$A[2][4],$rhotates[2][4] 1921cb0ef41Sopenharmony_ci rotldi $A[3][4],$A[4][3],$rhotates[4][3] 1931cb0ef41Sopenharmony_ci rotldi $A[2][1],$A[1][2],$rhotates[1][2] 1941cb0ef41Sopenharmony_ci rotldi $A[1][3],$A[3][1],$rhotates[3][1] 1951cb0ef41Sopenharmony_ci 1961cb0ef41Sopenharmony_ci rotldi $A[2][4],$A[4][0],$rhotates[4][0] 1971cb0ef41Sopenharmony_ci rotldi $A[4][3],$A[3][0],$rhotates[3][0] 1981cb0ef41Sopenharmony_ci rotldi $A[1][2],$A[2][0],$rhotates[2][0] 1991cb0ef41Sopenharmony_ci rotldi $A[3][1],$A[1][0],$rhotates[1][0] 2001cb0ef41Sopenharmony_ci 2011cb0ef41Sopenharmony_ci rotldi $A[1][0],$C[0],$rhotates[0][3] 2021cb0ef41Sopenharmony_ci rotldi $A[2][0],$C[3],$rhotates[0][1] 2031cb0ef41Sopenharmony_ci rotldi $A[3][0],$C[2],$rhotates[0][4] 2041cb0ef41Sopenharmony_ci rotldi $A[4][0],$C[1],$rhotates[0][2] 2051cb0ef41Sopenharmony_ci 2061cb0ef41Sopenharmony_ci andc $C[0],$A[0][2],$A[0][1] ; Chi+Iota 2071cb0ef41Sopenharmony_ci andc $C[1],$A[0][3],$A[0][2] 2081cb0ef41Sopenharmony_ci andc $C[2],$A[0][0],$A[0][4] 2091cb0ef41Sopenharmony_ci andc $C[3],$A[0][1],$A[0][0] 2101cb0ef41Sopenharmony_ci xor $A[0][0],$A[0][0],$C[0] 2111cb0ef41Sopenharmony_ci andc $C[0],$A[0][4],$A[0][3] 2121cb0ef41Sopenharmony_ci xor $A[0][1],$A[0][1],$C[1] 2131cb0ef41Sopenharmony_ci ld $C[1],`$LOCALS+4*$SIZE_T`($sp) 2141cb0ef41Sopenharmony_ci xor $A[0][3],$A[0][3],$C[2] 2151cb0ef41Sopenharmony_ci xor $A[0][4],$A[0][4],$C[3] 2161cb0ef41Sopenharmony_ci xor $A[0][2],$A[0][2],$C[0] 2171cb0ef41Sopenharmony_ci ldu $C[3],8($C[1]) ; Iota[i++] 2181cb0ef41Sopenharmony_ci 2191cb0ef41Sopenharmony_ci andc $C[0],$A[1][2],$A[1][1] 2201cb0ef41Sopenharmony_ci std $C[1],`$LOCALS+4*$SIZE_T`($sp) 2211cb0ef41Sopenharmony_ci andc $C[1],$A[1][3],$A[1][2] 2221cb0ef41Sopenharmony_ci andc $C[2],$A[1][0],$A[1][4] 2231cb0ef41Sopenharmony_ci xor $A[0][0],$A[0][0],$C[3] ; A[0][0] ^= Iota 2241cb0ef41Sopenharmony_ci andc $C[3],$A[1][1],$A[1][0] 2251cb0ef41Sopenharmony_ci xor $A[1][0],$A[1][0],$C[0] 2261cb0ef41Sopenharmony_ci andc $C[0],$A[1][4],$A[1][3] 2271cb0ef41Sopenharmony_ci xor $A[1][1],$A[1][1],$C[1] 2281cb0ef41Sopenharmony_ci xor $A[1][3],$A[1][3],$C[2] 2291cb0ef41Sopenharmony_ci xor $A[1][4],$A[1][4],$C[3] 2301cb0ef41Sopenharmony_ci xor $A[1][2],$A[1][2],$C[0] 2311cb0ef41Sopenharmony_ci 2321cb0ef41Sopenharmony_ci andc $C[0],$A[2][2],$A[2][1] 2331cb0ef41Sopenharmony_ci andc $C[1],$A[2][3],$A[2][2] 2341cb0ef41Sopenharmony_ci andc $C[2],$A[2][0],$A[2][4] 2351cb0ef41Sopenharmony_ci andc $C[3],$A[2][1],$A[2][0] 2361cb0ef41Sopenharmony_ci xor $A[2][0],$A[2][0],$C[0] 2371cb0ef41Sopenharmony_ci andc $C[0],$A[2][4],$A[2][3] 2381cb0ef41Sopenharmony_ci xor $A[2][1],$A[2][1],$C[1] 2391cb0ef41Sopenharmony_ci xor $A[2][3],$A[2][3],$C[2] 2401cb0ef41Sopenharmony_ci xor $A[2][4],$A[2][4],$C[3] 2411cb0ef41Sopenharmony_ci xor $A[2][2],$A[2][2],$C[0] 2421cb0ef41Sopenharmony_ci 2431cb0ef41Sopenharmony_ci andc $C[0],$A[3][2],$A[3][1] 2441cb0ef41Sopenharmony_ci andc $C[1],$A[3][3],$A[3][2] 2451cb0ef41Sopenharmony_ci andc $C[2],$A[3][0],$A[3][4] 2461cb0ef41Sopenharmony_ci andc $C[3],$A[3][1],$A[3][0] 2471cb0ef41Sopenharmony_ci xor $A[3][0],$A[3][0],$C[0] 2481cb0ef41Sopenharmony_ci andc $C[0],$A[3][4],$A[3][3] 2491cb0ef41Sopenharmony_ci xor $A[3][1],$A[3][1],$C[1] 2501cb0ef41Sopenharmony_ci xor $A[3][3],$A[3][3],$C[2] 2511cb0ef41Sopenharmony_ci xor $A[3][4],$A[3][4],$C[3] 2521cb0ef41Sopenharmony_ci xor $A[3][2],$A[3][2],$C[0] 2531cb0ef41Sopenharmony_ci 2541cb0ef41Sopenharmony_ci andc $C[0],$A[4][2],$A[4][1] 2551cb0ef41Sopenharmony_ci andc $C[1],$A[4][3],$A[4][2] 2561cb0ef41Sopenharmony_ci andc $C[2],$A[4][0],$A[4][4] 2571cb0ef41Sopenharmony_ci andc $C[3],$A[4][1],$A[4][0] 2581cb0ef41Sopenharmony_ci xor $A[4][0],$A[4][0],$C[0] 2591cb0ef41Sopenharmony_ci andc $C[0],$A[4][4],$A[4][3] 2601cb0ef41Sopenharmony_ci xor $A[4][1],$A[4][1],$C[1] 2611cb0ef41Sopenharmony_ci xor $A[4][3],$A[4][3],$C[2] 2621cb0ef41Sopenharmony_ci xor $A[4][4],$A[4][4],$C[3] 2631cb0ef41Sopenharmony_ci xor $A[4][2],$A[4][2],$C[0] 2641cb0ef41Sopenharmony_ci 2651cb0ef41Sopenharmony_ci bdnz .Loop 2661cb0ef41Sopenharmony_ci 2671cb0ef41Sopenharmony_ci blr 2681cb0ef41Sopenharmony_ci .long 0 2691cb0ef41Sopenharmony_ci .byte 0,12,0x14,0,0,0,0,0 2701cb0ef41Sopenharmony_ci.size KeccakF1600_int,.-KeccakF1600_int 2711cb0ef41Sopenharmony_ci 2721cb0ef41Sopenharmony_ci.type KeccakF1600,\@function 2731cb0ef41Sopenharmony_ci.align 5 2741cb0ef41Sopenharmony_ciKeccakF1600: 2751cb0ef41Sopenharmony_ci $STU $sp,-$FRAME($sp) 2761cb0ef41Sopenharmony_ci mflr r0 2771cb0ef41Sopenharmony_ci $PUSH r14,`$FRAME-$SIZE_T*18`($sp) 2781cb0ef41Sopenharmony_ci $PUSH r15,`$FRAME-$SIZE_T*17`($sp) 2791cb0ef41Sopenharmony_ci $PUSH r16,`$FRAME-$SIZE_T*16`($sp) 2801cb0ef41Sopenharmony_ci $PUSH r17,`$FRAME-$SIZE_T*15`($sp) 2811cb0ef41Sopenharmony_ci $PUSH r18,`$FRAME-$SIZE_T*14`($sp) 2821cb0ef41Sopenharmony_ci $PUSH r19,`$FRAME-$SIZE_T*13`($sp) 2831cb0ef41Sopenharmony_ci $PUSH r20,`$FRAME-$SIZE_T*12`($sp) 2841cb0ef41Sopenharmony_ci $PUSH r21,`$FRAME-$SIZE_T*11`($sp) 2851cb0ef41Sopenharmony_ci $PUSH r22,`$FRAME-$SIZE_T*10`($sp) 2861cb0ef41Sopenharmony_ci $PUSH r23,`$FRAME-$SIZE_T*9`($sp) 2871cb0ef41Sopenharmony_ci $PUSH r24,`$FRAME-$SIZE_T*8`($sp) 2881cb0ef41Sopenharmony_ci $PUSH r25,`$FRAME-$SIZE_T*7`($sp) 2891cb0ef41Sopenharmony_ci $PUSH r26,`$FRAME-$SIZE_T*6`($sp) 2901cb0ef41Sopenharmony_ci $PUSH r27,`$FRAME-$SIZE_T*5`($sp) 2911cb0ef41Sopenharmony_ci $PUSH r28,`$FRAME-$SIZE_T*4`($sp) 2921cb0ef41Sopenharmony_ci $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 2931cb0ef41Sopenharmony_ci $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 2941cb0ef41Sopenharmony_ci $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 2951cb0ef41Sopenharmony_ci $PUSH r0,`$FRAME+$LRSAVE`($sp) 2961cb0ef41Sopenharmony_ci 2971cb0ef41Sopenharmony_ci bl PICmeup 2981cb0ef41Sopenharmony_ci subi r12,r12,8 ; prepare for ldu 2991cb0ef41Sopenharmony_ci 3001cb0ef41Sopenharmony_ci $PUSH r3,`$LOCALS+0*$SIZE_T`($sp) 3011cb0ef41Sopenharmony_ci ;$PUSH r4,`$LOCALS+1*$SIZE_T`($sp) 3021cb0ef41Sopenharmony_ci ;$PUSH r5,`$LOCALS+2*$SIZE_T`($sp) 3031cb0ef41Sopenharmony_ci ;$PUSH r6,`$LOCALS+3*$SIZE_T`($sp) 3041cb0ef41Sopenharmony_ci $PUSH r12,`$LOCALS+4*$SIZE_T`($sp) 3051cb0ef41Sopenharmony_ci 3061cb0ef41Sopenharmony_ci ld $A[0][0],`8*0`(r3) ; load A[5][5] 3071cb0ef41Sopenharmony_ci ld $A[0][1],`8*1`(r3) 3081cb0ef41Sopenharmony_ci ld $A[0][2],`8*2`(r3) 3091cb0ef41Sopenharmony_ci ld $A[0][3],`8*3`(r3) 3101cb0ef41Sopenharmony_ci ld $A[0][4],`8*4`(r3) 3111cb0ef41Sopenharmony_ci ld $A[1][0],`8*5`(r3) 3121cb0ef41Sopenharmony_ci ld $A[1][1],`8*6`(r3) 3131cb0ef41Sopenharmony_ci ld $A[1][2],`8*7`(r3) 3141cb0ef41Sopenharmony_ci ld $A[1][3],`8*8`(r3) 3151cb0ef41Sopenharmony_ci ld $A[1][4],`8*9`(r3) 3161cb0ef41Sopenharmony_ci ld $A[2][0],`8*10`(r3) 3171cb0ef41Sopenharmony_ci ld $A[2][1],`8*11`(r3) 3181cb0ef41Sopenharmony_ci ld $A[2][2],`8*12`(r3) 3191cb0ef41Sopenharmony_ci ld $A[2][3],`8*13`(r3) 3201cb0ef41Sopenharmony_ci ld $A[2][4],`8*14`(r3) 3211cb0ef41Sopenharmony_ci ld $A[3][0],`8*15`(r3) 3221cb0ef41Sopenharmony_ci ld $A[3][1],`8*16`(r3) 3231cb0ef41Sopenharmony_ci ld $A[3][2],`8*17`(r3) 3241cb0ef41Sopenharmony_ci ld $A[3][3],`8*18`(r3) 3251cb0ef41Sopenharmony_ci ld $A[3][4],`8*19`(r3) 3261cb0ef41Sopenharmony_ci ld $A[4][0],`8*20`(r3) 3271cb0ef41Sopenharmony_ci ld $A[4][1],`8*21`(r3) 3281cb0ef41Sopenharmony_ci ld $A[4][2],`8*22`(r3) 3291cb0ef41Sopenharmony_ci ld $A[4][3],`8*23`(r3) 3301cb0ef41Sopenharmony_ci ld $A[4][4],`8*24`(r3) 3311cb0ef41Sopenharmony_ci 3321cb0ef41Sopenharmony_ci bl KeccakF1600_int 3331cb0ef41Sopenharmony_ci 3341cb0ef41Sopenharmony_ci $POP r3,`$LOCALS+0*$SIZE_T`($sp) 3351cb0ef41Sopenharmony_ci std $A[0][0],`8*0`(r3) ; return A[5][5] 3361cb0ef41Sopenharmony_ci std $A[0][1],`8*1`(r3) 3371cb0ef41Sopenharmony_ci std $A[0][2],`8*2`(r3) 3381cb0ef41Sopenharmony_ci std $A[0][3],`8*3`(r3) 3391cb0ef41Sopenharmony_ci std $A[0][4],`8*4`(r3) 3401cb0ef41Sopenharmony_ci std $A[1][0],`8*5`(r3) 3411cb0ef41Sopenharmony_ci std $A[1][1],`8*6`(r3) 3421cb0ef41Sopenharmony_ci std $A[1][2],`8*7`(r3) 3431cb0ef41Sopenharmony_ci std $A[1][3],`8*8`(r3) 3441cb0ef41Sopenharmony_ci std $A[1][4],`8*9`(r3) 3451cb0ef41Sopenharmony_ci std $A[2][0],`8*10`(r3) 3461cb0ef41Sopenharmony_ci std $A[2][1],`8*11`(r3) 3471cb0ef41Sopenharmony_ci std $A[2][2],`8*12`(r3) 3481cb0ef41Sopenharmony_ci std $A[2][3],`8*13`(r3) 3491cb0ef41Sopenharmony_ci std $A[2][4],`8*14`(r3) 3501cb0ef41Sopenharmony_ci std $A[3][0],`8*15`(r3) 3511cb0ef41Sopenharmony_ci std $A[3][1],`8*16`(r3) 3521cb0ef41Sopenharmony_ci std $A[3][2],`8*17`(r3) 3531cb0ef41Sopenharmony_ci std $A[3][3],`8*18`(r3) 3541cb0ef41Sopenharmony_ci std $A[3][4],`8*19`(r3) 3551cb0ef41Sopenharmony_ci std $A[4][0],`8*20`(r3) 3561cb0ef41Sopenharmony_ci std $A[4][1],`8*21`(r3) 3571cb0ef41Sopenharmony_ci std $A[4][2],`8*22`(r3) 3581cb0ef41Sopenharmony_ci std $A[4][3],`8*23`(r3) 3591cb0ef41Sopenharmony_ci std $A[4][4],`8*24`(r3) 3601cb0ef41Sopenharmony_ci 3611cb0ef41Sopenharmony_ci $POP r0,`$FRAME+$LRSAVE`($sp) 3621cb0ef41Sopenharmony_ci $POP r14,`$FRAME-$SIZE_T*18`($sp) 3631cb0ef41Sopenharmony_ci $POP r15,`$FRAME-$SIZE_T*17`($sp) 3641cb0ef41Sopenharmony_ci $POP r16,`$FRAME-$SIZE_T*16`($sp) 3651cb0ef41Sopenharmony_ci $POP r17,`$FRAME-$SIZE_T*15`($sp) 3661cb0ef41Sopenharmony_ci $POP r18,`$FRAME-$SIZE_T*14`($sp) 3671cb0ef41Sopenharmony_ci $POP r19,`$FRAME-$SIZE_T*13`($sp) 3681cb0ef41Sopenharmony_ci $POP r20,`$FRAME-$SIZE_T*12`($sp) 3691cb0ef41Sopenharmony_ci $POP r21,`$FRAME-$SIZE_T*11`($sp) 3701cb0ef41Sopenharmony_ci $POP r22,`$FRAME-$SIZE_T*10`($sp) 3711cb0ef41Sopenharmony_ci $POP r23,`$FRAME-$SIZE_T*9`($sp) 3721cb0ef41Sopenharmony_ci $POP r24,`$FRAME-$SIZE_T*8`($sp) 3731cb0ef41Sopenharmony_ci $POP r25,`$FRAME-$SIZE_T*7`($sp) 3741cb0ef41Sopenharmony_ci $POP r26,`$FRAME-$SIZE_T*6`($sp) 3751cb0ef41Sopenharmony_ci $POP r27,`$FRAME-$SIZE_T*5`($sp) 3761cb0ef41Sopenharmony_ci $POP r28,`$FRAME-$SIZE_T*4`($sp) 3771cb0ef41Sopenharmony_ci $POP r29,`$FRAME-$SIZE_T*3`($sp) 3781cb0ef41Sopenharmony_ci $POP r30,`$FRAME-$SIZE_T*2`($sp) 3791cb0ef41Sopenharmony_ci $POP r31,`$FRAME-$SIZE_T*1`($sp) 3801cb0ef41Sopenharmony_ci mtlr r0 3811cb0ef41Sopenharmony_ci addi $sp,$sp,$FRAME 3821cb0ef41Sopenharmony_ci blr 3831cb0ef41Sopenharmony_ci .long 0 3841cb0ef41Sopenharmony_ci .byte 0,12,4,1,0x80,18,1,0 3851cb0ef41Sopenharmony_ci .long 0 3861cb0ef41Sopenharmony_ci.size KeccakF1600,.-KeccakF1600 3871cb0ef41Sopenharmony_ci 3881cb0ef41Sopenharmony_ci.type dword_le_load,\@function 3891cb0ef41Sopenharmony_ci.align 5 3901cb0ef41Sopenharmony_cidword_le_load: 3911cb0ef41Sopenharmony_ci lbz r0,1(r3) 3921cb0ef41Sopenharmony_ci lbz r4,2(r3) 3931cb0ef41Sopenharmony_ci lbz r5,3(r3) 3941cb0ef41Sopenharmony_ci insrdi r0,r4,8,48 3951cb0ef41Sopenharmony_ci lbz r4,4(r3) 3961cb0ef41Sopenharmony_ci insrdi r0,r5,8,40 3971cb0ef41Sopenharmony_ci lbz r5,5(r3) 3981cb0ef41Sopenharmony_ci insrdi r0,r4,8,32 3991cb0ef41Sopenharmony_ci lbz r4,6(r3) 4001cb0ef41Sopenharmony_ci insrdi r0,r5,8,24 4011cb0ef41Sopenharmony_ci lbz r5,7(r3) 4021cb0ef41Sopenharmony_ci insrdi r0,r4,8,16 4031cb0ef41Sopenharmony_ci lbzu r4,8(r3) 4041cb0ef41Sopenharmony_ci insrdi r0,r5,8,8 4051cb0ef41Sopenharmony_ci insrdi r0,r4,8,0 4061cb0ef41Sopenharmony_ci blr 4071cb0ef41Sopenharmony_ci .long 0 4081cb0ef41Sopenharmony_ci .byte 0,12,0x14,0,0,0,1,0 4091cb0ef41Sopenharmony_ci .long 0 4101cb0ef41Sopenharmony_ci.size dword_le_load,.-dword_le_load 4111cb0ef41Sopenharmony_ci 4121cb0ef41Sopenharmony_ci.globl SHA3_absorb 4131cb0ef41Sopenharmony_ci.type SHA3_absorb,\@function 4141cb0ef41Sopenharmony_ci.align 5 4151cb0ef41Sopenharmony_ciSHA3_absorb: 4161cb0ef41Sopenharmony_ci $STU $sp,-$FRAME($sp) 4171cb0ef41Sopenharmony_ci mflr r0 4181cb0ef41Sopenharmony_ci $PUSH r14,`$FRAME-$SIZE_T*18`($sp) 4191cb0ef41Sopenharmony_ci $PUSH r15,`$FRAME-$SIZE_T*17`($sp) 4201cb0ef41Sopenharmony_ci $PUSH r16,`$FRAME-$SIZE_T*16`($sp) 4211cb0ef41Sopenharmony_ci $PUSH r17,`$FRAME-$SIZE_T*15`($sp) 4221cb0ef41Sopenharmony_ci $PUSH r18,`$FRAME-$SIZE_T*14`($sp) 4231cb0ef41Sopenharmony_ci $PUSH r19,`$FRAME-$SIZE_T*13`($sp) 4241cb0ef41Sopenharmony_ci $PUSH r20,`$FRAME-$SIZE_T*12`($sp) 4251cb0ef41Sopenharmony_ci $PUSH r21,`$FRAME-$SIZE_T*11`($sp) 4261cb0ef41Sopenharmony_ci $PUSH r22,`$FRAME-$SIZE_T*10`($sp) 4271cb0ef41Sopenharmony_ci $PUSH r23,`$FRAME-$SIZE_T*9`($sp) 4281cb0ef41Sopenharmony_ci $PUSH r24,`$FRAME-$SIZE_T*8`($sp) 4291cb0ef41Sopenharmony_ci $PUSH r25,`$FRAME-$SIZE_T*7`($sp) 4301cb0ef41Sopenharmony_ci $PUSH r26,`$FRAME-$SIZE_T*6`($sp) 4311cb0ef41Sopenharmony_ci $PUSH r27,`$FRAME-$SIZE_T*5`($sp) 4321cb0ef41Sopenharmony_ci $PUSH r28,`$FRAME-$SIZE_T*4`($sp) 4331cb0ef41Sopenharmony_ci $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 4341cb0ef41Sopenharmony_ci $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 4351cb0ef41Sopenharmony_ci $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 4361cb0ef41Sopenharmony_ci $PUSH r0,`$FRAME+$LRSAVE`($sp) 4371cb0ef41Sopenharmony_ci 4381cb0ef41Sopenharmony_ci bl PICmeup 4391cb0ef41Sopenharmony_ci subi r4,r4,1 ; prepare for lbzu 4401cb0ef41Sopenharmony_ci subi r12,r12,8 ; prepare for ldu 4411cb0ef41Sopenharmony_ci 4421cb0ef41Sopenharmony_ci $PUSH r3,`$LOCALS+0*$SIZE_T`($sp) ; save A[][] 4431cb0ef41Sopenharmony_ci $PUSH r4,`$LOCALS+1*$SIZE_T`($sp) ; save inp 4441cb0ef41Sopenharmony_ci $PUSH r5,`$LOCALS+2*$SIZE_T`($sp) ; save len 4451cb0ef41Sopenharmony_ci $PUSH r6,`$LOCALS+3*$SIZE_T`($sp) ; save bsz 4461cb0ef41Sopenharmony_ci mr r0,r6 4471cb0ef41Sopenharmony_ci $PUSH r12,`$LOCALS+4*$SIZE_T`($sp) 4481cb0ef41Sopenharmony_ci 4491cb0ef41Sopenharmony_ci ld $A[0][0],`8*0`(r3) ; load A[5][5] 4501cb0ef41Sopenharmony_ci ld $A[0][1],`8*1`(r3) 4511cb0ef41Sopenharmony_ci ld $A[0][2],`8*2`(r3) 4521cb0ef41Sopenharmony_ci ld $A[0][3],`8*3`(r3) 4531cb0ef41Sopenharmony_ci ld $A[0][4],`8*4`(r3) 4541cb0ef41Sopenharmony_ci ld $A[1][0],`8*5`(r3) 4551cb0ef41Sopenharmony_ci ld $A[1][1],`8*6`(r3) 4561cb0ef41Sopenharmony_ci ld $A[1][2],`8*7`(r3) 4571cb0ef41Sopenharmony_ci ld $A[1][3],`8*8`(r3) 4581cb0ef41Sopenharmony_ci ld $A[1][4],`8*9`(r3) 4591cb0ef41Sopenharmony_ci ld $A[2][0],`8*10`(r3) 4601cb0ef41Sopenharmony_ci ld $A[2][1],`8*11`(r3) 4611cb0ef41Sopenharmony_ci ld $A[2][2],`8*12`(r3) 4621cb0ef41Sopenharmony_ci ld $A[2][3],`8*13`(r3) 4631cb0ef41Sopenharmony_ci ld $A[2][4],`8*14`(r3) 4641cb0ef41Sopenharmony_ci ld $A[3][0],`8*15`(r3) 4651cb0ef41Sopenharmony_ci ld $A[3][1],`8*16`(r3) 4661cb0ef41Sopenharmony_ci ld $A[3][2],`8*17`(r3) 4671cb0ef41Sopenharmony_ci ld $A[3][3],`8*18`(r3) 4681cb0ef41Sopenharmony_ci ld $A[3][4],`8*19`(r3) 4691cb0ef41Sopenharmony_ci ld $A[4][0],`8*20`(r3) 4701cb0ef41Sopenharmony_ci ld $A[4][1],`8*21`(r3) 4711cb0ef41Sopenharmony_ci ld $A[4][2],`8*22`(r3) 4721cb0ef41Sopenharmony_ci ld $A[4][3],`8*23`(r3) 4731cb0ef41Sopenharmony_ci ld $A[4][4],`8*24`(r3) 4741cb0ef41Sopenharmony_ci 4751cb0ef41Sopenharmony_ci mr r3,r4 4761cb0ef41Sopenharmony_ci mr r4,r5 4771cb0ef41Sopenharmony_ci mr r5,r0 4781cb0ef41Sopenharmony_ci 4791cb0ef41Sopenharmony_ci b .Loop_absorb 4801cb0ef41Sopenharmony_ci 4811cb0ef41Sopenharmony_ci.align 4 4821cb0ef41Sopenharmony_ci.Loop_absorb: 4831cb0ef41Sopenharmony_ci $UCMP r4,r5 ; len < bsz? 4841cb0ef41Sopenharmony_ci blt .Labsorbed 4851cb0ef41Sopenharmony_ci 4861cb0ef41Sopenharmony_ci sub r4,r4,r5 ; len -= bsz 4871cb0ef41Sopenharmony_ci srwi r5,r5,3 4881cb0ef41Sopenharmony_ci $PUSH r4,`$LOCALS+2*$SIZE_T`($sp) ; save len 4891cb0ef41Sopenharmony_ci mtctr r5 4901cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 4911cb0ef41Sopenharmony_ci xor $A[0][0],$A[0][0],r0 4921cb0ef41Sopenharmony_ci bdz .Lprocess_block 4931cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 4941cb0ef41Sopenharmony_ci xor $A[0][1],$A[0][1],r0 4951cb0ef41Sopenharmony_ci bdz .Lprocess_block 4961cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 4971cb0ef41Sopenharmony_ci xor $A[0][2],$A[0][2],r0 4981cb0ef41Sopenharmony_ci bdz .Lprocess_block 4991cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5001cb0ef41Sopenharmony_ci xor $A[0][3],$A[0][3],r0 5011cb0ef41Sopenharmony_ci bdz .Lprocess_block 5021cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5031cb0ef41Sopenharmony_ci xor $A[0][4],$A[0][4],r0 5041cb0ef41Sopenharmony_ci bdz .Lprocess_block 5051cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5061cb0ef41Sopenharmony_ci xor $A[1][0],$A[1][0],r0 5071cb0ef41Sopenharmony_ci bdz .Lprocess_block 5081cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5091cb0ef41Sopenharmony_ci xor $A[1][1],$A[1][1],r0 5101cb0ef41Sopenharmony_ci bdz .Lprocess_block 5111cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5121cb0ef41Sopenharmony_ci xor $A[1][2],$A[1][2],r0 5131cb0ef41Sopenharmony_ci bdz .Lprocess_block 5141cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5151cb0ef41Sopenharmony_ci xor $A[1][3],$A[1][3],r0 5161cb0ef41Sopenharmony_ci bdz .Lprocess_block 5171cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5181cb0ef41Sopenharmony_ci xor $A[1][4],$A[1][4],r0 5191cb0ef41Sopenharmony_ci bdz .Lprocess_block 5201cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5211cb0ef41Sopenharmony_ci xor $A[2][0],$A[2][0],r0 5221cb0ef41Sopenharmony_ci bdz .Lprocess_block 5231cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5241cb0ef41Sopenharmony_ci xor $A[2][1],$A[2][1],r0 5251cb0ef41Sopenharmony_ci bdz .Lprocess_block 5261cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5271cb0ef41Sopenharmony_ci xor $A[2][2],$A[2][2],r0 5281cb0ef41Sopenharmony_ci bdz .Lprocess_block 5291cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5301cb0ef41Sopenharmony_ci xor $A[2][3],$A[2][3],r0 5311cb0ef41Sopenharmony_ci bdz .Lprocess_block 5321cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5331cb0ef41Sopenharmony_ci xor $A[2][4],$A[2][4],r0 5341cb0ef41Sopenharmony_ci bdz .Lprocess_block 5351cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5361cb0ef41Sopenharmony_ci xor $A[3][0],$A[3][0],r0 5371cb0ef41Sopenharmony_ci bdz .Lprocess_block 5381cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5391cb0ef41Sopenharmony_ci xor $A[3][1],$A[3][1],r0 5401cb0ef41Sopenharmony_ci bdz .Lprocess_block 5411cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5421cb0ef41Sopenharmony_ci xor $A[3][2],$A[3][2],r0 5431cb0ef41Sopenharmony_ci bdz .Lprocess_block 5441cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5451cb0ef41Sopenharmony_ci xor $A[3][3],$A[3][3],r0 5461cb0ef41Sopenharmony_ci bdz .Lprocess_block 5471cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5481cb0ef41Sopenharmony_ci xor $A[3][4],$A[3][4],r0 5491cb0ef41Sopenharmony_ci bdz .Lprocess_block 5501cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5511cb0ef41Sopenharmony_ci xor $A[4][0],$A[4][0],r0 5521cb0ef41Sopenharmony_ci bdz .Lprocess_block 5531cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5541cb0ef41Sopenharmony_ci xor $A[4][1],$A[4][1],r0 5551cb0ef41Sopenharmony_ci bdz .Lprocess_block 5561cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5571cb0ef41Sopenharmony_ci xor $A[4][2],$A[4][2],r0 5581cb0ef41Sopenharmony_ci bdz .Lprocess_block 5591cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5601cb0ef41Sopenharmony_ci xor $A[4][3],$A[4][3],r0 5611cb0ef41Sopenharmony_ci bdz .Lprocess_block 5621cb0ef41Sopenharmony_ci bl dword_le_load ; *inp++ 5631cb0ef41Sopenharmony_ci xor $A[4][4],$A[4][4],r0 5641cb0ef41Sopenharmony_ci 5651cb0ef41Sopenharmony_ci.Lprocess_block: 5661cb0ef41Sopenharmony_ci $PUSH r3,`$LOCALS+1*$SIZE_T`($sp) ; save inp 5671cb0ef41Sopenharmony_ci 5681cb0ef41Sopenharmony_ci bl KeccakF1600_int 5691cb0ef41Sopenharmony_ci 5701cb0ef41Sopenharmony_ci $POP r0,`$LOCALS+4*$SIZE_T`($sp) ; pull iotas[24] 5711cb0ef41Sopenharmony_ci $POP r5,`$LOCALS+3*$SIZE_T`($sp) ; restore bsz 5721cb0ef41Sopenharmony_ci $POP r4,`$LOCALS+2*$SIZE_T`($sp) ; restore len 5731cb0ef41Sopenharmony_ci $POP r3,`$LOCALS+1*$SIZE_T`($sp) ; restore inp 5741cb0ef41Sopenharmony_ci addic r0,r0,`-8*24` ; rewind iotas 5751cb0ef41Sopenharmony_ci $PUSH r0,`$LOCALS+4*$SIZE_T`($sp) 5761cb0ef41Sopenharmony_ci 5771cb0ef41Sopenharmony_ci b .Loop_absorb 5781cb0ef41Sopenharmony_ci 5791cb0ef41Sopenharmony_ci.align 4 5801cb0ef41Sopenharmony_ci.Labsorbed: 5811cb0ef41Sopenharmony_ci $POP r3,`$LOCALS+0*$SIZE_T`($sp) 5821cb0ef41Sopenharmony_ci std $A[0][0],`8*0`(r3) ; return A[5][5] 5831cb0ef41Sopenharmony_ci std $A[0][1],`8*1`(r3) 5841cb0ef41Sopenharmony_ci std $A[0][2],`8*2`(r3) 5851cb0ef41Sopenharmony_ci std $A[0][3],`8*3`(r3) 5861cb0ef41Sopenharmony_ci std $A[0][4],`8*4`(r3) 5871cb0ef41Sopenharmony_ci std $A[1][0],`8*5`(r3) 5881cb0ef41Sopenharmony_ci std $A[1][1],`8*6`(r3) 5891cb0ef41Sopenharmony_ci std $A[1][2],`8*7`(r3) 5901cb0ef41Sopenharmony_ci std $A[1][3],`8*8`(r3) 5911cb0ef41Sopenharmony_ci std $A[1][4],`8*9`(r3) 5921cb0ef41Sopenharmony_ci std $A[2][0],`8*10`(r3) 5931cb0ef41Sopenharmony_ci std $A[2][1],`8*11`(r3) 5941cb0ef41Sopenharmony_ci std $A[2][2],`8*12`(r3) 5951cb0ef41Sopenharmony_ci std $A[2][3],`8*13`(r3) 5961cb0ef41Sopenharmony_ci std $A[2][4],`8*14`(r3) 5971cb0ef41Sopenharmony_ci std $A[3][0],`8*15`(r3) 5981cb0ef41Sopenharmony_ci std $A[3][1],`8*16`(r3) 5991cb0ef41Sopenharmony_ci std $A[3][2],`8*17`(r3) 6001cb0ef41Sopenharmony_ci std $A[3][3],`8*18`(r3) 6011cb0ef41Sopenharmony_ci std $A[3][4],`8*19`(r3) 6021cb0ef41Sopenharmony_ci std $A[4][0],`8*20`(r3) 6031cb0ef41Sopenharmony_ci std $A[4][1],`8*21`(r3) 6041cb0ef41Sopenharmony_ci std $A[4][2],`8*22`(r3) 6051cb0ef41Sopenharmony_ci std $A[4][3],`8*23`(r3) 6061cb0ef41Sopenharmony_ci std $A[4][4],`8*24`(r3) 6071cb0ef41Sopenharmony_ci 6081cb0ef41Sopenharmony_ci mr r3,r4 ; return value 6091cb0ef41Sopenharmony_ci $POP r0,`$FRAME+$LRSAVE`($sp) 6101cb0ef41Sopenharmony_ci $POP r14,`$FRAME-$SIZE_T*18`($sp) 6111cb0ef41Sopenharmony_ci $POP r15,`$FRAME-$SIZE_T*17`($sp) 6121cb0ef41Sopenharmony_ci $POP r16,`$FRAME-$SIZE_T*16`($sp) 6131cb0ef41Sopenharmony_ci $POP r17,`$FRAME-$SIZE_T*15`($sp) 6141cb0ef41Sopenharmony_ci $POP r18,`$FRAME-$SIZE_T*14`($sp) 6151cb0ef41Sopenharmony_ci $POP r19,`$FRAME-$SIZE_T*13`($sp) 6161cb0ef41Sopenharmony_ci $POP r20,`$FRAME-$SIZE_T*12`($sp) 6171cb0ef41Sopenharmony_ci $POP r21,`$FRAME-$SIZE_T*11`($sp) 6181cb0ef41Sopenharmony_ci $POP r22,`$FRAME-$SIZE_T*10`($sp) 6191cb0ef41Sopenharmony_ci $POP r23,`$FRAME-$SIZE_T*9`($sp) 6201cb0ef41Sopenharmony_ci $POP r24,`$FRAME-$SIZE_T*8`($sp) 6211cb0ef41Sopenharmony_ci $POP r25,`$FRAME-$SIZE_T*7`($sp) 6221cb0ef41Sopenharmony_ci $POP r26,`$FRAME-$SIZE_T*6`($sp) 6231cb0ef41Sopenharmony_ci $POP r27,`$FRAME-$SIZE_T*5`($sp) 6241cb0ef41Sopenharmony_ci $POP r28,`$FRAME-$SIZE_T*4`($sp) 6251cb0ef41Sopenharmony_ci $POP r29,`$FRAME-$SIZE_T*3`($sp) 6261cb0ef41Sopenharmony_ci $POP r30,`$FRAME-$SIZE_T*2`($sp) 6271cb0ef41Sopenharmony_ci $POP r31,`$FRAME-$SIZE_T*1`($sp) 6281cb0ef41Sopenharmony_ci mtlr r0 6291cb0ef41Sopenharmony_ci addi $sp,$sp,$FRAME 6301cb0ef41Sopenharmony_ci blr 6311cb0ef41Sopenharmony_ci .long 0 6321cb0ef41Sopenharmony_ci .byte 0,12,4,1,0x80,18,4,0 6331cb0ef41Sopenharmony_ci .long 0 6341cb0ef41Sopenharmony_ci.size SHA3_absorb,.-SHA3_absorb 6351cb0ef41Sopenharmony_ci___ 6361cb0ef41Sopenharmony_ci{ 6371cb0ef41Sopenharmony_cimy ($A_flat,$out,$len,$bsz) = map("r$_",(28..31)); 6381cb0ef41Sopenharmony_ci$code.=<<___; 6391cb0ef41Sopenharmony_ci.globl SHA3_squeeze 6401cb0ef41Sopenharmony_ci.type SHA3_squeeze,\@function 6411cb0ef41Sopenharmony_ci.align 5 6421cb0ef41Sopenharmony_ciSHA3_squeeze: 6431cb0ef41Sopenharmony_ci $STU $sp,`-10*$SIZE_T`($sp) 6441cb0ef41Sopenharmony_ci mflr r0 6451cb0ef41Sopenharmony_ci $PUSH r28,`6*$SIZE_T`($sp) 6461cb0ef41Sopenharmony_ci $PUSH r29,`7*$SIZE_T`($sp) 6471cb0ef41Sopenharmony_ci $PUSH r30,`8*$SIZE_T`($sp) 6481cb0ef41Sopenharmony_ci $PUSH r31,`9*$SIZE_T`($sp) 6491cb0ef41Sopenharmony_ci $PUSH r0,`10*$SIZE_T+$LRSAVE`($sp) 6501cb0ef41Sopenharmony_ci 6511cb0ef41Sopenharmony_ci mr $A_flat,r3 6521cb0ef41Sopenharmony_ci subi r3,r3,8 ; prepare for ldu 6531cb0ef41Sopenharmony_ci subi $out,r4,1 ; prepare for stbu 6541cb0ef41Sopenharmony_ci mr $len,r5 6551cb0ef41Sopenharmony_ci mr $bsz,r6 6561cb0ef41Sopenharmony_ci b .Loop_squeeze 6571cb0ef41Sopenharmony_ci 6581cb0ef41Sopenharmony_ci.align 4 6591cb0ef41Sopenharmony_ci.Loop_squeeze: 6601cb0ef41Sopenharmony_ci ldu r0,8(r3) 6611cb0ef41Sopenharmony_ci ${UCMP}i $len,8 6621cb0ef41Sopenharmony_ci blt .Lsqueeze_tail 6631cb0ef41Sopenharmony_ci 6641cb0ef41Sopenharmony_ci stb r0,1($out) 6651cb0ef41Sopenharmony_ci srdi r0,r0,8 6661cb0ef41Sopenharmony_ci stb r0,2($out) 6671cb0ef41Sopenharmony_ci srdi r0,r0,8 6681cb0ef41Sopenharmony_ci stb r0,3($out) 6691cb0ef41Sopenharmony_ci srdi r0,r0,8 6701cb0ef41Sopenharmony_ci stb r0,4($out) 6711cb0ef41Sopenharmony_ci srdi r0,r0,8 6721cb0ef41Sopenharmony_ci stb r0,5($out) 6731cb0ef41Sopenharmony_ci srdi r0,r0,8 6741cb0ef41Sopenharmony_ci stb r0,6($out) 6751cb0ef41Sopenharmony_ci srdi r0,r0,8 6761cb0ef41Sopenharmony_ci stb r0,7($out) 6771cb0ef41Sopenharmony_ci srdi r0,r0,8 6781cb0ef41Sopenharmony_ci stbu r0,8($out) 6791cb0ef41Sopenharmony_ci 6801cb0ef41Sopenharmony_ci subic. $len,$len,8 6811cb0ef41Sopenharmony_ci beq .Lsqueeze_done 6821cb0ef41Sopenharmony_ci 6831cb0ef41Sopenharmony_ci subic. r6,r6,8 6841cb0ef41Sopenharmony_ci bgt .Loop_squeeze 6851cb0ef41Sopenharmony_ci 6861cb0ef41Sopenharmony_ci mr r3,$A_flat 6871cb0ef41Sopenharmony_ci bl KeccakF1600 6881cb0ef41Sopenharmony_ci subi r3,$A_flat,8 ; prepare for ldu 6891cb0ef41Sopenharmony_ci mr r6,$bsz 6901cb0ef41Sopenharmony_ci b .Loop_squeeze 6911cb0ef41Sopenharmony_ci 6921cb0ef41Sopenharmony_ci.align 4 6931cb0ef41Sopenharmony_ci.Lsqueeze_tail: 6941cb0ef41Sopenharmony_ci mtctr $len 6951cb0ef41Sopenharmony_ci.Loop_tail: 6961cb0ef41Sopenharmony_ci stbu r0,1($out) 6971cb0ef41Sopenharmony_ci srdi r0,r0,8 6981cb0ef41Sopenharmony_ci bdnz .Loop_tail 6991cb0ef41Sopenharmony_ci 7001cb0ef41Sopenharmony_ci.Lsqueeze_done: 7011cb0ef41Sopenharmony_ci $POP r0,`10*$SIZE_T+$LRSAVE`($sp) 7021cb0ef41Sopenharmony_ci $POP r28,`6*$SIZE_T`($sp) 7031cb0ef41Sopenharmony_ci $POP r29,`7*$SIZE_T`($sp) 7041cb0ef41Sopenharmony_ci $POP r30,`8*$SIZE_T`($sp) 7051cb0ef41Sopenharmony_ci $POP r31,`9*$SIZE_T`($sp) 7061cb0ef41Sopenharmony_ci mtlr r0 7071cb0ef41Sopenharmony_ci addi $sp,$sp,`10*$SIZE_T` 7081cb0ef41Sopenharmony_ci blr 7091cb0ef41Sopenharmony_ci .long 0 7101cb0ef41Sopenharmony_ci .byte 0,12,4,1,0x80,4,4,0 7111cb0ef41Sopenharmony_ci .long 0 7121cb0ef41Sopenharmony_ci.size SHA3_squeeze,.-SHA3_squeeze 7131cb0ef41Sopenharmony_ci___ 7141cb0ef41Sopenharmony_ci} 7151cb0ef41Sopenharmony_ci 7161cb0ef41Sopenharmony_ci# Ugly hack here, because PPC assembler syntax seem to vary too 7171cb0ef41Sopenharmony_ci# much from platforms to platform... 7181cb0ef41Sopenharmony_ci$code.=<<___; 7191cb0ef41Sopenharmony_ci.align 6 7201cb0ef41Sopenharmony_ciPICmeup: 7211cb0ef41Sopenharmony_ci mflr r0 7221cb0ef41Sopenharmony_ci bcl 20,31,\$+4 7231cb0ef41Sopenharmony_ci mflr r12 ; vvvvvv "distance" between . and 1st data entry 7241cb0ef41Sopenharmony_ci addi r12,r12,`64-8` 7251cb0ef41Sopenharmony_ci mtlr r0 7261cb0ef41Sopenharmony_ci blr 7271cb0ef41Sopenharmony_ci .long 0 7281cb0ef41Sopenharmony_ci .byte 0,12,0x14,0,0,0,0,0 7291cb0ef41Sopenharmony_ci .space `64-9*4` 7301cb0ef41Sopenharmony_ci.type iotas,\@object 7311cb0ef41Sopenharmony_ciiotas: 7321cb0ef41Sopenharmony_ci .quad 0x0000000000000001 7331cb0ef41Sopenharmony_ci .quad 0x0000000000008082 7341cb0ef41Sopenharmony_ci .quad 0x800000000000808a 7351cb0ef41Sopenharmony_ci .quad 0x8000000080008000 7361cb0ef41Sopenharmony_ci .quad 0x000000000000808b 7371cb0ef41Sopenharmony_ci .quad 0x0000000080000001 7381cb0ef41Sopenharmony_ci .quad 0x8000000080008081 7391cb0ef41Sopenharmony_ci .quad 0x8000000000008009 7401cb0ef41Sopenharmony_ci .quad 0x000000000000008a 7411cb0ef41Sopenharmony_ci .quad 0x0000000000000088 7421cb0ef41Sopenharmony_ci .quad 0x0000000080008009 7431cb0ef41Sopenharmony_ci .quad 0x000000008000000a 7441cb0ef41Sopenharmony_ci .quad 0x000000008000808b 7451cb0ef41Sopenharmony_ci .quad 0x800000000000008b 7461cb0ef41Sopenharmony_ci .quad 0x8000000000008089 7471cb0ef41Sopenharmony_ci .quad 0x8000000000008003 7481cb0ef41Sopenharmony_ci .quad 0x8000000000008002 7491cb0ef41Sopenharmony_ci .quad 0x8000000000000080 7501cb0ef41Sopenharmony_ci .quad 0x000000000000800a 7511cb0ef41Sopenharmony_ci .quad 0x800000008000000a 7521cb0ef41Sopenharmony_ci .quad 0x8000000080008081 7531cb0ef41Sopenharmony_ci .quad 0x8000000000008080 7541cb0ef41Sopenharmony_ci .quad 0x0000000080000001 7551cb0ef41Sopenharmony_ci .quad 0x8000000080008008 7561cb0ef41Sopenharmony_ci.size iotas,.-iotas 7571cb0ef41Sopenharmony_ci.asciz "Keccak-1600 absorb and squeeze for PPC64, CRYPTOGAMS by <appro\@openssl.org>" 7581cb0ef41Sopenharmony_ci___ 7591cb0ef41Sopenharmony_ci 7601cb0ef41Sopenharmony_ci$code =~ s/\`([^\`]*)\`/eval $1/gem; 7611cb0ef41Sopenharmony_ciprint $code; 7621cb0ef41Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 763