1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci 9e1051a39Sopenharmony_ci# 10e1051a39Sopenharmony_ci# ==================================================================== 11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and 13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further 14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/. 15e1051a39Sopenharmony_ci# ==================================================================== 16e1051a39Sopenharmony_ci# 17e1051a39Sopenharmony_ci# March 2010 18e1051a39Sopenharmony_ci# 19e1051a39Sopenharmony_ci# The module implements "4-bit" GCM GHASH function and underlying 20e1051a39Sopenharmony_ci# single multiplication operation in GF(2^128). "4-bit" means that it 21e1051a39Sopenharmony_ci# uses 256 bytes per-key table [+128 bytes shared table]. Even though 22e1051a39Sopenharmony_ci# loops are aggressively modulo-scheduled in respect to references to 23e1051a39Sopenharmony_ci# Htbl and Z.hi updates for 8 cycles per byte, measured performance is 24e1051a39Sopenharmony_ci# ~12 cycles per processed byte on 21264 CPU. It seems to be a dynamic 25e1051a39Sopenharmony_ci# scheduling "glitch," because uprofile(1) indicates uniform sample 26e1051a39Sopenharmony_ci# distribution, as if all instruction bundles execute in 1.5 cycles. 27e1051a39Sopenharmony_ci# Meaning that it could have been even faster, yet 12 cycles is ~60% 28e1051a39Sopenharmony_ci# better than gcc-generated code and ~80% than code generated by vendor 29e1051a39Sopenharmony_ci# compiler. 30e1051a39Sopenharmony_ci 31e1051a39Sopenharmony_ci$cnt="v0"; # $0 32e1051a39Sopenharmony_ci$t0="t0"; 33e1051a39Sopenharmony_ci$t1="t1"; 34e1051a39Sopenharmony_ci$t2="t2"; 35e1051a39Sopenharmony_ci$Thi0="t3"; # $4 36e1051a39Sopenharmony_ci$Tlo0="t4"; 37e1051a39Sopenharmony_ci$Thi1="t5"; 38e1051a39Sopenharmony_ci$Tlo1="t6"; 39e1051a39Sopenharmony_ci$rem="t7"; # $8 40e1051a39Sopenharmony_ci################# 41e1051a39Sopenharmony_ci$Xi="a0"; # $16, input argument block 42e1051a39Sopenharmony_ci$Htbl="a1"; 43e1051a39Sopenharmony_ci$inp="a2"; 44e1051a39Sopenharmony_ci$len="a3"; 45e1051a39Sopenharmony_ci$nlo="a4"; # $20 46e1051a39Sopenharmony_ci$nhi="a5"; 47e1051a39Sopenharmony_ci$Zhi="t8"; 48e1051a39Sopenharmony_ci$Zlo="t9"; 49e1051a39Sopenharmony_ci$Xhi="t10"; # $24 50e1051a39Sopenharmony_ci$Xlo="t11"; 51e1051a39Sopenharmony_ci$remp="t12"; 52e1051a39Sopenharmony_ci$rem_4bit="AT"; # $28 53e1051a39Sopenharmony_ci 54e1051a39Sopenharmony_ci{ my $N; 55e1051a39Sopenharmony_ci sub loop() { 56e1051a39Sopenharmony_ci 57e1051a39Sopenharmony_ci $N++; 58e1051a39Sopenharmony_ci$code.=<<___; 59e1051a39Sopenharmony_ci.align 4 60e1051a39Sopenharmony_ci extbl $Xlo,7,$nlo 61e1051a39Sopenharmony_ci and $nlo,0xf0,$nhi 62e1051a39Sopenharmony_ci sll $nlo,4,$nlo 63e1051a39Sopenharmony_ci and $nlo,0xf0,$nlo 64e1051a39Sopenharmony_ci 65e1051a39Sopenharmony_ci addq $nlo,$Htbl,$nlo 66e1051a39Sopenharmony_ci ldq $Zlo,8($nlo) 67e1051a39Sopenharmony_ci addq $nhi,$Htbl,$nhi 68e1051a39Sopenharmony_ci ldq $Zhi,0($nlo) 69e1051a39Sopenharmony_ci 70e1051a39Sopenharmony_ci and $Zlo,0x0f,$remp 71e1051a39Sopenharmony_ci sll $Zhi,60,$t0 72e1051a39Sopenharmony_ci lda $cnt,6(zero) 73e1051a39Sopenharmony_ci extbl $Xlo,6,$nlo 74e1051a39Sopenharmony_ci 75e1051a39Sopenharmony_ci ldq $Tlo1,8($nhi) 76e1051a39Sopenharmony_ci s8addq $remp,$rem_4bit,$remp 77e1051a39Sopenharmony_ci ldq $Thi1,0($nhi) 78e1051a39Sopenharmony_ci srl $Zlo,4,$Zlo 79e1051a39Sopenharmony_ci 80e1051a39Sopenharmony_ci ldq $rem,0($remp) 81e1051a39Sopenharmony_ci srl $Zhi,4,$Zhi 82e1051a39Sopenharmony_ci xor $t0,$Zlo,$Zlo 83e1051a39Sopenharmony_ci and $nlo,0xf0,$nhi 84e1051a39Sopenharmony_ci 85e1051a39Sopenharmony_ci xor $Tlo1,$Zlo,$Zlo 86e1051a39Sopenharmony_ci sll $nlo,4,$nlo 87e1051a39Sopenharmony_ci xor $Thi1,$Zhi,$Zhi 88e1051a39Sopenharmony_ci and $nlo,0xf0,$nlo 89e1051a39Sopenharmony_ci 90e1051a39Sopenharmony_ci addq $nlo,$Htbl,$nlo 91e1051a39Sopenharmony_ci ldq $Tlo0,8($nlo) 92e1051a39Sopenharmony_ci addq $nhi,$Htbl,$nhi 93e1051a39Sopenharmony_ci ldq $Thi0,0($nlo) 94e1051a39Sopenharmony_ci 95e1051a39Sopenharmony_ci.Looplo$N: 96e1051a39Sopenharmony_ci and $Zlo,0x0f,$remp 97e1051a39Sopenharmony_ci sll $Zhi,60,$t0 98e1051a39Sopenharmony_ci subq $cnt,1,$cnt 99e1051a39Sopenharmony_ci srl $Zlo,4,$Zlo 100e1051a39Sopenharmony_ci 101e1051a39Sopenharmony_ci ldq $Tlo1,8($nhi) 102e1051a39Sopenharmony_ci xor $rem,$Zhi,$Zhi 103e1051a39Sopenharmony_ci ldq $Thi1,0($nhi) 104e1051a39Sopenharmony_ci s8addq $remp,$rem_4bit,$remp 105e1051a39Sopenharmony_ci 106e1051a39Sopenharmony_ci ldq $rem,0($remp) 107e1051a39Sopenharmony_ci srl $Zhi,4,$Zhi 108e1051a39Sopenharmony_ci xor $t0,$Zlo,$Zlo 109e1051a39Sopenharmony_ci extbl $Xlo,$cnt,$nlo 110e1051a39Sopenharmony_ci 111e1051a39Sopenharmony_ci and $nlo,0xf0,$nhi 112e1051a39Sopenharmony_ci xor $Thi0,$Zhi,$Zhi 113e1051a39Sopenharmony_ci xor $Tlo0,$Zlo,$Zlo 114e1051a39Sopenharmony_ci sll $nlo,4,$nlo 115e1051a39Sopenharmony_ci 116e1051a39Sopenharmony_ci 117e1051a39Sopenharmony_ci and $Zlo,0x0f,$remp 118e1051a39Sopenharmony_ci sll $Zhi,60,$t0 119e1051a39Sopenharmony_ci and $nlo,0xf0,$nlo 120e1051a39Sopenharmony_ci srl $Zlo,4,$Zlo 121e1051a39Sopenharmony_ci 122e1051a39Sopenharmony_ci s8addq $remp,$rem_4bit,$remp 123e1051a39Sopenharmony_ci xor $rem,$Zhi,$Zhi 124e1051a39Sopenharmony_ci addq $nlo,$Htbl,$nlo 125e1051a39Sopenharmony_ci addq $nhi,$Htbl,$nhi 126e1051a39Sopenharmony_ci 127e1051a39Sopenharmony_ci ldq $rem,0($remp) 128e1051a39Sopenharmony_ci srl $Zhi,4,$Zhi 129e1051a39Sopenharmony_ci ldq $Tlo0,8($nlo) 130e1051a39Sopenharmony_ci xor $t0,$Zlo,$Zlo 131e1051a39Sopenharmony_ci 132e1051a39Sopenharmony_ci xor $Tlo1,$Zlo,$Zlo 133e1051a39Sopenharmony_ci xor $Thi1,$Zhi,$Zhi 134e1051a39Sopenharmony_ci ldq $Thi0,0($nlo) 135e1051a39Sopenharmony_ci bne $cnt,.Looplo$N 136e1051a39Sopenharmony_ci 137e1051a39Sopenharmony_ci 138e1051a39Sopenharmony_ci and $Zlo,0x0f,$remp 139e1051a39Sopenharmony_ci sll $Zhi,60,$t0 140e1051a39Sopenharmony_ci lda $cnt,7(zero) 141e1051a39Sopenharmony_ci srl $Zlo,4,$Zlo 142e1051a39Sopenharmony_ci 143e1051a39Sopenharmony_ci ldq $Tlo1,8($nhi) 144e1051a39Sopenharmony_ci xor $rem,$Zhi,$Zhi 145e1051a39Sopenharmony_ci ldq $Thi1,0($nhi) 146e1051a39Sopenharmony_ci s8addq $remp,$rem_4bit,$remp 147e1051a39Sopenharmony_ci 148e1051a39Sopenharmony_ci ldq $rem,0($remp) 149e1051a39Sopenharmony_ci srl $Zhi,4,$Zhi 150e1051a39Sopenharmony_ci xor $t0,$Zlo,$Zlo 151e1051a39Sopenharmony_ci extbl $Xhi,$cnt,$nlo 152e1051a39Sopenharmony_ci 153e1051a39Sopenharmony_ci and $nlo,0xf0,$nhi 154e1051a39Sopenharmony_ci xor $Thi0,$Zhi,$Zhi 155e1051a39Sopenharmony_ci xor $Tlo0,$Zlo,$Zlo 156e1051a39Sopenharmony_ci sll $nlo,4,$nlo 157e1051a39Sopenharmony_ci 158e1051a39Sopenharmony_ci and $Zlo,0x0f,$remp 159e1051a39Sopenharmony_ci sll $Zhi,60,$t0 160e1051a39Sopenharmony_ci and $nlo,0xf0,$nlo 161e1051a39Sopenharmony_ci srl $Zlo,4,$Zlo 162e1051a39Sopenharmony_ci 163e1051a39Sopenharmony_ci s8addq $remp,$rem_4bit,$remp 164e1051a39Sopenharmony_ci xor $rem,$Zhi,$Zhi 165e1051a39Sopenharmony_ci addq $nlo,$Htbl,$nlo 166e1051a39Sopenharmony_ci addq $nhi,$Htbl,$nhi 167e1051a39Sopenharmony_ci 168e1051a39Sopenharmony_ci ldq $rem,0($remp) 169e1051a39Sopenharmony_ci srl $Zhi,4,$Zhi 170e1051a39Sopenharmony_ci ldq $Tlo0,8($nlo) 171e1051a39Sopenharmony_ci xor $t0,$Zlo,$Zlo 172e1051a39Sopenharmony_ci 173e1051a39Sopenharmony_ci xor $Tlo1,$Zlo,$Zlo 174e1051a39Sopenharmony_ci xor $Thi1,$Zhi,$Zhi 175e1051a39Sopenharmony_ci ldq $Thi0,0($nlo) 176e1051a39Sopenharmony_ci unop 177e1051a39Sopenharmony_ci 178e1051a39Sopenharmony_ci 179e1051a39Sopenharmony_ci.Loophi$N: 180e1051a39Sopenharmony_ci and $Zlo,0x0f,$remp 181e1051a39Sopenharmony_ci sll $Zhi,60,$t0 182e1051a39Sopenharmony_ci subq $cnt,1,$cnt 183e1051a39Sopenharmony_ci srl $Zlo,4,$Zlo 184e1051a39Sopenharmony_ci 185e1051a39Sopenharmony_ci ldq $Tlo1,8($nhi) 186e1051a39Sopenharmony_ci xor $rem,$Zhi,$Zhi 187e1051a39Sopenharmony_ci ldq $Thi1,0($nhi) 188e1051a39Sopenharmony_ci s8addq $remp,$rem_4bit,$remp 189e1051a39Sopenharmony_ci 190e1051a39Sopenharmony_ci ldq $rem,0($remp) 191e1051a39Sopenharmony_ci srl $Zhi,4,$Zhi 192e1051a39Sopenharmony_ci xor $t0,$Zlo,$Zlo 193e1051a39Sopenharmony_ci extbl $Xhi,$cnt,$nlo 194e1051a39Sopenharmony_ci 195e1051a39Sopenharmony_ci and $nlo,0xf0,$nhi 196e1051a39Sopenharmony_ci xor $Thi0,$Zhi,$Zhi 197e1051a39Sopenharmony_ci xor $Tlo0,$Zlo,$Zlo 198e1051a39Sopenharmony_ci sll $nlo,4,$nlo 199e1051a39Sopenharmony_ci 200e1051a39Sopenharmony_ci 201e1051a39Sopenharmony_ci and $Zlo,0x0f,$remp 202e1051a39Sopenharmony_ci sll $Zhi,60,$t0 203e1051a39Sopenharmony_ci and $nlo,0xf0,$nlo 204e1051a39Sopenharmony_ci srl $Zlo,4,$Zlo 205e1051a39Sopenharmony_ci 206e1051a39Sopenharmony_ci s8addq $remp,$rem_4bit,$remp 207e1051a39Sopenharmony_ci xor $rem,$Zhi,$Zhi 208e1051a39Sopenharmony_ci addq $nlo,$Htbl,$nlo 209e1051a39Sopenharmony_ci addq $nhi,$Htbl,$nhi 210e1051a39Sopenharmony_ci 211e1051a39Sopenharmony_ci ldq $rem,0($remp) 212e1051a39Sopenharmony_ci srl $Zhi,4,$Zhi 213e1051a39Sopenharmony_ci ldq $Tlo0,8($nlo) 214e1051a39Sopenharmony_ci xor $t0,$Zlo,$Zlo 215e1051a39Sopenharmony_ci 216e1051a39Sopenharmony_ci xor $Tlo1,$Zlo,$Zlo 217e1051a39Sopenharmony_ci xor $Thi1,$Zhi,$Zhi 218e1051a39Sopenharmony_ci ldq $Thi0,0($nlo) 219e1051a39Sopenharmony_ci bne $cnt,.Loophi$N 220e1051a39Sopenharmony_ci 221e1051a39Sopenharmony_ci 222e1051a39Sopenharmony_ci and $Zlo,0x0f,$remp 223e1051a39Sopenharmony_ci sll $Zhi,60,$t0 224e1051a39Sopenharmony_ci srl $Zlo,4,$Zlo 225e1051a39Sopenharmony_ci 226e1051a39Sopenharmony_ci ldq $Tlo1,8($nhi) 227e1051a39Sopenharmony_ci xor $rem,$Zhi,$Zhi 228e1051a39Sopenharmony_ci ldq $Thi1,0($nhi) 229e1051a39Sopenharmony_ci s8addq $remp,$rem_4bit,$remp 230e1051a39Sopenharmony_ci 231e1051a39Sopenharmony_ci ldq $rem,0($remp) 232e1051a39Sopenharmony_ci srl $Zhi,4,$Zhi 233e1051a39Sopenharmony_ci xor $t0,$Zlo,$Zlo 234e1051a39Sopenharmony_ci 235e1051a39Sopenharmony_ci xor $Tlo0,$Zlo,$Zlo 236e1051a39Sopenharmony_ci xor $Thi0,$Zhi,$Zhi 237e1051a39Sopenharmony_ci 238e1051a39Sopenharmony_ci and $Zlo,0x0f,$remp 239e1051a39Sopenharmony_ci sll $Zhi,60,$t0 240e1051a39Sopenharmony_ci srl $Zlo,4,$Zlo 241e1051a39Sopenharmony_ci 242e1051a39Sopenharmony_ci s8addq $remp,$rem_4bit,$remp 243e1051a39Sopenharmony_ci xor $rem,$Zhi,$Zhi 244e1051a39Sopenharmony_ci 245e1051a39Sopenharmony_ci ldq $rem,0($remp) 246e1051a39Sopenharmony_ci srl $Zhi,4,$Zhi 247e1051a39Sopenharmony_ci xor $Tlo1,$Zlo,$Zlo 248e1051a39Sopenharmony_ci xor $Thi1,$Zhi,$Zhi 249e1051a39Sopenharmony_ci xor $t0,$Zlo,$Zlo 250e1051a39Sopenharmony_ci xor $rem,$Zhi,$Zhi 251e1051a39Sopenharmony_ci___ 252e1051a39Sopenharmony_ci}} 253e1051a39Sopenharmony_ci 254e1051a39Sopenharmony_ci$code=<<___; 255e1051a39Sopenharmony_ci#ifdef __linux__ 256e1051a39Sopenharmony_ci#include <asm/regdef.h> 257e1051a39Sopenharmony_ci#else 258e1051a39Sopenharmony_ci#include <asm.h> 259e1051a39Sopenharmony_ci#include <regdef.h> 260e1051a39Sopenharmony_ci#endif 261e1051a39Sopenharmony_ci 262e1051a39Sopenharmony_ci.text 263e1051a39Sopenharmony_ci 264e1051a39Sopenharmony_ci.set noat 265e1051a39Sopenharmony_ci.set noreorder 266e1051a39Sopenharmony_ci.globl gcm_gmult_4bit 267e1051a39Sopenharmony_ci.align 4 268e1051a39Sopenharmony_ci.ent gcm_gmult_4bit 269e1051a39Sopenharmony_cigcm_gmult_4bit: 270e1051a39Sopenharmony_ci .frame sp,0,ra 271e1051a39Sopenharmony_ci .prologue 0 272e1051a39Sopenharmony_ci 273e1051a39Sopenharmony_ci ldq $Xlo,8($Xi) 274e1051a39Sopenharmony_ci ldq $Xhi,0($Xi) 275e1051a39Sopenharmony_ci 276e1051a39Sopenharmony_ci bsr $t0,picmeup 277e1051a39Sopenharmony_ci nop 278e1051a39Sopenharmony_ci___ 279e1051a39Sopenharmony_ci 280e1051a39Sopenharmony_ci &loop(); 281e1051a39Sopenharmony_ci 282e1051a39Sopenharmony_ci$code.=<<___; 283e1051a39Sopenharmony_ci srl $Zlo,24,$t0 # byte swap 284e1051a39Sopenharmony_ci srl $Zlo,8,$t1 285e1051a39Sopenharmony_ci 286e1051a39Sopenharmony_ci sll $Zlo,8,$t2 287e1051a39Sopenharmony_ci sll $Zlo,24,$Zlo 288e1051a39Sopenharmony_ci zapnot $t0,0x11,$t0 289e1051a39Sopenharmony_ci zapnot $t1,0x22,$t1 290e1051a39Sopenharmony_ci 291e1051a39Sopenharmony_ci zapnot $Zlo,0x88,$Zlo 292e1051a39Sopenharmony_ci or $t0,$t1,$t0 293e1051a39Sopenharmony_ci zapnot $t2,0x44,$t2 294e1051a39Sopenharmony_ci 295e1051a39Sopenharmony_ci or $Zlo,$t0,$Zlo 296e1051a39Sopenharmony_ci srl $Zhi,24,$t0 297e1051a39Sopenharmony_ci srl $Zhi,8,$t1 298e1051a39Sopenharmony_ci 299e1051a39Sopenharmony_ci or $Zlo,$t2,$Zlo 300e1051a39Sopenharmony_ci sll $Zhi,8,$t2 301e1051a39Sopenharmony_ci sll $Zhi,24,$Zhi 302e1051a39Sopenharmony_ci 303e1051a39Sopenharmony_ci srl $Zlo,32,$Xlo 304e1051a39Sopenharmony_ci sll $Zlo,32,$Zlo 305e1051a39Sopenharmony_ci 306e1051a39Sopenharmony_ci zapnot $t0,0x11,$t0 307e1051a39Sopenharmony_ci zapnot $t1,0x22,$t1 308e1051a39Sopenharmony_ci or $Zlo,$Xlo,$Xlo 309e1051a39Sopenharmony_ci 310e1051a39Sopenharmony_ci zapnot $Zhi,0x88,$Zhi 311e1051a39Sopenharmony_ci or $t0,$t1,$t0 312e1051a39Sopenharmony_ci zapnot $t2,0x44,$t2 313e1051a39Sopenharmony_ci 314e1051a39Sopenharmony_ci or $Zhi,$t0,$Zhi 315e1051a39Sopenharmony_ci or $Zhi,$t2,$Zhi 316e1051a39Sopenharmony_ci 317e1051a39Sopenharmony_ci srl $Zhi,32,$Xhi 318e1051a39Sopenharmony_ci sll $Zhi,32,$Zhi 319e1051a39Sopenharmony_ci 320e1051a39Sopenharmony_ci or $Zhi,$Xhi,$Xhi 321e1051a39Sopenharmony_ci stq $Xlo,8($Xi) 322e1051a39Sopenharmony_ci stq $Xhi,0($Xi) 323e1051a39Sopenharmony_ci 324e1051a39Sopenharmony_ci ret (ra) 325e1051a39Sopenharmony_ci.end gcm_gmult_4bit 326e1051a39Sopenharmony_ci___ 327e1051a39Sopenharmony_ci 328e1051a39Sopenharmony_ci$inhi="s0"; 329e1051a39Sopenharmony_ci$inlo="s1"; 330e1051a39Sopenharmony_ci 331e1051a39Sopenharmony_ci$code.=<<___; 332e1051a39Sopenharmony_ci.globl gcm_ghash_4bit 333e1051a39Sopenharmony_ci.align 4 334e1051a39Sopenharmony_ci.ent gcm_ghash_4bit 335e1051a39Sopenharmony_cigcm_ghash_4bit: 336e1051a39Sopenharmony_ci lda sp,-32(sp) 337e1051a39Sopenharmony_ci stq ra,0(sp) 338e1051a39Sopenharmony_ci stq s0,8(sp) 339e1051a39Sopenharmony_ci stq s1,16(sp) 340e1051a39Sopenharmony_ci .mask 0x04000600,-32 341e1051a39Sopenharmony_ci .frame sp,32,ra 342e1051a39Sopenharmony_ci .prologue 0 343e1051a39Sopenharmony_ci 344e1051a39Sopenharmony_ci ldq_u $inhi,0($inp) 345e1051a39Sopenharmony_ci ldq_u $Thi0,7($inp) 346e1051a39Sopenharmony_ci ldq_u $inlo,8($inp) 347e1051a39Sopenharmony_ci ldq_u $Tlo0,15($inp) 348e1051a39Sopenharmony_ci ldq $Xhi,0($Xi) 349e1051a39Sopenharmony_ci ldq $Xlo,8($Xi) 350e1051a39Sopenharmony_ci 351e1051a39Sopenharmony_ci bsr $t0,picmeup 352e1051a39Sopenharmony_ci nop 353e1051a39Sopenharmony_ci 354e1051a39Sopenharmony_ci.Louter: 355e1051a39Sopenharmony_ci extql $inhi,$inp,$inhi 356e1051a39Sopenharmony_ci extqh $Thi0,$inp,$Thi0 357e1051a39Sopenharmony_ci or $inhi,$Thi0,$inhi 358e1051a39Sopenharmony_ci lda $inp,16($inp) 359e1051a39Sopenharmony_ci 360e1051a39Sopenharmony_ci extql $inlo,$inp,$inlo 361e1051a39Sopenharmony_ci extqh $Tlo0,$inp,$Tlo0 362e1051a39Sopenharmony_ci or $inlo,$Tlo0,$inlo 363e1051a39Sopenharmony_ci subq $len,16,$len 364e1051a39Sopenharmony_ci 365e1051a39Sopenharmony_ci xor $Xlo,$inlo,$Xlo 366e1051a39Sopenharmony_ci xor $Xhi,$inhi,$Xhi 367e1051a39Sopenharmony_ci___ 368e1051a39Sopenharmony_ci 369e1051a39Sopenharmony_ci &loop(); 370e1051a39Sopenharmony_ci 371e1051a39Sopenharmony_ci$code.=<<___; 372e1051a39Sopenharmony_ci srl $Zlo,24,$t0 # byte swap 373e1051a39Sopenharmony_ci srl $Zlo,8,$t1 374e1051a39Sopenharmony_ci 375e1051a39Sopenharmony_ci sll $Zlo,8,$t2 376e1051a39Sopenharmony_ci sll $Zlo,24,$Zlo 377e1051a39Sopenharmony_ci zapnot $t0,0x11,$t0 378e1051a39Sopenharmony_ci zapnot $t1,0x22,$t1 379e1051a39Sopenharmony_ci 380e1051a39Sopenharmony_ci zapnot $Zlo,0x88,$Zlo 381e1051a39Sopenharmony_ci or $t0,$t1,$t0 382e1051a39Sopenharmony_ci zapnot $t2,0x44,$t2 383e1051a39Sopenharmony_ci 384e1051a39Sopenharmony_ci or $Zlo,$t0,$Zlo 385e1051a39Sopenharmony_ci srl $Zhi,24,$t0 386e1051a39Sopenharmony_ci srl $Zhi,8,$t1 387e1051a39Sopenharmony_ci 388e1051a39Sopenharmony_ci or $Zlo,$t2,$Zlo 389e1051a39Sopenharmony_ci sll $Zhi,8,$t2 390e1051a39Sopenharmony_ci sll $Zhi,24,$Zhi 391e1051a39Sopenharmony_ci 392e1051a39Sopenharmony_ci srl $Zlo,32,$Xlo 393e1051a39Sopenharmony_ci sll $Zlo,32,$Zlo 394e1051a39Sopenharmony_ci beq $len,.Ldone 395e1051a39Sopenharmony_ci 396e1051a39Sopenharmony_ci zapnot $t0,0x11,$t0 397e1051a39Sopenharmony_ci zapnot $t1,0x22,$t1 398e1051a39Sopenharmony_ci or $Zlo,$Xlo,$Xlo 399e1051a39Sopenharmony_ci ldq_u $inhi,0($inp) 400e1051a39Sopenharmony_ci 401e1051a39Sopenharmony_ci zapnot $Zhi,0x88,$Zhi 402e1051a39Sopenharmony_ci or $t0,$t1,$t0 403e1051a39Sopenharmony_ci zapnot $t2,0x44,$t2 404e1051a39Sopenharmony_ci ldq_u $Thi0,7($inp) 405e1051a39Sopenharmony_ci 406e1051a39Sopenharmony_ci or $Zhi,$t0,$Zhi 407e1051a39Sopenharmony_ci or $Zhi,$t2,$Zhi 408e1051a39Sopenharmony_ci ldq_u $inlo,8($inp) 409e1051a39Sopenharmony_ci ldq_u $Tlo0,15($inp) 410e1051a39Sopenharmony_ci 411e1051a39Sopenharmony_ci srl $Zhi,32,$Xhi 412e1051a39Sopenharmony_ci sll $Zhi,32,$Zhi 413e1051a39Sopenharmony_ci 414e1051a39Sopenharmony_ci or $Zhi,$Xhi,$Xhi 415e1051a39Sopenharmony_ci br zero,.Louter 416e1051a39Sopenharmony_ci 417e1051a39Sopenharmony_ci.Ldone: 418e1051a39Sopenharmony_ci zapnot $t0,0x11,$t0 419e1051a39Sopenharmony_ci zapnot $t1,0x22,$t1 420e1051a39Sopenharmony_ci or $Zlo,$Xlo,$Xlo 421e1051a39Sopenharmony_ci 422e1051a39Sopenharmony_ci zapnot $Zhi,0x88,$Zhi 423e1051a39Sopenharmony_ci or $t0,$t1,$t0 424e1051a39Sopenharmony_ci zapnot $t2,0x44,$t2 425e1051a39Sopenharmony_ci 426e1051a39Sopenharmony_ci or $Zhi,$t0,$Zhi 427e1051a39Sopenharmony_ci or $Zhi,$t2,$Zhi 428e1051a39Sopenharmony_ci 429e1051a39Sopenharmony_ci srl $Zhi,32,$Xhi 430e1051a39Sopenharmony_ci sll $Zhi,32,$Zhi 431e1051a39Sopenharmony_ci 432e1051a39Sopenharmony_ci or $Zhi,$Xhi,$Xhi 433e1051a39Sopenharmony_ci 434e1051a39Sopenharmony_ci stq $Xlo,8($Xi) 435e1051a39Sopenharmony_ci stq $Xhi,0($Xi) 436e1051a39Sopenharmony_ci 437e1051a39Sopenharmony_ci .set noreorder 438e1051a39Sopenharmony_ci /*ldq ra,0(sp)*/ 439e1051a39Sopenharmony_ci ldq s0,8(sp) 440e1051a39Sopenharmony_ci ldq s1,16(sp) 441e1051a39Sopenharmony_ci lda sp,32(sp) 442e1051a39Sopenharmony_ci ret (ra) 443e1051a39Sopenharmony_ci.end gcm_ghash_4bit 444e1051a39Sopenharmony_ci 445e1051a39Sopenharmony_ci.align 4 446e1051a39Sopenharmony_ci.ent picmeup 447e1051a39Sopenharmony_cipicmeup: 448e1051a39Sopenharmony_ci .frame sp,0,$t0 449e1051a39Sopenharmony_ci .prologue 0 450e1051a39Sopenharmony_ci br $rem_4bit,.Lpic 451e1051a39Sopenharmony_ci.Lpic: lda $rem_4bit,12($rem_4bit) 452e1051a39Sopenharmony_ci ret ($t0) 453e1051a39Sopenharmony_ci.end picmeup 454e1051a39Sopenharmony_ci nop 455e1051a39Sopenharmony_cirem_4bit: 456e1051a39Sopenharmony_ci .long 0,0x0000<<16, 0,0x1C20<<16, 0,0x3840<<16, 0,0x2460<<16 457e1051a39Sopenharmony_ci .long 0,0x7080<<16, 0,0x6CA0<<16, 0,0x48C0<<16, 0,0x54E0<<16 458e1051a39Sopenharmony_ci .long 0,0xE100<<16, 0,0xFD20<<16, 0,0xD940<<16, 0,0xC560<<16 459e1051a39Sopenharmony_ci .long 0,0x9180<<16, 0,0x8DA0<<16, 0,0xA9C0<<16, 0,0xB5E0<<16 460e1051a39Sopenharmony_ci.ascii "GHASH for Alpha, CRYPTOGAMS by <appro\@openssl.org>" 461e1051a39Sopenharmony_ci.align 4 462e1051a39Sopenharmony_ci 463e1051a39Sopenharmony_ci___ 464e1051a39Sopenharmony_ci$output=pop and open STDOUT,">$output"; 465e1051a39Sopenharmony_ciprint $code; 466e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; 467e1051a39Sopenharmony_ci 468