1e1051a39Sopenharmony_ci#! /usr/bin/env perl 2e1051a39Sopenharmony_ci# Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci# 4e1051a39Sopenharmony_ci# Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci# this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci# in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci# https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci 9e1051a39Sopenharmony_ci 10e1051a39Sopenharmony_ci# ==================================================================== 11e1051a39Sopenharmony_ci# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12e1051a39Sopenharmony_ci# project. The module is, however, dual licensed under OpenSSL and 13e1051a39Sopenharmony_ci# CRYPTOGAMS licenses depending on where you obtain it. For further 14e1051a39Sopenharmony_ci# details see http://www.openssl.org/~appro/cryptogams/. 15e1051a39Sopenharmony_ci# ==================================================================== 16e1051a39Sopenharmony_ci 17e1051a39Sopenharmony_ci# sha1_block for Thumb. 18e1051a39Sopenharmony_ci# 19e1051a39Sopenharmony_ci# January 2007. 20e1051a39Sopenharmony_ci# 21e1051a39Sopenharmony_ci# The code does not present direct interest to OpenSSL, because of low 22e1051a39Sopenharmony_ci# performance. Its purpose is to establish _size_ benchmark. Pretty 23e1051a39Sopenharmony_ci# useless one I must say, because 30% or 88 bytes larger ARMv4 code 24e1051a39Sopenharmony_ci# [available on demand] is almost _twice_ as fast. It should also be 25e1051a39Sopenharmony_ci# noted that in-lining of .Lcommon and .Lrotate improves performance 26e1051a39Sopenharmony_ci# by over 40%, while code increases by only 10% or 32 bytes. But once 27e1051a39Sopenharmony_ci# again, the goal was to establish _size_ benchmark, not performance. 28e1051a39Sopenharmony_ci 29e1051a39Sopenharmony_ci$output=pop and open STDOUT,">$output"; 30e1051a39Sopenharmony_ci 31e1051a39Sopenharmony_ci$inline=0; 32e1051a39Sopenharmony_ci#$cheat_on_binutils=1; 33e1051a39Sopenharmony_ci 34e1051a39Sopenharmony_ci$t0="r0"; 35e1051a39Sopenharmony_ci$t1="r1"; 36e1051a39Sopenharmony_ci$t2="r2"; 37e1051a39Sopenharmony_ci$a="r3"; 38e1051a39Sopenharmony_ci$b="r4"; 39e1051a39Sopenharmony_ci$c="r5"; 40e1051a39Sopenharmony_ci$d="r6"; 41e1051a39Sopenharmony_ci$e="r7"; 42e1051a39Sopenharmony_ci$K="r8"; # "upper" registers can be used in add/sub and mov insns 43e1051a39Sopenharmony_ci$ctx="r9"; 44e1051a39Sopenharmony_ci$inp="r10"; 45e1051a39Sopenharmony_ci$len="r11"; 46e1051a39Sopenharmony_ci$Xi="r12"; 47e1051a39Sopenharmony_ci 48e1051a39Sopenharmony_cisub common { 49e1051a39Sopenharmony_ci<<___; 50e1051a39Sopenharmony_ci sub $t0,#4 51e1051a39Sopenharmony_ci ldr $t1,[$t0] 52e1051a39Sopenharmony_ci add $e,$K @ E+=K_xx_xx 53e1051a39Sopenharmony_ci lsl $t2,$a,#5 54e1051a39Sopenharmony_ci add $t2,$e 55e1051a39Sopenharmony_ci lsr $e,$a,#27 56e1051a39Sopenharmony_ci add $t2,$e @ E+=ROR(A,27) 57e1051a39Sopenharmony_ci add $t2,$t1 @ E+=X[i] 58e1051a39Sopenharmony_ci___ 59e1051a39Sopenharmony_ci} 60e1051a39Sopenharmony_cisub rotate { 61e1051a39Sopenharmony_ci<<___; 62e1051a39Sopenharmony_ci mov $e,$d @ E=D 63e1051a39Sopenharmony_ci mov $d,$c @ D=C 64e1051a39Sopenharmony_ci lsl $c,$b,#30 65e1051a39Sopenharmony_ci lsr $b,$b,#2 66e1051a39Sopenharmony_ci orr $c,$b @ C=ROR(B,2) 67e1051a39Sopenharmony_ci mov $b,$a @ B=A 68e1051a39Sopenharmony_ci add $a,$t2,$t1 @ A=E+F_xx_xx(B,C,D) 69e1051a39Sopenharmony_ci___ 70e1051a39Sopenharmony_ci} 71e1051a39Sopenharmony_ci 72e1051a39Sopenharmony_cisub BODY_00_19 { 73e1051a39Sopenharmony_ci$code.=$inline?&common():"\tbl .Lcommon\n"; 74e1051a39Sopenharmony_ci$code.=<<___; 75e1051a39Sopenharmony_ci mov $t1,$c 76e1051a39Sopenharmony_ci eor $t1,$d 77e1051a39Sopenharmony_ci and $t1,$b 78e1051a39Sopenharmony_ci eor $t1,$d @ F_00_19(B,C,D) 79e1051a39Sopenharmony_ci___ 80e1051a39Sopenharmony_ci$code.=$inline?&rotate():"\tbl .Lrotate\n"; 81e1051a39Sopenharmony_ci} 82e1051a39Sopenharmony_ci 83e1051a39Sopenharmony_cisub BODY_20_39 { 84e1051a39Sopenharmony_ci$code.=$inline?&common():"\tbl .Lcommon\n"; 85e1051a39Sopenharmony_ci$code.=<<___; 86e1051a39Sopenharmony_ci mov $t1,$b 87e1051a39Sopenharmony_ci eor $t1,$c 88e1051a39Sopenharmony_ci eor $t1,$d @ F_20_39(B,C,D) 89e1051a39Sopenharmony_ci___ 90e1051a39Sopenharmony_ci$code.=$inline?&rotate():"\tbl .Lrotate\n"; 91e1051a39Sopenharmony_ci} 92e1051a39Sopenharmony_ci 93e1051a39Sopenharmony_cisub BODY_40_59 { 94e1051a39Sopenharmony_ci$code.=$inline?&common():"\tbl .Lcommon\n"; 95e1051a39Sopenharmony_ci$code.=<<___; 96e1051a39Sopenharmony_ci mov $t1,$b 97e1051a39Sopenharmony_ci and $t1,$c 98e1051a39Sopenharmony_ci mov $e,$b 99e1051a39Sopenharmony_ci orr $e,$c 100e1051a39Sopenharmony_ci and $e,$d 101e1051a39Sopenharmony_ci orr $t1,$e @ F_40_59(B,C,D) 102e1051a39Sopenharmony_ci___ 103e1051a39Sopenharmony_ci$code.=$inline?&rotate():"\tbl .Lrotate\n"; 104e1051a39Sopenharmony_ci} 105e1051a39Sopenharmony_ci 106e1051a39Sopenharmony_ci$code=<<___; 107e1051a39Sopenharmony_ci.text 108e1051a39Sopenharmony_ci.code 16 109e1051a39Sopenharmony_ci 110e1051a39Sopenharmony_ci.global sha1_block_data_order 111e1051a39Sopenharmony_ci.type sha1_block_data_order,%function 112e1051a39Sopenharmony_ci 113e1051a39Sopenharmony_ci.align 2 114e1051a39Sopenharmony_cisha1_block_data_order: 115e1051a39Sopenharmony_ci___ 116e1051a39Sopenharmony_ciif ($cheat_on_binutils) { 117e1051a39Sopenharmony_ci$code.=<<___; 118e1051a39Sopenharmony_ci.code 32 119e1051a39Sopenharmony_ci add r3,pc,#1 120e1051a39Sopenharmony_ci bx r3 @ switch to Thumb ISA 121e1051a39Sopenharmony_ci.code 16 122e1051a39Sopenharmony_ci___ 123e1051a39Sopenharmony_ci} 124e1051a39Sopenharmony_ci$code.=<<___; 125e1051a39Sopenharmony_ci push {r4-r7} 126e1051a39Sopenharmony_ci mov r3,r8 127e1051a39Sopenharmony_ci mov r4,r9 128e1051a39Sopenharmony_ci mov r5,r10 129e1051a39Sopenharmony_ci mov r6,r11 130e1051a39Sopenharmony_ci mov r7,r12 131e1051a39Sopenharmony_ci push {r3-r7,lr} 132e1051a39Sopenharmony_ci lsl r2,#6 133e1051a39Sopenharmony_ci mov $ctx,r0 @ save context 134e1051a39Sopenharmony_ci mov $inp,r1 @ save inp 135e1051a39Sopenharmony_ci mov $len,r2 @ save len 136e1051a39Sopenharmony_ci add $len,$inp @ $len to point at inp end 137e1051a39Sopenharmony_ci 138e1051a39Sopenharmony_ci.Lloop: 139e1051a39Sopenharmony_ci mov $Xi,sp 140e1051a39Sopenharmony_ci mov $t2,sp 141e1051a39Sopenharmony_ci sub $t2,#16*4 @ [3] 142e1051a39Sopenharmony_ci.LXload: 143e1051a39Sopenharmony_ci ldrb $a,[$t1,#0] @ $t1 is r1 and holds inp 144e1051a39Sopenharmony_ci ldrb $b,[$t1,#1] 145e1051a39Sopenharmony_ci ldrb $c,[$t1,#2] 146e1051a39Sopenharmony_ci ldrb $d,[$t1,#3] 147e1051a39Sopenharmony_ci lsl $a,#24 148e1051a39Sopenharmony_ci lsl $b,#16 149e1051a39Sopenharmony_ci lsl $c,#8 150e1051a39Sopenharmony_ci orr $a,$b 151e1051a39Sopenharmony_ci orr $a,$c 152e1051a39Sopenharmony_ci orr $a,$d 153e1051a39Sopenharmony_ci add $t1,#4 154e1051a39Sopenharmony_ci push {$a} 155e1051a39Sopenharmony_ci cmp sp,$t2 156e1051a39Sopenharmony_ci bne .LXload @ [+14*16] 157e1051a39Sopenharmony_ci 158e1051a39Sopenharmony_ci mov $inp,$t1 @ update $inp 159e1051a39Sopenharmony_ci sub $t2,#32*4 160e1051a39Sopenharmony_ci sub $t2,#32*4 161e1051a39Sopenharmony_ci mov $e,#31 @ [+4] 162e1051a39Sopenharmony_ci.LXupdate: 163e1051a39Sopenharmony_ci ldr $a,[sp,#15*4] 164e1051a39Sopenharmony_ci ldr $b,[sp,#13*4] 165e1051a39Sopenharmony_ci ldr $c,[sp,#7*4] 166e1051a39Sopenharmony_ci ldr $d,[sp,#2*4] 167e1051a39Sopenharmony_ci eor $a,$b 168e1051a39Sopenharmony_ci eor $a,$c 169e1051a39Sopenharmony_ci eor $a,$d 170e1051a39Sopenharmony_ci ror $a,$e 171e1051a39Sopenharmony_ci push {$a} 172e1051a39Sopenharmony_ci cmp sp,$t2 173e1051a39Sopenharmony_ci bne .LXupdate @ [+(11+1)*64] 174e1051a39Sopenharmony_ci 175e1051a39Sopenharmony_ci ldmia $t0!,{$a,$b,$c,$d,$e} @ $t0 is r0 and holds ctx 176e1051a39Sopenharmony_ci mov $t0,$Xi 177e1051a39Sopenharmony_ci 178e1051a39Sopenharmony_ci ldr $t2,.LK_00_19 179e1051a39Sopenharmony_ci mov $t1,$t0 180e1051a39Sopenharmony_ci sub $t1,#20*4 181e1051a39Sopenharmony_ci mov $Xi,$t1 182e1051a39Sopenharmony_ci mov $K,$t2 @ [+7+4] 183e1051a39Sopenharmony_ci.L_00_19: 184e1051a39Sopenharmony_ci___ 185e1051a39Sopenharmony_ci &BODY_00_19(); 186e1051a39Sopenharmony_ci$code.=<<___; 187e1051a39Sopenharmony_ci cmp $Xi,$t0 188e1051a39Sopenharmony_ci bne .L_00_19 @ [+(2+9+4+2+8+2)*20] 189e1051a39Sopenharmony_ci 190e1051a39Sopenharmony_ci ldr $t2,.LK_20_39 191e1051a39Sopenharmony_ci mov $t1,$t0 192e1051a39Sopenharmony_ci sub $t1,#20*4 193e1051a39Sopenharmony_ci mov $Xi,$t1 194e1051a39Sopenharmony_ci mov $K,$t2 @ [+5] 195e1051a39Sopenharmony_ci.L_20_39_or_60_79: 196e1051a39Sopenharmony_ci___ 197e1051a39Sopenharmony_ci &BODY_20_39(); 198e1051a39Sopenharmony_ci$code.=<<___; 199e1051a39Sopenharmony_ci cmp $Xi,$t0 200e1051a39Sopenharmony_ci bne .L_20_39_or_60_79 @ [+(2+9+3+2+8+2)*20*2] 201e1051a39Sopenharmony_ci cmp sp,$t0 202e1051a39Sopenharmony_ci beq .Ldone @ [+2] 203e1051a39Sopenharmony_ci 204e1051a39Sopenharmony_ci ldr $t2,.LK_40_59 205e1051a39Sopenharmony_ci mov $t1,$t0 206e1051a39Sopenharmony_ci sub $t1,#20*4 207e1051a39Sopenharmony_ci mov $Xi,$t1 208e1051a39Sopenharmony_ci mov $K,$t2 @ [+5] 209e1051a39Sopenharmony_ci.L_40_59: 210e1051a39Sopenharmony_ci___ 211e1051a39Sopenharmony_ci &BODY_40_59(); 212e1051a39Sopenharmony_ci$code.=<<___; 213e1051a39Sopenharmony_ci cmp $Xi,$t0 214e1051a39Sopenharmony_ci bne .L_40_59 @ [+(2+9+6+2+8+2)*20] 215e1051a39Sopenharmony_ci 216e1051a39Sopenharmony_ci ldr $t2,.LK_60_79 217e1051a39Sopenharmony_ci mov $Xi,sp 218e1051a39Sopenharmony_ci mov $K,$t2 219e1051a39Sopenharmony_ci b .L_20_39_or_60_79 @ [+4] 220e1051a39Sopenharmony_ci.Ldone: 221e1051a39Sopenharmony_ci mov $t0,$ctx 222e1051a39Sopenharmony_ci ldr $t1,[$t0,#0] 223e1051a39Sopenharmony_ci ldr $t2,[$t0,#4] 224e1051a39Sopenharmony_ci add $a,$t1 225e1051a39Sopenharmony_ci ldr $t1,[$t0,#8] 226e1051a39Sopenharmony_ci add $b,$t2 227e1051a39Sopenharmony_ci ldr $t2,[$t0,#12] 228e1051a39Sopenharmony_ci add $c,$t1 229e1051a39Sopenharmony_ci ldr $t1,[$t0,#16] 230e1051a39Sopenharmony_ci add $d,$t2 231e1051a39Sopenharmony_ci add $e,$t1 232e1051a39Sopenharmony_ci stmia $t0!,{$a,$b,$c,$d,$e} @ [+20] 233e1051a39Sopenharmony_ci 234e1051a39Sopenharmony_ci add sp,#80*4 @ deallocate stack frame 235e1051a39Sopenharmony_ci mov $t0,$ctx @ restore ctx 236e1051a39Sopenharmony_ci mov $t1,$inp @ restore inp 237e1051a39Sopenharmony_ci cmp $t1,$len 238e1051a39Sopenharmony_ci beq .Lexit 239e1051a39Sopenharmony_ci b .Lloop @ [+6] total 3212 cycles 240e1051a39Sopenharmony_ci.Lexit: 241e1051a39Sopenharmony_ci pop {r2-r7} 242e1051a39Sopenharmony_ci mov r8,r2 243e1051a39Sopenharmony_ci mov r9,r3 244e1051a39Sopenharmony_ci mov r10,r4 245e1051a39Sopenharmony_ci mov r11,r5 246e1051a39Sopenharmony_ci mov r12,r6 247e1051a39Sopenharmony_ci mov lr,r7 248e1051a39Sopenharmony_ci pop {r4-r7} 249e1051a39Sopenharmony_ci bx lr 250e1051a39Sopenharmony_ci.align 2 251e1051a39Sopenharmony_ci___ 252e1051a39Sopenharmony_ci$code.=".Lcommon:\n".&common()."\tmov pc,lr\n" if (!$inline); 253e1051a39Sopenharmony_ci$code.=".Lrotate:\n".&rotate()."\tmov pc,lr\n" if (!$inline); 254e1051a39Sopenharmony_ci$code.=<<___; 255e1051a39Sopenharmony_ci.align 2 256e1051a39Sopenharmony_ci.LK_00_19: .word 0x5a827999 257e1051a39Sopenharmony_ci.LK_20_39: .word 0x6ed9eba1 258e1051a39Sopenharmony_ci.LK_40_59: .word 0x8f1bbcdc 259e1051a39Sopenharmony_ci.LK_60_79: .word 0xca62c1d6 260e1051a39Sopenharmony_ci.size sha1_block_data_order,.-sha1_block_data_order 261e1051a39Sopenharmony_ci.asciz "SHA1 block transform for Thumb, CRYPTOGAMS by <appro\@openssl.org>" 262e1051a39Sopenharmony_ci___ 263e1051a39Sopenharmony_ci 264e1051a39Sopenharmony_ciprint $code; 265e1051a39Sopenharmony_ciclose STDOUT or die "error closing STDOUT: $!"; # enforce flush 266