1 #include "s390x_arch.h" 2 3 .text 4 5 .type AES_Te,@object 6 .align 256 7 AES_Te: 8 .long 0xc66363a5,0xc66363a5 9 .long 0xf87c7c84,0xf87c7c84 10 .long 0xee777799,0xee777799 11 .long 0xf67b7b8d,0xf67b7b8d 12 .long 0xfff2f20d,0xfff2f20d 13 .long 0xd66b6bbd,0xd66b6bbd 14 .long 0xde6f6fb1,0xde6f6fb1 15 .long 0x91c5c554,0x91c5c554 16 .long 0x60303050,0x60303050 17 .long 0x02010103,0x02010103 18 .long 0xce6767a9,0xce6767a9 19 .long 0x562b2b7d,0x562b2b7d 20 .long 0xe7fefe19,0xe7fefe19 21 .long 0xb5d7d762,0xb5d7d762 22 .long 0x4dababe6,0x4dababe6 23 .long 0xec76769a,0xec76769a 24 .long 0x8fcaca45,0x8fcaca45 25 .long 0x1f82829d,0x1f82829d 26 .long 0x89c9c940,0x89c9c940 27 .long 0xfa7d7d87,0xfa7d7d87 28 .long 0xeffafa15,0xeffafa15 29 .long 0xb25959eb,0xb25959eb 30 .long 0x8e4747c9,0x8e4747c9 31 .long 0xfbf0f00b,0xfbf0f00b 32 .long 0x41adadec,0x41adadec 33 .long 0xb3d4d467,0xb3d4d467 34 .long 0x5fa2a2fd,0x5fa2a2fd 35 .long 0x45afafea,0x45afafea 36 .long 0x239c9cbf,0x239c9cbf 37 .long 0x53a4a4f7,0x53a4a4f7 38 .long 0xe4727296,0xe4727296 39 .long 0x9bc0c05b,0x9bc0c05b 40 .long 0x75b7b7c2,0x75b7b7c2 41 .long 0xe1fdfd1c,0xe1fdfd1c 42 .long 0x3d9393ae,0x3d9393ae 43 .long 0x4c26266a,0x4c26266a 44 .long 0x6c36365a,0x6c36365a 45 .long 0x7e3f3f41,0x7e3f3f41 46 .long 0xf5f7f702,0xf5f7f702 47 .long 0x83cccc4f,0x83cccc4f 48 .long 0x6834345c,0x6834345c 49 .long 0x51a5a5f4,0x51a5a5f4 50 .long 0xd1e5e534,0xd1e5e534 51 .long 0xf9f1f108,0xf9f1f108 52 .long 0xe2717193,0xe2717193 53 .long 0xabd8d873,0xabd8d873 54 .long 0x62313153,0x62313153 55 .long 0x2a15153f,0x2a15153f 56 .long 0x0804040c,0x0804040c 57 .long 0x95c7c752,0x95c7c752 58 .long 0x46232365,0x46232365 59 .long 0x9dc3c35e,0x9dc3c35e 60 .long 0x30181828,0x30181828 61 .long 0x379696a1,0x379696a1 62 .long 0x0a05050f,0x0a05050f 63 .long 0x2f9a9ab5,0x2f9a9ab5 64 .long 0x0e070709,0x0e070709 65 .long 0x24121236,0x24121236 66 .long 0x1b80809b,0x1b80809b 67 .long 0xdfe2e23d,0xdfe2e23d 68 .long 0xcdebeb26,0xcdebeb26 69 .long 0x4e272769,0x4e272769 70 .long 0x7fb2b2cd,0x7fb2b2cd 71 .long 0xea75759f,0xea75759f 72 .long 0x1209091b,0x1209091b 73 .long 0x1d83839e,0x1d83839e 74 .long 0x582c2c74,0x582c2c74 75 .long 0x341a1a2e,0x341a1a2e 76 .long 0x361b1b2d,0x361b1b2d 77 .long 0xdc6e6eb2,0xdc6e6eb2 78 .long 0xb45a5aee,0xb45a5aee 79 .long 0x5ba0a0fb,0x5ba0a0fb 80 .long 0xa45252f6,0xa45252f6 81 .long 0x763b3b4d,0x763b3b4d 82 .long 0xb7d6d661,0xb7d6d661 83 .long 0x7db3b3ce,0x7db3b3ce 84 .long 0x5229297b,0x5229297b 85 .long 0xdde3e33e,0xdde3e33e 86 .long 0x5e2f2f71,0x5e2f2f71 87 .long 0x13848497,0x13848497 88 .long 0xa65353f5,0xa65353f5 89 .long 0xb9d1d168,0xb9d1d168 90 .long 0x00000000,0x00000000 91 .long 0xc1eded2c,0xc1eded2c 92 .long 0x40202060,0x40202060 93 .long 0xe3fcfc1f,0xe3fcfc1f 94 .long 0x79b1b1c8,0x79b1b1c8 95 .long 0xb65b5bed,0xb65b5bed 96 .long 0xd46a6abe,0xd46a6abe 97 .long 0x8dcbcb46,0x8dcbcb46 98 .long 0x67bebed9,0x67bebed9 99 .long 0x7239394b,0x7239394b 100 .long 0x944a4ade,0x944a4ade 101 .long 0x984c4cd4,0x984c4cd4 102 .long 0xb05858e8,0xb05858e8 103 .long 0x85cfcf4a,0x85cfcf4a 104 .long 0xbbd0d06b,0xbbd0d06b 105 .long 0xc5efef2a,0xc5efef2a 106 .long 0x4faaaae5,0x4faaaae5 107 .long 0xedfbfb16,0xedfbfb16 108 .long 0x864343c5,0x864343c5 109 .long 0x9a4d4dd7,0x9a4d4dd7 110 .long 0x66333355,0x66333355 111 .long 0x11858594,0x11858594 112 .long 0x8a4545cf,0x8a4545cf 113 .long 0xe9f9f910,0xe9f9f910 114 .long 0x04020206,0x04020206 115 .long 0xfe7f7f81,0xfe7f7f81 116 .long 0xa05050f0,0xa05050f0 117 .long 0x783c3c44,0x783c3c44 118 .long 0x259f9fba,0x259f9fba 119 .long 0x4ba8a8e3,0x4ba8a8e3 120 .long 0xa25151f3,0xa25151f3 121 .long 0x5da3a3fe,0x5da3a3fe 122 .long 0x804040c0,0x804040c0 123 .long 0x058f8f8a,0x058f8f8a 124 .long 0x3f9292ad,0x3f9292ad 125 .long 0x219d9dbc,0x219d9dbc 126 .long 0x70383848,0x70383848 127 .long 0xf1f5f504,0xf1f5f504 128 .long 0x63bcbcdf,0x63bcbcdf 129 .long 0x77b6b6c1,0x77b6b6c1 130 .long 0xafdada75,0xafdada75 131 .long 0x42212163,0x42212163 132 .long 0x20101030,0x20101030 133 .long 0xe5ffff1a,0xe5ffff1a 134 .long 0xfdf3f30e,0xfdf3f30e 135 .long 0xbfd2d26d,0xbfd2d26d 136 .long 0x81cdcd4c,0x81cdcd4c 137 .long 0x180c0c14,0x180c0c14 138 .long 0x26131335,0x26131335 139 .long 0xc3ecec2f,0xc3ecec2f 140 .long 0xbe5f5fe1,0xbe5f5fe1 141 .long 0x359797a2,0x359797a2 142 .long 0x884444cc,0x884444cc 143 .long 0x2e171739,0x2e171739 144 .long 0x93c4c457,0x93c4c457 145 .long 0x55a7a7f2,0x55a7a7f2 146 .long 0xfc7e7e82,0xfc7e7e82 147 .long 0x7a3d3d47,0x7a3d3d47 148 .long 0xc86464ac,0xc86464ac 149 .long 0xba5d5de7,0xba5d5de7 150 .long 0x3219192b,0x3219192b 151 .long 0xe6737395,0xe6737395 152 .long 0xc06060a0,0xc06060a0 153 .long 0x19818198,0x19818198 154 .long 0x9e4f4fd1,0x9e4f4fd1 155 .long 0xa3dcdc7f,0xa3dcdc7f 156 .long 0x44222266,0x44222266 157 .long 0x542a2a7e,0x542a2a7e 158 .long 0x3b9090ab,0x3b9090ab 159 .long 0x0b888883,0x0b888883 160 .long 0x8c4646ca,0x8c4646ca 161 .long 0xc7eeee29,0xc7eeee29 162 .long 0x6bb8b8d3,0x6bb8b8d3 163 .long 0x2814143c,0x2814143c 164 .long 0xa7dede79,0xa7dede79 165 .long 0xbc5e5ee2,0xbc5e5ee2 166 .long 0x160b0b1d,0x160b0b1d 167 .long 0xaddbdb76,0xaddbdb76 168 .long 0xdbe0e03b,0xdbe0e03b 169 .long 0x64323256,0x64323256 170 .long 0x743a3a4e,0x743a3a4e 171 .long 0x140a0a1e,0x140a0a1e 172 .long 0x924949db,0x924949db 173 .long 0x0c06060a,0x0c06060a 174 .long 0x4824246c,0x4824246c 175 .long 0xb85c5ce4,0xb85c5ce4 176 .long 0x9fc2c25d,0x9fc2c25d 177 .long 0xbdd3d36e,0xbdd3d36e 178 .long 0x43acacef,0x43acacef 179 .long 0xc46262a6,0xc46262a6 180 .long 0x399191a8,0x399191a8 181 .long 0x319595a4,0x319595a4 182 .long 0xd3e4e437,0xd3e4e437 183 .long 0xf279798b,0xf279798b 184 .long 0xd5e7e732,0xd5e7e732 185 .long 0x8bc8c843,0x8bc8c843 186 .long 0x6e373759,0x6e373759 187 .long 0xda6d6db7,0xda6d6db7 188 .long 0x018d8d8c,0x018d8d8c 189 .long 0xb1d5d564,0xb1d5d564 190 .long 0x9c4e4ed2,0x9c4e4ed2 191 .long 0x49a9a9e0,0x49a9a9e0 192 .long 0xd86c6cb4,0xd86c6cb4 193 .long 0xac5656fa,0xac5656fa 194 .long 0xf3f4f407,0xf3f4f407 195 .long 0xcfeaea25,0xcfeaea25 196 .long 0xca6565af,0xca6565af 197 .long 0xf47a7a8e,0xf47a7a8e 198 .long 0x47aeaee9,0x47aeaee9 199 .long 0x10080818,0x10080818 200 .long 0x6fbabad5,0x6fbabad5 201 .long 0xf0787888,0xf0787888 202 .long 0x4a25256f,0x4a25256f 203 .long 0x5c2e2e72,0x5c2e2e72 204 .long 0x381c1c24,0x381c1c24 205 .long 0x57a6a6f1,0x57a6a6f1 206 .long 0x73b4b4c7,0x73b4b4c7 207 .long 0x97c6c651,0x97c6c651 208 .long 0xcbe8e823,0xcbe8e823 209 .long 0xa1dddd7c,0xa1dddd7c 210 .long 0xe874749c,0xe874749c 211 .long 0x3e1f1f21,0x3e1f1f21 212 .long 0x964b4bdd,0x964b4bdd 213 .long 0x61bdbddc,0x61bdbddc 214 .long 0x0d8b8b86,0x0d8b8b86 215 .long 0x0f8a8a85,0x0f8a8a85 216 .long 0xe0707090,0xe0707090 217 .long 0x7c3e3e42,0x7c3e3e42 218 .long 0x71b5b5c4,0x71b5b5c4 219 .long 0xcc6666aa,0xcc6666aa 220 .long 0x904848d8,0x904848d8 221 .long 0x06030305,0x06030305 222 .long 0xf7f6f601,0xf7f6f601 223 .long 0x1c0e0e12,0x1c0e0e12 224 .long 0xc26161a3,0xc26161a3 225 .long 0x6a35355f,0x6a35355f 226 .long 0xae5757f9,0xae5757f9 227 .long 0x69b9b9d0,0x69b9b9d0 228 .long 0x17868691,0x17868691 229 .long 0x99c1c158,0x99c1c158 230 .long 0x3a1d1d27,0x3a1d1d27 231 .long 0x279e9eb9,0x279e9eb9 232 .long 0xd9e1e138,0xd9e1e138 233 .long 0xebf8f813,0xebf8f813 234 .long 0x2b9898b3,0x2b9898b3 235 .long 0x22111133,0x22111133 236 .long 0xd26969bb,0xd26969bb 237 .long 0xa9d9d970,0xa9d9d970 238 .long 0x078e8e89,0x078e8e89 239 .long 0x339494a7,0x339494a7 240 .long 0x2d9b9bb6,0x2d9b9bb6 241 .long 0x3c1e1e22,0x3c1e1e22 242 .long 0x15878792,0x15878792 243 .long 0xc9e9e920,0xc9e9e920 244 .long 0x87cece49,0x87cece49 245 .long 0xaa5555ff,0xaa5555ff 246 .long 0x50282878,0x50282878 247 .long 0xa5dfdf7a,0xa5dfdf7a 248 .long 0x038c8c8f,0x038c8c8f 249 .long 0x59a1a1f8,0x59a1a1f8 250 .long 0x09898980,0x09898980 251 .long 0x1a0d0d17,0x1a0d0d17 252 .long 0x65bfbfda,0x65bfbfda 253 .long 0xd7e6e631,0xd7e6e631 254 .long 0x844242c6,0x844242c6 255 .long 0xd06868b8,0xd06868b8 256 .long 0x824141c3,0x824141c3 257 .long 0x299999b0,0x299999b0 258 .long 0x5a2d2d77,0x5a2d2d77 259 .long 0x1e0f0f11,0x1e0f0f11 260 .long 0x7bb0b0cb,0x7bb0b0cb 261 .long 0xa85454fc,0xa85454fc 262 .long 0x6dbbbbd6,0x6dbbbbd6 263 .long 0x2c16163a,0x2c16163a 264 # Te4[256] 265 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 266 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 267 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 268 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 269 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 270 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 271 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 272 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 273 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 274 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 275 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 276 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 277 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 278 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 279 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 280 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 281 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 282 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 283 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 284 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 285 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 286 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 287 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 288 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 289 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 290 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 291 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 292 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 293 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 294 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 295 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 296 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 297 # rcon[] 298 .long 0x01000000, 0x02000000, 0x04000000, 0x08000000 299 .long 0x10000000, 0x20000000, 0x40000000, 0x80000000 300 .long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 301 .align 256 302 .size AES_Te,.-AES_Te 303 304 # void AES_encrypt(const unsigned char *inp, unsigned char *out, 305 # const AES_KEY *key) { 306 .globl AES_encrypt 307 .type AES_encrypt,@function 308 AES_encrypt: 309 l %r0,240(%r4) 310 lhi %r1,16 311 clr %r0,%r1 312 jl .Lesoft 313 314 la %r1,0(%r4) 315 #la %r2,0(%r2) 316 la %r4,0(%r3) 317 lghi %r3,16 # single block length 318 .long 0xb92e0042 # km %r4,%r2 319 brc 1,.-4 # can this happen? 320 br %r14 321 .align 64 322 .Lesoft: 323 stm %r3,%r14,3*4(%r15) 324 325 llgf %r8,0(%r2) 326 llgf %r9,4(%r2) 327 llgf %r10,8(%r2) 328 llgf %r11,12(%r2) 329 330 larl %r12,AES_Te 331 bras %r14,_s390x_AES_encrypt 332 333 l %r3,3*4(%r15) 334 st %r8,0(%r3) 335 st %r9,4(%r3) 336 st %r10,8(%r3) 337 st %r11,12(%r3) 338 339 lm %r6,%r14,6*4(%r15) 340 br %r14 341 .size AES_encrypt,.-AES_encrypt 342 343 .type _s390x_AES_encrypt,@function 344 .align 16 345 _s390x_AES_encrypt: 346 st %r14,15*4(%r15) 347 x %r8,0(%r4) 348 x %r9,4(%r4) 349 x %r10,8(%r4) 350 x %r11,12(%r4) 351 l %r13,240(%r4) 352 llill %r0,2040 353 aghi %r13,-1 354 j .Lenc_loop 355 .align 16 356 .Lenc_loop: 357 sllg %r1,%r8,3 358 srlg %r2,%r8,5 359 srlg %r3,%r8,13 360 srl %r8,21 361 nr %r8,%r0 362 ngr %r1,%r0 363 nr %r2,%r0 364 nr %r3,%r0 365 366 srlg %r5,%r9,13 # i0 367 sllg %r6,%r9,3 368 srlg %r7,%r9,5 369 srl %r9,21 370 nr %r5,%r0 371 nr %r9,%r0 372 ngr %r6,%r0 373 nr %r7,%r0 374 375 l %r8,0(%r8,%r12) # Te0[s0>>24] 376 l %r1,1(%r1,%r12) # Te3[s0>>0] 377 l %r2,2(%r2,%r12) # Te2[s0>>8] 378 l %r3,3(%r3,%r12) # Te1[s0>>16] 379 380 x %r8,3(%r5,%r12) # Te1[s1>>16] 381 l %r9,0(%r9,%r12) # Te0[s1>>24] 382 x %r2,1(%r6,%r12) # Te3[s1>>0] 383 x %r3,2(%r7,%r12) # Te2[s1>>8] 384 385 srlg %r5,%r10,5 # i0 386 srlg %r6,%r10,13 # i1 387 nr %r5,%r0 388 nr %r6,%r0 389 sllg %r7,%r10,3 390 srl %r10,21 391 nr %r10,%r0 392 ngr %r7,%r0 393 394 xr %r9,%r1 395 srlg %r14,%r11,5 # i1 396 sllg %r1,%r11,3 # i0 397 nr %r14,%r0 398 la %r4,16(%r4) 399 ngr %r1,%r0 400 401 x %r8,2(%r5,%r12) # Te2[s2>>8] 402 x %r9,3(%r6,%r12) # Te1[s2>>16] 403 l %r10,0(%r10,%r12) # Te0[s2>>24] 404 x %r3,1(%r7,%r12) # Te3[s2>>0] 405 406 srlg %r7,%r11,13 # i2 407 xr %r10,%r2 408 srl %r11,21 409 nr %r7,%r0 410 nr %r11,%r0 411 412 x %r8,0(%r4) 413 x %r9,4(%r4) 414 x %r10,8(%r4) 415 x %r3,12(%r4) 416 417 x %r8,1(%r1,%r12) # Te3[s3>>0] 418 x %r9,2(%r14,%r12) # Te2[s3>>8] 419 x %r10,3(%r7,%r12) # Te1[s3>>16] 420 l %r11,0(%r11,%r12) # Te0[s3>>24] 421 xr %r11,%r3 422 423 brct %r13,.Lenc_loop 424 .align 16 425 426 sllg %r1,%r8,3 427 srlg %r2,%r8,5 428 ngr %r1,%r0 429 srlg %r3,%r8,13 430 srl %r8,21 431 nr %r8,%r0 432 nr %r2,%r0 433 nr %r3,%r0 434 435 srlg %r5,%r9,13 # i0 436 sllg %r6,%r9,3 437 ngr %r6,%r0 438 srlg %r7,%r9,5 439 srl %r9,21 440 nr %r5,%r0 441 nr %r9,%r0 442 nr %r7,%r0 443 444 llgc %r8,2(%r8,%r12) # Te4[s0>>24] 445 llgc %r1,2(%r1,%r12) # Te4[s0>>0] 446 sll %r8,24 447 llgc %r2,2(%r2,%r12) # Te4[s0>>8] 448 llgc %r3,2(%r3,%r12) # Te4[s0>>16] 449 sll %r2,8 450 sll %r3,16 451 452 llgc %r5,2(%r5,%r12) # Te4[s1>>16] 453 llgc %r9,2(%r9,%r12) # Te4[s1>>24] 454 llgc %r6,2(%r6,%r12) # Te4[s1>>0] 455 llgc %r7,2(%r7,%r12) # Te4[s1>>8] 456 sll %r5,16 457 sll %r9,24 458 sll %r7,8 459 or %r8,%r5 460 or %r9,%r1 461 or %r2,%r6 462 or %r3,%r7 463 464 srlg %r5,%r10,5 # i0 465 srlg %r6,%r10,13 # i1 466 nr %r5,%r0 467 nr %r6,%r0 468 sllg %r7,%r10,3 469 srl %r10,21 470 ngr %r7,%r0 471 nr %r10,%r0 472 473 sllg %r1,%r11,3 # i0 474 srlg %r14,%r11,5 # i1 475 ngr %r1,%r0 476 477 llgc %r5,2(%r5,%r12) # Te4[s2>>8] 478 llgc %r6,2(%r6,%r12) # Te4[s2>>16] 479 sll %r5,8 480 llgc %r10,2(%r10,%r12) # Te4[s2>>24] 481 llgc %r7,2(%r7,%r12) # Te4[s2>>0] 482 sll %r6,16 483 nr %r14,%r0 484 sll %r10,24 485 or %r8,%r5 486 or %r9,%r6 487 or %r10,%r2 488 or %r3,%r7 489 490 srlg %r7,%r11,13 # i2 491 srl %r11,21 492 nr %r7,%r0 493 nr %r11,%r0 494 495 l %r0,16(%r4) 496 l %r2,20(%r4) 497 498 llgc %r5,2(%r1,%r12) # Te4[s3>>0] 499 llgc %r6,2(%r14,%r12) # Te4[s3>>8] 500 llgc %r7,2(%r7,%r12) # Te4[s3>>16] 501 llgc %r11,2(%r11,%r12) # Te4[s3>>24] 502 sll %r6,8 503 sll %r7,16 504 sll %r11,24 505 or %r8,%r5 506 or %r9,%r6 507 or %r10,%r7 508 or %r11,%r3 509 510 l %r14,15*4(%r15) 511 xr %r8,%r0 512 xr %r9,%r2 513 x %r10,24(%r4) 514 x %r11,28(%r4) 515 516 br %r14 517 .size _s390x_AES_encrypt,.-_s390x_AES_encrypt 518 .type AES_Td,@object 519 .align 256 520 AES_Td: 521 .long 0x51f4a750,0x51f4a750 522 .long 0x7e416553,0x7e416553 523 .long 0x1a17a4c3,0x1a17a4c3 524 .long 0x3a275e96,0x3a275e96 525 .long 0x3bab6bcb,0x3bab6bcb 526 .long 0x1f9d45f1,0x1f9d45f1 527 .long 0xacfa58ab,0xacfa58ab 528 .long 0x4be30393,0x4be30393 529 .long 0x2030fa55,0x2030fa55 530 .long 0xad766df6,0xad766df6 531 .long 0x88cc7691,0x88cc7691 532 .long 0xf5024c25,0xf5024c25 533 .long 0x4fe5d7fc,0x4fe5d7fc 534 .long 0xc52acbd7,0xc52acbd7 535 .long 0x26354480,0x26354480 536 .long 0xb562a38f,0xb562a38f 537 .long 0xdeb15a49,0xdeb15a49 538 .long 0x25ba1b67,0x25ba1b67 539 .long 0x45ea0e98,0x45ea0e98 540 .long 0x5dfec0e1,0x5dfec0e1 541 .long 0xc32f7502,0xc32f7502 542 .long 0x814cf012,0x814cf012 543 .long 0x8d4697a3,0x8d4697a3 544 .long 0x6bd3f9c6,0x6bd3f9c6 545 .long 0x038f5fe7,0x038f5fe7 546 .long 0x15929c95,0x15929c95 547 .long 0xbf6d7aeb,0xbf6d7aeb 548 .long 0x955259da,0x955259da 549 .long 0xd4be832d,0xd4be832d 550 .long 0x587421d3,0x587421d3 551 .long 0x49e06929,0x49e06929 552 .long 0x8ec9c844,0x8ec9c844 553 .long 0x75c2896a,0x75c2896a 554 .long 0xf48e7978,0xf48e7978 555 .long 0x99583e6b,0x99583e6b 556 .long 0x27b971dd,0x27b971dd 557 .long 0xbee14fb6,0xbee14fb6 558 .long 0xf088ad17,0xf088ad17 559 .long 0xc920ac66,0xc920ac66 560 .long 0x7dce3ab4,0x7dce3ab4 561 .long 0x63df4a18,0x63df4a18 562 .long 0xe51a3182,0xe51a3182 563 .long 0x97513360,0x97513360 564 .long 0x62537f45,0x62537f45 565 .long 0xb16477e0,0xb16477e0 566 .long 0xbb6bae84,0xbb6bae84 567 .long 0xfe81a01c,0xfe81a01c 568 .long 0xf9082b94,0xf9082b94 569 .long 0x70486858,0x70486858 570 .long 0x8f45fd19,0x8f45fd19 571 .long 0x94de6c87,0x94de6c87 572 .long 0x527bf8b7,0x527bf8b7 573 .long 0xab73d323,0xab73d323 574 .long 0x724b02e2,0x724b02e2 575 .long 0xe31f8f57,0xe31f8f57 576 .long 0x6655ab2a,0x6655ab2a 577 .long 0xb2eb2807,0xb2eb2807 578 .long 0x2fb5c203,0x2fb5c203 579 .long 0x86c57b9a,0x86c57b9a 580 .long 0xd33708a5,0xd33708a5 581 .long 0x302887f2,0x302887f2 582 .long 0x23bfa5b2,0x23bfa5b2 583 .long 0x02036aba,0x02036aba 584 .long 0xed16825c,0xed16825c 585 .long 0x8acf1c2b,0x8acf1c2b 586 .long 0xa779b492,0xa779b492 587 .long 0xf307f2f0,0xf307f2f0 588 .long 0x4e69e2a1,0x4e69e2a1 589 .long 0x65daf4cd,0x65daf4cd 590 .long 0x0605bed5,0x0605bed5 591 .long 0xd134621f,0xd134621f 592 .long 0xc4a6fe8a,0xc4a6fe8a 593 .long 0x342e539d,0x342e539d 594 .long 0xa2f355a0,0xa2f355a0 595 .long 0x058ae132,0x058ae132 596 .long 0xa4f6eb75,0xa4f6eb75 597 .long 0x0b83ec39,0x0b83ec39 598 .long 0x4060efaa,0x4060efaa 599 .long 0x5e719f06,0x5e719f06 600 .long 0xbd6e1051,0xbd6e1051 601 .long 0x3e218af9,0x3e218af9 602 .long 0x96dd063d,0x96dd063d 603 .long 0xdd3e05ae,0xdd3e05ae 604 .long 0x4de6bd46,0x4de6bd46 605 .long 0x91548db5,0x91548db5 606 .long 0x71c45d05,0x71c45d05 607 .long 0x0406d46f,0x0406d46f 608 .long 0x605015ff,0x605015ff 609 .long 0x1998fb24,0x1998fb24 610 .long 0xd6bde997,0xd6bde997 611 .long 0x894043cc,0x894043cc 612 .long 0x67d99e77,0x67d99e77 613 .long 0xb0e842bd,0xb0e842bd 614 .long 0x07898b88,0x07898b88 615 .long 0xe7195b38,0xe7195b38 616 .long 0x79c8eedb,0x79c8eedb 617 .long 0xa17c0a47,0xa17c0a47 618 .long 0x7c420fe9,0x7c420fe9 619 .long 0xf8841ec9,0xf8841ec9 620 .long 0x00000000,0x00000000 621 .long 0x09808683,0x09808683 622 .long 0x322bed48,0x322bed48 623 .long 0x1e1170ac,0x1e1170ac 624 .long 0x6c5a724e,0x6c5a724e 625 .long 0xfd0efffb,0xfd0efffb 626 .long 0x0f853856,0x0f853856 627 .long 0x3daed51e,0x3daed51e 628 .long 0x362d3927,0x362d3927 629 .long 0x0a0fd964,0x0a0fd964 630 .long 0x685ca621,0x685ca621 631 .long 0x9b5b54d1,0x9b5b54d1 632 .long 0x24362e3a,0x24362e3a 633 .long 0x0c0a67b1,0x0c0a67b1 634 .long 0x9357e70f,0x9357e70f 635 .long 0xb4ee96d2,0xb4ee96d2 636 .long 0x1b9b919e,0x1b9b919e 637 .long 0x80c0c54f,0x80c0c54f 638 .long 0x61dc20a2,0x61dc20a2 639 .long 0x5a774b69,0x5a774b69 640 .long 0x1c121a16,0x1c121a16 641 .long 0xe293ba0a,0xe293ba0a 642 .long 0xc0a02ae5,0xc0a02ae5 643 .long 0x3c22e043,0x3c22e043 644 .long 0x121b171d,0x121b171d 645 .long 0x0e090d0b,0x0e090d0b 646 .long 0xf28bc7ad,0xf28bc7ad 647 .long 0x2db6a8b9,0x2db6a8b9 648 .long 0x141ea9c8,0x141ea9c8 649 .long 0x57f11985,0x57f11985 650 .long 0xaf75074c,0xaf75074c 651 .long 0xee99ddbb,0xee99ddbb 652 .long 0xa37f60fd,0xa37f60fd 653 .long 0xf701269f,0xf701269f 654 .long 0x5c72f5bc,0x5c72f5bc 655 .long 0x44663bc5,0x44663bc5 656 .long 0x5bfb7e34,0x5bfb7e34 657 .long 0x8b432976,0x8b432976 658 .long 0xcb23c6dc,0xcb23c6dc 659 .long 0xb6edfc68,0xb6edfc68 660 .long 0xb8e4f163,0xb8e4f163 661 .long 0xd731dcca,0xd731dcca 662 .long 0x42638510,0x42638510 663 .long 0x13972240,0x13972240 664 .long 0x84c61120,0x84c61120 665 .long 0x854a247d,0x854a247d 666 .long 0xd2bb3df8,0xd2bb3df8 667 .long 0xaef93211,0xaef93211 668 .long 0xc729a16d,0xc729a16d 669 .long 0x1d9e2f4b,0x1d9e2f4b 670 .long 0xdcb230f3,0xdcb230f3 671 .long 0x0d8652ec,0x0d8652ec 672 .long 0x77c1e3d0,0x77c1e3d0 673 .long 0x2bb3166c,0x2bb3166c 674 .long 0xa970b999,0xa970b999 675 .long 0x119448fa,0x119448fa 676 .long 0x47e96422,0x47e96422 677 .long 0xa8fc8cc4,0xa8fc8cc4 678 .long 0xa0f03f1a,0xa0f03f1a 679 .long 0x567d2cd8,0x567d2cd8 680 .long 0x223390ef,0x223390ef 681 .long 0x87494ec7,0x87494ec7 682 .long 0xd938d1c1,0xd938d1c1 683 .long 0x8ccaa2fe,0x8ccaa2fe 684 .long 0x98d40b36,0x98d40b36 685 .long 0xa6f581cf,0xa6f581cf 686 .long 0xa57ade28,0xa57ade28 687 .long 0xdab78e26,0xdab78e26 688 .long 0x3fadbfa4,0x3fadbfa4 689 .long 0x2c3a9de4,0x2c3a9de4 690 .long 0x5078920d,0x5078920d 691 .long 0x6a5fcc9b,0x6a5fcc9b 692 .long 0x547e4662,0x547e4662 693 .long 0xf68d13c2,0xf68d13c2 694 .long 0x90d8b8e8,0x90d8b8e8 695 .long 0x2e39f75e,0x2e39f75e 696 .long 0x82c3aff5,0x82c3aff5 697 .long 0x9f5d80be,0x9f5d80be 698 .long 0x69d0937c,0x69d0937c 699 .long 0x6fd52da9,0x6fd52da9 700 .long 0xcf2512b3,0xcf2512b3 701 .long 0xc8ac993b,0xc8ac993b 702 .long 0x10187da7,0x10187da7 703 .long 0xe89c636e,0xe89c636e 704 .long 0xdb3bbb7b,0xdb3bbb7b 705 .long 0xcd267809,0xcd267809 706 .long 0x6e5918f4,0x6e5918f4 707 .long 0xec9ab701,0xec9ab701 708 .long 0x834f9aa8,0x834f9aa8 709 .long 0xe6956e65,0xe6956e65 710 .long 0xaaffe67e,0xaaffe67e 711 .long 0x21bccf08,0x21bccf08 712 .long 0xef15e8e6,0xef15e8e6 713 .long 0xbae79bd9,0xbae79bd9 714 .long 0x4a6f36ce,0x4a6f36ce 715 .long 0xea9f09d4,0xea9f09d4 716 .long 0x29b07cd6,0x29b07cd6 717 .long 0x31a4b2af,0x31a4b2af 718 .long 0x2a3f2331,0x2a3f2331 719 .long 0xc6a59430,0xc6a59430 720 .long 0x35a266c0,0x35a266c0 721 .long 0x744ebc37,0x744ebc37 722 .long 0xfc82caa6,0xfc82caa6 723 .long 0xe090d0b0,0xe090d0b0 724 .long 0x33a7d815,0x33a7d815 725 .long 0xf104984a,0xf104984a 726 .long 0x41ecdaf7,0x41ecdaf7 727 .long 0x7fcd500e,0x7fcd500e 728 .long 0x1791f62f,0x1791f62f 729 .long 0x764dd68d,0x764dd68d 730 .long 0x43efb04d,0x43efb04d 731 .long 0xccaa4d54,0xccaa4d54 732 .long 0xe49604df,0xe49604df 733 .long 0x9ed1b5e3,0x9ed1b5e3 734 .long 0x4c6a881b,0x4c6a881b 735 .long 0xc12c1fb8,0xc12c1fb8 736 .long 0x4665517f,0x4665517f 737 .long 0x9d5eea04,0x9d5eea04 738 .long 0x018c355d,0x018c355d 739 .long 0xfa877473,0xfa877473 740 .long 0xfb0b412e,0xfb0b412e 741 .long 0xb3671d5a,0xb3671d5a 742 .long 0x92dbd252,0x92dbd252 743 .long 0xe9105633,0xe9105633 744 .long 0x6dd64713,0x6dd64713 745 .long 0x9ad7618c,0x9ad7618c 746 .long 0x37a10c7a,0x37a10c7a 747 .long 0x59f8148e,0x59f8148e 748 .long 0xeb133c89,0xeb133c89 749 .long 0xcea927ee,0xcea927ee 750 .long 0xb761c935,0xb761c935 751 .long 0xe11ce5ed,0xe11ce5ed 752 .long 0x7a47b13c,0x7a47b13c 753 .long 0x9cd2df59,0x9cd2df59 754 .long 0x55f2733f,0x55f2733f 755 .long 0x1814ce79,0x1814ce79 756 .long 0x73c737bf,0x73c737bf 757 .long 0x53f7cdea,0x53f7cdea 758 .long 0x5ffdaa5b,0x5ffdaa5b 759 .long 0xdf3d6f14,0xdf3d6f14 760 .long 0x7844db86,0x7844db86 761 .long 0xcaaff381,0xcaaff381 762 .long 0xb968c43e,0xb968c43e 763 .long 0x3824342c,0x3824342c 764 .long 0xc2a3405f,0xc2a3405f 765 .long 0x161dc372,0x161dc372 766 .long 0xbce2250c,0xbce2250c 767 .long 0x283c498b,0x283c498b 768 .long 0xff0d9541,0xff0d9541 769 .long 0x39a80171,0x39a80171 770 .long 0x080cb3de,0x080cb3de 771 .long 0xd8b4e49c,0xd8b4e49c 772 .long 0x6456c190,0x6456c190 773 .long 0x7bcb8461,0x7bcb8461 774 .long 0xd532b670,0xd532b670 775 .long 0x486c5c74,0x486c5c74 776 .long 0xd0b85742,0xd0b85742 777 # Td4[256] 778 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 779 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 780 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 781 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 782 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 783 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 784 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 785 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 786 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 787 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 788 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 789 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 790 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 791 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 792 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 793 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 794 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 795 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 796 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 797 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 798 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 799 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 800 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 801 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 802 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 803 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 804 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 805 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 806 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 807 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 808 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 809 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 810 .size AES_Td,.-AES_Td 811 812 # void AES_decrypt(const unsigned char *inp, unsigned char *out, 813 # const AES_KEY *key) { 814 .globl AES_decrypt 815 .type AES_decrypt,@function 816 AES_decrypt: 817 l %r0,240(%r4) 818 lhi %r1,16 819 clr %r0,%r1 820 jl .Ldsoft 821 822 la %r1,0(%r4) 823 #la %r2,0(%r2) 824 la %r4,0(%r3) 825 lghi %r3,16 # single block length 826 .long 0xb92e0042 # km %r4,%r2 827 brc 1,.-4 # can this happen? 828 br %r14 829 .align 64 830 .Ldsoft: 831 stm %r3,%r14,3*4(%r15) 832 833 llgf %r8,0(%r2) 834 llgf %r9,4(%r2) 835 llgf %r10,8(%r2) 836 llgf %r11,12(%r2) 837 838 larl %r12,AES_Td 839 bras %r14,_s390x_AES_decrypt 840 841 l %r3,3*4(%r15) 842 st %r8,0(%r3) 843 st %r9,4(%r3) 844 st %r10,8(%r3) 845 st %r11,12(%r3) 846 847 lm %r6,%r14,6*4(%r15) 848 br %r14 849 .size AES_decrypt,.-AES_decrypt 850 851 .type _s390x_AES_decrypt,@function 852 .align 16 853 _s390x_AES_decrypt: 854 st %r14,15*4(%r15) 855 x %r8,0(%r4) 856 x %r9,4(%r4) 857 x %r10,8(%r4) 858 x %r11,12(%r4) 859 l %r13,240(%r4) 860 llill %r0,2040 861 aghi %r13,-1 862 j .Ldec_loop 863 .align 16 864 .Ldec_loop: 865 srlg %r1,%r8,13 866 srlg %r2,%r8,5 867 sllg %r3,%r8,3 868 srl %r8,21 869 nr %r8,%r0 870 nr %r1,%r0 871 nr %r2,%r0 872 ngr %r3,%r0 873 874 sllg %r5,%r9,3 # i0 875 srlg %r6,%r9,13 876 srlg %r7,%r9,5 877 srl %r9,21 878 ngr %r5,%r0 879 nr %r9,%r0 880 nr %r6,%r0 881 nr %r7,%r0 882 883 l %r8,0(%r8,%r12) # Td0[s0>>24] 884 l %r1,3(%r1,%r12) # Td1[s0>>16] 885 l %r2,2(%r2,%r12) # Td2[s0>>8] 886 l %r3,1(%r3,%r12) # Td3[s0>>0] 887 888 x %r8,1(%r5,%r12) # Td3[s1>>0] 889 l %r9,0(%r9,%r12) # Td0[s1>>24] 890 x %r2,3(%r6,%r12) # Td1[s1>>16] 891 x %r3,2(%r7,%r12) # Td2[s1>>8] 892 893 srlg %r5,%r10,5 # i0 894 sllg %r6,%r10,3 # i1 895 srlg %r7,%r10,13 896 srl %r10,21 897 nr %r5,%r0 898 ngr %r6,%r0 899 nr %r10,%r0 900 nr %r7,%r0 901 902 xr %r9,%r1 903 srlg %r14,%r11,5 # i1 904 srlg %r1,%r11,13 # i0 905 nr %r14,%r0 906 la %r4,16(%r4) 907 nr %r1,%r0 908 909 x %r8,2(%r5,%r12) # Td2[s2>>8] 910 x %r9,1(%r6,%r12) # Td3[s2>>0] 911 l %r10,0(%r10,%r12) # Td0[s2>>24] 912 x %r3,3(%r7,%r12) # Td1[s2>>16] 913 914 sllg %r7,%r11,3 # i2 915 srl %r11,21 916 ngr %r7,%r0 917 nr %r11,%r0 918 919 xr %r10,%r2 920 x %r8,0(%r4) 921 x %r9,4(%r4) 922 x %r10,8(%r4) 923 x %r3,12(%r4) 924 925 x %r8,3(%r1,%r12) # Td1[s3>>16] 926 x %r9,2(%r14,%r12) # Td2[s3>>8] 927 x %r10,1(%r7,%r12) # Td3[s3>>0] 928 l %r11,0(%r11,%r12) # Td0[s3>>24] 929 xr %r11,%r3 930 931 brct %r13,.Ldec_loop 932 .align 16 933 934 l %r1,2048(%r12) # prefetch Td4 935 l %r2,2112(%r12) 936 l %r3,2176(%r12) 937 l %r5,2240(%r12) 938 llill %r0,0xff 939 940 srlg %r7,%r8,24 # i0 941 srlg %r1,%r8,16 942 srlg %r2,%r8,8 943 nr %r8,%r0 # i3 944 nr %r1,%r0 945 946 srlg %r5,%r9,24 947 nr %r2,%r0 948 srlg %r6,%r9,16 949 srlg %r14,%r9,8 950 nr %r9,%r0 # i0 951 nr %r6,%r0 952 nr %r14,%r0 953 954 llgc %r7,2048(%r7,%r12) # Td4[s0>>24] 955 llgc %r1,2048(%r1,%r12) # Td4[s0>>16] 956 llgc %r2,2048(%r2,%r12) # Td4[s0>>8] 957 sll %r1,16 958 llgc %r3,2048(%r8,%r12) # Td4[s0>>0] 959 sllg %r8,%r7,24 960 sll %r2,8 961 962 llgc %r9,2048(%r9,%r12) # Td4[s1>>0] 963 llgc %r5,2048(%r5,%r12) # Td4[s1>>24] 964 llgc %r6,2048(%r6,%r12) # Td4[s1>>16] 965 sll %r5,24 966 llgc %r7,2048(%r14,%r12) # Td4[s1>>8] 967 sll %r6,16 968 sll %r7,8 969 or %r8,%r9 970 or %r1,%r5 971 or %r2,%r6 972 or %r3,%r7 973 974 srlg %r5,%r10,8 # i0 975 srlg %r6,%r10,24 976 srlg %r7,%r10,16 977 nr %r10,%r0 # i1 978 nr %r5,%r0 979 nr %r7,%r0 980 llgc %r5,2048(%r5,%r12) # Td4[s2>>8] 981 llgc %r9,2048(%r10,%r12) # Td4[s2>>0] 982 llgc %r6,2048(%r6,%r12) # Td4[s2>>24] 983 llgc %r7,2048(%r7,%r12) # Td4[s2>>16] 984 sll %r5,8 985 sll %r6,24 986 or %r8,%r5 987 sll %r7,16 988 or %r2,%r6 989 or %r3,%r7 990 991 srlg %r5,%r11,16 # i0 992 srlg %r6,%r11,8 # i1 993 srlg %r7,%r11,24 994 nr %r11,%r0 # i2 995 nr %r5,%r0 996 nr %r6,%r0 997 998 l %r14,15*4(%r15) 999 or %r9,%r1 1000 l %r0,16(%r4) 1001 l %r1,20(%r4) 1002 1003 llgc %r5,2048(%r5,%r12) # Td4[s3>>16] 1004 llgc %r6,2048(%r6,%r12) # Td4[s3>>8] 1005 sll %r5,16 1006 llgc %r10,2048(%r11,%r12) # Td4[s3>>0] 1007 llgc %r11,2048(%r7,%r12) # Td4[s3>>24] 1008 sll %r6,8 1009 sll %r11,24 1010 or %r8,%r5 1011 or %r9,%r6 1012 or %r10,%r2 1013 or %r11,%r3 1014 1015 xr %r8,%r0 1016 xr %r9,%r1 1017 x %r10,24(%r4) 1018 x %r11,28(%r4) 1019 1020 br %r14 1021 .size _s390x_AES_decrypt,.-_s390x_AES_decrypt 1022 # void AES_set_encrypt_key(const unsigned char *in, int bits, 1023 # AES_KEY *key) { 1024 .globl AES_set_encrypt_key 1025 .type AES_set_encrypt_key,@function 1026 .align 16 1027 AES_set_encrypt_key: 1028 _s390x_AES_set_encrypt_key: 1029 lghi %r0,0 1030 clr %r2,%r0 1031 je .Lminus1 1032 clr %r4,%r0 1033 je .Lminus1 1034 1035 lghi %r0,128 1036 clr %r3,%r0 1037 je .Lproceed 1038 lghi %r0,192 1039 clr %r3,%r0 1040 je .Lproceed 1041 lghi %r0,256 1042 clr %r3,%r0 1043 je .Lproceed 1044 lghi %r2,-2 1045 br %r14 1046 1047 .align 16 1048 .Lproceed: 1049 # convert bits to km(c) code, [128,192,256]->[18,19,20] 1050 lhi %r5,-128 1051 lhi %r0,18 1052 ar %r5,%r3 1053 srl %r5,6 1054 ar %r5,%r0 1055 1056 larl %r1,OPENSSL_s390xcap_P 1057 llihh %r0,0x8000 1058 srlg %r0,%r0,0(%r5) 1059 ng %r0,S390X_KM(%r1) # check availability of both km... 1060 ng %r0,S390X_KMC(%r1) # ...and kmc support for given key length 1061 jz .Lekey_internal 1062 1063 lmg %r0,%r1,0(%r2) # just copy 128 bits... 1064 stmg %r0,%r1,0(%r4) 1065 lhi %r0,192 1066 cr %r3,%r0 1067 jl 1f 1068 lg %r1,16(%r2) 1069 stg %r1,16(%r4) 1070 je 1f 1071 lg %r1,24(%r2) 1072 stg %r1,24(%r4) 1073 1: st %r3,236(%r4) # save bits [for debugging purposes] 1074 lgr %r0,%r5 1075 st %r5,240(%r4) # save km(c) code 1076 lghi %r2,0 1077 br %r14 1078 .align 16 1079 .Lekey_internal: 1080 stm %r4,%r13,4*4(%r15) # all non-volatile regs and %r4 1081 1082 larl %r12,AES_Te+2048 1083 1084 llgf %r8,0(%r2) 1085 llgf %r9,4(%r2) 1086 llgf %r10,8(%r2) 1087 llgf %r11,12(%r2) 1088 st %r8,0(%r4) 1089 st %r9,4(%r4) 1090 st %r10,8(%r4) 1091 st %r11,12(%r4) 1092 lghi %r0,128 1093 cr %r3,%r0 1094 jne .Lnot128 1095 1096 llill %r0,0xff 1097 lghi %r3,0 # i=0 1098 lghi %r13,10 1099 st %r13,240(%r4) 1100 1101 llgfr %r2,%r11 # temp=rk[3] 1102 srlg %r5,%r11,8 1103 srlg %r6,%r11,16 1104 srlg %r7,%r11,24 1105 nr %r2,%r0 1106 nr %r5,%r0 1107 nr %r6,%r0 1108 1109 .align 16 1110 .L128_loop: 1111 la %r2,0(%r2,%r12) 1112 la %r5,0(%r5,%r12) 1113 la %r6,0(%r6,%r12) 1114 la %r7,0(%r7,%r12) 1115 icm %r2,2,0(%r2) # Te4[rk[3]>>0]<<8 1116 icm %r2,4,0(%r5) # Te4[rk[3]>>8]<<16 1117 icm %r2,8,0(%r6) # Te4[rk[3]>>16]<<24 1118 icm %r2,1,0(%r7) # Te4[rk[3]>>24] 1119 x %r2,256(%r3,%r12) # rcon[i] 1120 xr %r8,%r2 # rk[4]=rk[0]^... 1121 xr %r9,%r8 # rk[5]=rk[1]^rk[4] 1122 xr %r10,%r9 # rk[6]=rk[2]^rk[5] 1123 xr %r11,%r10 # rk[7]=rk[3]^rk[6] 1124 1125 llgfr %r2,%r11 # temp=rk[3] 1126 srlg %r5,%r11,8 1127 srlg %r6,%r11,16 1128 nr %r2,%r0 1129 nr %r5,%r0 1130 srlg %r7,%r11,24 1131 nr %r6,%r0 1132 1133 st %r8,16(%r4) 1134 st %r9,20(%r4) 1135 st %r10,24(%r4) 1136 st %r11,28(%r4) 1137 la %r4,16(%r4) # key+=4 1138 la %r3,4(%r3) # i++ 1139 brct %r13,.L128_loop 1140 lghi %r0,10 1141 lghi %r2,0 1142 lm %r4,%r13,4*4(%r15) 1143 br %r14 1144 1145 .align 16 1146 .Lnot128: 1147 llgf %r0,16(%r2) 1148 llgf %r1,20(%r2) 1149 st %r0,16(%r4) 1150 st %r1,20(%r4) 1151 lghi %r0,192 1152 cr %r3,%r0 1153 jne .Lnot192 1154 1155 llill %r0,0xff 1156 lghi %r3,0 # i=0 1157 lghi %r13,12 1158 st %r13,240(%r4) 1159 lghi %r13,8 1160 1161 srlg %r5,%r1,8 1162 srlg %r6,%r1,16 1163 srlg %r7,%r1,24 1164 nr %r1,%r0 1165 nr %r5,%r0 1166 nr %r6,%r0 1167 1168 .align 16 1169 .L192_loop: 1170 la %r1,0(%r1,%r12) 1171 la %r5,0(%r5,%r12) 1172 la %r6,0(%r6,%r12) 1173 la %r7,0(%r7,%r12) 1174 icm %r1,2,0(%r1) # Te4[rk[5]>>0]<<8 1175 icm %r1,4,0(%r5) # Te4[rk[5]>>8]<<16 1176 icm %r1,8,0(%r6) # Te4[rk[5]>>16]<<24 1177 icm %r1,1,0(%r7) # Te4[rk[5]>>24] 1178 x %r1,256(%r3,%r12) # rcon[i] 1179 xr %r8,%r1 # rk[6]=rk[0]^... 1180 xr %r9,%r8 # rk[7]=rk[1]^rk[6] 1181 xr %r10,%r9 # rk[8]=rk[2]^rk[7] 1182 xr %r11,%r10 # rk[9]=rk[3]^rk[8] 1183 1184 st %r8,24(%r4) 1185 st %r9,28(%r4) 1186 st %r10,32(%r4) 1187 st %r11,36(%r4) 1188 brct %r13,.L192_continue 1189 lghi %r0,12 1190 lghi %r2,0 1191 lm %r4,%r13,4*4(%r15) 1192 br %r14 1193 1194 .align 16 1195 .L192_continue: 1196 lgr %r1,%r11 1197 x %r1,16(%r4) # rk[10]=rk[4]^rk[9] 1198 st %r1,40(%r4) 1199 x %r1,20(%r4) # rk[11]=rk[5]^rk[10] 1200 st %r1,44(%r4) 1201 1202 srlg %r5,%r1,8 1203 srlg %r6,%r1,16 1204 srlg %r7,%r1,24 1205 nr %r1,%r0 1206 nr %r5,%r0 1207 nr %r6,%r0 1208 1209 la %r4,24(%r4) # key+=6 1210 la %r3,4(%r3) # i++ 1211 j .L192_loop 1212 1213 .align 16 1214 .Lnot192: 1215 llgf %r0,24(%r2) 1216 llgf %r1,28(%r2) 1217 st %r0,24(%r4) 1218 st %r1,28(%r4) 1219 llill %r0,0xff 1220 lghi %r3,0 # i=0 1221 lghi %r13,14 1222 st %r13,240(%r4) 1223 lghi %r13,7 1224 1225 srlg %r5,%r1,8 1226 srlg %r6,%r1,16 1227 srlg %r7,%r1,24 1228 nr %r1,%r0 1229 nr %r5,%r0 1230 nr %r6,%r0 1231 1232 .align 16 1233 .L256_loop: 1234 la %r1,0(%r1,%r12) 1235 la %r5,0(%r5,%r12) 1236 la %r6,0(%r6,%r12) 1237 la %r7,0(%r7,%r12) 1238 icm %r1,2,0(%r1) # Te4[rk[7]>>0]<<8 1239 icm %r1,4,0(%r5) # Te4[rk[7]>>8]<<16 1240 icm %r1,8,0(%r6) # Te4[rk[7]>>16]<<24 1241 icm %r1,1,0(%r7) # Te4[rk[7]>>24] 1242 x %r1,256(%r3,%r12) # rcon[i] 1243 xr %r8,%r1 # rk[8]=rk[0]^... 1244 xr %r9,%r8 # rk[9]=rk[1]^rk[8] 1245 xr %r10,%r9 # rk[10]=rk[2]^rk[9] 1246 xr %r11,%r10 # rk[11]=rk[3]^rk[10] 1247 st %r8,32(%r4) 1248 st %r9,36(%r4) 1249 st %r10,40(%r4) 1250 st %r11,44(%r4) 1251 brct %r13,.L256_continue 1252 lghi %r0,14 1253 lghi %r2,0 1254 lm %r4,%r13,4*4(%r15) 1255 br %r14 1256 1257 .align 16 1258 .L256_continue: 1259 lgr %r1,%r11 # temp=rk[11] 1260 srlg %r5,%r11,8 1261 srlg %r6,%r11,16 1262 srlg %r7,%r11,24 1263 nr %r1,%r0 1264 nr %r5,%r0 1265 nr %r6,%r0 1266 la %r1,0(%r1,%r12) 1267 la %r5,0(%r5,%r12) 1268 la %r6,0(%r6,%r12) 1269 la %r7,0(%r7,%r12) 1270 llgc %r1,0(%r1) # Te4[rk[11]>>0] 1271 icm %r1,2,0(%r5) # Te4[rk[11]>>8]<<8 1272 icm %r1,4,0(%r6) # Te4[rk[11]>>16]<<16 1273 icm %r1,8,0(%r7) # Te4[rk[11]>>24]<<24 1274 x %r1,16(%r4) # rk[12]=rk[4]^... 1275 st %r1,48(%r4) 1276 x %r1,20(%r4) # rk[13]=rk[5]^rk[12] 1277 st %r1,52(%r4) 1278 x %r1,24(%r4) # rk[14]=rk[6]^rk[13] 1279 st %r1,56(%r4) 1280 x %r1,28(%r4) # rk[15]=rk[7]^rk[14] 1281 st %r1,60(%r4) 1282 1283 srlg %r5,%r1,8 1284 srlg %r6,%r1,16 1285 srlg %r7,%r1,24 1286 nr %r1,%r0 1287 nr %r5,%r0 1288 nr %r6,%r0 1289 1290 la %r4,32(%r4) # key+=8 1291 la %r3,4(%r3) # i++ 1292 j .L256_loop 1293 1294 .Lminus1: 1295 lghi %r2,-1 1296 br %r14 1297 .size AES_set_encrypt_key,.-AES_set_encrypt_key 1298 1299 # void AES_set_decrypt_key(const unsigned char *in, int bits, 1300 # AES_KEY *key) { 1301 .globl AES_set_decrypt_key 1302 .type AES_set_decrypt_key,@function 1303 .align 16 1304 AES_set_decrypt_key: 1305 #st %r4,4*4(%r15) # I rely on AES_set_encrypt_key to 1306 st %r14,14*4(%r15) # save non-volatile registers and %r4! 1307 bras %r14,_s390x_AES_set_encrypt_key 1308 #l %r4,4*4(%r15) 1309 l %r14,14*4(%r15) 1310 ltgr %r2,%r2 1311 bnzr %r14 1312 #l %r0,240(%r4) 1313 lhi %r1,16 1314 cr %r0,%r1 1315 jl .Lgo 1316 oill %r0,S390X_DECRYPT # set "decrypt" bit 1317 st %r0,240(%r4) 1318 br %r14 1319 .align 16 1320 .Lgo: lgr %r13,%r0 #llgf %r13,240(%r4) 1321 la %r5,0(%r4) 1322 sllg %r6,%r13,4 1323 la %r6,0(%r6,%r4) 1324 srl %r13,1 1325 lghi %r1,-16 1326 1327 .align 16 1328 .Linv: lmg %r8,%r9,0(%r5) 1329 lmg %r10,%r11,0(%r6) 1330 stmg %r8,%r9,0(%r6) 1331 stmg %r10,%r11,0(%r5) 1332 la %r5,16(%r5) 1333 la %r6,0(%r1,%r6) 1334 brct %r13,.Linv 1335 llgf %r13,240(%r4) 1336 aghi %r13,-1 1337 sll %r13,2 # (rounds-1)*4 1338 llilh %r5,0x8080 1339 llilh %r6,0x1b1b 1340 llilh %r7,0xfefe 1341 oill %r5,0x8080 1342 oill %r6,0x1b1b 1343 oill %r7,0xfefe 1344 1345 .align 16 1346 .Lmix: l %r8,16(%r4) # tp1 1347 lr %r9,%r8 1348 ngr %r9,%r5 1349 srlg %r1,%r9,7 1350 slr %r9,%r1 1351 nr %r9,%r6 1352 sllg %r1,%r8,1 1353 nr %r1,%r7 1354 xr %r9,%r1 # tp2 1355 1356 lr %r10,%r9 1357 ngr %r10,%r5 1358 srlg %r1,%r10,7 1359 slr %r10,%r1 1360 nr %r10,%r6 1361 sllg %r1,%r9,1 1362 nr %r1,%r7 1363 xr %r10,%r1 # tp4 1364 1365 lr %r11,%r10 1366 ngr %r11,%r5 1367 srlg %r1,%r11,7 1368 slr %r11,%r1 1369 nr %r11,%r6 1370 sllg %r1,%r10,1 1371 nr %r1,%r7 1372 xr %r11,%r1 # tp8 1373 1374 xr %r9,%r8 # tp2^tp1 1375 xr %r10,%r8 # tp4^tp1 1376 rll %r8,%r8,24 # = ROTATE(tp1,8) 1377 xr %r10,%r11 # ^=tp8 1378 xr %r8,%r9 # ^=tp2^tp1 1379 xr %r9,%r11 # tp2^tp1^tp8 1380 xr %r8,%r10 # ^=tp4^tp1^tp8 1381 rll %r9,%r9,8 1382 rll %r10,%r10,16 1383 xr %r8,%r9 # ^= ROTATE(tp8^tp2^tp1,24) 1384 rll %r11,%r11,24 1385 xr %r8,%r10 # ^= ROTATE(tp8^tp4^tp1,16) 1386 xr %r8,%r11 # ^= ROTATE(tp8,8) 1387 1388 st %r8,16(%r4) 1389 la %r4,4(%r4) 1390 brct %r13,.Lmix 1391 1392 lm %r6,%r13,6*4(%r15)# as was saved by AES_set_encrypt_key! 1393 lghi %r2,0 1394 br %r14 1395 .size AES_set_decrypt_key,.-AES_set_decrypt_key 1396 .globl AES_cbc_encrypt 1397 .type AES_cbc_encrypt,@function 1398 .align 16 1399 AES_cbc_encrypt: 1400 xgr %r3,%r4 # flip %r3 and %r4, out and len 1401 xgr %r4,%r3 1402 xgr %r3,%r4 1403 lhi %r0,16 1404 cl %r0,240(%r5) 1405 jh .Lcbc_software 1406 1407 lg %r0,0(%r6) # copy ivec 1408 lg %r1,8(%r6) 1409 stmg %r0,%r1,16(%r15) 1410 lmg %r0,%r1,0(%r5) # copy key, cover 256 bit 1411 stmg %r0,%r1,32(%r15) 1412 lmg %r0,%r1,16(%r5) 1413 stmg %r0,%r1,48(%r15) 1414 l %r0,240(%r5) # load kmc code 1415 lghi %r5,15 # res=len%16, len-=res; 1416 ngr %r5,%r3 1417 slr %r3,%r5 1418 la %r1,16(%r15) # parameter block - ivec || key 1419 jz .Lkmc_truncated 1420 .long 0xb92f0042 # kmc %r4,%r2 1421 brc 1,.-4 # pay attention to "partial completion" 1422 ltr %r5,%r5 1423 jnz .Lkmc_truncated 1424 .Lkmc_done: 1425 lmg %r0,%r1,16(%r15) # copy ivec to caller 1426 stg %r0,0(%r6) 1427 stg %r1,8(%r6) 1428 br %r14 1429 .align 16 1430 .Lkmc_truncated: 1431 ahi %r5,-1 # it's the way it's encoded in mvc 1432 tmll %r0,S390X_DECRYPT 1433 jnz .Lkmc_truncated_dec 1434 lghi %r1,0 1435 stg %r1,16*4(%r15) 1436 stg %r1,16*4+8(%r15) 1437 bras %r1,1f 1438 mvc 16*4(1,%r15),0(%r2) 1439 1: ex %r5,0(%r1) 1440 la %r1,16(%r15) # restore parameter block 1441 la %r2,16*4(%r15) 1442 lghi %r3,16 1443 .long 0xb92f0042 # kmc %r4,%r2 1444 j .Lkmc_done 1445 .align 16 1446 .Lkmc_truncated_dec: 1447 st %r4,4*4(%r15) 1448 la %r4,16*4(%r15) 1449 lghi %r3,16 1450 .long 0xb92f0042 # kmc %r4,%r2 1451 l %r4,4*4(%r15) 1452 bras %r1,2f 1453 mvc 0(1,%r4),16*4(%r15) 1454 2: ex %r5,0(%r1) 1455 j .Lkmc_done 1456 .align 16 1457 .Lcbc_software: 1458 stm %r5,%r14,5*4(%r15) 1459 lhi %r0,0 1460 cl %r0,96(%r15) 1461 je .Lcbc_decrypt 1462 1463 larl %r12,AES_Te 1464 1465 llgf %r8,0(%r6) 1466 llgf %r9,4(%r6) 1467 llgf %r10,8(%r6) 1468 llgf %r11,12(%r6) 1469 1470 lghi %r0,16 1471 slr %r3,%r0 1472 brc 4,.Lcbc_enc_tail # if borrow 1473 .Lcbc_enc_loop: 1474 stm %r2,%r4,2*4(%r15) 1475 x %r8,0(%r2) 1476 x %r9,4(%r2) 1477 x %r10,8(%r2) 1478 x %r11,12(%r2) 1479 lgr %r4,%r5 1480 1481 bras %r14,_s390x_AES_encrypt 1482 1483 lm %r2,%r5,2*4(%r15) 1484 st %r8,0(%r4) 1485 st %r9,4(%r4) 1486 st %r10,8(%r4) 1487 st %r11,12(%r4) 1488 1489 la %r2,16(%r2) 1490 la %r4,16(%r4) 1491 lghi %r0,16 1492 ltr %r3,%r3 1493 jz .Lcbc_enc_done 1494 slr %r3,%r0 1495 brc 4,.Lcbc_enc_tail # if borrow 1496 j .Lcbc_enc_loop 1497 .align 16 1498 .Lcbc_enc_done: 1499 l %r6,6*4(%r15) 1500 st %r8,0(%r6) 1501 st %r9,4(%r6) 1502 st %r10,8(%r6) 1503 st %r11,12(%r6) 1504 1505 lm %r7,%r14,7*4(%r15) 1506 br %r14 1507 1508 .align 16 1509 .Lcbc_enc_tail: 1510 aghi %r3,15 1511 lghi %r0,0 1512 stg %r0,16*4(%r15) 1513 stg %r0,16*4+8(%r15) 1514 bras %r1,3f 1515 mvc 16*4(1,%r15),0(%r2) 1516 3: ex %r3,0(%r1) 1517 lghi %r3,0 1518 la %r2,16*4(%r15) 1519 j .Lcbc_enc_loop 1520 1521 .align 16 1522 .Lcbc_decrypt: 1523 larl %r12,AES_Td 1524 1525 lg %r0,0(%r6) 1526 lg %r1,8(%r6) 1527 stmg %r0,%r1,16*4(%r15) 1528 1529 .Lcbc_dec_loop: 1530 stm %r2,%r4,2*4(%r15) 1531 llgf %r8,0(%r2) 1532 llgf %r9,4(%r2) 1533 llgf %r10,8(%r2) 1534 llgf %r11,12(%r2) 1535 lgr %r4,%r5 1536 1537 bras %r14,_s390x_AES_decrypt 1538 1539 lm %r2,%r5,2*4(%r15) 1540 sllg %r8,%r8,32 1541 sllg %r10,%r10,32 1542 lr %r8,%r9 1543 lr %r10,%r11 1544 1545 lg %r0,0(%r2) 1546 lg %r1,8(%r2) 1547 xg %r8,16*4(%r15) 1548 xg %r10,16*4+8(%r15) 1549 lghi %r9,16 1550 slr %r3,%r9 1551 brc 4,.Lcbc_dec_tail # if borrow 1552 brc 2,.Lcbc_dec_done # if zero 1553 stg %r8,0(%r4) 1554 stg %r10,8(%r4) 1555 stmg %r0,%r1,16*4(%r15) 1556 1557 la %r2,16(%r2) 1558 la %r4,16(%r4) 1559 j .Lcbc_dec_loop 1560 1561 .Lcbc_dec_done: 1562 stg %r8,0(%r4) 1563 stg %r10,8(%r4) 1564 .Lcbc_dec_exit: 1565 lm %r6,%r14,6*4(%r15) 1566 stmg %r0,%r1,0(%r6) 1567 1568 br %r14 1569 1570 .align 16 1571 .Lcbc_dec_tail: 1572 aghi %r3,15 1573 stg %r8,16*4(%r15) 1574 stg %r10,16*4+8(%r15) 1575 bras %r9,4f 1576 mvc 0(1,%r4),16*4(%r15) 1577 4: ex %r3,0(%r9) 1578 j .Lcbc_dec_exit 1579 .size AES_cbc_encrypt,.-AES_cbc_encrypt 1580 .globl AES_ctr32_encrypt 1581 .type AES_ctr32_encrypt,@function 1582 .align 16 1583 AES_ctr32_encrypt: 1584 xgr %r3,%r4 # flip %r3 and %r4, %r4 and %r3 1585 xgr %r4,%r3 1586 xgr %r3,%r4 1587 llgfr %r3,%r3 # safe in ctr32 subroutine even in 64-bit case 1588 l %r0,240(%r5) 1589 lhi %r1,16 1590 clr %r0,%r1 1591 jl .Lctr32_software 1592 1593 st %r10,10*4(%r15) 1594 st %r11,11*4(%r15) 1595 1596 clr %r3,%r1 # does work even in 64-bit mode 1597 jle .Lctr32_nokma # kma is slower for <= 16 blocks 1598 1599 larl %r1,OPENSSL_s390xcap_P 1600 lr %r10,%r0 1601 llihh %r11,0x8000 1602 srlg %r11,%r11,0(%r10) 1603 ng %r11,S390X_KMA(%r1) # check kma capability vector 1604 jz .Lctr32_nokma 1605 1606 lhi %r1,-96-112 1607 lr %r11,%r15 1608 la %r15,0(%r1,%r15) # prepare parameter block 1609 1610 lhi %r1,0x0600 1611 sllg %r3,%r3,4 1612 or %r0,%r1 # set HS and LAAD flags 1613 1614 st %r11,0(%r15) # backchain 1615 la %r1,96(%r15) 1616 1617 lmg %r10,%r11,0(%r5) # copy key 1618 stg %r10,96+80(%r15) 1619 stg %r11,96+88(%r15) 1620 lmg %r10,%r11,16(%r5) 1621 stg %r10,96+96(%r15) 1622 stg %r11,96+104(%r15) 1623 1624 lmg %r10,%r11,0(%r6) # copy iv 1625 stg %r10,96+64(%r15) 1626 ahi %r11,-1 # kma requires counter-1 1627 stg %r11,96+72(%r15) 1628 st %r11,96+12(%r15) # copy counter 1629 1630 lghi %r10,0 # no AAD 1631 lghi %r11,0 1632 1633 .long 0xb929a042 # kma %r4,%r10,%r2 1634 brc 1,.-4 # pay attention to "partial completion" 1635 1636 stg %r0,96+80(%r15) # wipe key 1637 stg %r0,96+88(%r15) 1638 stg %r0,96+96(%r15) 1639 stg %r0,96+104(%r15) 1640 la %r15,96+112(%r15) 1641 1642 lm %r10,%r11,10*4(%r15) 1643 br %r14 1644 1645 .align 16 1646 .Lctr32_nokma: 1647 stm %r6,%r9,6*4(%r15) 1648 1649 slgr %r4,%r2 1650 la %r1,0(%r5) # %r1 is permanent copy of %r5 1651 lg %r5,0(%r6) # load ivec 1652 lg %r6,8(%r6) 1653 1654 # prepare and allocate stack frame at the top of 4K page 1655 # with 1K reserved for eventual signal handling 1656 lghi %r8,-1024-256-16# guarantee at least 256-bytes buffer 1657 lghi %r9,-4096 1658 algr %r8,%r15 1659 lgr %r7,%r15 1660 ngr %r8,%r9 # align at page boundary 1661 slgr %r7,%r8 # total buffer size 1662 lgr %r10,%r15 1663 lghi %r9,1024+16 # sl[g]fi is extended-immediate facility 1664 slgr %r7,%r9 # deduct reservation to get usable buffer size 1665 # buffer size is at lest 256 and at most 3072+256-16 1666 1667 la %r15,1024(%r8) # alloca 1668 srlg %r7,%r7,4 # convert bytes to blocks, minimum 16 1669 st %r10,0(%r15) # back-chain 1670 st %r7,4(%r15) 1671 1672 slgr %r3,%r7 1673 brc 1,.Lctr32_hw_switch # not zero, no borrow 1674 algr %r7,%r3 # input is shorter than allocated buffer 1675 lghi %r3,0 1676 st %r7,4(%r15) 1677 1678 .Lctr32_hw_switch: 1679 .Lctr32_km_loop: 1680 la %r10,16(%r15) 1681 lgr %r11,%r7 1682 .Lctr32_km_prepare: 1683 stg %r5,0(%r10) 1684 stg %r6,8(%r10) 1685 la %r10,16(%r10) 1686 ahi %r6,1 # 32-bit increment, preserves upper half 1687 brct %r11,.Lctr32_km_prepare 1688 1689 la %r8,16(%r15) # inp 1690 sllg %r9,%r7,4 # len 1691 la %r10,16(%r15) # out 1692 .long 0xb92e00a8 # km %r10,%r8 1693 brc 1,.-4 # pay attention to "partial completion" 1694 1695 la %r10,16(%r15) 1696 lgr %r11,%r7 1697 slgr %r10,%r2 1698 .Lctr32_km_xor: 1699 lg %r8,0(%r2) 1700 lg %r9,8(%r2) 1701 xg %r8,0(%r10,%r2) 1702 xg %r9,8(%r10,%r2) 1703 stg %r8,0(%r4,%r2) 1704 stg %r9,8(%r4,%r2) 1705 la %r2,16(%r2) 1706 brct %r11,.Lctr32_km_xor 1707 1708 slgr %r3,%r7 1709 brc 1,.Lctr32_km_loop # not zero, no borrow 1710 algr %r7,%r3 1711 lghi %r3,0 1712 brc 4+1,.Lctr32_km_loop # not zero 1713 1714 l %r8,0(%r15) 1715 l %r9,4(%r15) 1716 la %r10,16(%r15) 1717 .Lctr32_km_zap: 1718 stg %r8,0(%r10) 1719 stg %r8,8(%r10) 1720 la %r10,16(%r10) 1721 brct %r9,.Lctr32_km_zap 1722 1723 la %r15,0(%r8) 1724 lm %r6,%r11,6*4(%r15) 1725 br %r14 1726 .align 16 1727 .Lctr32_software: 1728 stm %r5,%r14,5*4(%r15) 1729 slr %r2,%r4 1730 larl %r12,AES_Te 1731 llgf %r1,12(%r6) 1732 1733 .Lctr32_loop: 1734 stm %r2,%r4,2*4(%r15) 1735 llgf %r8,0(%r6) 1736 llgf %r9,4(%r6) 1737 llgf %r10,8(%r6) 1738 lgr %r11,%r1 1739 st %r1,16*4(%r15) 1740 lgr %r4,%r5 1741 1742 bras %r14,_s390x_AES_encrypt 1743 1744 lm %r2,%r6,2*4(%r15) 1745 llgf %r1,16*4(%r15) 1746 x %r8,0(%r2,%r4) 1747 x %r9,4(%r2,%r4) 1748 x %r10,8(%r2,%r4) 1749 x %r11,12(%r2,%r4) 1750 stm %r8,%r11,0(%r4) 1751 1752 la %r4,16(%r4) 1753 ahi %r1,1 # 32-bit increment 1754 brct %r3,.Lctr32_loop 1755 1756 lm %r6,%r14,6*4(%r15) 1757 br %r14 1758 .size AES_ctr32_encrypt,.-AES_ctr32_encrypt 1759 .type _s390x_xts_km,@function 1760 .align 16 1761 _s390x_xts_km: 1762 llgfr %r8,%r0 # put aside the function code 1763 lghi %r9,0x7f 1764 nr %r9,%r0 1765 larl %r1,OPENSSL_s390xcap_P 1766 llihh %r0,0x8000 1767 srlg %r0,%r0,32(%r9) # check for 32+function code 1768 ng %r0,S390X_KM(%r1) # check km capability vector 1769 lgr %r0,%r8 # restore the function code 1770 la %r1,0(%r5) # restore %r5 1771 jz .Lxts_km_vanilla 1772 1773 lmg %r6,%r7,80(%r15) # put aside the tweak value 1774 algr %r4,%r2 1775 1776 oill %r0,32 # switch to xts function code 1777 aghi %r9,-18 # 1778 sllg %r9,%r9,3 # (function code - 18)*8, 0 or 16 1779 la %r1,80-16(%r15) 1780 slgr %r1,%r9 # parameter block position 1781 lmg %r8,%r11,0(%r5) # load 256 bits of key material, 1782 stmg %r8,%r11,0(%r1) # and copy it to parameter block. 1783 # yes, it contains junk and overlaps 1784 # with the tweak in 128-bit case. 1785 # it's done to avoid conditional 1786 # branch. 1787 stmg %r6,%r7,80(%r15) # "re-seat" the tweak value 1788 1789 .long 0xb92e0042 # km %r4,%r2 1790 brc 1,.-4 # pay attention to "partial completion" 1791 1792 lrvg %r8,80+0(%r15) # load the last tweak 1793 lrvg %r9,80+8(%r15) 1794 stmg %r0,%r3,80-32(%r15) # wipe copy of the key 1795 1796 nill %r0,0xffdf # switch back to original function code 1797 la %r1,0(%r5) # restore pointer to %r5 1798 slgr %r4,%r2 1799 1800 llgc %r3,2*4-1(%r15) 1801 nill %r3,0x0f # %r3%=16 1802 br %r14 1803 1804 .align 16 1805 .Lxts_km_vanilla: 1806 # prepare and allocate stack frame at the top of 4K page 1807 # with 1K reserved for eventual signal handling 1808 lghi %r8,-1024-256-16# guarantee at least 256-bytes buffer 1809 lghi %r9,-4096 1810 algr %r8,%r15 1811 lgr %r7,%r15 1812 ngr %r8,%r9 # align at page boundary 1813 slgr %r7,%r8 # total buffer size 1814 lgr %r10,%r15 1815 lghi %r9,1024+16 # sl[g]fi is extended-immediate facility 1816 slgr %r7,%r9 # deduct reservation to get usable buffer size 1817 # buffer size is at lest 256 and at most 3072+256-16 1818 1819 la %r15,1024(%r8) # alloca 1820 nill %r7,0xfff0 # round to 16*n 1821 st %r10,0(%r15) # back-chain 1822 nill %r3,0xfff0 # redundant 1823 st %r7,4(%r15) 1824 1825 slgr %r3,%r7 1826 brc 1,.Lxts_km_go # not zero, no borrow 1827 algr %r7,%r3 # input is shorter than allocated buffer 1828 lghi %r3,0 1829 st %r7,4(%r15) 1830 1831 .Lxts_km_go: 1832 lrvg %r8,80+0(%r10) # load the tweak value in little-endian 1833 lrvg %r9,80+8(%r10) 1834 1835 la %r10,16(%r15) # vector of ascending tweak values 1836 slgr %r10,%r2 1837 srlg %r11,%r7,4 1838 j .Lxts_km_start 1839 1840 .Lxts_km_loop: 1841 la %r10,16(%r15) 1842 slgr %r10,%r2 1843 srlg %r11,%r7,4 1844 .Lxts_km_prepare: 1845 lghi %r5,0x87 1846 srag %r6,%r9,63 # broadcast upper bit 1847 ngr %r5,%r6 # rem 1848 algr %r8,%r8 1849 alcgr %r9,%r9 1850 xgr %r8,%r5 1851 .Lxts_km_start: 1852 lrvgr %r5,%r8 # flip byte order 1853 lrvgr %r6,%r9 1854 stg %r5,0(%r10,%r2) 1855 stg %r6,8(%r10,%r2) 1856 xg %r5,0(%r2) 1857 xg %r6,8(%r2) 1858 stg %r5,0(%r4,%r2) 1859 stg %r6,8(%r4,%r2) 1860 la %r2,16(%r2) 1861 brct %r11,.Lxts_km_prepare 1862 1863 slgr %r2,%r7 # rewind %r2 1864 la %r10,0(%r4,%r2) 1865 lgr %r11,%r7 1866 .long 0xb92e00aa # km %r10,%r10 1867 brc 1,.-4 # pay attention to "partial completion" 1868 1869 la %r10,16(%r15) 1870 slgr %r10,%r2 1871 srlg %r11,%r7,4 1872 .Lxts_km_xor: 1873 lg %r5,0(%r4,%r2) 1874 lg %r6,8(%r4,%r2) 1875 xg %r5,0(%r10,%r2) 1876 xg %r6,8(%r10,%r2) 1877 stg %r5,0(%r4,%r2) 1878 stg %r6,8(%r4,%r2) 1879 la %r2,16(%r2) 1880 brct %r11,.Lxts_km_xor 1881 1882 slgr %r3,%r7 1883 brc 1,.Lxts_km_loop # not zero, no borrow 1884 algr %r7,%r3 1885 lghi %r3,0 1886 brc 4+1,.Lxts_km_loop # not zero 1887 1888 l %r5,0(%r15) # back-chain 1889 llgf %r7,4(%r15) # bytes used 1890 la %r6,16(%r15) 1891 srlg %r7,%r7,4 1892 .Lxts_km_zap: 1893 stg %r5,0(%r6) 1894 stg %r5,8(%r6) 1895 la %r6,16(%r6) 1896 brct %r7,.Lxts_km_zap 1897 1898 la %r15,0(%r5) 1899 llgc %r3,2*4-1(%r5) 1900 nill %r3,0x0f # %r3%=16 1901 bzr %r14 1902 1903 # generate one more tweak... 1904 lghi %r5,0x87 1905 srag %r6,%r9,63 # broadcast upper bit 1906 ngr %r5,%r6 # rem 1907 algr %r8,%r8 1908 alcgr %r9,%r9 1909 xgr %r8,%r5 1910 1911 ltr %r3,%r3 # clear zero flag 1912 br %r14 1913 .size _s390x_xts_km,.-_s390x_xts_km 1914 1915 .globl AES_xts_encrypt 1916 .type AES_xts_encrypt,@function 1917 .align 16 1918 AES_xts_encrypt: 1919 xgr %r3,%r4 # flip %r3 and %r4, %r4 and %r3 1920 xgr %r4,%r3 1921 xgr %r3,%r4 1922 llgfr %r3,%r3 1923 st %r3,1*4(%r15) # save copy of %r3 1924 srag %r3,%r3,4 # formally wrong, because it expands 1925 # sign byte, but who can afford asking 1926 # to process more than 2^63-1 bytes? 1927 # I use it, because it sets condition 1928 # code... 1929 bcr 8,%r14 # abort if zero (i.e. less than 16) 1930 llgf %r0,240(%r6) 1931 lhi %r1,16 1932 clr %r0,%r1 1933 jl .Lxts_enc_software 1934 1935 st %r14,5*4(%r15) 1936 stm %r6,%r11,6*4(%r15) 1937 1938 sllg %r3,%r3,4 # %r3&=~15 1939 slgr %r4,%r2 1940 1941 # generate the tweak value 1942 l %r11,96(%r15) # pointer to iv 1943 la %r10,80(%r15) 1944 lmg %r8,%r9,0(%r11) 1945 lghi %r11,16 1946 stmg %r8,%r9,0(%r10) 1947 la %r1,0(%r6) # %r6 is not needed anymore 1948 .long 0xb92e00aa # km %r10,%r10, generate the tweak 1949 brc 1,.-4 # can this happen? 1950 1951 l %r0,240(%r5) 1952 la %r1,0(%r5) # %r5 is not needed anymore 1953 bras %r14,_s390x_xts_km 1954 jz .Lxts_enc_km_done 1955 1956 aghi %r2,-16 # take one step back 1957 la %r7,0(%r4,%r2) # put aside real %r4 1958 .Lxts_enc_km_steal: 1959 llgc %r5,16(%r2) 1960 llgc %r6,0(%r4,%r2) 1961 stc %r5,0(%r4,%r2) 1962 stc %r6,16(%r4,%r2) 1963 la %r2,1(%r2) 1964 brct %r3,.Lxts_enc_km_steal 1965 1966 la %r10,0(%r7) 1967 lghi %r11,16 1968 lrvgr %r5,%r8 # flip byte order 1969 lrvgr %r6,%r9 1970 xg %r5,0(%r10) 1971 xg %r6,8(%r10) 1972 stg %r5,0(%r10) 1973 stg %r6,8(%r10) 1974 .long 0xb92e00aa # km %r10,%r10 1975 brc 1,.-4 # can this happen? 1976 lrvgr %r5,%r8 # flip byte order 1977 lrvgr %r6,%r9 1978 xg %r5,0(%r7) 1979 xg %r6,8(%r7) 1980 stg %r5,0(%r7) 1981 stg %r6,8(%r7) 1982 1983 .Lxts_enc_km_done: 1984 stg %r15,80+0(%r15) # wipe tweak 1985 stg %r15,80+8(%r15) 1986 l %r14,5*4(%r15) 1987 lm %r6,%r11,6*4(%r15) 1988 br %r14 1989 .align 16 1990 .Lxts_enc_software: 1991 stm %r6,%r14,6*4(%r15) 1992 1993 slgr %r4,%r2 1994 1995 l %r11,96(%r15) # ivp 1996 llgf %r8,0(%r11) # load iv 1997 llgf %r9,4(%r11) 1998 llgf %r10,8(%r11) 1999 llgf %r11,12(%r11) 2000 stm %r2,%r5,2*4(%r15) 2001 la %r4,0(%r6) 2002 larl %r12,AES_Te 2003 bras %r14,_s390x_AES_encrypt # generate the tweak 2004 lm %r2,%r5,2*4(%r15) 2005 stm %r8,%r11,80(%r15) # save the tweak 2006 j .Lxts_enc_enter 2007 2008 .align 16 2009 .Lxts_enc_loop: 2010 lrvg %r9,80+0(%r15) # load the tweak in little-endian 2011 lrvg %r11,80+8(%r15) 2012 lghi %r1,0x87 2013 srag %r0,%r11,63 # broadcast upper bit 2014 ngr %r1,%r0 # rem 2015 algr %r9,%r9 2016 alcgr %r11,%r11 2017 xgr %r9,%r1 2018 lrvgr %r9,%r9 # flip byte order 2019 lrvgr %r11,%r11 2020 srlg %r8,%r9,32 # smash the tweak to 4x32-bits 2021 stg %r9,80+0(%r15) # save the tweak 2022 llgfr %r9,%r9 2023 srlg %r10,%r11,32 2024 stg %r11,80+8(%r15) 2025 llgfr %r11,%r11 2026 la %r2,16(%r2) # %r2+=16 2027 .Lxts_enc_enter: 2028 x %r8,0(%r2) # ^=*(%r2) 2029 x %r9,4(%r2) 2030 x %r10,8(%r2) 2031 x %r11,12(%r2) 2032 stm %r2,%r3,2*4(%r15) # only two registers are changing 2033 la %r4,0(%r5) 2034 bras %r14,_s390x_AES_encrypt 2035 lm %r2,%r5,2*4(%r15) 2036 x %r8,80+0(%r15) # ^=tweak 2037 x %r9,80+4(%r15) 2038 x %r10,80+8(%r15) 2039 x %r11,80+12(%r15) 2040 st %r8,0(%r4,%r2) 2041 st %r9,4(%r4,%r2) 2042 st %r10,8(%r4,%r2) 2043 st %r11,12(%r4,%r2) 2044 brct %r3,.Lxts_enc_loop 2045 2046 llgc %r3,7(%r15) 2047 nill %r3,0x0f # %r3%16 2048 jz .Lxts_enc_done 2049 2050 la %r7,0(%r2,%r4) # put aside real %r4 2051 .Lxts_enc_steal: 2052 llgc %r0,16(%r2) 2053 llgc %r1,0(%r4,%r2) 2054 stc %r0,0(%r4,%r2) 2055 stc %r1,16(%r4,%r2) 2056 la %r2,1(%r2) 2057 brct %r3,.Lxts_enc_steal 2058 la %r4,0(%r7) # restore real %r4 2059 2060 # generate last tweak... 2061 lrvg %r9,80+0(%r15) # load the tweak in little-endian 2062 lrvg %r11,80+8(%r15) 2063 lghi %r1,0x87 2064 srag %r0,%r11,63 # broadcast upper bit 2065 ngr %r1,%r0 # rem 2066 algr %r9,%r9 2067 alcgr %r11,%r11 2068 xgr %r9,%r1 2069 lrvgr %r9,%r9 # flip byte order 2070 lrvgr %r11,%r11 2071 srlg %r8,%r9,32 # smash the tweak to 4x32-bits 2072 stg %r9,80+0(%r15) # save the tweak 2073 llgfr %r9,%r9 2074 srlg %r10,%r11,32 2075 stg %r11,80+8(%r15) 2076 llgfr %r11,%r11 2077 2078 x %r8,0(%r4) # ^=*(inp)|stolen cipther-text 2079 x %r9,4(%r4) 2080 x %r10,8(%r4) 2081 x %r11,12(%r4) 2082 st %r4,4*4(%r15) 2083 la %r4,0(%r5) 2084 bras %r14,_s390x_AES_encrypt 2085 l %r4,4*4(%r15) 2086 x %r8,80(%r15) # ^=tweak 2087 x %r9,84(%r15) 2088 x %r10,88(%r15) 2089 x %r11,92(%r15) 2090 st %r8,0(%r4) 2091 st %r9,4(%r4) 2092 st %r10,8(%r4) 2093 st %r11,12(%r4) 2094 2095 .Lxts_enc_done: 2096 stg %r15,80+0(%r15) # wipe tweak 2097 stg %r15,80+8(%r15) 2098 lm %r6,%r14,6*4(%r15) 2099 br %r14 2100 .size AES_xts_encrypt,.-AES_xts_encrypt 2101 .globl AES_xts_decrypt 2102 .type AES_xts_decrypt,@function 2103 .align 16 2104 AES_xts_decrypt: 2105 xgr %r3,%r4 # flip %r3 and %r4, %r4 and %r3 2106 xgr %r4,%r3 2107 xgr %r3,%r4 2108 llgfr %r3,%r3 2109 st %r3,1*4(%r15) # save copy of %r3 2110 aghi %r3,-16 2111 bcr 4,%r14 # abort if less than zero. formally 2112 # wrong, because %r3 is unsigned, 2113 # but who can afford asking to 2114 # process more than 2^63-1 bytes? 2115 tmll %r3,0x0f 2116 jnz .Lxts_dec_proceed 2117 aghi %r3,16 2118 .Lxts_dec_proceed: 2119 llgf %r0,240(%r6) 2120 lhi %r1,16 2121 clr %r0,%r1 2122 jl .Lxts_dec_software 2123 2124 st %r14,5*4(%r15) 2125 stm %r6,%r11,6*4(%r15) 2126 2127 nill %r3,0xfff0 # %r3&=~15 2128 slgr %r4,%r2 2129 2130 # generate the tweak value 2131 l %r11,96(%r15) # pointer to iv 2132 la %r10,80(%r15) 2133 lmg %r8,%r9,0(%r11) 2134 lghi %r11,16 2135 stmg %r8,%r9,0(%r10) 2136 la %r1,0(%r6) # %r6 is not needed past this point 2137 .long 0xb92e00aa # km %r10,%r10, generate the tweak 2138 brc 1,.-4 # can this happen? 2139 2140 l %r0,240(%r5) 2141 la %r1,0(%r5) # %r5 is not needed anymore 2142 2143 ltgr %r3,%r3 2144 jz .Lxts_dec_km_short 2145 bras %r14,_s390x_xts_km 2146 jz .Lxts_dec_km_done 2147 2148 lrvgr %r10,%r8 # make copy in reverse byte order 2149 lrvgr %r11,%r9 2150 j .Lxts_dec_km_2ndtweak 2151 2152 .Lxts_dec_km_short: 2153 llgc %r3,7(%r15) 2154 nill %r3,0x0f # %r3%=16 2155 lrvg %r8,80+0(%r15) # load the tweak 2156 lrvg %r9,80+8(%r15) 2157 lrvgr %r10,%r8 # make copy in reverse byte order 2158 lrvgr %r11,%r9 2159 2160 .Lxts_dec_km_2ndtweak: 2161 lghi %r5,0x87 2162 srag %r6,%r9,63 # broadcast upper bit 2163 ngr %r5,%r6 # rem 2164 algr %r8,%r8 2165 alcgr %r9,%r9 2166 xgr %r8,%r5 2167 lrvgr %r5,%r8 # flip byte order 2168 lrvgr %r6,%r9 2169 2170 xg %r5,0(%r2) 2171 xg %r6,8(%r2) 2172 stg %r5,0(%r4,%r2) 2173 stg %r6,8(%r4,%r2) 2174 la %r6,0(%r4,%r2) 2175 lghi %r7,16 2176 .long 0xb92e0066 # km %r6,%r6 2177 brc 1,.-4 # can this happen? 2178 lrvgr %r5,%r8 2179 lrvgr %r6,%r9 2180 xg %r5,0(%r4,%r2) 2181 xg %r6,8(%r4,%r2) 2182 stg %r5,0(%r4,%r2) 2183 stg %r6,8(%r4,%r2) 2184 2185 la %r7,0(%r4,%r2) # put aside real %r4 2186 .Lxts_dec_km_steal: 2187 llgc %r5,16(%r2) 2188 llgc %r6,0(%r4,%r2) 2189 stc %r5,0(%r4,%r2) 2190 stc %r6,16(%r4,%r2) 2191 la %r2,1(%r2) 2192 brct %r3,.Lxts_dec_km_steal 2193 2194 lgr %r8,%r10 2195 lgr %r9,%r11 2196 xg %r8,0(%r7) 2197 xg %r9,8(%r7) 2198 stg %r8,0(%r7) 2199 stg %r9,8(%r7) 2200 la %r8,0(%r7) 2201 lghi %r9,16 2202 .long 0xb92e0088 # km %r8,%r8 2203 brc 1,.-4 # can this happen? 2204 xg %r10,0(%r7) 2205 xg %r11,8(%r7) 2206 stg %r10,0(%r7) 2207 stg %r11,8(%r7) 2208 .Lxts_dec_km_done: 2209 stg %r15,80+0(%r15) # wipe tweak 2210 stg %r15,80+8(%r15) 2211 l %r14,5*4(%r15) 2212 lm %r6,%r11,6*4(%r15) 2213 br %r14 2214 .align 16 2215 .Lxts_dec_software: 2216 stm %r6,%r14,6*4(%r15) 2217 2218 srlg %r3,%r3,4 2219 slgr %r4,%r2 2220 2221 l %r11,96(%r15) # ivp 2222 llgf %r8,0(%r11) # load iv 2223 llgf %r9,4(%r11) 2224 llgf %r10,8(%r11) 2225 llgf %r11,12(%r11) 2226 stm %r2,%r5,2*4(%r15) 2227 la %r4,0(%r6) 2228 larl %r12,AES_Te 2229 bras %r14,_s390x_AES_encrypt # generate the tweak 2230 lm %r2,%r5,2*4(%r15) 2231 larl %r12,AES_Td 2232 ltr %r3,%r3 2233 stm %r8,%r11,80(%r15) # save the tweak 2234 jz .Lxts_dec_short 2235 j .Lxts_dec_enter 2236 2237 .align 16 2238 .Lxts_dec_loop: 2239 lrvg %r9,80+0(%r15) # load the tweak in little-endian 2240 lrvg %r11,80+8(%r15) 2241 lghi %r1,0x87 2242 srag %r0,%r11,63 # broadcast upper bit 2243 ngr %r1,%r0 # rem 2244 algr %r9,%r9 2245 alcgr %r11,%r11 2246 xgr %r9,%r1 2247 lrvgr %r9,%r9 # flip byte order 2248 lrvgr %r11,%r11 2249 srlg %r8,%r9,32 # smash the tweak to 4x32-bits 2250 stg %r9,80+0(%r15) # save the tweak 2251 llgfr %r9,%r9 2252 srlg %r10,%r11,32 2253 stg %r11,80+8(%r15) 2254 llgfr %r11,%r11 2255 .Lxts_dec_enter: 2256 x %r8,0(%r2) # tweak^=*(inp) 2257 x %r9,4(%r2) 2258 x %r10,8(%r2) 2259 x %r11,12(%r2) 2260 stm %r2,%r3,2*4(%r15) # only two registers are changing 2261 la %r4,0(%r5) 2262 bras %r14,_s390x_AES_decrypt 2263 lm %r2,%r5,2*4(%r15) 2264 x %r8,80+0(%r15) # ^=tweak 2265 x %r9,80+4(%r15) 2266 x %r10,80+8(%r15) 2267 x %r11,80+12(%r15) 2268 st %r8,0(%r4,%r2) 2269 st %r9,4(%r4,%r2) 2270 st %r10,8(%r4,%r2) 2271 st %r11,12(%r4,%r2) 2272 la %r2,16(%r2) 2273 brct %r3,.Lxts_dec_loop 2274 2275 llgc %r3,7(%r15) 2276 nill %r3,0x0f # %r3%16 2277 jz .Lxts_dec_done 2278 2279 # generate pair of tweaks... 2280 lrvg %r9,80+0(%r15) # load the tweak in little-endian 2281 lrvg %r11,80+8(%r15) 2282 lghi %r1,0x87 2283 srag %r0,%r11,63 # broadcast upper bit 2284 ngr %r1,%r0 # rem 2285 algr %r9,%r9 2286 alcgr %r11,%r11 2287 xgr %r9,%r1 2288 lrvgr %r6,%r9 # flip byte order 2289 lrvgr %r7,%r11 2290 stmg %r6,%r7,80(%r15) # save the 1st tweak 2291 j .Lxts_dec_2ndtweak 2292 2293 .align 16 2294 .Lxts_dec_short: 2295 llgc %r3,7(%r15) 2296 nill %r3,0x0f # %r3%16 2297 lrvg %r9,80+0(%r15) # load the tweak in little-endian 2298 lrvg %r11,80+8(%r15) 2299 .Lxts_dec_2ndtweak: 2300 lghi %r1,0x87 2301 srag %r0,%r11,63 # broadcast upper bit 2302 ngr %r1,%r0 # rem 2303 algr %r9,%r9 2304 alcgr %r11,%r11 2305 xgr %r9,%r1 2306 lrvgr %r9,%r9 # flip byte order 2307 lrvgr %r11,%r11 2308 srlg %r8,%r9,32 # smash the tweak to 4x32-bits 2309 stg %r9,80-16+0(%r15) # save the 2nd tweak 2310 llgfr %r9,%r9 2311 srlg %r10,%r11,32 2312 stg %r11,80-16+8(%r15) 2313 llgfr %r11,%r11 2314 2315 x %r8,0(%r2) # tweak_the_2nd^=*(inp) 2316 x %r9,4(%r2) 2317 x %r10,8(%r2) 2318 x %r11,12(%r2) 2319 stm %r2,%r3,2*4(%r15) 2320 la %r4,0(%r5) 2321 bras %r14,_s390x_AES_decrypt 2322 lm %r2,%r5,2*4(%r15) 2323 x %r8,80-16+0(%r15) # ^=tweak_the_2nd 2324 x %r9,80-16+4(%r15) 2325 x %r10,80-16+8(%r15) 2326 x %r11,80-16+12(%r15) 2327 st %r8,0(%r4,%r2) 2328 st %r9,4(%r4,%r2) 2329 st %r10,8(%r4,%r2) 2330 st %r11,12(%r4,%r2) 2331 2332 la %r7,0(%r4,%r2) # put aside real %r4 2333 .Lxts_dec_steal: 2334 llgc %r0,16(%r2) 2335 llgc %r1,0(%r4,%r2) 2336 stc %r0,0(%r4,%r2) 2337 stc %r1,16(%r4,%r2) 2338 la %r2,1(%r2) 2339 brct %r3,.Lxts_dec_steal 2340 la %r4,0(%r7) # restore real %r4 2341 2342 lm %r8,%r11,80(%r15) # load the 1st tweak 2343 x %r8,0(%r4) # tweak^=*(inp)|stolen cipher-text 2344 x %r9,4(%r4) 2345 x %r10,8(%r4) 2346 x %r11,12(%r4) 2347 st %r4,4*4(%r15) 2348 la %r4,0(%r5) 2349 bras %r14,_s390x_AES_decrypt 2350 l %r4,4*4(%r15) 2351 x %r8,80+0(%r15) # ^=tweak 2352 x %r9,80+4(%r15) 2353 x %r10,80+8(%r15) 2354 x %r11,80+12(%r15) 2355 st %r8,0(%r4) 2356 st %r9,4(%r4) 2357 st %r10,8(%r4) 2358 st %r11,12(%r4) 2359 stg %r15,80-16+0(%r15) # wipe 2nd tweak 2360 stg %r15,80-16+8(%r15) 2361 .Lxts_dec_done: 2362 stg %r15,80+0(%r15) # wipe tweak 2363 stg %r15,80+8(%r15) 2364 lm %r6,%r14,6*4(%r15) 2365 br %r14 2366 .size AES_xts_decrypt,.-AES_xts_decrypt 2367 .string "AES for s390x, CRYPTOGAMS by <appro@openssl.org>" 2368