1 .text 2 .globl _aesni_encrypt 3 .align 4 4 _aesni_encrypt: 5 L_aesni_encrypt_begin: 6 %ifdef __CET__ 7 8 .byte 243,15,30,251 9 %endif 10 11 movl 4(%esp),%eax 12 movl 12(%esp),%edx 13 movups (%eax),%xmm2 14 movl 240(%edx),%ecx 15 movl 8(%esp),%eax 16 movups (%edx),%xmm0 17 movups 16(%edx),%xmm1 18 leal 32(%edx),%edx 19 xorps %xmm0,%xmm2 20 L000enc1_loop_1: 21 .byte 102,15,56,220,209 22 decl %ecx 23 movups (%edx),%xmm1 24 leal 16(%edx),%edx 25 jnz L000enc1_loop_1 26 .byte 102,15,56,221,209 27 pxor %xmm0,%xmm0 28 pxor %xmm1,%xmm1 29 movups %xmm2,(%eax) 30 pxor %xmm2,%xmm2 31 ret 32 .globl _aesni_decrypt 33 .align 4 34 _aesni_decrypt: 35 L_aesni_decrypt_begin: 36 %ifdef __CET__ 37 38 .byte 243,15,30,251 39 %endif 40 41 movl 4(%esp),%eax 42 movl 12(%esp),%edx 43 movups (%eax),%xmm2 44 movl 240(%edx),%ecx 45 movl 8(%esp),%eax 46 movups (%edx),%xmm0 47 movups 16(%edx),%xmm1 48 leal 32(%edx),%edx 49 xorps %xmm0,%xmm2 50 L001dec1_loop_2: 51 .byte 102,15,56,222,209 52 decl %ecx 53 movups (%edx),%xmm1 54 leal 16(%edx),%edx 55 jnz L001dec1_loop_2 56 .byte 102,15,56,223,209 57 pxor %xmm0,%xmm0 58 pxor %xmm1,%xmm1 59 movups %xmm2,(%eax) 60 pxor %xmm2,%xmm2 61 ret 62 .align 4 63 __aesni_encrypt2: 64 %ifdef __CET__ 65 66 .byte 243,15,30,251 67 %endif 68 69 movups (%edx),%xmm0 70 shll $4,%ecx 71 movups 16(%edx),%xmm1 72 xorps %xmm0,%xmm2 73 pxor %xmm0,%xmm3 74 movups 32(%edx),%xmm0 75 leal 32(%edx,%ecx,1),%edx 76 negl %ecx 77 addl $16,%ecx 78 L002enc2_loop: 79 .byte 102,15,56,220,209 80 .byte 102,15,56,220,217 81 movups (%edx,%ecx,1),%xmm1 82 addl $32,%ecx 83 .byte 102,15,56,220,208 84 .byte 102,15,56,220,216 85 movups -16(%edx,%ecx,1),%xmm0 86 jnz L002enc2_loop 87 .byte 102,15,56,220,209 88 .byte 102,15,56,220,217 89 .byte 102,15,56,221,208 90 .byte 102,15,56,221,216 91 ret 92 .align 4 93 __aesni_decrypt2: 94 %ifdef __CET__ 95 96 .byte 243,15,30,251 97 %endif 98 99 movups (%edx),%xmm0 100 shll $4,%ecx 101 movups 16(%edx),%xmm1 102 xorps %xmm0,%xmm2 103 pxor %xmm0,%xmm3 104 movups 32(%edx),%xmm0 105 leal 32(%edx,%ecx,1),%edx 106 negl %ecx 107 addl $16,%ecx 108 L003dec2_loop: 109 .byte 102,15,56,222,209 110 .byte 102,15,56,222,217 111 movups (%edx,%ecx,1),%xmm1 112 addl $32,%ecx 113 .byte 102,15,56,222,208 114 .byte 102,15,56,222,216 115 movups -16(%edx,%ecx,1),%xmm0 116 jnz L003dec2_loop 117 .byte 102,15,56,222,209 118 .byte 102,15,56,222,217 119 .byte 102,15,56,223,208 120 .byte 102,15,56,223,216 121 ret 122 .align 4 123 __aesni_encrypt3: 124 %ifdef __CET__ 125 126 .byte 243,15,30,251 127 %endif 128 129 movups (%edx),%xmm0 130 shll $4,%ecx 131 movups 16(%edx),%xmm1 132 xorps %xmm0,%xmm2 133 pxor %xmm0,%xmm3 134 pxor %xmm0,%xmm4 135 movups 32(%edx),%xmm0 136 leal 32(%edx,%ecx,1),%edx 137 negl %ecx 138 addl $16,%ecx 139 L004enc3_loop: 140 .byte 102,15,56,220,209 141 .byte 102,15,56,220,217 142 .byte 102,15,56,220,225 143 movups (%edx,%ecx,1),%xmm1 144 addl $32,%ecx 145 .byte 102,15,56,220,208 146 .byte 102,15,56,220,216 147 .byte 102,15,56,220,224 148 movups -16(%edx,%ecx,1),%xmm0 149 jnz L004enc3_loop 150 .byte 102,15,56,220,209 151 .byte 102,15,56,220,217 152 .byte 102,15,56,220,225 153 .byte 102,15,56,221,208 154 .byte 102,15,56,221,216 155 .byte 102,15,56,221,224 156 ret 157 .align 4 158 __aesni_decrypt3: 159 %ifdef __CET__ 160 161 .byte 243,15,30,251 162 %endif 163 164 movups (%edx),%xmm0 165 shll $4,%ecx 166 movups 16(%edx),%xmm1 167 xorps %xmm0,%xmm2 168 pxor %xmm0,%xmm3 169 pxor %xmm0,%xmm4 170 movups 32(%edx),%xmm0 171 leal 32(%edx,%ecx,1),%edx 172 negl %ecx 173 addl $16,%ecx 174 L005dec3_loop: 175 .byte 102,15,56,222,209 176 .byte 102,15,56,222,217 177 .byte 102,15,56,222,225 178 movups (%edx,%ecx,1),%xmm1 179 addl $32,%ecx 180 .byte 102,15,56,222,208 181 .byte 102,15,56,222,216 182 .byte 102,15,56,222,224 183 movups -16(%edx,%ecx,1),%xmm0 184 jnz L005dec3_loop 185 .byte 102,15,56,222,209 186 .byte 102,15,56,222,217 187 .byte 102,15,56,222,225 188 .byte 102,15,56,223,208 189 .byte 102,15,56,223,216 190 .byte 102,15,56,223,224 191 ret 192 .align 4 193 __aesni_encrypt4: 194 %ifdef __CET__ 195 196 .byte 243,15,30,251 197 %endif 198 199 movups (%edx),%xmm0 200 movups 16(%edx),%xmm1 201 shll $4,%ecx 202 xorps %xmm0,%xmm2 203 pxor %xmm0,%xmm3 204 pxor %xmm0,%xmm4 205 pxor %xmm0,%xmm5 206 movups 32(%edx),%xmm0 207 leal 32(%edx,%ecx,1),%edx 208 negl %ecx 209 .byte 15,31,64,0 210 addl $16,%ecx 211 L006enc4_loop: 212 .byte 102,15,56,220,209 213 .byte 102,15,56,220,217 214 .byte 102,15,56,220,225 215 .byte 102,15,56,220,233 216 movups (%edx,%ecx,1),%xmm1 217 addl $32,%ecx 218 .byte 102,15,56,220,208 219 .byte 102,15,56,220,216 220 .byte 102,15,56,220,224 221 .byte 102,15,56,220,232 222 movups -16(%edx,%ecx,1),%xmm0 223 jnz L006enc4_loop 224 .byte 102,15,56,220,209 225 .byte 102,15,56,220,217 226 .byte 102,15,56,220,225 227 .byte 102,15,56,220,233 228 .byte 102,15,56,221,208 229 .byte 102,15,56,221,216 230 .byte 102,15,56,221,224 231 .byte 102,15,56,221,232 232 ret 233 .align 4 234 __aesni_decrypt4: 235 %ifdef __CET__ 236 237 .byte 243,15,30,251 238 %endif 239 240 movups (%edx),%xmm0 241 movups 16(%edx),%xmm1 242 shll $4,%ecx 243 xorps %xmm0,%xmm2 244 pxor %xmm0,%xmm3 245 pxor %xmm0,%xmm4 246 pxor %xmm0,%xmm5 247 movups 32(%edx),%xmm0 248 leal 32(%edx,%ecx,1),%edx 249 negl %ecx 250 .byte 15,31,64,0 251 addl $16,%ecx 252 L007dec4_loop: 253 .byte 102,15,56,222,209 254 .byte 102,15,56,222,217 255 .byte 102,15,56,222,225 256 .byte 102,15,56,222,233 257 movups (%edx,%ecx,1),%xmm1 258 addl $32,%ecx 259 .byte 102,15,56,222,208 260 .byte 102,15,56,222,216 261 .byte 102,15,56,222,224 262 .byte 102,15,56,222,232 263 movups -16(%edx,%ecx,1),%xmm0 264 jnz L007dec4_loop 265 .byte 102,15,56,222,209 266 .byte 102,15,56,222,217 267 .byte 102,15,56,222,225 268 .byte 102,15,56,222,233 269 .byte 102,15,56,223,208 270 .byte 102,15,56,223,216 271 .byte 102,15,56,223,224 272 .byte 102,15,56,223,232 273 ret 274 .align 4 275 __aesni_encrypt6: 276 %ifdef __CET__ 277 278 .byte 243,15,30,251 279 %endif 280 281 movups (%edx),%xmm0 282 shll $4,%ecx 283 movups 16(%edx),%xmm1 284 xorps %xmm0,%xmm2 285 pxor %xmm0,%xmm3 286 pxor %xmm0,%xmm4 287 .byte 102,15,56,220,209 288 pxor %xmm0,%xmm5 289 pxor %xmm0,%xmm6 290 .byte 102,15,56,220,217 291 leal 32(%edx,%ecx,1),%edx 292 negl %ecx 293 .byte 102,15,56,220,225 294 pxor %xmm0,%xmm7 295 movups (%edx,%ecx,1),%xmm0 296 addl $16,%ecx 297 jmp L008_aesni_encrypt6_inner 298 .align 4,0x90 299 L009enc6_loop: 300 .byte 102,15,56,220,209 301 .byte 102,15,56,220,217 302 .byte 102,15,56,220,225 303 L008_aesni_encrypt6_inner: 304 .byte 102,15,56,220,233 305 .byte 102,15,56,220,241 306 .byte 102,15,56,220,249 307 L_aesni_encrypt6_enter: 308 movups (%edx,%ecx,1),%xmm1 309 addl $32,%ecx 310 .byte 102,15,56,220,208 311 .byte 102,15,56,220,216 312 .byte 102,15,56,220,224 313 .byte 102,15,56,220,232 314 .byte 102,15,56,220,240 315 .byte 102,15,56,220,248 316 movups -16(%edx,%ecx,1),%xmm0 317 jnz L009enc6_loop 318 .byte 102,15,56,220,209 319 .byte 102,15,56,220,217 320 .byte 102,15,56,220,225 321 .byte 102,15,56,220,233 322 .byte 102,15,56,220,241 323 .byte 102,15,56,220,249 324 .byte 102,15,56,221,208 325 .byte 102,15,56,221,216 326 .byte 102,15,56,221,224 327 .byte 102,15,56,221,232 328 .byte 102,15,56,221,240 329 .byte 102,15,56,221,248 330 ret 331 .align 4 332 __aesni_decrypt6: 333 %ifdef __CET__ 334 335 .byte 243,15,30,251 336 %endif 337 338 movups (%edx),%xmm0 339 shll $4,%ecx 340 movups 16(%edx),%xmm1 341 xorps %xmm0,%xmm2 342 pxor %xmm0,%xmm3 343 pxor %xmm0,%xmm4 344 .byte 102,15,56,222,209 345 pxor %xmm0,%xmm5 346 pxor %xmm0,%xmm6 347 .byte 102,15,56,222,217 348 leal 32(%edx,%ecx,1),%edx 349 negl %ecx 350 .byte 102,15,56,222,225 351 pxor %xmm0,%xmm7 352 movups (%edx,%ecx,1),%xmm0 353 addl $16,%ecx 354 jmp L010_aesni_decrypt6_inner 355 .align 4,0x90 356 L011dec6_loop: 357 .byte 102,15,56,222,209 358 .byte 102,15,56,222,217 359 .byte 102,15,56,222,225 360 L010_aesni_decrypt6_inner: 361 .byte 102,15,56,222,233 362 .byte 102,15,56,222,241 363 .byte 102,15,56,222,249 364 L_aesni_decrypt6_enter: 365 movups (%edx,%ecx,1),%xmm1 366 addl $32,%ecx 367 .byte 102,15,56,222,208 368 .byte 102,15,56,222,216 369 .byte 102,15,56,222,224 370 .byte 102,15,56,222,232 371 .byte 102,15,56,222,240 372 .byte 102,15,56,222,248 373 movups -16(%edx,%ecx,1),%xmm0 374 jnz L011dec6_loop 375 .byte 102,15,56,222,209 376 .byte 102,15,56,222,217 377 .byte 102,15,56,222,225 378 .byte 102,15,56,222,233 379 .byte 102,15,56,222,241 380 .byte 102,15,56,222,249 381 .byte 102,15,56,223,208 382 .byte 102,15,56,223,216 383 .byte 102,15,56,223,224 384 .byte 102,15,56,223,232 385 .byte 102,15,56,223,240 386 .byte 102,15,56,223,248 387 ret 388 .globl _aesni_ecb_encrypt 389 .align 4 390 _aesni_ecb_encrypt: 391 L_aesni_ecb_encrypt_begin: 392 %ifdef __CET__ 393 394 .byte 243,15,30,251 395 %endif 396 397 pushl %ebp 398 pushl %ebx 399 pushl %esi 400 pushl %edi 401 movl 20(%esp),%esi 402 movl 24(%esp),%edi 403 movl 28(%esp),%eax 404 movl 32(%esp),%edx 405 movl 36(%esp),%ebx 406 andl $-16,%eax 407 jz L012ecb_ret 408 movl 240(%edx),%ecx 409 testl %ebx,%ebx 410 jz L013ecb_decrypt 411 movl %edx,%ebp 412 movl %ecx,%ebx 413 cmpl $96,%eax 414 jb L014ecb_enc_tail 415 movdqu (%esi),%xmm2 416 movdqu 16(%esi),%xmm3 417 movdqu 32(%esi),%xmm4 418 movdqu 48(%esi),%xmm5 419 movdqu 64(%esi),%xmm6 420 movdqu 80(%esi),%xmm7 421 leal 96(%esi),%esi 422 subl $96,%eax 423 jmp L015ecb_enc_loop6_enter 424 .align 4,0x90 425 L016ecb_enc_loop6: 426 movups %xmm2,(%edi) 427 movdqu (%esi),%xmm2 428 movups %xmm3,16(%edi) 429 movdqu 16(%esi),%xmm3 430 movups %xmm4,32(%edi) 431 movdqu 32(%esi),%xmm4 432 movups %xmm5,48(%edi) 433 movdqu 48(%esi),%xmm5 434 movups %xmm6,64(%edi) 435 movdqu 64(%esi),%xmm6 436 movups %xmm7,80(%edi) 437 leal 96(%edi),%edi 438 movdqu 80(%esi),%xmm7 439 leal 96(%esi),%esi 440 L015ecb_enc_loop6_enter: 441 call __aesni_encrypt6 442 movl %ebp,%edx 443 movl %ebx,%ecx 444 subl $96,%eax 445 jnc L016ecb_enc_loop6 446 movups %xmm2,(%edi) 447 movups %xmm3,16(%edi) 448 movups %xmm4,32(%edi) 449 movups %xmm5,48(%edi) 450 movups %xmm6,64(%edi) 451 movups %xmm7,80(%edi) 452 leal 96(%edi),%edi 453 addl $96,%eax 454 jz L012ecb_ret 455 L014ecb_enc_tail: 456 movups (%esi),%xmm2 457 cmpl $32,%eax 458 jb L017ecb_enc_one 459 movups 16(%esi),%xmm3 460 je L018ecb_enc_two 461 movups 32(%esi),%xmm4 462 cmpl $64,%eax 463 jb L019ecb_enc_three 464 movups 48(%esi),%xmm5 465 je L020ecb_enc_four 466 movups 64(%esi),%xmm6 467 xorps %xmm7,%xmm7 468 call __aesni_encrypt6 469 movups %xmm2,(%edi) 470 movups %xmm3,16(%edi) 471 movups %xmm4,32(%edi) 472 movups %xmm5,48(%edi) 473 movups %xmm6,64(%edi) 474 jmp L012ecb_ret 475 .align 4,0x90 476 L017ecb_enc_one: 477 movups (%edx),%xmm0 478 movups 16(%edx),%xmm1 479 leal 32(%edx),%edx 480 xorps %xmm0,%xmm2 481 L021enc1_loop_3: 482 .byte 102,15,56,220,209 483 decl %ecx 484 movups (%edx),%xmm1 485 leal 16(%edx),%edx 486 jnz L021enc1_loop_3 487 .byte 102,15,56,221,209 488 movups %xmm2,(%edi) 489 jmp L012ecb_ret 490 .align 4,0x90 491 L018ecb_enc_two: 492 call __aesni_encrypt2 493 movups %xmm2,(%edi) 494 movups %xmm3,16(%edi) 495 jmp L012ecb_ret 496 .align 4,0x90 497 L019ecb_enc_three: 498 call __aesni_encrypt3 499 movups %xmm2,(%edi) 500 movups %xmm3,16(%edi) 501 movups %xmm4,32(%edi) 502 jmp L012ecb_ret 503 .align 4,0x90 504 L020ecb_enc_four: 505 call __aesni_encrypt4 506 movups %xmm2,(%edi) 507 movups %xmm3,16(%edi) 508 movups %xmm4,32(%edi) 509 movups %xmm5,48(%edi) 510 jmp L012ecb_ret 511 .align 4,0x90 512 L013ecb_decrypt: 513 movl %edx,%ebp 514 movl %ecx,%ebx 515 cmpl $96,%eax 516 jb L022ecb_dec_tail 517 movdqu (%esi),%xmm2 518 movdqu 16(%esi),%xmm3 519 movdqu 32(%esi),%xmm4 520 movdqu 48(%esi),%xmm5 521 movdqu 64(%esi),%xmm6 522 movdqu 80(%esi),%xmm7 523 leal 96(%esi),%esi 524 subl $96,%eax 525 jmp L023ecb_dec_loop6_enter 526 .align 4,0x90 527 L024ecb_dec_loop6: 528 movups %xmm2,(%edi) 529 movdqu (%esi),%xmm2 530 movups %xmm3,16(%edi) 531 movdqu 16(%esi),%xmm3 532 movups %xmm4,32(%edi) 533 movdqu 32(%esi),%xmm4 534 movups %xmm5,48(%edi) 535 movdqu 48(%esi),%xmm5 536 movups %xmm6,64(%edi) 537 movdqu 64(%esi),%xmm6 538 movups %xmm7,80(%edi) 539 leal 96(%edi),%edi 540 movdqu 80(%esi),%xmm7 541 leal 96(%esi),%esi 542 L023ecb_dec_loop6_enter: 543 call __aesni_decrypt6 544 movl %ebp,%edx 545 movl %ebx,%ecx 546 subl $96,%eax 547 jnc L024ecb_dec_loop6 548 movups %xmm2,(%edi) 549 movups %xmm3,16(%edi) 550 movups %xmm4,32(%edi) 551 movups %xmm5,48(%edi) 552 movups %xmm6,64(%edi) 553 movups %xmm7,80(%edi) 554 leal 96(%edi),%edi 555 addl $96,%eax 556 jz L012ecb_ret 557 L022ecb_dec_tail: 558 movups (%esi),%xmm2 559 cmpl $32,%eax 560 jb L025ecb_dec_one 561 movups 16(%esi),%xmm3 562 je L026ecb_dec_two 563 movups 32(%esi),%xmm4 564 cmpl $64,%eax 565 jb L027ecb_dec_three 566 movups 48(%esi),%xmm5 567 je L028ecb_dec_four 568 movups 64(%esi),%xmm6 569 xorps %xmm7,%xmm7 570 call __aesni_decrypt6 571 movups %xmm2,(%edi) 572 movups %xmm3,16(%edi) 573 movups %xmm4,32(%edi) 574 movups %xmm5,48(%edi) 575 movups %xmm6,64(%edi) 576 jmp L012ecb_ret 577 .align 4,0x90 578 L025ecb_dec_one: 579 movups (%edx),%xmm0 580 movups 16(%edx),%xmm1 581 leal 32(%edx),%edx 582 xorps %xmm0,%xmm2 583 L029dec1_loop_4: 584 .byte 102,15,56,222,209 585 decl %ecx 586 movups (%edx),%xmm1 587 leal 16(%edx),%edx 588 jnz L029dec1_loop_4 589 .byte 102,15,56,223,209 590 movups %xmm2,(%edi) 591 jmp L012ecb_ret 592 .align 4,0x90 593 L026ecb_dec_two: 594 call __aesni_decrypt2 595 movups %xmm2,(%edi) 596 movups %xmm3,16(%edi) 597 jmp L012ecb_ret 598 .align 4,0x90 599 L027ecb_dec_three: 600 call __aesni_decrypt3 601 movups %xmm2,(%edi) 602 movups %xmm3,16(%edi) 603 movups %xmm4,32(%edi) 604 jmp L012ecb_ret 605 .align 4,0x90 606 L028ecb_dec_four: 607 call __aesni_decrypt4 608 movups %xmm2,(%edi) 609 movups %xmm3,16(%edi) 610 movups %xmm4,32(%edi) 611 movups %xmm5,48(%edi) 612 L012ecb_ret: 613 pxor %xmm0,%xmm0 614 pxor %xmm1,%xmm1 615 pxor %xmm2,%xmm2 616 pxor %xmm3,%xmm3 617 pxor %xmm4,%xmm4 618 pxor %xmm5,%xmm5 619 pxor %xmm6,%xmm6 620 pxor %xmm7,%xmm7 621 popl %edi 622 popl %esi 623 popl %ebx 624 popl %ebp 625 ret 626 .globl _aesni_ccm64_encrypt_blocks 627 .align 4 628 _aesni_ccm64_encrypt_blocks: 629 L_aesni_ccm64_encrypt_blocks_begin: 630 %ifdef __CET__ 631 632 .byte 243,15,30,251 633 %endif 634 635 pushl %ebp 636 pushl %ebx 637 pushl %esi 638 pushl %edi 639 movl 20(%esp),%esi 640 movl 24(%esp),%edi 641 movl 28(%esp),%eax 642 movl 32(%esp),%edx 643 movl 36(%esp),%ebx 644 movl 40(%esp),%ecx 645 movl %esp,%ebp 646 subl $60,%esp 647 andl $-16,%esp 648 movl %ebp,48(%esp) 649 movdqu (%ebx),%xmm7 650 movdqu (%ecx),%xmm3 651 movl 240(%edx),%ecx 652 movl $202182159,(%esp) 653 movl $134810123,4(%esp) 654 movl $67438087,8(%esp) 655 movl $66051,12(%esp) 656 movl $1,%ebx 657 xorl %ebp,%ebp 658 movl %ebx,16(%esp) 659 movl %ebp,20(%esp) 660 movl %ebp,24(%esp) 661 movl %ebp,28(%esp) 662 shll $4,%ecx 663 movl $16,%ebx 664 leal (%edx),%ebp 665 movdqa (%esp),%xmm5 666 movdqa %xmm7,%xmm2 667 leal 32(%edx,%ecx,1),%edx 668 subl %ecx,%ebx 669 .byte 102,15,56,0,253 670 L030ccm64_enc_outer: 671 movups (%ebp),%xmm0 672 movl %ebx,%ecx 673 movups (%esi),%xmm6 674 xorps %xmm0,%xmm2 675 movups 16(%ebp),%xmm1 676 xorps %xmm6,%xmm0 677 xorps %xmm0,%xmm3 678 movups 32(%ebp),%xmm0 679 L031ccm64_enc2_loop: 680 .byte 102,15,56,220,209 681 .byte 102,15,56,220,217 682 movups (%edx,%ecx,1),%xmm1 683 addl $32,%ecx 684 .byte 102,15,56,220,208 685 .byte 102,15,56,220,216 686 movups -16(%edx,%ecx,1),%xmm0 687 jnz L031ccm64_enc2_loop 688 .byte 102,15,56,220,209 689 .byte 102,15,56,220,217 690 paddq 16(%esp),%xmm7 691 decl %eax 692 .byte 102,15,56,221,208 693 .byte 102,15,56,221,216 694 leal 16(%esi),%esi 695 xorps %xmm2,%xmm6 696 movdqa %xmm7,%xmm2 697 movups %xmm6,(%edi) 698 .byte 102,15,56,0,213 699 leal 16(%edi),%edi 700 jnz L030ccm64_enc_outer 701 movl 48(%esp),%esp 702 movl 40(%esp),%edi 703 movups %xmm3,(%edi) 704 pxor %xmm0,%xmm0 705 pxor %xmm1,%xmm1 706 pxor %xmm2,%xmm2 707 pxor %xmm3,%xmm3 708 pxor %xmm4,%xmm4 709 pxor %xmm5,%xmm5 710 pxor %xmm6,%xmm6 711 pxor %xmm7,%xmm7 712 popl %edi 713 popl %esi 714 popl %ebx 715 popl %ebp 716 ret 717 .globl _aesni_ccm64_decrypt_blocks 718 .align 4 719 _aesni_ccm64_decrypt_blocks: 720 L_aesni_ccm64_decrypt_blocks_begin: 721 %ifdef __CET__ 722 723 .byte 243,15,30,251 724 %endif 725 726 pushl %ebp 727 pushl %ebx 728 pushl %esi 729 pushl %edi 730 movl 20(%esp),%esi 731 movl 24(%esp),%edi 732 movl 28(%esp),%eax 733 movl 32(%esp),%edx 734 movl 36(%esp),%ebx 735 movl 40(%esp),%ecx 736 movl %esp,%ebp 737 subl $60,%esp 738 andl $-16,%esp 739 movl %ebp,48(%esp) 740 movdqu (%ebx),%xmm7 741 movdqu (%ecx),%xmm3 742 movl 240(%edx),%ecx 743 movl $202182159,(%esp) 744 movl $134810123,4(%esp) 745 movl $67438087,8(%esp) 746 movl $66051,12(%esp) 747 movl $1,%ebx 748 xorl %ebp,%ebp 749 movl %ebx,16(%esp) 750 movl %ebp,20(%esp) 751 movl %ebp,24(%esp) 752 movl %ebp,28(%esp) 753 movdqa (%esp),%xmm5 754 movdqa %xmm7,%xmm2 755 movl %edx,%ebp 756 movl %ecx,%ebx 757 .byte 102,15,56,0,253 758 movups (%edx),%xmm0 759 movups 16(%edx),%xmm1 760 leal 32(%edx),%edx 761 xorps %xmm0,%xmm2 762 L032enc1_loop_5: 763 .byte 102,15,56,220,209 764 decl %ecx 765 movups (%edx),%xmm1 766 leal 16(%edx),%edx 767 jnz L032enc1_loop_5 768 .byte 102,15,56,221,209 769 shll $4,%ebx 770 movl $16,%ecx 771 movups (%esi),%xmm6 772 paddq 16(%esp),%xmm7 773 leal 16(%esi),%esi 774 subl %ebx,%ecx 775 leal 32(%ebp,%ebx,1),%edx 776 movl %ecx,%ebx 777 jmp L033ccm64_dec_outer 778 .align 4,0x90 779 L033ccm64_dec_outer: 780 xorps %xmm2,%xmm6 781 movdqa %xmm7,%xmm2 782 movups %xmm6,(%edi) 783 leal 16(%edi),%edi 784 .byte 102,15,56,0,213 785 subl $1,%eax 786 jz L034ccm64_dec_break 787 movups (%ebp),%xmm0 788 movl %ebx,%ecx 789 movups 16(%ebp),%xmm1 790 xorps %xmm0,%xmm6 791 xorps %xmm0,%xmm2 792 xorps %xmm6,%xmm3 793 movups 32(%ebp),%xmm0 794 L035ccm64_dec2_loop: 795 .byte 102,15,56,220,209 796 .byte 102,15,56,220,217 797 movups (%edx,%ecx,1),%xmm1 798 addl $32,%ecx 799 .byte 102,15,56,220,208 800 .byte 102,15,56,220,216 801 movups -16(%edx,%ecx,1),%xmm0 802 jnz L035ccm64_dec2_loop 803 movups (%esi),%xmm6 804 paddq 16(%esp),%xmm7 805 .byte 102,15,56,220,209 806 .byte 102,15,56,220,217 807 .byte 102,15,56,221,208 808 .byte 102,15,56,221,216 809 leal 16(%esi),%esi 810 jmp L033ccm64_dec_outer 811 .align 4,0x90 812 L034ccm64_dec_break: 813 movl 240(%ebp),%ecx 814 movl %ebp,%edx 815 movups (%edx),%xmm0 816 movups 16(%edx),%xmm1 817 xorps %xmm0,%xmm6 818 leal 32(%edx),%edx 819 xorps %xmm6,%xmm3 820 L036enc1_loop_6: 821 .byte 102,15,56,220,217 822 decl %ecx 823 movups (%edx),%xmm1 824 leal 16(%edx),%edx 825 jnz L036enc1_loop_6 826 .byte 102,15,56,221,217 827 movl 48(%esp),%esp 828 movl 40(%esp),%edi 829 movups %xmm3,(%edi) 830 pxor %xmm0,%xmm0 831 pxor %xmm1,%xmm1 832 pxor %xmm2,%xmm2 833 pxor %xmm3,%xmm3 834 pxor %xmm4,%xmm4 835 pxor %xmm5,%xmm5 836 pxor %xmm6,%xmm6 837 pxor %xmm7,%xmm7 838 popl %edi 839 popl %esi 840 popl %ebx 841 popl %ebp 842 ret 843 .globl _aesni_ctr32_encrypt_blocks 844 .align 4 845 _aesni_ctr32_encrypt_blocks: 846 L_aesni_ctr32_encrypt_blocks_begin: 847 %ifdef __CET__ 848 849 .byte 243,15,30,251 850 %endif 851 852 pushl %ebp 853 pushl %ebx 854 pushl %esi 855 pushl %edi 856 movl 20(%esp),%esi 857 movl 24(%esp),%edi 858 movl 28(%esp),%eax 859 movl 32(%esp),%edx 860 movl 36(%esp),%ebx 861 movl %esp,%ebp 862 subl $88,%esp 863 andl $-16,%esp 864 movl %ebp,80(%esp) 865 cmpl $1,%eax 866 je L037ctr32_one_shortcut 867 movdqu (%ebx),%xmm7 868 movl $202182159,(%esp) 869 movl $134810123,4(%esp) 870 movl $67438087,8(%esp) 871 movl $66051,12(%esp) 872 movl $6,%ecx 873 xorl %ebp,%ebp 874 movl %ecx,16(%esp) 875 movl %ecx,20(%esp) 876 movl %ecx,24(%esp) 877 movl %ebp,28(%esp) 878 .byte 102,15,58,22,251,3 879 .byte 102,15,58,34,253,3 880 movl 240(%edx),%ecx 881 bswap %ebx 882 pxor %xmm0,%xmm0 883 pxor %xmm1,%xmm1 884 movdqa (%esp),%xmm2 885 .byte 102,15,58,34,195,0 886 leal 3(%ebx),%ebp 887 .byte 102,15,58,34,205,0 888 incl %ebx 889 .byte 102,15,58,34,195,1 890 incl %ebp 891 .byte 102,15,58,34,205,1 892 incl %ebx 893 .byte 102,15,58,34,195,2 894 incl %ebp 895 .byte 102,15,58,34,205,2 896 movdqa %xmm0,48(%esp) 897 .byte 102,15,56,0,194 898 movdqu (%edx),%xmm6 899 movdqa %xmm1,64(%esp) 900 .byte 102,15,56,0,202 901 pshufd $192,%xmm0,%xmm2 902 pshufd $128,%xmm0,%xmm3 903 cmpl $6,%eax 904 jb L038ctr32_tail 905 pxor %xmm6,%xmm7 906 shll $4,%ecx 907 movl $16,%ebx 908 movdqa %xmm7,32(%esp) 909 movl %edx,%ebp 910 subl %ecx,%ebx 911 leal 32(%edx,%ecx,1),%edx 912 subl $6,%eax 913 jmp L039ctr32_loop6 914 .align 4,0x90 915 L039ctr32_loop6: 916 pshufd $64,%xmm0,%xmm4 917 movdqa 32(%esp),%xmm0 918 pshufd $192,%xmm1,%xmm5 919 pxor %xmm0,%xmm2 920 pshufd $128,%xmm1,%xmm6 921 pxor %xmm0,%xmm3 922 pshufd $64,%xmm1,%xmm7 923 movups 16(%ebp),%xmm1 924 pxor %xmm0,%xmm4 925 pxor %xmm0,%xmm5 926 .byte 102,15,56,220,209 927 pxor %xmm0,%xmm6 928 pxor %xmm0,%xmm7 929 .byte 102,15,56,220,217 930 movups 32(%ebp),%xmm0 931 movl %ebx,%ecx 932 .byte 102,15,56,220,225 933 .byte 102,15,56,220,233 934 .byte 102,15,56,220,241 935 .byte 102,15,56,220,249 936 call L_aesni_encrypt6_enter 937 movups (%esi),%xmm1 938 movups 16(%esi),%xmm0 939 xorps %xmm1,%xmm2 940 movups 32(%esi),%xmm1 941 xorps %xmm0,%xmm3 942 movups %xmm2,(%edi) 943 movdqa 16(%esp),%xmm0 944 xorps %xmm1,%xmm4 945 movdqa 64(%esp),%xmm1 946 movups %xmm3,16(%edi) 947 movups %xmm4,32(%edi) 948 paddd %xmm0,%xmm1 949 paddd 48(%esp),%xmm0 950 movdqa (%esp),%xmm2 951 movups 48(%esi),%xmm3 952 movups 64(%esi),%xmm4 953 xorps %xmm3,%xmm5 954 movups 80(%esi),%xmm3 955 leal 96(%esi),%esi 956 movdqa %xmm0,48(%esp) 957 .byte 102,15,56,0,194 958 xorps %xmm4,%xmm6 959 movups %xmm5,48(%edi) 960 xorps %xmm3,%xmm7 961 movdqa %xmm1,64(%esp) 962 .byte 102,15,56,0,202 963 movups %xmm6,64(%edi) 964 pshufd $192,%xmm0,%xmm2 965 movups %xmm7,80(%edi) 966 leal 96(%edi),%edi 967 pshufd $128,%xmm0,%xmm3 968 subl $6,%eax 969 jnc L039ctr32_loop6 970 addl $6,%eax 971 jz L040ctr32_ret 972 movdqu (%ebp),%xmm7 973 movl %ebp,%edx 974 pxor 32(%esp),%xmm7 975 movl 240(%ebp),%ecx 976 L038ctr32_tail: 977 por %xmm7,%xmm2 978 cmpl $2,%eax 979 jb L041ctr32_one 980 pshufd $64,%xmm0,%xmm4 981 por %xmm7,%xmm3 982 je L042ctr32_two 983 pshufd $192,%xmm1,%xmm5 984 por %xmm7,%xmm4 985 cmpl $4,%eax 986 jb L043ctr32_three 987 pshufd $128,%xmm1,%xmm6 988 por %xmm7,%xmm5 989 je L044ctr32_four 990 por %xmm7,%xmm6 991 call __aesni_encrypt6 992 movups (%esi),%xmm1 993 movups 16(%esi),%xmm0 994 xorps %xmm1,%xmm2 995 movups 32(%esi),%xmm1 996 xorps %xmm0,%xmm3 997 movups 48(%esi),%xmm0 998 xorps %xmm1,%xmm4 999 movups 64(%esi),%xmm1 1000 xorps %xmm0,%xmm5 1001 movups %xmm2,(%edi) 1002 xorps %xmm1,%xmm6 1003 movups %xmm3,16(%edi) 1004 movups %xmm4,32(%edi) 1005 movups %xmm5,48(%edi) 1006 movups %xmm6,64(%edi) 1007 jmp L040ctr32_ret 1008 .align 4,0x90 1009 L037ctr32_one_shortcut: 1010 movups (%ebx),%xmm2 1011 movl 240(%edx),%ecx 1012 L041ctr32_one: 1013 movups (%edx),%xmm0 1014 movups 16(%edx),%xmm1 1015 leal 32(%edx),%edx 1016 xorps %xmm0,%xmm2 1017 L045enc1_loop_7: 1018 .byte 102,15,56,220,209 1019 decl %ecx 1020 movups (%edx),%xmm1 1021 leal 16(%edx),%edx 1022 jnz L045enc1_loop_7 1023 .byte 102,15,56,221,209 1024 movups (%esi),%xmm6 1025 xorps %xmm2,%xmm6 1026 movups %xmm6,(%edi) 1027 jmp L040ctr32_ret 1028 .align 4,0x90 1029 L042ctr32_two: 1030 call __aesni_encrypt2 1031 movups (%esi),%xmm5 1032 movups 16(%esi),%xmm6 1033 xorps %xmm5,%xmm2 1034 xorps %xmm6,%xmm3 1035 movups %xmm2,(%edi) 1036 movups %xmm3,16(%edi) 1037 jmp L040ctr32_ret 1038 .align 4,0x90 1039 L043ctr32_three: 1040 call __aesni_encrypt3 1041 movups (%esi),%xmm5 1042 movups 16(%esi),%xmm6 1043 xorps %xmm5,%xmm2 1044 movups 32(%esi),%xmm7 1045 xorps %xmm6,%xmm3 1046 movups %xmm2,(%edi) 1047 xorps %xmm7,%xmm4 1048 movups %xmm3,16(%edi) 1049 movups %xmm4,32(%edi) 1050 jmp L040ctr32_ret 1051 .align 4,0x90 1052 L044ctr32_four: 1053 call __aesni_encrypt4 1054 movups (%esi),%xmm6 1055 movups 16(%esi),%xmm7 1056 movups 32(%esi),%xmm1 1057 xorps %xmm6,%xmm2 1058 movups 48(%esi),%xmm0 1059 xorps %xmm7,%xmm3 1060 movups %xmm2,(%edi) 1061 xorps %xmm1,%xmm4 1062 movups %xmm3,16(%edi) 1063 xorps %xmm0,%xmm5 1064 movups %xmm4,32(%edi) 1065 movups %xmm5,48(%edi) 1066 L040ctr32_ret: 1067 pxor %xmm0,%xmm0 1068 pxor %xmm1,%xmm1 1069 pxor %xmm2,%xmm2 1070 pxor %xmm3,%xmm3 1071 pxor %xmm4,%xmm4 1072 movdqa %xmm0,32(%esp) 1073 pxor %xmm5,%xmm5 1074 movdqa %xmm0,48(%esp) 1075 pxor %xmm6,%xmm6 1076 movdqa %xmm0,64(%esp) 1077 pxor %xmm7,%xmm7 1078 movl 80(%esp),%esp 1079 popl %edi 1080 popl %esi 1081 popl %ebx 1082 popl %ebp 1083 ret 1084 .globl _aesni_xts_encrypt 1085 .align 4 1086 _aesni_xts_encrypt: 1087 L_aesni_xts_encrypt_begin: 1088 %ifdef __CET__ 1089 1090 .byte 243,15,30,251 1091 %endif 1092 1093 pushl %ebp 1094 pushl %ebx 1095 pushl %esi 1096 pushl %edi 1097 movl 36(%esp),%edx 1098 movl 40(%esp),%esi 1099 movl 240(%edx),%ecx 1100 movups (%esi),%xmm2 1101 movups (%edx),%xmm0 1102 movups 16(%edx),%xmm1 1103 leal 32(%edx),%edx 1104 xorps %xmm0,%xmm2 1105 L046enc1_loop_8: 1106 .byte 102,15,56,220,209 1107 decl %ecx 1108 movups (%edx),%xmm1 1109 leal 16(%edx),%edx 1110 jnz L046enc1_loop_8 1111 .byte 102,15,56,221,209 1112 movl 20(%esp),%esi 1113 movl 24(%esp),%edi 1114 movl 28(%esp),%eax 1115 movl 32(%esp),%edx 1116 movl %esp,%ebp 1117 subl $120,%esp 1118 movl 240(%edx),%ecx 1119 andl $-16,%esp 1120 movl $135,96(%esp) 1121 movl $0,100(%esp) 1122 movl $1,104(%esp) 1123 movl $0,108(%esp) 1124 movl %eax,112(%esp) 1125 movl %ebp,116(%esp) 1126 movdqa %xmm2,%xmm1 1127 pxor %xmm0,%xmm0 1128 movdqa 96(%esp),%xmm3 1129 pcmpgtd %xmm1,%xmm0 1130 andl $-16,%eax 1131 movl %edx,%ebp 1132 movl %ecx,%ebx 1133 subl $96,%eax 1134 jc L047xts_enc_short 1135 shll $4,%ecx 1136 movl $16,%ebx 1137 subl %ecx,%ebx 1138 leal 32(%edx,%ecx,1),%edx 1139 jmp L048xts_enc_loop6 1140 .align 4,0x90 1141 L048xts_enc_loop6: 1142 pshufd $19,%xmm0,%xmm2 1143 pxor %xmm0,%xmm0 1144 movdqa %xmm1,(%esp) 1145 paddq %xmm1,%xmm1 1146 pand %xmm3,%xmm2 1147 pcmpgtd %xmm1,%xmm0 1148 pxor %xmm2,%xmm1 1149 pshufd $19,%xmm0,%xmm2 1150 pxor %xmm0,%xmm0 1151 movdqa %xmm1,16(%esp) 1152 paddq %xmm1,%xmm1 1153 pand %xmm3,%xmm2 1154 pcmpgtd %xmm1,%xmm0 1155 pxor %xmm2,%xmm1 1156 pshufd $19,%xmm0,%xmm2 1157 pxor %xmm0,%xmm0 1158 movdqa %xmm1,32(%esp) 1159 paddq %xmm1,%xmm1 1160 pand %xmm3,%xmm2 1161 pcmpgtd %xmm1,%xmm0 1162 pxor %xmm2,%xmm1 1163 pshufd $19,%xmm0,%xmm2 1164 pxor %xmm0,%xmm0 1165 movdqa %xmm1,48(%esp) 1166 paddq %xmm1,%xmm1 1167 pand %xmm3,%xmm2 1168 pcmpgtd %xmm1,%xmm0 1169 pxor %xmm2,%xmm1 1170 pshufd $19,%xmm0,%xmm7 1171 movdqa %xmm1,64(%esp) 1172 paddq %xmm1,%xmm1 1173 movups (%ebp),%xmm0 1174 pand %xmm3,%xmm7 1175 movups (%esi),%xmm2 1176 pxor %xmm1,%xmm7 1177 movl %ebx,%ecx 1178 movdqu 16(%esi),%xmm3 1179 xorps %xmm0,%xmm2 1180 movdqu 32(%esi),%xmm4 1181 pxor %xmm0,%xmm3 1182 movdqu 48(%esi),%xmm5 1183 pxor %xmm0,%xmm4 1184 movdqu 64(%esi),%xmm6 1185 pxor %xmm0,%xmm5 1186 movdqu 80(%esi),%xmm1 1187 pxor %xmm0,%xmm6 1188 leal 96(%esi),%esi 1189 pxor (%esp),%xmm2 1190 movdqa %xmm7,80(%esp) 1191 pxor %xmm1,%xmm7 1192 movups 16(%ebp),%xmm1 1193 pxor 16(%esp),%xmm3 1194 pxor 32(%esp),%xmm4 1195 .byte 102,15,56,220,209 1196 pxor 48(%esp),%xmm5 1197 pxor 64(%esp),%xmm6 1198 .byte 102,15,56,220,217 1199 pxor %xmm0,%xmm7 1200 movups 32(%ebp),%xmm0 1201 .byte 102,15,56,220,225 1202 .byte 102,15,56,220,233 1203 .byte 102,15,56,220,241 1204 .byte 102,15,56,220,249 1205 call L_aesni_encrypt6_enter 1206 movdqa 80(%esp),%xmm1 1207 pxor %xmm0,%xmm0 1208 xorps (%esp),%xmm2 1209 pcmpgtd %xmm1,%xmm0 1210 xorps 16(%esp),%xmm3 1211 movups %xmm2,(%edi) 1212 xorps 32(%esp),%xmm4 1213 movups %xmm3,16(%edi) 1214 xorps 48(%esp),%xmm5 1215 movups %xmm4,32(%edi) 1216 xorps 64(%esp),%xmm6 1217 movups %xmm5,48(%edi) 1218 xorps %xmm1,%xmm7 1219 movups %xmm6,64(%edi) 1220 pshufd $19,%xmm0,%xmm2 1221 movups %xmm7,80(%edi) 1222 leal 96(%edi),%edi 1223 movdqa 96(%esp),%xmm3 1224 pxor %xmm0,%xmm0 1225 paddq %xmm1,%xmm1 1226 pand %xmm3,%xmm2 1227 pcmpgtd %xmm1,%xmm0 1228 pxor %xmm2,%xmm1 1229 subl $96,%eax 1230 jnc L048xts_enc_loop6 1231 movl 240(%ebp),%ecx 1232 movl %ebp,%edx 1233 movl %ecx,%ebx 1234 L047xts_enc_short: 1235 addl $96,%eax 1236 jz L049xts_enc_done6x 1237 movdqa %xmm1,%xmm5 1238 cmpl $32,%eax 1239 jb L050xts_enc_one 1240 pshufd $19,%xmm0,%xmm2 1241 pxor %xmm0,%xmm0 1242 paddq %xmm1,%xmm1 1243 pand %xmm3,%xmm2 1244 pcmpgtd %xmm1,%xmm0 1245 pxor %xmm2,%xmm1 1246 je L051xts_enc_two 1247 pshufd $19,%xmm0,%xmm2 1248 pxor %xmm0,%xmm0 1249 movdqa %xmm1,%xmm6 1250 paddq %xmm1,%xmm1 1251 pand %xmm3,%xmm2 1252 pcmpgtd %xmm1,%xmm0 1253 pxor %xmm2,%xmm1 1254 cmpl $64,%eax 1255 jb L052xts_enc_three 1256 pshufd $19,%xmm0,%xmm2 1257 pxor %xmm0,%xmm0 1258 movdqa %xmm1,%xmm7 1259 paddq %xmm1,%xmm1 1260 pand %xmm3,%xmm2 1261 pcmpgtd %xmm1,%xmm0 1262 pxor %xmm2,%xmm1 1263 movdqa %xmm5,(%esp) 1264 movdqa %xmm6,16(%esp) 1265 je L053xts_enc_four 1266 movdqa %xmm7,32(%esp) 1267 pshufd $19,%xmm0,%xmm7 1268 movdqa %xmm1,48(%esp) 1269 paddq %xmm1,%xmm1 1270 pand %xmm3,%xmm7 1271 pxor %xmm1,%xmm7 1272 movdqu (%esi),%xmm2 1273 movdqu 16(%esi),%xmm3 1274 movdqu 32(%esi),%xmm4 1275 pxor (%esp),%xmm2 1276 movdqu 48(%esi),%xmm5 1277 pxor 16(%esp),%xmm3 1278 movdqu 64(%esi),%xmm6 1279 pxor 32(%esp),%xmm4 1280 leal 80(%esi),%esi 1281 pxor 48(%esp),%xmm5 1282 movdqa %xmm7,64(%esp) 1283 pxor %xmm7,%xmm6 1284 call __aesni_encrypt6 1285 movaps 64(%esp),%xmm1 1286 xorps (%esp),%xmm2 1287 xorps 16(%esp),%xmm3 1288 xorps 32(%esp),%xmm4 1289 movups %xmm2,(%edi) 1290 xorps 48(%esp),%xmm5 1291 movups %xmm3,16(%edi) 1292 xorps %xmm1,%xmm6 1293 movups %xmm4,32(%edi) 1294 movups %xmm5,48(%edi) 1295 movups %xmm6,64(%edi) 1296 leal 80(%edi),%edi 1297 jmp L054xts_enc_done 1298 .align 4,0x90 1299 L050xts_enc_one: 1300 movups (%esi),%xmm2 1301 leal 16(%esi),%esi 1302 xorps %xmm5,%xmm2 1303 movups (%edx),%xmm0 1304 movups 16(%edx),%xmm1 1305 leal 32(%edx),%edx 1306 xorps %xmm0,%xmm2 1307 L055enc1_loop_9: 1308 .byte 102,15,56,220,209 1309 decl %ecx 1310 movups (%edx),%xmm1 1311 leal 16(%edx),%edx 1312 jnz L055enc1_loop_9 1313 .byte 102,15,56,221,209 1314 xorps %xmm5,%xmm2 1315 movups %xmm2,(%edi) 1316 leal 16(%edi),%edi 1317 movdqa %xmm5,%xmm1 1318 jmp L054xts_enc_done 1319 .align 4,0x90 1320 L051xts_enc_two: 1321 movaps %xmm1,%xmm6 1322 movups (%esi),%xmm2 1323 movups 16(%esi),%xmm3 1324 leal 32(%esi),%esi 1325 xorps %xmm5,%xmm2 1326 xorps %xmm6,%xmm3 1327 call __aesni_encrypt2 1328 xorps %xmm5,%xmm2 1329 xorps %xmm6,%xmm3 1330 movups %xmm2,(%edi) 1331 movups %xmm3,16(%edi) 1332 leal 32(%edi),%edi 1333 movdqa %xmm6,%xmm1 1334 jmp L054xts_enc_done 1335 .align 4,0x90 1336 L052xts_enc_three: 1337 movaps %xmm1,%xmm7 1338 movups (%esi),%xmm2 1339 movups 16(%esi),%xmm3 1340 movups 32(%esi),%xmm4 1341 leal 48(%esi),%esi 1342 xorps %xmm5,%xmm2 1343 xorps %xmm6,%xmm3 1344 xorps %xmm7,%xmm4 1345 call __aesni_encrypt3 1346 xorps %xmm5,%xmm2 1347 xorps %xmm6,%xmm3 1348 xorps %xmm7,%xmm4 1349 movups %xmm2,(%edi) 1350 movups %xmm3,16(%edi) 1351 movups %xmm4,32(%edi) 1352 leal 48(%edi),%edi 1353 movdqa %xmm7,%xmm1 1354 jmp L054xts_enc_done 1355 .align 4,0x90 1356 L053xts_enc_four: 1357 movaps %xmm1,%xmm6 1358 movups (%esi),%xmm2 1359 movups 16(%esi),%xmm3 1360 movups 32(%esi),%xmm4 1361 xorps (%esp),%xmm2 1362 movups 48(%esi),%xmm5 1363 leal 64(%esi),%esi 1364 xorps 16(%esp),%xmm3 1365 xorps %xmm7,%xmm4 1366 xorps %xmm6,%xmm5 1367 call __aesni_encrypt4 1368 xorps (%esp),%xmm2 1369 xorps 16(%esp),%xmm3 1370 xorps %xmm7,%xmm4 1371 movups %xmm2,(%edi) 1372 xorps %xmm6,%xmm5 1373 movups %xmm3,16(%edi) 1374 movups %xmm4,32(%edi) 1375 movups %xmm5,48(%edi) 1376 leal 64(%edi),%edi 1377 movdqa %xmm6,%xmm1 1378 jmp L054xts_enc_done 1379 .align 4,0x90 1380 L049xts_enc_done6x: 1381 movl 112(%esp),%eax 1382 andl $15,%eax 1383 jz L056xts_enc_ret 1384 movdqa %xmm1,%xmm5 1385 movl %eax,112(%esp) 1386 jmp L057xts_enc_steal 1387 .align 4,0x90 1388 L054xts_enc_done: 1389 movl 112(%esp),%eax 1390 pxor %xmm0,%xmm0 1391 andl $15,%eax 1392 jz L056xts_enc_ret 1393 pcmpgtd %xmm1,%xmm0 1394 movl %eax,112(%esp) 1395 pshufd $19,%xmm0,%xmm5 1396 paddq %xmm1,%xmm1 1397 pand 96(%esp),%xmm5 1398 pxor %xmm1,%xmm5 1399 L057xts_enc_steal: 1400 movzbl (%esi),%ecx 1401 movzbl -16(%edi),%edx 1402 leal 1(%esi),%esi 1403 movb %cl,-16(%edi) 1404 movb %dl,(%edi) 1405 leal 1(%edi),%edi 1406 subl $1,%eax 1407 jnz L057xts_enc_steal 1408 subl 112(%esp),%edi 1409 movl %ebp,%edx 1410 movl %ebx,%ecx 1411 movups -16(%edi),%xmm2 1412 xorps %xmm5,%xmm2 1413 movups (%edx),%xmm0 1414 movups 16(%edx),%xmm1 1415 leal 32(%edx),%edx 1416 xorps %xmm0,%xmm2 1417 L058enc1_loop_10: 1418 .byte 102,15,56,220,209 1419 decl %ecx 1420 movups (%edx),%xmm1 1421 leal 16(%edx),%edx 1422 jnz L058enc1_loop_10 1423 .byte 102,15,56,221,209 1424 xorps %xmm5,%xmm2 1425 movups %xmm2,-16(%edi) 1426 L056xts_enc_ret: 1427 pxor %xmm0,%xmm0 1428 pxor %xmm1,%xmm1 1429 pxor %xmm2,%xmm2 1430 movdqa %xmm0,(%esp) 1431 pxor %xmm3,%xmm3 1432 movdqa %xmm0,16(%esp) 1433 pxor %xmm4,%xmm4 1434 movdqa %xmm0,32(%esp) 1435 pxor %xmm5,%xmm5 1436 movdqa %xmm0,48(%esp) 1437 pxor %xmm6,%xmm6 1438 movdqa %xmm0,64(%esp) 1439 pxor %xmm7,%xmm7 1440 movdqa %xmm0,80(%esp) 1441 movl 116(%esp),%esp 1442 popl %edi 1443 popl %esi 1444 popl %ebx 1445 popl %ebp 1446 ret 1447 .globl _aesni_xts_decrypt 1448 .align 4 1449 _aesni_xts_decrypt: 1450 L_aesni_xts_decrypt_begin: 1451 %ifdef __CET__ 1452 1453 .byte 243,15,30,251 1454 %endif 1455 1456 pushl %ebp 1457 pushl %ebx 1458 pushl %esi 1459 pushl %edi 1460 movl 36(%esp),%edx 1461 movl 40(%esp),%esi 1462 movl 240(%edx),%ecx 1463 movups (%esi),%xmm2 1464 movups (%edx),%xmm0 1465 movups 16(%edx),%xmm1 1466 leal 32(%edx),%edx 1467 xorps %xmm0,%xmm2 1468 L059enc1_loop_11: 1469 .byte 102,15,56,220,209 1470 decl %ecx 1471 movups (%edx),%xmm1 1472 leal 16(%edx),%edx 1473 jnz L059enc1_loop_11 1474 .byte 102,15,56,221,209 1475 movl 20(%esp),%esi 1476 movl 24(%esp),%edi 1477 movl 28(%esp),%eax 1478 movl 32(%esp),%edx 1479 movl %esp,%ebp 1480 subl $120,%esp 1481 andl $-16,%esp 1482 xorl %ebx,%ebx 1483 testl $15,%eax 1484 setnz %bl 1485 shll $4,%ebx 1486 subl %ebx,%eax 1487 movl $135,96(%esp) 1488 movl $0,100(%esp) 1489 movl $1,104(%esp) 1490 movl $0,108(%esp) 1491 movl %eax,112(%esp) 1492 movl %ebp,116(%esp) 1493 movl 240(%edx),%ecx 1494 movl %edx,%ebp 1495 movl %ecx,%ebx 1496 movdqa %xmm2,%xmm1 1497 pxor %xmm0,%xmm0 1498 movdqa 96(%esp),%xmm3 1499 pcmpgtd %xmm1,%xmm0 1500 andl $-16,%eax 1501 subl $96,%eax 1502 jc L060xts_dec_short 1503 shll $4,%ecx 1504 movl $16,%ebx 1505 subl %ecx,%ebx 1506 leal 32(%edx,%ecx,1),%edx 1507 jmp L061xts_dec_loop6 1508 .align 4,0x90 1509 L061xts_dec_loop6: 1510 pshufd $19,%xmm0,%xmm2 1511 pxor %xmm0,%xmm0 1512 movdqa %xmm1,(%esp) 1513 paddq %xmm1,%xmm1 1514 pand %xmm3,%xmm2 1515 pcmpgtd %xmm1,%xmm0 1516 pxor %xmm2,%xmm1 1517 pshufd $19,%xmm0,%xmm2 1518 pxor %xmm0,%xmm0 1519 movdqa %xmm1,16(%esp) 1520 paddq %xmm1,%xmm1 1521 pand %xmm3,%xmm2 1522 pcmpgtd %xmm1,%xmm0 1523 pxor %xmm2,%xmm1 1524 pshufd $19,%xmm0,%xmm2 1525 pxor %xmm0,%xmm0 1526 movdqa %xmm1,32(%esp) 1527 paddq %xmm1,%xmm1 1528 pand %xmm3,%xmm2 1529 pcmpgtd %xmm1,%xmm0 1530 pxor %xmm2,%xmm1 1531 pshufd $19,%xmm0,%xmm2 1532 pxor %xmm0,%xmm0 1533 movdqa %xmm1,48(%esp) 1534 paddq %xmm1,%xmm1 1535 pand %xmm3,%xmm2 1536 pcmpgtd %xmm1,%xmm0 1537 pxor %xmm2,%xmm1 1538 pshufd $19,%xmm0,%xmm7 1539 movdqa %xmm1,64(%esp) 1540 paddq %xmm1,%xmm1 1541 movups (%ebp),%xmm0 1542 pand %xmm3,%xmm7 1543 movups (%esi),%xmm2 1544 pxor %xmm1,%xmm7 1545 movl %ebx,%ecx 1546 movdqu 16(%esi),%xmm3 1547 xorps %xmm0,%xmm2 1548 movdqu 32(%esi),%xmm4 1549 pxor %xmm0,%xmm3 1550 movdqu 48(%esi),%xmm5 1551 pxor %xmm0,%xmm4 1552 movdqu 64(%esi),%xmm6 1553 pxor %xmm0,%xmm5 1554 movdqu 80(%esi),%xmm1 1555 pxor %xmm0,%xmm6 1556 leal 96(%esi),%esi 1557 pxor (%esp),%xmm2 1558 movdqa %xmm7,80(%esp) 1559 pxor %xmm1,%xmm7 1560 movups 16(%ebp),%xmm1 1561 pxor 16(%esp),%xmm3 1562 pxor 32(%esp),%xmm4 1563 .byte 102,15,56,222,209 1564 pxor 48(%esp),%xmm5 1565 pxor 64(%esp),%xmm6 1566 .byte 102,15,56,222,217 1567 pxor %xmm0,%xmm7 1568 movups 32(%ebp),%xmm0 1569 .byte 102,15,56,222,225 1570 .byte 102,15,56,222,233 1571 .byte 102,15,56,222,241 1572 .byte 102,15,56,222,249 1573 call L_aesni_decrypt6_enter 1574 movdqa 80(%esp),%xmm1 1575 pxor %xmm0,%xmm0 1576 xorps (%esp),%xmm2 1577 pcmpgtd %xmm1,%xmm0 1578 xorps 16(%esp),%xmm3 1579 movups %xmm2,(%edi) 1580 xorps 32(%esp),%xmm4 1581 movups %xmm3,16(%edi) 1582 xorps 48(%esp),%xmm5 1583 movups %xmm4,32(%edi) 1584 xorps 64(%esp),%xmm6 1585 movups %xmm5,48(%edi) 1586 xorps %xmm1,%xmm7 1587 movups %xmm6,64(%edi) 1588 pshufd $19,%xmm0,%xmm2 1589 movups %xmm7,80(%edi) 1590 leal 96(%edi),%edi 1591 movdqa 96(%esp),%xmm3 1592 pxor %xmm0,%xmm0 1593 paddq %xmm1,%xmm1 1594 pand %xmm3,%xmm2 1595 pcmpgtd %xmm1,%xmm0 1596 pxor %xmm2,%xmm1 1597 subl $96,%eax 1598 jnc L061xts_dec_loop6 1599 movl 240(%ebp),%ecx 1600 movl %ebp,%edx 1601 movl %ecx,%ebx 1602 L060xts_dec_short: 1603 addl $96,%eax 1604 jz L062xts_dec_done6x 1605 movdqa %xmm1,%xmm5 1606 cmpl $32,%eax 1607 jb L063xts_dec_one 1608 pshufd $19,%xmm0,%xmm2 1609 pxor %xmm0,%xmm0 1610 paddq %xmm1,%xmm1 1611 pand %xmm3,%xmm2 1612 pcmpgtd %xmm1,%xmm0 1613 pxor %xmm2,%xmm1 1614 je L064xts_dec_two 1615 pshufd $19,%xmm0,%xmm2 1616 pxor %xmm0,%xmm0 1617 movdqa %xmm1,%xmm6 1618 paddq %xmm1,%xmm1 1619 pand %xmm3,%xmm2 1620 pcmpgtd %xmm1,%xmm0 1621 pxor %xmm2,%xmm1 1622 cmpl $64,%eax 1623 jb L065xts_dec_three 1624 pshufd $19,%xmm0,%xmm2 1625 pxor %xmm0,%xmm0 1626 movdqa %xmm1,%xmm7 1627 paddq %xmm1,%xmm1 1628 pand %xmm3,%xmm2 1629 pcmpgtd %xmm1,%xmm0 1630 pxor %xmm2,%xmm1 1631 movdqa %xmm5,(%esp) 1632 movdqa %xmm6,16(%esp) 1633 je L066xts_dec_four 1634 movdqa %xmm7,32(%esp) 1635 pshufd $19,%xmm0,%xmm7 1636 movdqa %xmm1,48(%esp) 1637 paddq %xmm1,%xmm1 1638 pand %xmm3,%xmm7 1639 pxor %xmm1,%xmm7 1640 movdqu (%esi),%xmm2 1641 movdqu 16(%esi),%xmm3 1642 movdqu 32(%esi),%xmm4 1643 pxor (%esp),%xmm2 1644 movdqu 48(%esi),%xmm5 1645 pxor 16(%esp),%xmm3 1646 movdqu 64(%esi),%xmm6 1647 pxor 32(%esp),%xmm4 1648 leal 80(%esi),%esi 1649 pxor 48(%esp),%xmm5 1650 movdqa %xmm7,64(%esp) 1651 pxor %xmm7,%xmm6 1652 call __aesni_decrypt6 1653 movaps 64(%esp),%xmm1 1654 xorps (%esp),%xmm2 1655 xorps 16(%esp),%xmm3 1656 xorps 32(%esp),%xmm4 1657 movups %xmm2,(%edi) 1658 xorps 48(%esp),%xmm5 1659 movups %xmm3,16(%edi) 1660 xorps %xmm1,%xmm6 1661 movups %xmm4,32(%edi) 1662 movups %xmm5,48(%edi) 1663 movups %xmm6,64(%edi) 1664 leal 80(%edi),%edi 1665 jmp L067xts_dec_done 1666 .align 4,0x90 1667 L063xts_dec_one: 1668 movups (%esi),%xmm2 1669 leal 16(%esi),%esi 1670 xorps %xmm5,%xmm2 1671 movups (%edx),%xmm0 1672 movups 16(%edx),%xmm1 1673 leal 32(%edx),%edx 1674 xorps %xmm0,%xmm2 1675 L068dec1_loop_12: 1676 .byte 102,15,56,222,209 1677 decl %ecx 1678 movups (%edx),%xmm1 1679 leal 16(%edx),%edx 1680 jnz L068dec1_loop_12 1681 .byte 102,15,56,223,209 1682 xorps %xmm5,%xmm2 1683 movups %xmm2,(%edi) 1684 leal 16(%edi),%edi 1685 movdqa %xmm5,%xmm1 1686 jmp L067xts_dec_done 1687 .align 4,0x90 1688 L064xts_dec_two: 1689 movaps %xmm1,%xmm6 1690 movups (%esi),%xmm2 1691 movups 16(%esi),%xmm3 1692 leal 32(%esi),%esi 1693 xorps %xmm5,%xmm2 1694 xorps %xmm6,%xmm3 1695 call __aesni_decrypt2 1696 xorps %xmm5,%xmm2 1697 xorps %xmm6,%xmm3 1698 movups %xmm2,(%edi) 1699 movups %xmm3,16(%edi) 1700 leal 32(%edi),%edi 1701 movdqa %xmm6,%xmm1 1702 jmp L067xts_dec_done 1703 .align 4,0x90 1704 L065xts_dec_three: 1705 movaps %xmm1,%xmm7 1706 movups (%esi),%xmm2 1707 movups 16(%esi),%xmm3 1708 movups 32(%esi),%xmm4 1709 leal 48(%esi),%esi 1710 xorps %xmm5,%xmm2 1711 xorps %xmm6,%xmm3 1712 xorps %xmm7,%xmm4 1713 call __aesni_decrypt3 1714 xorps %xmm5,%xmm2 1715 xorps %xmm6,%xmm3 1716 xorps %xmm7,%xmm4 1717 movups %xmm2,(%edi) 1718 movups %xmm3,16(%edi) 1719 movups %xmm4,32(%edi) 1720 leal 48(%edi),%edi 1721 movdqa %xmm7,%xmm1 1722 jmp L067xts_dec_done 1723 .align 4,0x90 1724 L066xts_dec_four: 1725 movaps %xmm1,%xmm6 1726 movups (%esi),%xmm2 1727 movups 16(%esi),%xmm3 1728 movups 32(%esi),%xmm4 1729 xorps (%esp),%xmm2 1730 movups 48(%esi),%xmm5 1731 leal 64(%esi),%esi 1732 xorps 16(%esp),%xmm3 1733 xorps %xmm7,%xmm4 1734 xorps %xmm6,%xmm5 1735 call __aesni_decrypt4 1736 xorps (%esp),%xmm2 1737 xorps 16(%esp),%xmm3 1738 xorps %xmm7,%xmm4 1739 movups %xmm2,(%edi) 1740 xorps %xmm6,%xmm5 1741 movups %xmm3,16(%edi) 1742 movups %xmm4,32(%edi) 1743 movups %xmm5,48(%edi) 1744 leal 64(%edi),%edi 1745 movdqa %xmm6,%xmm1 1746 jmp L067xts_dec_done 1747 .align 4,0x90 1748 L062xts_dec_done6x: 1749 movl 112(%esp),%eax 1750 andl $15,%eax 1751 jz L069xts_dec_ret 1752 movl %eax,112(%esp) 1753 jmp L070xts_dec_only_one_more 1754 .align 4,0x90 1755 L067xts_dec_done: 1756 movl 112(%esp),%eax 1757 pxor %xmm0,%xmm0 1758 andl $15,%eax 1759 jz L069xts_dec_ret 1760 pcmpgtd %xmm1,%xmm0 1761 movl %eax,112(%esp) 1762 pshufd $19,%xmm0,%xmm2 1763 pxor %xmm0,%xmm0 1764 movdqa 96(%esp),%xmm3 1765 paddq %xmm1,%xmm1 1766 pand %xmm3,%xmm2 1767 pcmpgtd %xmm1,%xmm0 1768 pxor %xmm2,%xmm1 1769 L070xts_dec_only_one_more: 1770 pshufd $19,%xmm0,%xmm5 1771 movdqa %xmm1,%xmm6 1772 paddq %xmm1,%xmm1 1773 pand %xmm3,%xmm5 1774 pxor %xmm1,%xmm5 1775 movl %ebp,%edx 1776 movl %ebx,%ecx 1777 movups (%esi),%xmm2 1778 xorps %xmm5,%xmm2 1779 movups (%edx),%xmm0 1780 movups 16(%edx),%xmm1 1781 leal 32(%edx),%edx 1782 xorps %xmm0,%xmm2 1783 L071dec1_loop_13: 1784 .byte 102,15,56,222,209 1785 decl %ecx 1786 movups (%edx),%xmm1 1787 leal 16(%edx),%edx 1788 jnz L071dec1_loop_13 1789 .byte 102,15,56,223,209 1790 xorps %xmm5,%xmm2 1791 movups %xmm2,(%edi) 1792 L072xts_dec_steal: 1793 movzbl 16(%esi),%ecx 1794 movzbl (%edi),%edx 1795 leal 1(%esi),%esi 1796 movb %cl,(%edi) 1797 movb %dl,16(%edi) 1798 leal 1(%edi),%edi 1799 subl $1,%eax 1800 jnz L072xts_dec_steal 1801 subl 112(%esp),%edi 1802 movl %ebp,%edx 1803 movl %ebx,%ecx 1804 movups (%edi),%xmm2 1805 xorps %xmm6,%xmm2 1806 movups (%edx),%xmm0 1807 movups 16(%edx),%xmm1 1808 leal 32(%edx),%edx 1809 xorps %xmm0,%xmm2 1810 L073dec1_loop_14: 1811 .byte 102,15,56,222,209 1812 decl %ecx 1813 movups (%edx),%xmm1 1814 leal 16(%edx),%edx 1815 jnz L073dec1_loop_14 1816 .byte 102,15,56,223,209 1817 xorps %xmm6,%xmm2 1818 movups %xmm2,(%edi) 1819 L069xts_dec_ret: 1820 pxor %xmm0,%xmm0 1821 pxor %xmm1,%xmm1 1822 pxor %xmm2,%xmm2 1823 movdqa %xmm0,(%esp) 1824 pxor %xmm3,%xmm3 1825 movdqa %xmm0,16(%esp) 1826 pxor %xmm4,%xmm4 1827 movdqa %xmm0,32(%esp) 1828 pxor %xmm5,%xmm5 1829 movdqa %xmm0,48(%esp) 1830 pxor %xmm6,%xmm6 1831 movdqa %xmm0,64(%esp) 1832 pxor %xmm7,%xmm7 1833 movdqa %xmm0,80(%esp) 1834 movl 116(%esp),%esp 1835 popl %edi 1836 popl %esi 1837 popl %ebx 1838 popl %ebp 1839 ret 1840 .globl _aesni_ocb_encrypt 1841 .align 4 1842 _aesni_ocb_encrypt: 1843 L_aesni_ocb_encrypt_begin: 1844 %ifdef __CET__ 1845 1846 .byte 243,15,30,251 1847 %endif 1848 1849 pushl %ebp 1850 pushl %ebx 1851 pushl %esi 1852 pushl %edi 1853 movl 40(%esp),%ecx 1854 movl 48(%esp),%ebx 1855 movl 20(%esp),%esi 1856 movl 24(%esp),%edi 1857 movl 28(%esp),%eax 1858 movl 32(%esp),%edx 1859 movdqu (%ecx),%xmm0 1860 movl 36(%esp),%ebp 1861 movdqu (%ebx),%xmm1 1862 movl 44(%esp),%ebx 1863 movl %esp,%ecx 1864 subl $132,%esp 1865 andl $-16,%esp 1866 subl %esi,%edi 1867 shll $4,%eax 1868 leal -96(%esi,%eax,1),%eax 1869 movl %edi,120(%esp) 1870 movl %eax,124(%esp) 1871 movl %ecx,128(%esp) 1872 movl 240(%edx),%ecx 1873 testl $1,%ebp 1874 jnz L074odd 1875 bsfl %ebp,%eax 1876 addl $1,%ebp 1877 shll $4,%eax 1878 movdqu (%ebx,%eax,1),%xmm7 1879 movl %edx,%eax 1880 movdqu (%esi),%xmm2 1881 leal 16(%esi),%esi 1882 pxor %xmm0,%xmm7 1883 pxor %xmm2,%xmm1 1884 pxor %xmm7,%xmm2 1885 movdqa %xmm1,%xmm6 1886 movups (%edx),%xmm0 1887 movups 16(%edx),%xmm1 1888 leal 32(%edx),%edx 1889 xorps %xmm0,%xmm2 1890 L075enc1_loop_15: 1891 .byte 102,15,56,220,209 1892 decl %ecx 1893 movups (%edx),%xmm1 1894 leal 16(%edx),%edx 1895 jnz L075enc1_loop_15 1896 .byte 102,15,56,221,209 1897 xorps %xmm7,%xmm2 1898 movdqa %xmm7,%xmm0 1899 movdqa %xmm6,%xmm1 1900 movups %xmm2,-16(%edi,%esi,1) 1901 movl 240(%eax),%ecx 1902 movl %eax,%edx 1903 movl 124(%esp),%eax 1904 L074odd: 1905 shll $4,%ecx 1906 movl $16,%edi 1907 subl %ecx,%edi 1908 movl %edx,112(%esp) 1909 leal 32(%edx,%ecx,1),%edx 1910 movl %edi,116(%esp) 1911 cmpl %eax,%esi 1912 ja L076short 1913 jmp L077grandloop 1914 .align 5,0x90 1915 L077grandloop: 1916 leal 1(%ebp),%ecx 1917 leal 3(%ebp),%eax 1918 leal 5(%ebp),%edi 1919 addl $6,%ebp 1920 bsfl %ecx,%ecx 1921 bsfl %eax,%eax 1922 bsfl %edi,%edi 1923 shll $4,%ecx 1924 shll $4,%eax 1925 shll $4,%edi 1926 movdqu (%ebx),%xmm2 1927 movdqu (%ebx,%ecx,1),%xmm3 1928 movl 116(%esp),%ecx 1929 movdqa %xmm2,%xmm4 1930 movdqu (%ebx,%eax,1),%xmm5 1931 movdqa %xmm2,%xmm6 1932 movdqu (%ebx,%edi,1),%xmm7 1933 pxor %xmm0,%xmm2 1934 pxor %xmm2,%xmm3 1935 movdqa %xmm2,(%esp) 1936 pxor %xmm3,%xmm4 1937 movdqa %xmm3,16(%esp) 1938 pxor %xmm4,%xmm5 1939 movdqa %xmm4,32(%esp) 1940 pxor %xmm5,%xmm6 1941 movdqa %xmm5,48(%esp) 1942 pxor %xmm6,%xmm7 1943 movdqa %xmm6,64(%esp) 1944 movdqa %xmm7,80(%esp) 1945 movups -48(%edx,%ecx,1),%xmm0 1946 movdqu (%esi),%xmm2 1947 movdqu 16(%esi),%xmm3 1948 movdqu 32(%esi),%xmm4 1949 movdqu 48(%esi),%xmm5 1950 movdqu 64(%esi),%xmm6 1951 movdqu 80(%esi),%xmm7 1952 leal 96(%esi),%esi 1953 pxor %xmm2,%xmm1 1954 pxor %xmm0,%xmm2 1955 pxor %xmm3,%xmm1 1956 pxor %xmm0,%xmm3 1957 pxor %xmm4,%xmm1 1958 pxor %xmm0,%xmm4 1959 pxor %xmm5,%xmm1 1960 pxor %xmm0,%xmm5 1961 pxor %xmm6,%xmm1 1962 pxor %xmm0,%xmm6 1963 pxor %xmm7,%xmm1 1964 pxor %xmm0,%xmm7 1965 movdqa %xmm1,96(%esp) 1966 movups -32(%edx,%ecx,1),%xmm1 1967 pxor (%esp),%xmm2 1968 pxor 16(%esp),%xmm3 1969 pxor 32(%esp),%xmm4 1970 pxor 48(%esp),%xmm5 1971 pxor 64(%esp),%xmm6 1972 pxor 80(%esp),%xmm7 1973 movups -16(%edx,%ecx,1),%xmm0 1974 .byte 102,15,56,220,209 1975 .byte 102,15,56,220,217 1976 .byte 102,15,56,220,225 1977 .byte 102,15,56,220,233 1978 .byte 102,15,56,220,241 1979 .byte 102,15,56,220,249 1980 movl 120(%esp),%edi 1981 movl 124(%esp),%eax 1982 call L_aesni_encrypt6_enter 1983 movdqa 80(%esp),%xmm0 1984 pxor (%esp),%xmm2 1985 pxor 16(%esp),%xmm3 1986 pxor 32(%esp),%xmm4 1987 pxor 48(%esp),%xmm5 1988 pxor 64(%esp),%xmm6 1989 pxor %xmm0,%xmm7 1990 movdqa 96(%esp),%xmm1 1991 movdqu %xmm2,-96(%edi,%esi,1) 1992 movdqu %xmm3,-80(%edi,%esi,1) 1993 movdqu %xmm4,-64(%edi,%esi,1) 1994 movdqu %xmm5,-48(%edi,%esi,1) 1995 movdqu %xmm6,-32(%edi,%esi,1) 1996 movdqu %xmm7,-16(%edi,%esi,1) 1997 cmpl %eax,%esi 1998 jbe L077grandloop 1999 L076short: 2000 addl $96,%eax 2001 subl %esi,%eax 2002 jz L078done 2003 cmpl $32,%eax 2004 jb L079one 2005 je L080two 2006 cmpl $64,%eax 2007 jb L081three 2008 je L082four 2009 leal 1(%ebp),%ecx 2010 leal 3(%ebp),%eax 2011 bsfl %ecx,%ecx 2012 bsfl %eax,%eax 2013 shll $4,%ecx 2014 shll $4,%eax 2015 movdqu (%ebx),%xmm2 2016 movdqu (%ebx,%ecx,1),%xmm3 2017 movl 116(%esp),%ecx 2018 movdqa %xmm2,%xmm4 2019 movdqu (%ebx,%eax,1),%xmm5 2020 movdqa %xmm2,%xmm6 2021 pxor %xmm0,%xmm2 2022 pxor %xmm2,%xmm3 2023 movdqa %xmm2,(%esp) 2024 pxor %xmm3,%xmm4 2025 movdqa %xmm3,16(%esp) 2026 pxor %xmm4,%xmm5 2027 movdqa %xmm4,32(%esp) 2028 pxor %xmm5,%xmm6 2029 movdqa %xmm5,48(%esp) 2030 pxor %xmm6,%xmm7 2031 movdqa %xmm6,64(%esp) 2032 movups -48(%edx,%ecx,1),%xmm0 2033 movdqu (%esi),%xmm2 2034 movdqu 16(%esi),%xmm3 2035 movdqu 32(%esi),%xmm4 2036 movdqu 48(%esi),%xmm5 2037 movdqu 64(%esi),%xmm6 2038 pxor %xmm7,%xmm7 2039 pxor %xmm2,%xmm1 2040 pxor %xmm0,%xmm2 2041 pxor %xmm3,%xmm1 2042 pxor %xmm0,%xmm3 2043 pxor %xmm4,%xmm1 2044 pxor %xmm0,%xmm4 2045 pxor %xmm5,%xmm1 2046 pxor %xmm0,%xmm5 2047 pxor %xmm6,%xmm1 2048 pxor %xmm0,%xmm6 2049 movdqa %xmm1,96(%esp) 2050 movups -32(%edx,%ecx,1),%xmm1 2051 pxor (%esp),%xmm2 2052 pxor 16(%esp),%xmm3 2053 pxor 32(%esp),%xmm4 2054 pxor 48(%esp),%xmm5 2055 pxor 64(%esp),%xmm6 2056 movups -16(%edx,%ecx,1),%xmm0 2057 .byte 102,15,56,220,209 2058 .byte 102,15,56,220,217 2059 .byte 102,15,56,220,225 2060 .byte 102,15,56,220,233 2061 .byte 102,15,56,220,241 2062 .byte 102,15,56,220,249 2063 movl 120(%esp),%edi 2064 call L_aesni_encrypt6_enter 2065 movdqa 64(%esp),%xmm0 2066 pxor (%esp),%xmm2 2067 pxor 16(%esp),%xmm3 2068 pxor 32(%esp),%xmm4 2069 pxor 48(%esp),%xmm5 2070 pxor %xmm0,%xmm6 2071 movdqa 96(%esp),%xmm1 2072 movdqu %xmm2,(%edi,%esi,1) 2073 movdqu %xmm3,16(%edi,%esi,1) 2074 movdqu %xmm4,32(%edi,%esi,1) 2075 movdqu %xmm5,48(%edi,%esi,1) 2076 movdqu %xmm6,64(%edi,%esi,1) 2077 jmp L078done 2078 .align 4,0x90 2079 L079one: 2080 movdqu (%ebx),%xmm7 2081 movl 112(%esp),%edx 2082 movdqu (%esi),%xmm2 2083 movl 240(%edx),%ecx 2084 pxor %xmm0,%xmm7 2085 pxor %xmm2,%xmm1 2086 pxor %xmm7,%xmm2 2087 movdqa %xmm1,%xmm6 2088 movl 120(%esp),%edi 2089 movups (%edx),%xmm0 2090 movups 16(%edx),%xmm1 2091 leal 32(%edx),%edx 2092 xorps %xmm0,%xmm2 2093 L083enc1_loop_16: 2094 .byte 102,15,56,220,209 2095 decl %ecx 2096 movups (%edx),%xmm1 2097 leal 16(%edx),%edx 2098 jnz L083enc1_loop_16 2099 .byte 102,15,56,221,209 2100 xorps %xmm7,%xmm2 2101 movdqa %xmm7,%xmm0 2102 movdqa %xmm6,%xmm1 2103 movups %xmm2,(%edi,%esi,1) 2104 jmp L078done 2105 .align 4,0x90 2106 L080two: 2107 leal 1(%ebp),%ecx 2108 movl 112(%esp),%edx 2109 bsfl %ecx,%ecx 2110 shll $4,%ecx 2111 movdqu (%ebx),%xmm6 2112 movdqu (%ebx,%ecx,1),%xmm7 2113 movdqu (%esi),%xmm2 2114 movdqu 16(%esi),%xmm3 2115 movl 240(%edx),%ecx 2116 pxor %xmm0,%xmm6 2117 pxor %xmm6,%xmm7 2118 pxor %xmm2,%xmm1 2119 pxor %xmm6,%xmm2 2120 pxor %xmm3,%xmm1 2121 pxor %xmm7,%xmm3 2122 movdqa %xmm1,%xmm5 2123 movl 120(%esp),%edi 2124 call __aesni_encrypt2 2125 xorps %xmm6,%xmm2 2126 xorps %xmm7,%xmm3 2127 movdqa %xmm7,%xmm0 2128 movdqa %xmm5,%xmm1 2129 movups %xmm2,(%edi,%esi,1) 2130 movups %xmm3,16(%edi,%esi,1) 2131 jmp L078done 2132 .align 4,0x90 2133 L081three: 2134 leal 1(%ebp),%ecx 2135 movl 112(%esp),%edx 2136 bsfl %ecx,%ecx 2137 shll $4,%ecx 2138 movdqu (%ebx),%xmm5 2139 movdqu (%ebx,%ecx,1),%xmm6 2140 movdqa %xmm5,%xmm7 2141 movdqu (%esi),%xmm2 2142 movdqu 16(%esi),%xmm3 2143 movdqu 32(%esi),%xmm4 2144 movl 240(%edx),%ecx 2145 pxor %xmm0,%xmm5 2146 pxor %xmm5,%xmm6 2147 pxor %xmm6,%xmm7 2148 pxor %xmm2,%xmm1 2149 pxor %xmm5,%xmm2 2150 pxor %xmm3,%xmm1 2151 pxor %xmm6,%xmm3 2152 pxor %xmm4,%xmm1 2153 pxor %xmm7,%xmm4 2154 movdqa %xmm1,96(%esp) 2155 movl 120(%esp),%edi 2156 call __aesni_encrypt3 2157 xorps %xmm5,%xmm2 2158 xorps %xmm6,%xmm3 2159 xorps %xmm7,%xmm4 2160 movdqa %xmm7,%xmm0 2161 movdqa 96(%esp),%xmm1 2162 movups %xmm2,(%edi,%esi,1) 2163 movups %xmm3,16(%edi,%esi,1) 2164 movups %xmm4,32(%edi,%esi,1) 2165 jmp L078done 2166 .align 4,0x90 2167 L082four: 2168 leal 1(%ebp),%ecx 2169 leal 3(%ebp),%eax 2170 bsfl %ecx,%ecx 2171 bsfl %eax,%eax 2172 movl 112(%esp),%edx 2173 shll $4,%ecx 2174 shll $4,%eax 2175 movdqu (%ebx),%xmm4 2176 movdqu (%ebx,%ecx,1),%xmm5 2177 movdqa %xmm4,%xmm6 2178 movdqu (%ebx,%eax,1),%xmm7 2179 pxor %xmm0,%xmm4 2180 movdqu (%esi),%xmm2 2181 pxor %xmm4,%xmm5 2182 movdqu 16(%esi),%xmm3 2183 pxor %xmm5,%xmm6 2184 movdqa %xmm4,(%esp) 2185 pxor %xmm6,%xmm7 2186 movdqa %xmm5,16(%esp) 2187 movdqu 32(%esi),%xmm4 2188 movdqu 48(%esi),%xmm5 2189 movl 240(%edx),%ecx 2190 pxor %xmm2,%xmm1 2191 pxor (%esp),%xmm2 2192 pxor %xmm3,%xmm1 2193 pxor 16(%esp),%xmm3 2194 pxor %xmm4,%xmm1 2195 pxor %xmm6,%xmm4 2196 pxor %xmm5,%xmm1 2197 pxor %xmm7,%xmm5 2198 movdqa %xmm1,96(%esp) 2199 movl 120(%esp),%edi 2200 call __aesni_encrypt4 2201 xorps (%esp),%xmm2 2202 xorps 16(%esp),%xmm3 2203 xorps %xmm6,%xmm4 2204 movups %xmm2,(%edi,%esi,1) 2205 xorps %xmm7,%xmm5 2206 movups %xmm3,16(%edi,%esi,1) 2207 movdqa %xmm7,%xmm0 2208 movups %xmm4,32(%edi,%esi,1) 2209 movdqa 96(%esp),%xmm1 2210 movups %xmm5,48(%edi,%esi,1) 2211 L078done: 2212 movl 128(%esp),%edx 2213 pxor %xmm2,%xmm2 2214 pxor %xmm3,%xmm3 2215 movdqa %xmm2,(%esp) 2216 pxor %xmm4,%xmm4 2217 movdqa %xmm2,16(%esp) 2218 pxor %xmm5,%xmm5 2219 movdqa %xmm2,32(%esp) 2220 pxor %xmm6,%xmm6 2221 movdqa %xmm2,48(%esp) 2222 pxor %xmm7,%xmm7 2223 movdqa %xmm2,64(%esp) 2224 movdqa %xmm2,80(%esp) 2225 movdqa %xmm2,96(%esp) 2226 leal (%edx),%esp 2227 movl 40(%esp),%ecx 2228 movl 48(%esp),%ebx 2229 movdqu %xmm0,(%ecx) 2230 pxor %xmm0,%xmm0 2231 movdqu %xmm1,(%ebx) 2232 pxor %xmm1,%xmm1 2233 popl %edi 2234 popl %esi 2235 popl %ebx 2236 popl %ebp 2237 ret 2238 .globl _aesni_ocb_decrypt 2239 .align 4 2240 _aesni_ocb_decrypt: 2241 L_aesni_ocb_decrypt_begin: 2242 %ifdef __CET__ 2243 2244 .byte 243,15,30,251 2245 %endif 2246 2247 pushl %ebp 2248 pushl %ebx 2249 pushl %esi 2250 pushl %edi 2251 movl 40(%esp),%ecx 2252 movl 48(%esp),%ebx 2253 movl 20(%esp),%esi 2254 movl 24(%esp),%edi 2255 movl 28(%esp),%eax 2256 movl 32(%esp),%edx 2257 movdqu (%ecx),%xmm0 2258 movl 36(%esp),%ebp 2259 movdqu (%ebx),%xmm1 2260 movl 44(%esp),%ebx 2261 movl %esp,%ecx 2262 subl $132,%esp 2263 andl $-16,%esp 2264 subl %esi,%edi 2265 shll $4,%eax 2266 leal -96(%esi,%eax,1),%eax 2267 movl %edi,120(%esp) 2268 movl %eax,124(%esp) 2269 movl %ecx,128(%esp) 2270 movl 240(%edx),%ecx 2271 testl $1,%ebp 2272 jnz L084odd 2273 bsfl %ebp,%eax 2274 addl $1,%ebp 2275 shll $4,%eax 2276 movdqu (%ebx,%eax,1),%xmm7 2277 movl %edx,%eax 2278 movdqu (%esi),%xmm2 2279 leal 16(%esi),%esi 2280 pxor %xmm0,%xmm7 2281 pxor %xmm7,%xmm2 2282 movdqa %xmm1,%xmm6 2283 movups (%edx),%xmm0 2284 movups 16(%edx),%xmm1 2285 leal 32(%edx),%edx 2286 xorps %xmm0,%xmm2 2287 L085dec1_loop_17: 2288 .byte 102,15,56,222,209 2289 decl %ecx 2290 movups (%edx),%xmm1 2291 leal 16(%edx),%edx 2292 jnz L085dec1_loop_17 2293 .byte 102,15,56,223,209 2294 xorps %xmm7,%xmm2 2295 movaps %xmm6,%xmm1 2296 movdqa %xmm7,%xmm0 2297 xorps %xmm2,%xmm1 2298 movups %xmm2,-16(%edi,%esi,1) 2299 movl 240(%eax),%ecx 2300 movl %eax,%edx 2301 movl 124(%esp),%eax 2302 L084odd: 2303 shll $4,%ecx 2304 movl $16,%edi 2305 subl %ecx,%edi 2306 movl %edx,112(%esp) 2307 leal 32(%edx,%ecx,1),%edx 2308 movl %edi,116(%esp) 2309 cmpl %eax,%esi 2310 ja L086short 2311 jmp L087grandloop 2312 .align 5,0x90 2313 L087grandloop: 2314 leal 1(%ebp),%ecx 2315 leal 3(%ebp),%eax 2316 leal 5(%ebp),%edi 2317 addl $6,%ebp 2318 bsfl %ecx,%ecx 2319 bsfl %eax,%eax 2320 bsfl %edi,%edi 2321 shll $4,%ecx 2322 shll $4,%eax 2323 shll $4,%edi 2324 movdqu (%ebx),%xmm2 2325 movdqu (%ebx,%ecx,1),%xmm3 2326 movl 116(%esp),%ecx 2327 movdqa %xmm2,%xmm4 2328 movdqu (%ebx,%eax,1),%xmm5 2329 movdqa %xmm2,%xmm6 2330 movdqu (%ebx,%edi,1),%xmm7 2331 pxor %xmm0,%xmm2 2332 pxor %xmm2,%xmm3 2333 movdqa %xmm2,(%esp) 2334 pxor %xmm3,%xmm4 2335 movdqa %xmm3,16(%esp) 2336 pxor %xmm4,%xmm5 2337 movdqa %xmm4,32(%esp) 2338 pxor %xmm5,%xmm6 2339 movdqa %xmm5,48(%esp) 2340 pxor %xmm6,%xmm7 2341 movdqa %xmm6,64(%esp) 2342 movdqa %xmm7,80(%esp) 2343 movups -48(%edx,%ecx,1),%xmm0 2344 movdqu (%esi),%xmm2 2345 movdqu 16(%esi),%xmm3 2346 movdqu 32(%esi),%xmm4 2347 movdqu 48(%esi),%xmm5 2348 movdqu 64(%esi),%xmm6 2349 movdqu 80(%esi),%xmm7 2350 leal 96(%esi),%esi 2351 movdqa %xmm1,96(%esp) 2352 pxor %xmm0,%xmm2 2353 pxor %xmm0,%xmm3 2354 pxor %xmm0,%xmm4 2355 pxor %xmm0,%xmm5 2356 pxor %xmm0,%xmm6 2357 pxor %xmm0,%xmm7 2358 movups -32(%edx,%ecx,1),%xmm1 2359 pxor (%esp),%xmm2 2360 pxor 16(%esp),%xmm3 2361 pxor 32(%esp),%xmm4 2362 pxor 48(%esp),%xmm5 2363 pxor 64(%esp),%xmm6 2364 pxor 80(%esp),%xmm7 2365 movups -16(%edx,%ecx,1),%xmm0 2366 .byte 102,15,56,222,209 2367 .byte 102,15,56,222,217 2368 .byte 102,15,56,222,225 2369 .byte 102,15,56,222,233 2370 .byte 102,15,56,222,241 2371 .byte 102,15,56,222,249 2372 movl 120(%esp),%edi 2373 movl 124(%esp),%eax 2374 call L_aesni_decrypt6_enter 2375 movdqa 80(%esp),%xmm0 2376 pxor (%esp),%xmm2 2377 movdqa 96(%esp),%xmm1 2378 pxor 16(%esp),%xmm3 2379 pxor 32(%esp),%xmm4 2380 pxor 48(%esp),%xmm5 2381 pxor 64(%esp),%xmm6 2382 pxor %xmm0,%xmm7 2383 pxor %xmm2,%xmm1 2384 movdqu %xmm2,-96(%edi,%esi,1) 2385 pxor %xmm3,%xmm1 2386 movdqu %xmm3,-80(%edi,%esi,1) 2387 pxor %xmm4,%xmm1 2388 movdqu %xmm4,-64(%edi,%esi,1) 2389 pxor %xmm5,%xmm1 2390 movdqu %xmm5,-48(%edi,%esi,1) 2391 pxor %xmm6,%xmm1 2392 movdqu %xmm6,-32(%edi,%esi,1) 2393 pxor %xmm7,%xmm1 2394 movdqu %xmm7,-16(%edi,%esi,1) 2395 cmpl %eax,%esi 2396 jbe L087grandloop 2397 L086short: 2398 addl $96,%eax 2399 subl %esi,%eax 2400 jz L088done 2401 cmpl $32,%eax 2402 jb L089one 2403 je L090two 2404 cmpl $64,%eax 2405 jb L091three 2406 je L092four 2407 leal 1(%ebp),%ecx 2408 leal 3(%ebp),%eax 2409 bsfl %ecx,%ecx 2410 bsfl %eax,%eax 2411 shll $4,%ecx 2412 shll $4,%eax 2413 movdqu (%ebx),%xmm2 2414 movdqu (%ebx,%ecx,1),%xmm3 2415 movl 116(%esp),%ecx 2416 movdqa %xmm2,%xmm4 2417 movdqu (%ebx,%eax,1),%xmm5 2418 movdqa %xmm2,%xmm6 2419 pxor %xmm0,%xmm2 2420 pxor %xmm2,%xmm3 2421 movdqa %xmm2,(%esp) 2422 pxor %xmm3,%xmm4 2423 movdqa %xmm3,16(%esp) 2424 pxor %xmm4,%xmm5 2425 movdqa %xmm4,32(%esp) 2426 pxor %xmm5,%xmm6 2427 movdqa %xmm5,48(%esp) 2428 pxor %xmm6,%xmm7 2429 movdqa %xmm6,64(%esp) 2430 movups -48(%edx,%ecx,1),%xmm0 2431 movdqu (%esi),%xmm2 2432 movdqu 16(%esi),%xmm3 2433 movdqu 32(%esi),%xmm4 2434 movdqu 48(%esi),%xmm5 2435 movdqu 64(%esi),%xmm6 2436 pxor %xmm7,%xmm7 2437 movdqa %xmm1,96(%esp) 2438 pxor %xmm0,%xmm2 2439 pxor %xmm0,%xmm3 2440 pxor %xmm0,%xmm4 2441 pxor %xmm0,%xmm5 2442 pxor %xmm0,%xmm6 2443 movups -32(%edx,%ecx,1),%xmm1 2444 pxor (%esp),%xmm2 2445 pxor 16(%esp),%xmm3 2446 pxor 32(%esp),%xmm4 2447 pxor 48(%esp),%xmm5 2448 pxor 64(%esp),%xmm6 2449 movups -16(%edx,%ecx,1),%xmm0 2450 .byte 102,15,56,222,209 2451 .byte 102,15,56,222,217 2452 .byte 102,15,56,222,225 2453 .byte 102,15,56,222,233 2454 .byte 102,15,56,222,241 2455 .byte 102,15,56,222,249 2456 movl 120(%esp),%edi 2457 call L_aesni_decrypt6_enter 2458 movdqa 64(%esp),%xmm0 2459 pxor (%esp),%xmm2 2460 movdqa 96(%esp),%xmm1 2461 pxor 16(%esp),%xmm3 2462 pxor 32(%esp),%xmm4 2463 pxor 48(%esp),%xmm5 2464 pxor %xmm0,%xmm6 2465 pxor %xmm2,%xmm1 2466 movdqu %xmm2,(%edi,%esi,1) 2467 pxor %xmm3,%xmm1 2468 movdqu %xmm3,16(%edi,%esi,1) 2469 pxor %xmm4,%xmm1 2470 movdqu %xmm4,32(%edi,%esi,1) 2471 pxor %xmm5,%xmm1 2472 movdqu %xmm5,48(%edi,%esi,1) 2473 pxor %xmm6,%xmm1 2474 movdqu %xmm6,64(%edi,%esi,1) 2475 jmp L088done 2476 .align 4,0x90 2477 L089one: 2478 movdqu (%ebx),%xmm7 2479 movl 112(%esp),%edx 2480 movdqu (%esi),%xmm2 2481 movl 240(%edx),%ecx 2482 pxor %xmm0,%xmm7 2483 pxor %xmm7,%xmm2 2484 movdqa %xmm1,%xmm6 2485 movl 120(%esp),%edi 2486 movups (%edx),%xmm0 2487 movups 16(%edx),%xmm1 2488 leal 32(%edx),%edx 2489 xorps %xmm0,%xmm2 2490 L093dec1_loop_18: 2491 .byte 102,15,56,222,209 2492 decl %ecx 2493 movups (%edx),%xmm1 2494 leal 16(%edx),%edx 2495 jnz L093dec1_loop_18 2496 .byte 102,15,56,223,209 2497 xorps %xmm7,%xmm2 2498 movaps %xmm6,%xmm1 2499 movdqa %xmm7,%xmm0 2500 xorps %xmm2,%xmm1 2501 movups %xmm2,(%edi,%esi,1) 2502 jmp L088done 2503 .align 4,0x90 2504 L090two: 2505 leal 1(%ebp),%ecx 2506 movl 112(%esp),%edx 2507 bsfl %ecx,%ecx 2508 shll $4,%ecx 2509 movdqu (%ebx),%xmm6 2510 movdqu (%ebx,%ecx,1),%xmm7 2511 movdqu (%esi),%xmm2 2512 movdqu 16(%esi),%xmm3 2513 movl 240(%edx),%ecx 2514 movdqa %xmm1,%xmm5 2515 pxor %xmm0,%xmm6 2516 pxor %xmm6,%xmm7 2517 pxor %xmm6,%xmm2 2518 pxor %xmm7,%xmm3 2519 movl 120(%esp),%edi 2520 call __aesni_decrypt2 2521 xorps %xmm6,%xmm2 2522 xorps %xmm7,%xmm3 2523 movdqa %xmm7,%xmm0 2524 xorps %xmm2,%xmm5 2525 movups %xmm2,(%edi,%esi,1) 2526 xorps %xmm3,%xmm5 2527 movups %xmm3,16(%edi,%esi,1) 2528 movaps %xmm5,%xmm1 2529 jmp L088done 2530 .align 4,0x90 2531 L091three: 2532 leal 1(%ebp),%ecx 2533 movl 112(%esp),%edx 2534 bsfl %ecx,%ecx 2535 shll $4,%ecx 2536 movdqu (%ebx),%xmm5 2537 movdqu (%ebx,%ecx,1),%xmm6 2538 movdqa %xmm5,%xmm7 2539 movdqu (%esi),%xmm2 2540 movdqu 16(%esi),%xmm3 2541 movdqu 32(%esi),%xmm4 2542 movl 240(%edx),%ecx 2543 movdqa %xmm1,96(%esp) 2544 pxor %xmm0,%xmm5 2545 pxor %xmm5,%xmm6 2546 pxor %xmm6,%xmm7 2547 pxor %xmm5,%xmm2 2548 pxor %xmm6,%xmm3 2549 pxor %xmm7,%xmm4 2550 movl 120(%esp),%edi 2551 call __aesni_decrypt3 2552 movdqa 96(%esp),%xmm1 2553 xorps %xmm5,%xmm2 2554 xorps %xmm6,%xmm3 2555 xorps %xmm7,%xmm4 2556 movups %xmm2,(%edi,%esi,1) 2557 pxor %xmm2,%xmm1 2558 movdqa %xmm7,%xmm0 2559 movups %xmm3,16(%edi,%esi,1) 2560 pxor %xmm3,%xmm1 2561 movups %xmm4,32(%edi,%esi,1) 2562 pxor %xmm4,%xmm1 2563 jmp L088done 2564 .align 4,0x90 2565 L092four: 2566 leal 1(%ebp),%ecx 2567 leal 3(%ebp),%eax 2568 bsfl %ecx,%ecx 2569 bsfl %eax,%eax 2570 movl 112(%esp),%edx 2571 shll $4,%ecx 2572 shll $4,%eax 2573 movdqu (%ebx),%xmm4 2574 movdqu (%ebx,%ecx,1),%xmm5 2575 movdqa %xmm4,%xmm6 2576 movdqu (%ebx,%eax,1),%xmm7 2577 pxor %xmm0,%xmm4 2578 movdqu (%esi),%xmm2 2579 pxor %xmm4,%xmm5 2580 movdqu 16(%esi),%xmm3 2581 pxor %xmm5,%xmm6 2582 movdqa %xmm4,(%esp) 2583 pxor %xmm6,%xmm7 2584 movdqa %xmm5,16(%esp) 2585 movdqu 32(%esi),%xmm4 2586 movdqu 48(%esi),%xmm5 2587 movl 240(%edx),%ecx 2588 movdqa %xmm1,96(%esp) 2589 pxor (%esp),%xmm2 2590 pxor 16(%esp),%xmm3 2591 pxor %xmm6,%xmm4 2592 pxor %xmm7,%xmm5 2593 movl 120(%esp),%edi 2594 call __aesni_decrypt4 2595 movdqa 96(%esp),%xmm1 2596 xorps (%esp),%xmm2 2597 xorps 16(%esp),%xmm3 2598 xorps %xmm6,%xmm4 2599 movups %xmm2,(%edi,%esi,1) 2600 pxor %xmm2,%xmm1 2601 xorps %xmm7,%xmm5 2602 movups %xmm3,16(%edi,%esi,1) 2603 pxor %xmm3,%xmm1 2604 movdqa %xmm7,%xmm0 2605 movups %xmm4,32(%edi,%esi,1) 2606 pxor %xmm4,%xmm1 2607 movups %xmm5,48(%edi,%esi,1) 2608 pxor %xmm5,%xmm1 2609 L088done: 2610 movl 128(%esp),%edx 2611 pxor %xmm2,%xmm2 2612 pxor %xmm3,%xmm3 2613 movdqa %xmm2,(%esp) 2614 pxor %xmm4,%xmm4 2615 movdqa %xmm2,16(%esp) 2616 pxor %xmm5,%xmm5 2617 movdqa %xmm2,32(%esp) 2618 pxor %xmm6,%xmm6 2619 movdqa %xmm2,48(%esp) 2620 pxor %xmm7,%xmm7 2621 movdqa %xmm2,64(%esp) 2622 movdqa %xmm2,80(%esp) 2623 movdqa %xmm2,96(%esp) 2624 leal (%edx),%esp 2625 movl 40(%esp),%ecx 2626 movl 48(%esp),%ebx 2627 movdqu %xmm0,(%ecx) 2628 pxor %xmm0,%xmm0 2629 movdqu %xmm1,(%ebx) 2630 pxor %xmm1,%xmm1 2631 popl %edi 2632 popl %esi 2633 popl %ebx 2634 popl %ebp 2635 ret 2636 .globl _aesni_cbc_encrypt 2637 .align 4 2638 _aesni_cbc_encrypt: 2639 L_aesni_cbc_encrypt_begin: 2640 %ifdef __CET__ 2641 2642 .byte 243,15,30,251 2643 %endif 2644 2645 pushl %ebp 2646 pushl %ebx 2647 pushl %esi 2648 pushl %edi 2649 movl 20(%esp),%esi 2650 movl %esp,%ebx 2651 movl 24(%esp),%edi 2652 subl $24,%ebx 2653 movl 28(%esp),%eax 2654 andl $-16,%ebx 2655 movl 32(%esp),%edx 2656 movl 36(%esp),%ebp 2657 testl %eax,%eax 2658 jz L094cbc_abort 2659 cmpl $0,40(%esp) 2660 xchgl %esp,%ebx 2661 movups (%ebp),%xmm7 2662 movl 240(%edx),%ecx 2663 movl %edx,%ebp 2664 movl %ebx,16(%esp) 2665 movl %ecx,%ebx 2666 je L095cbc_decrypt 2667 movaps %xmm7,%xmm2 2668 cmpl $16,%eax 2669 jb L096cbc_enc_tail 2670 subl $16,%eax 2671 jmp L097cbc_enc_loop 2672 .align 4,0x90 2673 L097cbc_enc_loop: 2674 movups (%esi),%xmm7 2675 leal 16(%esi),%esi 2676 movups (%edx),%xmm0 2677 movups 16(%edx),%xmm1 2678 xorps %xmm0,%xmm7 2679 leal 32(%edx),%edx 2680 xorps %xmm7,%xmm2 2681 L098enc1_loop_19: 2682 .byte 102,15,56,220,209 2683 decl %ecx 2684 movups (%edx),%xmm1 2685 leal 16(%edx),%edx 2686 jnz L098enc1_loop_19 2687 .byte 102,15,56,221,209 2688 movl %ebx,%ecx 2689 movl %ebp,%edx 2690 movups %xmm2,(%edi) 2691 leal 16(%edi),%edi 2692 subl $16,%eax 2693 jnc L097cbc_enc_loop 2694 addl $16,%eax 2695 jnz L096cbc_enc_tail 2696 movaps %xmm2,%xmm7 2697 pxor %xmm2,%xmm2 2698 jmp L099cbc_ret 2699 L096cbc_enc_tail: 2700 movl %eax,%ecx 2701 .long 2767451785 2702 movl $16,%ecx 2703 subl %eax,%ecx 2704 xorl %eax,%eax 2705 .long 2868115081 2706 leal -16(%edi),%edi 2707 movl %ebx,%ecx 2708 movl %edi,%esi 2709 movl %ebp,%edx 2710 jmp L097cbc_enc_loop 2711 .align 4,0x90 2712 L095cbc_decrypt: 2713 cmpl $80,%eax 2714 jbe L100cbc_dec_tail 2715 movaps %xmm7,(%esp) 2716 subl $80,%eax 2717 jmp L101cbc_dec_loop6_enter 2718 .align 4,0x90 2719 L102cbc_dec_loop6: 2720 movaps %xmm0,(%esp) 2721 movups %xmm7,(%edi) 2722 leal 16(%edi),%edi 2723 L101cbc_dec_loop6_enter: 2724 movdqu (%esi),%xmm2 2725 movdqu 16(%esi),%xmm3 2726 movdqu 32(%esi),%xmm4 2727 movdqu 48(%esi),%xmm5 2728 movdqu 64(%esi),%xmm6 2729 movdqu 80(%esi),%xmm7 2730 call __aesni_decrypt6 2731 movups (%esi),%xmm1 2732 movups 16(%esi),%xmm0 2733 xorps (%esp),%xmm2 2734 xorps %xmm1,%xmm3 2735 movups 32(%esi),%xmm1 2736 xorps %xmm0,%xmm4 2737 movups 48(%esi),%xmm0 2738 xorps %xmm1,%xmm5 2739 movups 64(%esi),%xmm1 2740 xorps %xmm0,%xmm6 2741 movups 80(%esi),%xmm0 2742 xorps %xmm1,%xmm7 2743 movups %xmm2,(%edi) 2744 movups %xmm3,16(%edi) 2745 leal 96(%esi),%esi 2746 movups %xmm4,32(%edi) 2747 movl %ebx,%ecx 2748 movups %xmm5,48(%edi) 2749 movl %ebp,%edx 2750 movups %xmm6,64(%edi) 2751 leal 80(%edi),%edi 2752 subl $96,%eax 2753 ja L102cbc_dec_loop6 2754 movaps %xmm7,%xmm2 2755 movaps %xmm0,%xmm7 2756 addl $80,%eax 2757 jle L103cbc_dec_clear_tail_collected 2758 movups %xmm2,(%edi) 2759 leal 16(%edi),%edi 2760 L100cbc_dec_tail: 2761 movups (%esi),%xmm2 2762 movaps %xmm2,%xmm6 2763 cmpl $16,%eax 2764 jbe L104cbc_dec_one 2765 movups 16(%esi),%xmm3 2766 movaps %xmm3,%xmm5 2767 cmpl $32,%eax 2768 jbe L105cbc_dec_two 2769 movups 32(%esi),%xmm4 2770 cmpl $48,%eax 2771 jbe L106cbc_dec_three 2772 movups 48(%esi),%xmm5 2773 cmpl $64,%eax 2774 jbe L107cbc_dec_four 2775 movups 64(%esi),%xmm6 2776 movaps %xmm7,(%esp) 2777 movups (%esi),%xmm2 2778 xorps %xmm7,%xmm7 2779 call __aesni_decrypt6 2780 movups (%esi),%xmm1 2781 movups 16(%esi),%xmm0 2782 xorps (%esp),%xmm2 2783 xorps %xmm1,%xmm3 2784 movups 32(%esi),%xmm1 2785 xorps %xmm0,%xmm4 2786 movups 48(%esi),%xmm0 2787 xorps %xmm1,%xmm5 2788 movups 64(%esi),%xmm7 2789 xorps %xmm0,%xmm6 2790 movups %xmm2,(%edi) 2791 movups %xmm3,16(%edi) 2792 pxor %xmm3,%xmm3 2793 movups %xmm4,32(%edi) 2794 pxor %xmm4,%xmm4 2795 movups %xmm5,48(%edi) 2796 pxor %xmm5,%xmm5 2797 leal 64(%edi),%edi 2798 movaps %xmm6,%xmm2 2799 pxor %xmm6,%xmm6 2800 subl $80,%eax 2801 jmp L108cbc_dec_tail_collected 2802 .align 4,0x90 2803 L104cbc_dec_one: 2804 movups (%edx),%xmm0 2805 movups 16(%edx),%xmm1 2806 leal 32(%edx),%edx 2807 xorps %xmm0,%xmm2 2808 L109dec1_loop_20: 2809 .byte 102,15,56,222,209 2810 decl %ecx 2811 movups (%edx),%xmm1 2812 leal 16(%edx),%edx 2813 jnz L109dec1_loop_20 2814 .byte 102,15,56,223,209 2815 xorps %xmm7,%xmm2 2816 movaps %xmm6,%xmm7 2817 subl $16,%eax 2818 jmp L108cbc_dec_tail_collected 2819 .align 4,0x90 2820 L105cbc_dec_two: 2821 call __aesni_decrypt2 2822 xorps %xmm7,%xmm2 2823 xorps %xmm6,%xmm3 2824 movups %xmm2,(%edi) 2825 movaps %xmm3,%xmm2 2826 pxor %xmm3,%xmm3 2827 leal 16(%edi),%edi 2828 movaps %xmm5,%xmm7 2829 subl $32,%eax 2830 jmp L108cbc_dec_tail_collected 2831 .align 4,0x90 2832 L106cbc_dec_three: 2833 call __aesni_decrypt3 2834 xorps %xmm7,%xmm2 2835 xorps %xmm6,%xmm3 2836 xorps %xmm5,%xmm4 2837 movups %xmm2,(%edi) 2838 movaps %xmm4,%xmm2 2839 pxor %xmm4,%xmm4 2840 movups %xmm3,16(%edi) 2841 pxor %xmm3,%xmm3 2842 leal 32(%edi),%edi 2843 movups 32(%esi),%xmm7 2844 subl $48,%eax 2845 jmp L108cbc_dec_tail_collected 2846 .align 4,0x90 2847 L107cbc_dec_four: 2848 call __aesni_decrypt4 2849 movups 16(%esi),%xmm1 2850 movups 32(%esi),%xmm0 2851 xorps %xmm7,%xmm2 2852 movups 48(%esi),%xmm7 2853 xorps %xmm6,%xmm3 2854 movups %xmm2,(%edi) 2855 xorps %xmm1,%xmm4 2856 movups %xmm3,16(%edi) 2857 pxor %xmm3,%xmm3 2858 xorps %xmm0,%xmm5 2859 movups %xmm4,32(%edi) 2860 pxor %xmm4,%xmm4 2861 leal 48(%edi),%edi 2862 movaps %xmm5,%xmm2 2863 pxor %xmm5,%xmm5 2864 subl $64,%eax 2865 jmp L108cbc_dec_tail_collected 2866 .align 4,0x90 2867 L103cbc_dec_clear_tail_collected: 2868 pxor %xmm3,%xmm3 2869 pxor %xmm4,%xmm4 2870 pxor %xmm5,%xmm5 2871 pxor %xmm6,%xmm6 2872 L108cbc_dec_tail_collected: 2873 andl $15,%eax 2874 jnz L110cbc_dec_tail_partial 2875 movups %xmm2,(%edi) 2876 pxor %xmm0,%xmm0 2877 jmp L099cbc_ret 2878 .align 4,0x90 2879 L110cbc_dec_tail_partial: 2880 movaps %xmm2,(%esp) 2881 pxor %xmm0,%xmm0 2882 movl $16,%ecx 2883 movl %esp,%esi 2884 subl %eax,%ecx 2885 .long 2767451785 2886 movdqa %xmm2,(%esp) 2887 L099cbc_ret: 2888 movl 16(%esp),%esp 2889 movl 36(%esp),%ebp 2890 pxor %xmm2,%xmm2 2891 pxor %xmm1,%xmm1 2892 movups %xmm7,(%ebp) 2893 pxor %xmm7,%xmm7 2894 L094cbc_abort: 2895 popl %edi 2896 popl %esi 2897 popl %ebx 2898 popl %ebp 2899 ret 2900 .align 4 2901 __aesni_set_encrypt_key: 2902 %ifdef __CET__ 2903 2904 .byte 243,15,30,251 2905 %endif 2906 2907 pushl %ebp 2908 pushl %ebx 2909 testl %eax,%eax 2910 jz L111bad_pointer 2911 testl %edx,%edx 2912 jz L111bad_pointer 2913 call L112pic 2914 L112pic: 2915 popl %ebx 2916 leal Lkey_const-L112pic(%ebx),%ebx 2917 movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lkey_const(%ebx),%ebp 2918 movups (%eax),%xmm0 2919 xorps %xmm4,%xmm4 2920 movl 4(%ebp),%ebp 2921 leal 16(%edx),%edx 2922 andl $268437504,%ebp 2923 cmpl $256,%ecx 2924 je L11314rounds 2925 cmpl $192,%ecx 2926 je L11412rounds 2927 cmpl $128,%ecx 2928 jne L115bad_keybits 2929 .align 4,0x90 2930 L11610rounds: 2931 cmpl $268435456,%ebp 2932 je L11710rounds_alt 2933 movl $9,%ecx 2934 movups %xmm0,-16(%edx) 2935 .byte 102,15,58,223,200,1 2936 call L118key_128_cold 2937 .byte 102,15,58,223,200,2 2938 call L119key_128 2939 .byte 102,15,58,223,200,4 2940 call L119key_128 2941 .byte 102,15,58,223,200,8 2942 call L119key_128 2943 .byte 102,15,58,223,200,16 2944 call L119key_128 2945 .byte 102,15,58,223,200,32 2946 call L119key_128 2947 .byte 102,15,58,223,200,64 2948 call L119key_128 2949 .byte 102,15,58,223,200,128 2950 call L119key_128 2951 .byte 102,15,58,223,200,27 2952 call L119key_128 2953 .byte 102,15,58,223,200,54 2954 call L119key_128 2955 movups %xmm0,(%edx) 2956 movl %ecx,80(%edx) 2957 jmp L120good_key 2958 .align 4,0x90 2959 L119key_128: 2960 movups %xmm0,(%edx) 2961 leal 16(%edx),%edx 2962 L118key_128_cold: 2963 shufps $16,%xmm0,%xmm4 2964 xorps %xmm4,%xmm0 2965 shufps $140,%xmm0,%xmm4 2966 xorps %xmm4,%xmm0 2967 shufps $255,%xmm1,%xmm1 2968 xorps %xmm1,%xmm0 2969 ret 2970 .align 4,0x90 2971 L11710rounds_alt: 2972 movdqa (%ebx),%xmm5 2973 movl $8,%ecx 2974 movdqa 32(%ebx),%xmm4 2975 movdqa %xmm0,%xmm2 2976 movdqu %xmm0,-16(%edx) 2977 L121loop_key128: 2978 .byte 102,15,56,0,197 2979 .byte 102,15,56,221,196 2980 pslld $1,%xmm4 2981 leal 16(%edx),%edx 2982 movdqa %xmm2,%xmm3 2983 pslldq $4,%xmm2 2984 pxor %xmm2,%xmm3 2985 pslldq $4,%xmm2 2986 pxor %xmm2,%xmm3 2987 pslldq $4,%xmm2 2988 pxor %xmm3,%xmm2 2989 pxor %xmm2,%xmm0 2990 movdqu %xmm0,-16(%edx) 2991 movdqa %xmm0,%xmm2 2992 decl %ecx 2993 jnz L121loop_key128 2994 movdqa 48(%ebx),%xmm4 2995 .byte 102,15,56,0,197 2996 .byte 102,15,56,221,196 2997 pslld $1,%xmm4 2998 movdqa %xmm2,%xmm3 2999 pslldq $4,%xmm2 3000 pxor %xmm2,%xmm3 3001 pslldq $4,%xmm2 3002 pxor %xmm2,%xmm3 3003 pslldq $4,%xmm2 3004 pxor %xmm3,%xmm2 3005 pxor %xmm2,%xmm0 3006 movdqu %xmm0,(%edx) 3007 movdqa %xmm0,%xmm2 3008 .byte 102,15,56,0,197 3009 .byte 102,15,56,221,196 3010 movdqa %xmm2,%xmm3 3011 pslldq $4,%xmm2 3012 pxor %xmm2,%xmm3 3013 pslldq $4,%xmm2 3014 pxor %xmm2,%xmm3 3015 pslldq $4,%xmm2 3016 pxor %xmm3,%xmm2 3017 pxor %xmm2,%xmm0 3018 movdqu %xmm0,16(%edx) 3019 movl $9,%ecx 3020 movl %ecx,96(%edx) 3021 jmp L120good_key 3022 .align 4,0x90 3023 L11412rounds: 3024 movq 16(%eax),%xmm2 3025 cmpl $268435456,%ebp 3026 je L12212rounds_alt 3027 movl $11,%ecx 3028 movups %xmm0,-16(%edx) 3029 .byte 102,15,58,223,202,1 3030 call L123key_192a_cold 3031 .byte 102,15,58,223,202,2 3032 call L124key_192b 3033 .byte 102,15,58,223,202,4 3034 call L125key_192a 3035 .byte 102,15,58,223,202,8 3036 call L124key_192b 3037 .byte 102,15,58,223,202,16 3038 call L125key_192a 3039 .byte 102,15,58,223,202,32 3040 call L124key_192b 3041 .byte 102,15,58,223,202,64 3042 call L125key_192a 3043 .byte 102,15,58,223,202,128 3044 call L124key_192b 3045 movups %xmm0,(%edx) 3046 movl %ecx,48(%edx) 3047 jmp L120good_key 3048 .align 4,0x90 3049 L125key_192a: 3050 movups %xmm0,(%edx) 3051 leal 16(%edx),%edx 3052 .align 4,0x90 3053 L123key_192a_cold: 3054 movaps %xmm2,%xmm5 3055 L126key_192b_warm: 3056 shufps $16,%xmm0,%xmm4 3057 movdqa %xmm2,%xmm3 3058 xorps %xmm4,%xmm0 3059 shufps $140,%xmm0,%xmm4 3060 pslldq $4,%xmm3 3061 xorps %xmm4,%xmm0 3062 pshufd $85,%xmm1,%xmm1 3063 pxor %xmm3,%xmm2 3064 pxor %xmm1,%xmm0 3065 pshufd $255,%xmm0,%xmm3 3066 pxor %xmm3,%xmm2 3067 ret 3068 .align 4,0x90 3069 L124key_192b: 3070 movaps %xmm0,%xmm3 3071 shufps $68,%xmm0,%xmm5 3072 movups %xmm5,(%edx) 3073 shufps $78,%xmm2,%xmm3 3074 movups %xmm3,16(%edx) 3075 leal 32(%edx),%edx 3076 jmp L126key_192b_warm 3077 .align 4,0x90 3078 L12212rounds_alt: 3079 movdqa 16(%ebx),%xmm5 3080 movdqa 32(%ebx),%xmm4 3081 movl $8,%ecx 3082 movdqu %xmm0,-16(%edx) 3083 L127loop_key192: 3084 movq %xmm2,(%edx) 3085 movdqa %xmm2,%xmm1 3086 .byte 102,15,56,0,213 3087 .byte 102,15,56,221,212 3088 pslld $1,%xmm4 3089 leal 24(%edx),%edx 3090 movdqa %xmm0,%xmm3 3091 pslldq $4,%xmm0 3092 pxor %xmm0,%xmm3 3093 pslldq $4,%xmm0 3094 pxor %xmm0,%xmm3 3095 pslldq $4,%xmm0 3096 pxor %xmm3,%xmm0 3097 pshufd $255,%xmm0,%xmm3 3098 pxor %xmm1,%xmm3 3099 pslldq $4,%xmm1 3100 pxor %xmm1,%xmm3 3101 pxor %xmm2,%xmm0 3102 pxor %xmm3,%xmm2 3103 movdqu %xmm0,-16(%edx) 3104 decl %ecx 3105 jnz L127loop_key192 3106 movl $11,%ecx 3107 movl %ecx,32(%edx) 3108 jmp L120good_key 3109 .align 4,0x90 3110 L11314rounds: 3111 movups 16(%eax),%xmm2 3112 leal 16(%edx),%edx 3113 cmpl $268435456,%ebp 3114 je L12814rounds_alt 3115 movl $13,%ecx 3116 movups %xmm0,-32(%edx) 3117 movups %xmm2,-16(%edx) 3118 .byte 102,15,58,223,202,1 3119 call L129key_256a_cold 3120 .byte 102,15,58,223,200,1 3121 call L130key_256b 3122 .byte 102,15,58,223,202,2 3123 call L131key_256a 3124 .byte 102,15,58,223,200,2 3125 call L130key_256b 3126 .byte 102,15,58,223,202,4 3127 call L131key_256a 3128 .byte 102,15,58,223,200,4 3129 call L130key_256b 3130 .byte 102,15,58,223,202,8 3131 call L131key_256a 3132 .byte 102,15,58,223,200,8 3133 call L130key_256b 3134 .byte 102,15,58,223,202,16 3135 call L131key_256a 3136 .byte 102,15,58,223,200,16 3137 call L130key_256b 3138 .byte 102,15,58,223,202,32 3139 call L131key_256a 3140 .byte 102,15,58,223,200,32 3141 call L130key_256b 3142 .byte 102,15,58,223,202,64 3143 call L131key_256a 3144 movups %xmm0,(%edx) 3145 movl %ecx,16(%edx) 3146 xorl %eax,%eax 3147 jmp L120good_key 3148 .align 4,0x90 3149 L131key_256a: 3150 movups %xmm2,(%edx) 3151 leal 16(%edx),%edx 3152 L129key_256a_cold: 3153 shufps $16,%xmm0,%xmm4 3154 xorps %xmm4,%xmm0 3155 shufps $140,%xmm0,%xmm4 3156 xorps %xmm4,%xmm0 3157 shufps $255,%xmm1,%xmm1 3158 xorps %xmm1,%xmm0 3159 ret 3160 .align 4,0x90 3161 L130key_256b: 3162 movups %xmm0,(%edx) 3163 leal 16(%edx),%edx 3164 shufps $16,%xmm2,%xmm4 3165 xorps %xmm4,%xmm2 3166 shufps $140,%xmm2,%xmm4 3167 xorps %xmm4,%xmm2 3168 shufps $170,%xmm1,%xmm1 3169 xorps %xmm1,%xmm2 3170 ret 3171 .align 4,0x90 3172 L12814rounds_alt: 3173 movdqa (%ebx),%xmm5 3174 movdqa 32(%ebx),%xmm4 3175 movl $7,%ecx 3176 movdqu %xmm0,-32(%edx) 3177 movdqa %xmm2,%xmm1 3178 movdqu %xmm2,-16(%edx) 3179 L132loop_key256: 3180 .byte 102,15,56,0,213 3181 .byte 102,15,56,221,212 3182 movdqa %xmm0,%xmm3 3183 pslldq $4,%xmm0 3184 pxor %xmm0,%xmm3 3185 pslldq $4,%xmm0 3186 pxor %xmm0,%xmm3 3187 pslldq $4,%xmm0 3188 pxor %xmm3,%xmm0 3189 pslld $1,%xmm4 3190 pxor %xmm2,%xmm0 3191 movdqu %xmm0,(%edx) 3192 decl %ecx 3193 jz L133done_key256 3194 pshufd $255,%xmm0,%xmm2 3195 pxor %xmm3,%xmm3 3196 .byte 102,15,56,221,211 3197 movdqa %xmm1,%xmm3 3198 pslldq $4,%xmm1 3199 pxor %xmm1,%xmm3 3200 pslldq $4,%xmm1 3201 pxor %xmm1,%xmm3 3202 pslldq $4,%xmm1 3203 pxor %xmm3,%xmm1 3204 pxor %xmm1,%xmm2 3205 movdqu %xmm2,16(%edx) 3206 leal 32(%edx),%edx 3207 movdqa %xmm2,%xmm1 3208 jmp L132loop_key256 3209 L133done_key256: 3210 movl $13,%ecx 3211 movl %ecx,16(%edx) 3212 L120good_key: 3213 pxor %xmm0,%xmm0 3214 pxor %xmm1,%xmm1 3215 pxor %xmm2,%xmm2 3216 pxor %xmm3,%xmm3 3217 pxor %xmm4,%xmm4 3218 pxor %xmm5,%xmm5 3219 xorl %eax,%eax 3220 popl %ebx 3221 popl %ebp 3222 ret 3223 .align 2,0x90 3224 L111bad_pointer: 3225 movl $-1,%eax 3226 popl %ebx 3227 popl %ebp 3228 ret 3229 .align 2,0x90 3230 L115bad_keybits: 3231 pxor %xmm0,%xmm0 3232 movl $-2,%eax 3233 popl %ebx 3234 popl %ebp 3235 ret 3236 .globl _aesni_set_encrypt_key 3237 .align 4 3238 _aesni_set_encrypt_key: 3239 L_aesni_set_encrypt_key_begin: 3240 %ifdef __CET__ 3241 3242 .byte 243,15,30,251 3243 %endif 3244 3245 movl 4(%esp),%eax 3246 movl 8(%esp),%ecx 3247 movl 12(%esp),%edx 3248 call __aesni_set_encrypt_key 3249 ret 3250 .globl _aesni_set_decrypt_key 3251 .align 4 3252 _aesni_set_decrypt_key: 3253 L_aesni_set_decrypt_key_begin: 3254 %ifdef __CET__ 3255 3256 .byte 243,15,30,251 3257 %endif 3258 3259 movl 4(%esp),%eax 3260 movl 8(%esp),%ecx 3261 movl 12(%esp),%edx 3262 call __aesni_set_encrypt_key 3263 movl 12(%esp),%edx 3264 shll $4,%ecx 3265 testl %eax,%eax 3266 jnz L134dec_key_ret 3267 leal 16(%edx,%ecx,1),%eax 3268 movups (%edx),%xmm0 3269 movups (%eax),%xmm1 3270 movups %xmm0,(%eax) 3271 movups %xmm1,(%edx) 3272 leal 16(%edx),%edx 3273 leal -16(%eax),%eax 3274 L135dec_key_inverse: 3275 movups (%edx),%xmm0 3276 movups (%eax),%xmm1 3277 .byte 102,15,56,219,192 3278 .byte 102,15,56,219,201 3279 leal 16(%edx),%edx 3280 leal -16(%eax),%eax 3281 movups %xmm0,16(%eax) 3282 movups %xmm1,-16(%edx) 3283 cmpl %edx,%eax 3284 ja L135dec_key_inverse 3285 movups (%edx),%xmm0 3286 .byte 102,15,56,219,192 3287 movups %xmm0,(%edx) 3288 pxor %xmm0,%xmm0 3289 pxor %xmm1,%xmm1 3290 xorl %eax,%eax 3291 L134dec_key_ret: 3292 ret 3293 .align 6,0x90 3294 Lkey_const: 3295 .long 202313229,202313229,202313229,202313229 3296 .long 67569157,67569157,67569157,67569157 3297 .long 1,1,1,1 3298 .long 27,27,27,27 3299 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 3300 .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 3301 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 3302 .byte 115,108,46,111,114,103,62,0 3303 .section __IMPORT,__pointers,non_lazy_symbol_pointers 3304 L_OPENSSL_ia32cap_P$non_lazy_ptr: 3305 .indirect_symbol _OPENSSL_ia32cap_P 3306 .long 0 3307 .comm _OPENSSL_ia32cap_P,16,2 3308