1 .text 2 .align 4 3 __mul_1x1_mmx: 4 %ifdef __CET__ 5 6 .byte 243,15,30,251 7 %endif 8 9 subl $36,%esp 10 movl %eax,%ecx 11 leal (%eax,%eax,1),%edx 12 andl $1073741823,%ecx 13 leal (%edx,%edx,1),%ebp 14 movl $0,(%esp) 15 andl $2147483647,%edx 16 movd %eax,%mm2 17 movd %ebx,%mm3 18 movl %ecx,4(%esp) 19 xorl %edx,%ecx 20 pxor %mm5,%mm5 21 pxor %mm4,%mm4 22 movl %edx,8(%esp) 23 xorl %ebp,%edx 24 movl %ecx,12(%esp) 25 pcmpgtd %mm2,%mm5 26 paddd %mm2,%mm2 27 xorl %edx,%ecx 28 movl %ebp,16(%esp) 29 xorl %edx,%ebp 30 pand %mm3,%mm5 31 pcmpgtd %mm2,%mm4 32 movl %ecx,20(%esp) 33 xorl %ecx,%ebp 34 psllq $31,%mm5 35 pand %mm3,%mm4 36 movl %edx,24(%esp) 37 movl $7,%esi 38 movl %ebp,28(%esp) 39 movl %esi,%ebp 40 andl %ebx,%esi 41 shrl $3,%ebx 42 movl %ebp,%edi 43 psllq $30,%mm4 44 andl %ebx,%edi 45 shrl $3,%ebx 46 movd (%esp,%esi,4),%mm0 47 movl %ebp,%esi 48 andl %ebx,%esi 49 shrl $3,%ebx 50 movd (%esp,%edi,4),%mm2 51 movl %ebp,%edi 52 psllq $3,%mm2 53 andl %ebx,%edi 54 shrl $3,%ebx 55 pxor %mm2,%mm0 56 movd (%esp,%esi,4),%mm1 57 movl %ebp,%esi 58 psllq $6,%mm1 59 andl %ebx,%esi 60 shrl $3,%ebx 61 pxor %mm1,%mm0 62 movd (%esp,%edi,4),%mm2 63 movl %ebp,%edi 64 psllq $9,%mm2 65 andl %ebx,%edi 66 shrl $3,%ebx 67 pxor %mm2,%mm0 68 movd (%esp,%esi,4),%mm1 69 movl %ebp,%esi 70 psllq $12,%mm1 71 andl %ebx,%esi 72 shrl $3,%ebx 73 pxor %mm1,%mm0 74 movd (%esp,%edi,4),%mm2 75 movl %ebp,%edi 76 psllq $15,%mm2 77 andl %ebx,%edi 78 shrl $3,%ebx 79 pxor %mm2,%mm0 80 movd (%esp,%esi,4),%mm1 81 movl %ebp,%esi 82 psllq $18,%mm1 83 andl %ebx,%esi 84 shrl $3,%ebx 85 pxor %mm1,%mm0 86 movd (%esp,%edi,4),%mm2 87 movl %ebp,%edi 88 psllq $21,%mm2 89 andl %ebx,%edi 90 shrl $3,%ebx 91 pxor %mm2,%mm0 92 movd (%esp,%esi,4),%mm1 93 movl %ebp,%esi 94 psllq $24,%mm1 95 andl %ebx,%esi 96 shrl $3,%ebx 97 pxor %mm1,%mm0 98 movd (%esp,%edi,4),%mm2 99 pxor %mm4,%mm0 100 psllq $27,%mm2 101 pxor %mm2,%mm0 102 movd (%esp,%esi,4),%mm1 103 pxor %mm5,%mm0 104 psllq $30,%mm1 105 addl $36,%esp 106 pxor %mm1,%mm0 107 ret 108 .align 4 109 __mul_1x1_ialu: 110 %ifdef __CET__ 111 112 .byte 243,15,30,251 113 %endif 114 115 subl $36,%esp 116 movl %eax,%ecx 117 leal (%eax,%eax,1),%edx 118 leal (,%eax,4),%ebp 119 andl $1073741823,%ecx 120 leal (%eax,%eax,1),%edi 121 sarl $31,%eax 122 movl $0,(%esp) 123 andl $2147483647,%edx 124 movl %ecx,4(%esp) 125 xorl %edx,%ecx 126 movl %edx,8(%esp) 127 xorl %ebp,%edx 128 movl %ecx,12(%esp) 129 xorl %edx,%ecx 130 movl %ebp,16(%esp) 131 xorl %edx,%ebp 132 movl %ecx,20(%esp) 133 xorl %ecx,%ebp 134 sarl $31,%edi 135 andl %ebx,%eax 136 movl %edx,24(%esp) 137 andl %ebx,%edi 138 movl %ebp,28(%esp) 139 movl %eax,%edx 140 shll $31,%eax 141 movl %edi,%ecx 142 shrl $1,%edx 143 movl $7,%esi 144 shll $30,%edi 145 andl %ebx,%esi 146 shrl $2,%ecx 147 xorl %edi,%eax 148 shrl $3,%ebx 149 movl $7,%edi 150 andl %ebx,%edi 151 shrl $3,%ebx 152 xorl %ecx,%edx 153 xorl (%esp,%esi,4),%eax 154 movl $7,%esi 155 andl %ebx,%esi 156 shrl $3,%ebx 157 movl (%esp,%edi,4),%ebp 158 movl $7,%edi 159 movl %ebp,%ecx 160 shll $3,%ebp 161 andl %ebx,%edi 162 shrl $29,%ecx 163 xorl %ebp,%eax 164 shrl $3,%ebx 165 xorl %ecx,%edx 166 movl (%esp,%esi,4),%ecx 167 movl $7,%esi 168 movl %ecx,%ebp 169 shll $6,%ecx 170 andl %ebx,%esi 171 shrl $26,%ebp 172 xorl %ecx,%eax 173 shrl $3,%ebx 174 xorl %ebp,%edx 175 movl (%esp,%edi,4),%ebp 176 movl $7,%edi 177 movl %ebp,%ecx 178 shll $9,%ebp 179 andl %ebx,%edi 180 shrl $23,%ecx 181 xorl %ebp,%eax 182 shrl $3,%ebx 183 xorl %ecx,%edx 184 movl (%esp,%esi,4),%ecx 185 movl $7,%esi 186 movl %ecx,%ebp 187 shll $12,%ecx 188 andl %ebx,%esi 189 shrl $20,%ebp 190 xorl %ecx,%eax 191 shrl $3,%ebx 192 xorl %ebp,%edx 193 movl (%esp,%edi,4),%ebp 194 movl $7,%edi 195 movl %ebp,%ecx 196 shll $15,%ebp 197 andl %ebx,%edi 198 shrl $17,%ecx 199 xorl %ebp,%eax 200 shrl $3,%ebx 201 xorl %ecx,%edx 202 movl (%esp,%esi,4),%ecx 203 movl $7,%esi 204 movl %ecx,%ebp 205 shll $18,%ecx 206 andl %ebx,%esi 207 shrl $14,%ebp 208 xorl %ecx,%eax 209 shrl $3,%ebx 210 xorl %ebp,%edx 211 movl (%esp,%edi,4),%ebp 212 movl $7,%edi 213 movl %ebp,%ecx 214 shll $21,%ebp 215 andl %ebx,%edi 216 shrl $11,%ecx 217 xorl %ebp,%eax 218 shrl $3,%ebx 219 xorl %ecx,%edx 220 movl (%esp,%esi,4),%ecx 221 movl $7,%esi 222 movl %ecx,%ebp 223 shll $24,%ecx 224 andl %ebx,%esi 225 shrl $8,%ebp 226 xorl %ecx,%eax 227 shrl $3,%ebx 228 xorl %ebp,%edx 229 movl (%esp,%edi,4),%ebp 230 movl %ebp,%ecx 231 shll $27,%ebp 232 movl (%esp,%esi,4),%edi 233 shrl $5,%ecx 234 movl %edi,%esi 235 xorl %ebp,%eax 236 shll $30,%edi 237 xorl %ecx,%edx 238 shrl $2,%esi 239 xorl %edi,%eax 240 xorl %esi,%edx 241 addl $36,%esp 242 ret 243 .globl _bn_GF2m_mul_2x2 244 .align 4 245 _bn_GF2m_mul_2x2: 246 L_bn_GF2m_mul_2x2_begin: 247 %ifdef __CET__ 248 249 .byte 243,15,30,251 250 %endif 251 252 call L000PIC_me_up 253 L000PIC_me_up: 254 popl %edx 255 movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L000PIC_me_up(%edx),%edx 256 movl (%edx),%eax 257 movl 4(%edx),%edx 258 testl $8388608,%eax 259 jz L001ialu 260 testl $16777216,%eax 261 jz L002mmx 262 testl $2,%edx 263 jz L002mmx 264 movups 8(%esp),%xmm0 265 shufps $177,%xmm0,%xmm0 266 .byte 102,15,58,68,192,1 267 movl 4(%esp),%eax 268 movups %xmm0,(%eax) 269 ret 270 .align 4,0x90 271 L002mmx: 272 pushl %ebp 273 pushl %ebx 274 pushl %esi 275 pushl %edi 276 movl 24(%esp),%eax 277 movl 32(%esp),%ebx 278 call __mul_1x1_mmx 279 movq %mm0,%mm7 280 movl 28(%esp),%eax 281 movl 36(%esp),%ebx 282 call __mul_1x1_mmx 283 movq %mm0,%mm6 284 movl 24(%esp),%eax 285 movl 32(%esp),%ebx 286 xorl 28(%esp),%eax 287 xorl 36(%esp),%ebx 288 call __mul_1x1_mmx 289 pxor %mm7,%mm0 290 movl 20(%esp),%eax 291 pxor %mm6,%mm0 292 movq %mm0,%mm2 293 psllq $32,%mm0 294 popl %edi 295 psrlq $32,%mm2 296 popl %esi 297 pxor %mm6,%mm0 298 popl %ebx 299 pxor %mm7,%mm2 300 movq %mm0,(%eax) 301 popl %ebp 302 movq %mm2,8(%eax) 303 emms 304 ret 305 .align 4,0x90 306 L001ialu: 307 pushl %ebp 308 pushl %ebx 309 pushl %esi 310 pushl %edi 311 subl $20,%esp 312 movl 44(%esp),%eax 313 movl 52(%esp),%ebx 314 call __mul_1x1_ialu 315 movl %eax,8(%esp) 316 movl %edx,12(%esp) 317 movl 48(%esp),%eax 318 movl 56(%esp),%ebx 319 call __mul_1x1_ialu 320 movl %eax,(%esp) 321 movl %edx,4(%esp) 322 movl 44(%esp),%eax 323 movl 52(%esp),%ebx 324 xorl 48(%esp),%eax 325 xorl 56(%esp),%ebx 326 call __mul_1x1_ialu 327 movl 40(%esp),%ebp 328 movl (%esp),%ebx 329 movl 4(%esp),%ecx 330 movl 8(%esp),%edi 331 movl 12(%esp),%esi 332 xorl %edx,%eax 333 xorl %ecx,%edx 334 xorl %ebx,%eax 335 movl %ebx,(%ebp) 336 xorl %edi,%edx 337 movl %esi,12(%ebp) 338 xorl %esi,%eax 339 addl $20,%esp 340 xorl %esi,%edx 341 popl %edi 342 xorl %edx,%eax 343 popl %esi 344 movl %edx,8(%ebp) 345 popl %ebx 346 movl %eax,4(%ebp) 347 popl %ebp 348 ret 349 .byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 350 .byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 351 .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 352 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 353 .byte 62,0 354 .section __IMPORT,__pointers,non_lazy_symbol_pointers 355 L_OPENSSL_ia32cap_P$non_lazy_ptr: 356 .indirect_symbol _OPENSSL_ia32cap_P 357 .long 0 358 .comm _OPENSSL_ia32cap_P,16,2 359