Lines Matching refs:__m128i
87 const __m128i *rk = (const __m128i *) (ctx->buf + ctx->rk_offset);
91 __m128i state;
127 static void gcm_clmul(const __m128i aa, const __m128i bb,
128 __m128i *cc, __m128i *dd)
136 __m128i ee = _mm_clmulepi64_si128(aa, bb, 0x10); // a0*b1 = e1:e0
137 __m128i ff = _mm_clmulepi64_si128(aa, bb, 0x01); // a1*b0 = f1:f0
146 static void gcm_shift(__m128i *cc, __m128i *dd)
152 __m128i cc_lo = _mm_slli_epi64(*cc, 1); // r1<<1:r0<<1
153 __m128i dd_lo = _mm_slli_epi64(*dd, 1); // r3<<1:r2<<1
154 __m128i cc_hi = _mm_srli_epi64(*cc, 63); // r1>>63:r0>>63
155 __m128i dd_hi = _mm_srli_epi64(*dd, 63); // r3>>63:r2>>63
156 __m128i xmm5 = _mm_srli_si128(cc_hi, 8); // 0:r1>>63
164 static __m128i gcm_reduce(__m128i xx)
168 __m128i aa = _mm_slli_epi64(xx, 63); // x1<<63:x0<<63 = stuff:a
169 __m128i bb = _mm_slli_epi64(xx, 62); // x1<<62:x0<<62 = stuff:b
170 __m128i cc = _mm_slli_epi64(xx, 57); // x1<<57:x0<<57 = stuff:c
171 __m128i dd = _mm_slli_si128(_mm_xor_si128(_mm_xor_si128(aa, bb), cc), 8); // a+b+c:0
175 static __m128i gcm_mix(__m128i dx)
178 __m128i ee = _mm_srli_epi64(dx, 1); // e1:x0>>1 = e1:e0'
179 __m128i ff = _mm_srli_epi64(dx, 2); // f1:x0>>2 = f1:f0'
180 __m128i gg = _mm_srli_epi64(dx, 7); // g1:x0>>7 = g1:g0'
184 __m128i eh = _mm_slli_epi64(dx, 63); // d<<63:stuff
185 __m128i fh = _mm_slli_epi64(dx, 62); // d<<62:stuff
186 __m128i gh = _mm_slli_epi64(dx, 57); // d<<57:stuff
187 __m128i hh = _mm_srli_si128(_mm_xor_si128(_mm_xor_si128(eh, fh), gh), 8); // 0:missing bits of d
196 __m128i aa = { 0 }, bb = { 0 }, cc, dd;
211 __m128i dx = gcm_reduce(cc);
212 __m128i xh = gcm_mix(dx);
230 __m128i *ik = (__m128i *) invkey;
231 const __m128i *fk = (const __m128i *) fwdkey + nr;
234 for (--fk, ++ik; fk > (const __m128i *) fwdkey; --fk, ++ik) {
244 static __m128i aesni_set_rk_128(__m128i state, __m128i xword)
270 __m128i *rk = (__m128i *) rk_bytes;
289 static void aesni_set_rk_192(__m128i *state0, __m128i *state1, __m128i xword,
331 __m128i state0 = ((__m128i *) rk)[0];
332 __m128i state1 = _mm_loadl_epi64(((__m128i *) rk) + 1);
349 static void aesni_set_rk_256(__m128i state0, __m128i state1, __m128i xword,
350 __m128i *rk0, __m128i *rk1)
388 __m128i *rk = (__m128i *) rk_bytes;