17db96d56Sopenharmony_ci/*
27db96d56Sopenharmony_ci   BLAKE2 reference source code package - optimized C implementations
37db96d56Sopenharmony_ci
47db96d56Sopenharmony_ci   Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
57db96d56Sopenharmony_ci
67db96d56Sopenharmony_ci   To the extent possible under law, the author(s) have dedicated all copyright
77db96d56Sopenharmony_ci   and related and neighboring rights to this software to the public domain
87db96d56Sopenharmony_ci   worldwide. This software is distributed without any warranty.
97db96d56Sopenharmony_ci
107db96d56Sopenharmony_ci   You should have received a copy of the CC0 Public Domain Dedication along with
117db96d56Sopenharmony_ci   this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
127db96d56Sopenharmony_ci*/
137db96d56Sopenharmony_ci#pragma once
147db96d56Sopenharmony_ci#ifndef __BLAKE2B_LOAD_SSE41_H__
157db96d56Sopenharmony_ci#define __BLAKE2B_LOAD_SSE41_H__
167db96d56Sopenharmony_ci
177db96d56Sopenharmony_ci#define LOAD_MSG_0_1(b0, b1) \
187db96d56Sopenharmony_cido \
197db96d56Sopenharmony_ci{ \
207db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m0, m1); \
217db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m2, m3); \
227db96d56Sopenharmony_ci} while(0)
237db96d56Sopenharmony_ci
247db96d56Sopenharmony_ci
257db96d56Sopenharmony_ci#define LOAD_MSG_0_2(b0, b1) \
267db96d56Sopenharmony_cido \
277db96d56Sopenharmony_ci{ \
287db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m0, m1); \
297db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m2, m3); \
307db96d56Sopenharmony_ci} while(0)
317db96d56Sopenharmony_ci
327db96d56Sopenharmony_ci
337db96d56Sopenharmony_ci#define LOAD_MSG_0_3(b0, b1) \
347db96d56Sopenharmony_cido \
357db96d56Sopenharmony_ci{ \
367db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m4, m5); \
377db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m6, m7); \
387db96d56Sopenharmony_ci} while(0)
397db96d56Sopenharmony_ci
407db96d56Sopenharmony_ci
417db96d56Sopenharmony_ci#define LOAD_MSG_0_4(b0, b1) \
427db96d56Sopenharmony_cido \
437db96d56Sopenharmony_ci{ \
447db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m4, m5); \
457db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m6, m7); \
467db96d56Sopenharmony_ci} while(0)
477db96d56Sopenharmony_ci
487db96d56Sopenharmony_ci
497db96d56Sopenharmony_ci#define LOAD_MSG_1_1(b0, b1) \
507db96d56Sopenharmony_cido \
517db96d56Sopenharmony_ci{ \
527db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m7, m2); \
537db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m4, m6); \
547db96d56Sopenharmony_ci} while(0)
557db96d56Sopenharmony_ci
567db96d56Sopenharmony_ci
577db96d56Sopenharmony_ci#define LOAD_MSG_1_2(b0, b1) \
587db96d56Sopenharmony_cido \
597db96d56Sopenharmony_ci{ \
607db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m5, m4); \
617db96d56Sopenharmony_cib1 = _mm_alignr_epi8(m3, m7, 8); \
627db96d56Sopenharmony_ci} while(0)
637db96d56Sopenharmony_ci
647db96d56Sopenharmony_ci
657db96d56Sopenharmony_ci#define LOAD_MSG_1_3(b0, b1) \
667db96d56Sopenharmony_cido \
677db96d56Sopenharmony_ci{ \
687db96d56Sopenharmony_cib0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
697db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m5, m2); \
707db96d56Sopenharmony_ci} while(0)
717db96d56Sopenharmony_ci
727db96d56Sopenharmony_ci
737db96d56Sopenharmony_ci#define LOAD_MSG_1_4(b0, b1) \
747db96d56Sopenharmony_cido \
757db96d56Sopenharmony_ci{ \
767db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m6, m1); \
777db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m3, m1); \
787db96d56Sopenharmony_ci} while(0)
797db96d56Sopenharmony_ci
807db96d56Sopenharmony_ci
817db96d56Sopenharmony_ci#define LOAD_MSG_2_1(b0, b1) \
827db96d56Sopenharmony_cido \
837db96d56Sopenharmony_ci{ \
847db96d56Sopenharmony_cib0 = _mm_alignr_epi8(m6, m5, 8); \
857db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m2, m7); \
867db96d56Sopenharmony_ci} while(0)
877db96d56Sopenharmony_ci
887db96d56Sopenharmony_ci
897db96d56Sopenharmony_ci#define LOAD_MSG_2_2(b0, b1) \
907db96d56Sopenharmony_cido \
917db96d56Sopenharmony_ci{ \
927db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m4, m0); \
937db96d56Sopenharmony_cib1 = _mm_blend_epi16(m1, m6, 0xF0); \
947db96d56Sopenharmony_ci} while(0)
957db96d56Sopenharmony_ci
967db96d56Sopenharmony_ci
977db96d56Sopenharmony_ci#define LOAD_MSG_2_3(b0, b1) \
987db96d56Sopenharmony_cido \
997db96d56Sopenharmony_ci{ \
1007db96d56Sopenharmony_cib0 = _mm_blend_epi16(m5, m1, 0xF0); \
1017db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m3, m4); \
1027db96d56Sopenharmony_ci} while(0)
1037db96d56Sopenharmony_ci
1047db96d56Sopenharmony_ci
1057db96d56Sopenharmony_ci#define LOAD_MSG_2_4(b0, b1) \
1067db96d56Sopenharmony_cido \
1077db96d56Sopenharmony_ci{ \
1087db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m7, m3); \
1097db96d56Sopenharmony_cib1 = _mm_alignr_epi8(m2, m0, 8); \
1107db96d56Sopenharmony_ci} while(0)
1117db96d56Sopenharmony_ci
1127db96d56Sopenharmony_ci
1137db96d56Sopenharmony_ci#define LOAD_MSG_3_1(b0, b1) \
1147db96d56Sopenharmony_cido \
1157db96d56Sopenharmony_ci{ \
1167db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m3, m1); \
1177db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m6, m5); \
1187db96d56Sopenharmony_ci} while(0)
1197db96d56Sopenharmony_ci
1207db96d56Sopenharmony_ci
1217db96d56Sopenharmony_ci#define LOAD_MSG_3_2(b0, b1) \
1227db96d56Sopenharmony_cido \
1237db96d56Sopenharmony_ci{ \
1247db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m4, m0); \
1257db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m6, m7); \
1267db96d56Sopenharmony_ci} while(0)
1277db96d56Sopenharmony_ci
1287db96d56Sopenharmony_ci
1297db96d56Sopenharmony_ci#define LOAD_MSG_3_3(b0, b1) \
1307db96d56Sopenharmony_cido \
1317db96d56Sopenharmony_ci{ \
1327db96d56Sopenharmony_cib0 = _mm_blend_epi16(m1, m2, 0xF0); \
1337db96d56Sopenharmony_cib1 = _mm_blend_epi16(m2, m7, 0xF0); \
1347db96d56Sopenharmony_ci} while(0)
1357db96d56Sopenharmony_ci
1367db96d56Sopenharmony_ci
1377db96d56Sopenharmony_ci#define LOAD_MSG_3_4(b0, b1) \
1387db96d56Sopenharmony_cido \
1397db96d56Sopenharmony_ci{ \
1407db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m3, m5); \
1417db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m0, m4); \
1427db96d56Sopenharmony_ci} while(0)
1437db96d56Sopenharmony_ci
1447db96d56Sopenharmony_ci
1457db96d56Sopenharmony_ci#define LOAD_MSG_4_1(b0, b1) \
1467db96d56Sopenharmony_cido \
1477db96d56Sopenharmony_ci{ \
1487db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m4, m2); \
1497db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m1, m5); \
1507db96d56Sopenharmony_ci} while(0)
1517db96d56Sopenharmony_ci
1527db96d56Sopenharmony_ci
1537db96d56Sopenharmony_ci#define LOAD_MSG_4_2(b0, b1) \
1547db96d56Sopenharmony_cido \
1557db96d56Sopenharmony_ci{ \
1567db96d56Sopenharmony_cib0 = _mm_blend_epi16(m0, m3, 0xF0); \
1577db96d56Sopenharmony_cib1 = _mm_blend_epi16(m2, m7, 0xF0); \
1587db96d56Sopenharmony_ci} while(0)
1597db96d56Sopenharmony_ci
1607db96d56Sopenharmony_ci
1617db96d56Sopenharmony_ci#define LOAD_MSG_4_3(b0, b1) \
1627db96d56Sopenharmony_cido \
1637db96d56Sopenharmony_ci{ \
1647db96d56Sopenharmony_cib0 = _mm_blend_epi16(m7, m5, 0xF0); \
1657db96d56Sopenharmony_cib1 = _mm_blend_epi16(m3, m1, 0xF0); \
1667db96d56Sopenharmony_ci} while(0)
1677db96d56Sopenharmony_ci
1687db96d56Sopenharmony_ci
1697db96d56Sopenharmony_ci#define LOAD_MSG_4_4(b0, b1) \
1707db96d56Sopenharmony_cido \
1717db96d56Sopenharmony_ci{ \
1727db96d56Sopenharmony_cib0 = _mm_alignr_epi8(m6, m0, 8); \
1737db96d56Sopenharmony_cib1 = _mm_blend_epi16(m4, m6, 0xF0); \
1747db96d56Sopenharmony_ci} while(0)
1757db96d56Sopenharmony_ci
1767db96d56Sopenharmony_ci
1777db96d56Sopenharmony_ci#define LOAD_MSG_5_1(b0, b1) \
1787db96d56Sopenharmony_cido \
1797db96d56Sopenharmony_ci{ \
1807db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m1, m3); \
1817db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m0, m4); \
1827db96d56Sopenharmony_ci} while(0)
1837db96d56Sopenharmony_ci
1847db96d56Sopenharmony_ci
1857db96d56Sopenharmony_ci#define LOAD_MSG_5_2(b0, b1) \
1867db96d56Sopenharmony_cido \
1877db96d56Sopenharmony_ci{ \
1887db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m6, m5); \
1897db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m5, m1); \
1907db96d56Sopenharmony_ci} while(0)
1917db96d56Sopenharmony_ci
1927db96d56Sopenharmony_ci
1937db96d56Sopenharmony_ci#define LOAD_MSG_5_3(b0, b1) \
1947db96d56Sopenharmony_cido \
1957db96d56Sopenharmony_ci{ \
1967db96d56Sopenharmony_cib0 = _mm_blend_epi16(m2, m3, 0xF0); \
1977db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m7, m0); \
1987db96d56Sopenharmony_ci} while(0)
1997db96d56Sopenharmony_ci
2007db96d56Sopenharmony_ci
2017db96d56Sopenharmony_ci#define LOAD_MSG_5_4(b0, b1) \
2027db96d56Sopenharmony_cido \
2037db96d56Sopenharmony_ci{ \
2047db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m6, m2); \
2057db96d56Sopenharmony_cib1 = _mm_blend_epi16(m7, m4, 0xF0); \
2067db96d56Sopenharmony_ci} while(0)
2077db96d56Sopenharmony_ci
2087db96d56Sopenharmony_ci
2097db96d56Sopenharmony_ci#define LOAD_MSG_6_1(b0, b1) \
2107db96d56Sopenharmony_cido \
2117db96d56Sopenharmony_ci{ \
2127db96d56Sopenharmony_cib0 = _mm_blend_epi16(m6, m0, 0xF0); \
2137db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m7, m2); \
2147db96d56Sopenharmony_ci} while(0)
2157db96d56Sopenharmony_ci
2167db96d56Sopenharmony_ci
2177db96d56Sopenharmony_ci#define LOAD_MSG_6_2(b0, b1) \
2187db96d56Sopenharmony_cido \
2197db96d56Sopenharmony_ci{ \
2207db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m2, m7); \
2217db96d56Sopenharmony_cib1 = _mm_alignr_epi8(m5, m6, 8); \
2227db96d56Sopenharmony_ci} while(0)
2237db96d56Sopenharmony_ci
2247db96d56Sopenharmony_ci
2257db96d56Sopenharmony_ci#define LOAD_MSG_6_3(b0, b1) \
2267db96d56Sopenharmony_cido \
2277db96d56Sopenharmony_ci{ \
2287db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m0, m3); \
2297db96d56Sopenharmony_cib1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
2307db96d56Sopenharmony_ci} while(0)
2317db96d56Sopenharmony_ci
2327db96d56Sopenharmony_ci
2337db96d56Sopenharmony_ci#define LOAD_MSG_6_4(b0, b1) \
2347db96d56Sopenharmony_cido \
2357db96d56Sopenharmony_ci{ \
2367db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m3, m1); \
2377db96d56Sopenharmony_cib1 = _mm_blend_epi16(m1, m5, 0xF0); \
2387db96d56Sopenharmony_ci} while(0)
2397db96d56Sopenharmony_ci
2407db96d56Sopenharmony_ci
2417db96d56Sopenharmony_ci#define LOAD_MSG_7_1(b0, b1) \
2427db96d56Sopenharmony_cido \
2437db96d56Sopenharmony_ci{ \
2447db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m6, m3); \
2457db96d56Sopenharmony_cib1 = _mm_blend_epi16(m6, m1, 0xF0); \
2467db96d56Sopenharmony_ci} while(0)
2477db96d56Sopenharmony_ci
2487db96d56Sopenharmony_ci
2497db96d56Sopenharmony_ci#define LOAD_MSG_7_2(b0, b1) \
2507db96d56Sopenharmony_cido \
2517db96d56Sopenharmony_ci{ \
2527db96d56Sopenharmony_cib0 = _mm_alignr_epi8(m7, m5, 8); \
2537db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m0, m4); \
2547db96d56Sopenharmony_ci} while(0)
2557db96d56Sopenharmony_ci
2567db96d56Sopenharmony_ci
2577db96d56Sopenharmony_ci#define LOAD_MSG_7_3(b0, b1) \
2587db96d56Sopenharmony_cido \
2597db96d56Sopenharmony_ci{ \
2607db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m2, m7); \
2617db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m4, m1); \
2627db96d56Sopenharmony_ci} while(0)
2637db96d56Sopenharmony_ci
2647db96d56Sopenharmony_ci
2657db96d56Sopenharmony_ci#define LOAD_MSG_7_4(b0, b1) \
2667db96d56Sopenharmony_cido \
2677db96d56Sopenharmony_ci{ \
2687db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m0, m2); \
2697db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m3, m5); \
2707db96d56Sopenharmony_ci} while(0)
2717db96d56Sopenharmony_ci
2727db96d56Sopenharmony_ci
2737db96d56Sopenharmony_ci#define LOAD_MSG_8_1(b0, b1) \
2747db96d56Sopenharmony_cido \
2757db96d56Sopenharmony_ci{ \
2767db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m3, m7); \
2777db96d56Sopenharmony_cib1 = _mm_alignr_epi8(m0, m5, 8); \
2787db96d56Sopenharmony_ci} while(0)
2797db96d56Sopenharmony_ci
2807db96d56Sopenharmony_ci
2817db96d56Sopenharmony_ci#define LOAD_MSG_8_2(b0, b1) \
2827db96d56Sopenharmony_cido \
2837db96d56Sopenharmony_ci{ \
2847db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m7, m4); \
2857db96d56Sopenharmony_cib1 = _mm_alignr_epi8(m4, m1, 8); \
2867db96d56Sopenharmony_ci} while(0)
2877db96d56Sopenharmony_ci
2887db96d56Sopenharmony_ci
2897db96d56Sopenharmony_ci#define LOAD_MSG_8_3(b0, b1) \
2907db96d56Sopenharmony_cido \
2917db96d56Sopenharmony_ci{ \
2927db96d56Sopenharmony_cib0 = m6; \
2937db96d56Sopenharmony_cib1 = _mm_alignr_epi8(m5, m0, 8); \
2947db96d56Sopenharmony_ci} while(0)
2957db96d56Sopenharmony_ci
2967db96d56Sopenharmony_ci
2977db96d56Sopenharmony_ci#define LOAD_MSG_8_4(b0, b1) \
2987db96d56Sopenharmony_cido \
2997db96d56Sopenharmony_ci{ \
3007db96d56Sopenharmony_cib0 = _mm_blend_epi16(m1, m3, 0xF0); \
3017db96d56Sopenharmony_cib1 = m2; \
3027db96d56Sopenharmony_ci} while(0)
3037db96d56Sopenharmony_ci
3047db96d56Sopenharmony_ci
3057db96d56Sopenharmony_ci#define LOAD_MSG_9_1(b0, b1) \
3067db96d56Sopenharmony_cido \
3077db96d56Sopenharmony_ci{ \
3087db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m5, m4); \
3097db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m3, m0); \
3107db96d56Sopenharmony_ci} while(0)
3117db96d56Sopenharmony_ci
3127db96d56Sopenharmony_ci
3137db96d56Sopenharmony_ci#define LOAD_MSG_9_2(b0, b1) \
3147db96d56Sopenharmony_cido \
3157db96d56Sopenharmony_ci{ \
3167db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m1, m2); \
3177db96d56Sopenharmony_cib1 = _mm_blend_epi16(m3, m2, 0xF0); \
3187db96d56Sopenharmony_ci} while(0)
3197db96d56Sopenharmony_ci
3207db96d56Sopenharmony_ci
3217db96d56Sopenharmony_ci#define LOAD_MSG_9_3(b0, b1) \
3227db96d56Sopenharmony_cido \
3237db96d56Sopenharmony_ci{ \
3247db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m7, m4); \
3257db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m1, m6); \
3267db96d56Sopenharmony_ci} while(0)
3277db96d56Sopenharmony_ci
3287db96d56Sopenharmony_ci
3297db96d56Sopenharmony_ci#define LOAD_MSG_9_4(b0, b1) \
3307db96d56Sopenharmony_cido \
3317db96d56Sopenharmony_ci{ \
3327db96d56Sopenharmony_cib0 = _mm_alignr_epi8(m7, m5, 8); \
3337db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m6, m0); \
3347db96d56Sopenharmony_ci} while(0)
3357db96d56Sopenharmony_ci
3367db96d56Sopenharmony_ci
3377db96d56Sopenharmony_ci#define LOAD_MSG_10_1(b0, b1) \
3387db96d56Sopenharmony_cido \
3397db96d56Sopenharmony_ci{ \
3407db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m0, m1); \
3417db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m2, m3); \
3427db96d56Sopenharmony_ci} while(0)
3437db96d56Sopenharmony_ci
3447db96d56Sopenharmony_ci
3457db96d56Sopenharmony_ci#define LOAD_MSG_10_2(b0, b1) \
3467db96d56Sopenharmony_cido \
3477db96d56Sopenharmony_ci{ \
3487db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m0, m1); \
3497db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m2, m3); \
3507db96d56Sopenharmony_ci} while(0)
3517db96d56Sopenharmony_ci
3527db96d56Sopenharmony_ci
3537db96d56Sopenharmony_ci#define LOAD_MSG_10_3(b0, b1) \
3547db96d56Sopenharmony_cido \
3557db96d56Sopenharmony_ci{ \
3567db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m4, m5); \
3577db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m6, m7); \
3587db96d56Sopenharmony_ci} while(0)
3597db96d56Sopenharmony_ci
3607db96d56Sopenharmony_ci
3617db96d56Sopenharmony_ci#define LOAD_MSG_10_4(b0, b1) \
3627db96d56Sopenharmony_cido \
3637db96d56Sopenharmony_ci{ \
3647db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m4, m5); \
3657db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m6, m7); \
3667db96d56Sopenharmony_ci} while(0)
3677db96d56Sopenharmony_ci
3687db96d56Sopenharmony_ci
3697db96d56Sopenharmony_ci#define LOAD_MSG_11_1(b0, b1) \
3707db96d56Sopenharmony_cido \
3717db96d56Sopenharmony_ci{ \
3727db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m7, m2); \
3737db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m4, m6); \
3747db96d56Sopenharmony_ci} while(0)
3757db96d56Sopenharmony_ci
3767db96d56Sopenharmony_ci
3777db96d56Sopenharmony_ci#define LOAD_MSG_11_2(b0, b1) \
3787db96d56Sopenharmony_cido \
3797db96d56Sopenharmony_ci{ \
3807db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m5, m4); \
3817db96d56Sopenharmony_cib1 = _mm_alignr_epi8(m3, m7, 8); \
3827db96d56Sopenharmony_ci} while(0)
3837db96d56Sopenharmony_ci
3847db96d56Sopenharmony_ci
3857db96d56Sopenharmony_ci#define LOAD_MSG_11_3(b0, b1) \
3867db96d56Sopenharmony_cido \
3877db96d56Sopenharmony_ci{ \
3887db96d56Sopenharmony_cib0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
3897db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m5, m2); \
3907db96d56Sopenharmony_ci} while(0)
3917db96d56Sopenharmony_ci
3927db96d56Sopenharmony_ci
3937db96d56Sopenharmony_ci#define LOAD_MSG_11_4(b0, b1) \
3947db96d56Sopenharmony_cido \
3957db96d56Sopenharmony_ci{ \
3967db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m6, m1); \
3977db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m3, m1); \
3987db96d56Sopenharmony_ci} while(0)
3997db96d56Sopenharmony_ci
4007db96d56Sopenharmony_ci
4017db96d56Sopenharmony_ci#endif
4027db96d56Sopenharmony_ci
403