17db96d56Sopenharmony_ci/* 27db96d56Sopenharmony_ci BLAKE2 reference source code package - optimized C implementations 37db96d56Sopenharmony_ci 47db96d56Sopenharmony_ci Written in 2012 by Samuel Neves <sneves@dei.uc.pt> 57db96d56Sopenharmony_ci 67db96d56Sopenharmony_ci To the extent possible under law, the author(s) have dedicated all copyright 77db96d56Sopenharmony_ci and related and neighboring rights to this software to the public domain 87db96d56Sopenharmony_ci worldwide. This software is distributed without any warranty. 97db96d56Sopenharmony_ci 107db96d56Sopenharmony_ci You should have received a copy of the CC0 Public Domain Dedication along with 117db96d56Sopenharmony_ci this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>. 127db96d56Sopenharmony_ci*/ 137db96d56Sopenharmony_ci#pragma once 147db96d56Sopenharmony_ci#ifndef __BLAKE2B_LOAD_SSE41_H__ 157db96d56Sopenharmony_ci#define __BLAKE2B_LOAD_SSE41_H__ 167db96d56Sopenharmony_ci 177db96d56Sopenharmony_ci#define LOAD_MSG_0_1(b0, b1) \ 187db96d56Sopenharmony_cido \ 197db96d56Sopenharmony_ci{ \ 207db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m0, m1); \ 217db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m2, m3); \ 227db96d56Sopenharmony_ci} while(0) 237db96d56Sopenharmony_ci 247db96d56Sopenharmony_ci 257db96d56Sopenharmony_ci#define LOAD_MSG_0_2(b0, b1) \ 267db96d56Sopenharmony_cido \ 277db96d56Sopenharmony_ci{ \ 287db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m0, m1); \ 297db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m2, m3); \ 307db96d56Sopenharmony_ci} while(0) 317db96d56Sopenharmony_ci 327db96d56Sopenharmony_ci 337db96d56Sopenharmony_ci#define LOAD_MSG_0_3(b0, b1) \ 347db96d56Sopenharmony_cido \ 357db96d56Sopenharmony_ci{ \ 367db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m4, m5); \ 377db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m6, m7); \ 387db96d56Sopenharmony_ci} while(0) 397db96d56Sopenharmony_ci 407db96d56Sopenharmony_ci 417db96d56Sopenharmony_ci#define LOAD_MSG_0_4(b0, b1) \ 427db96d56Sopenharmony_cido \ 437db96d56Sopenharmony_ci{ \ 447db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m4, m5); \ 457db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m6, m7); \ 467db96d56Sopenharmony_ci} while(0) 477db96d56Sopenharmony_ci 487db96d56Sopenharmony_ci 497db96d56Sopenharmony_ci#define LOAD_MSG_1_1(b0, b1) \ 507db96d56Sopenharmony_cido \ 517db96d56Sopenharmony_ci{ \ 527db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m7, m2); \ 537db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m4, m6); \ 547db96d56Sopenharmony_ci} while(0) 557db96d56Sopenharmony_ci 567db96d56Sopenharmony_ci 577db96d56Sopenharmony_ci#define LOAD_MSG_1_2(b0, b1) \ 587db96d56Sopenharmony_cido \ 597db96d56Sopenharmony_ci{ \ 607db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m5, m4); \ 617db96d56Sopenharmony_cib1 = _mm_alignr_epi8(m3, m7, 8); \ 627db96d56Sopenharmony_ci} while(0) 637db96d56Sopenharmony_ci 647db96d56Sopenharmony_ci 657db96d56Sopenharmony_ci#define LOAD_MSG_1_3(b0, b1) \ 667db96d56Sopenharmony_cido \ 677db96d56Sopenharmony_ci{ \ 687db96d56Sopenharmony_cib0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \ 697db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m5, m2); \ 707db96d56Sopenharmony_ci} while(0) 717db96d56Sopenharmony_ci 727db96d56Sopenharmony_ci 737db96d56Sopenharmony_ci#define LOAD_MSG_1_4(b0, b1) \ 747db96d56Sopenharmony_cido \ 757db96d56Sopenharmony_ci{ \ 767db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m6, m1); \ 777db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m3, m1); \ 787db96d56Sopenharmony_ci} while(0) 797db96d56Sopenharmony_ci 807db96d56Sopenharmony_ci 817db96d56Sopenharmony_ci#define LOAD_MSG_2_1(b0, b1) \ 827db96d56Sopenharmony_cido \ 837db96d56Sopenharmony_ci{ \ 847db96d56Sopenharmony_cib0 = _mm_alignr_epi8(m6, m5, 8); \ 857db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m2, m7); \ 867db96d56Sopenharmony_ci} while(0) 877db96d56Sopenharmony_ci 887db96d56Sopenharmony_ci 897db96d56Sopenharmony_ci#define LOAD_MSG_2_2(b0, b1) \ 907db96d56Sopenharmony_cido \ 917db96d56Sopenharmony_ci{ \ 927db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m4, m0); \ 937db96d56Sopenharmony_cib1 = _mm_blend_epi16(m1, m6, 0xF0); \ 947db96d56Sopenharmony_ci} while(0) 957db96d56Sopenharmony_ci 967db96d56Sopenharmony_ci 977db96d56Sopenharmony_ci#define LOAD_MSG_2_3(b0, b1) \ 987db96d56Sopenharmony_cido \ 997db96d56Sopenharmony_ci{ \ 1007db96d56Sopenharmony_cib0 = _mm_blend_epi16(m5, m1, 0xF0); \ 1017db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m3, m4); \ 1027db96d56Sopenharmony_ci} while(0) 1037db96d56Sopenharmony_ci 1047db96d56Sopenharmony_ci 1057db96d56Sopenharmony_ci#define LOAD_MSG_2_4(b0, b1) \ 1067db96d56Sopenharmony_cido \ 1077db96d56Sopenharmony_ci{ \ 1087db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m7, m3); \ 1097db96d56Sopenharmony_cib1 = _mm_alignr_epi8(m2, m0, 8); \ 1107db96d56Sopenharmony_ci} while(0) 1117db96d56Sopenharmony_ci 1127db96d56Sopenharmony_ci 1137db96d56Sopenharmony_ci#define LOAD_MSG_3_1(b0, b1) \ 1147db96d56Sopenharmony_cido \ 1157db96d56Sopenharmony_ci{ \ 1167db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m3, m1); \ 1177db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m6, m5); \ 1187db96d56Sopenharmony_ci} while(0) 1197db96d56Sopenharmony_ci 1207db96d56Sopenharmony_ci 1217db96d56Sopenharmony_ci#define LOAD_MSG_3_2(b0, b1) \ 1227db96d56Sopenharmony_cido \ 1237db96d56Sopenharmony_ci{ \ 1247db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m4, m0); \ 1257db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m6, m7); \ 1267db96d56Sopenharmony_ci} while(0) 1277db96d56Sopenharmony_ci 1287db96d56Sopenharmony_ci 1297db96d56Sopenharmony_ci#define LOAD_MSG_3_3(b0, b1) \ 1307db96d56Sopenharmony_cido \ 1317db96d56Sopenharmony_ci{ \ 1327db96d56Sopenharmony_cib0 = _mm_blend_epi16(m1, m2, 0xF0); \ 1337db96d56Sopenharmony_cib1 = _mm_blend_epi16(m2, m7, 0xF0); \ 1347db96d56Sopenharmony_ci} while(0) 1357db96d56Sopenharmony_ci 1367db96d56Sopenharmony_ci 1377db96d56Sopenharmony_ci#define LOAD_MSG_3_4(b0, b1) \ 1387db96d56Sopenharmony_cido \ 1397db96d56Sopenharmony_ci{ \ 1407db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m3, m5); \ 1417db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m0, m4); \ 1427db96d56Sopenharmony_ci} while(0) 1437db96d56Sopenharmony_ci 1447db96d56Sopenharmony_ci 1457db96d56Sopenharmony_ci#define LOAD_MSG_4_1(b0, b1) \ 1467db96d56Sopenharmony_cido \ 1477db96d56Sopenharmony_ci{ \ 1487db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m4, m2); \ 1497db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m1, m5); \ 1507db96d56Sopenharmony_ci} while(0) 1517db96d56Sopenharmony_ci 1527db96d56Sopenharmony_ci 1537db96d56Sopenharmony_ci#define LOAD_MSG_4_2(b0, b1) \ 1547db96d56Sopenharmony_cido \ 1557db96d56Sopenharmony_ci{ \ 1567db96d56Sopenharmony_cib0 = _mm_blend_epi16(m0, m3, 0xF0); \ 1577db96d56Sopenharmony_cib1 = _mm_blend_epi16(m2, m7, 0xF0); \ 1587db96d56Sopenharmony_ci} while(0) 1597db96d56Sopenharmony_ci 1607db96d56Sopenharmony_ci 1617db96d56Sopenharmony_ci#define LOAD_MSG_4_3(b0, b1) \ 1627db96d56Sopenharmony_cido \ 1637db96d56Sopenharmony_ci{ \ 1647db96d56Sopenharmony_cib0 = _mm_blend_epi16(m7, m5, 0xF0); \ 1657db96d56Sopenharmony_cib1 = _mm_blend_epi16(m3, m1, 0xF0); \ 1667db96d56Sopenharmony_ci} while(0) 1677db96d56Sopenharmony_ci 1687db96d56Sopenharmony_ci 1697db96d56Sopenharmony_ci#define LOAD_MSG_4_4(b0, b1) \ 1707db96d56Sopenharmony_cido \ 1717db96d56Sopenharmony_ci{ \ 1727db96d56Sopenharmony_cib0 = _mm_alignr_epi8(m6, m0, 8); \ 1737db96d56Sopenharmony_cib1 = _mm_blend_epi16(m4, m6, 0xF0); \ 1747db96d56Sopenharmony_ci} while(0) 1757db96d56Sopenharmony_ci 1767db96d56Sopenharmony_ci 1777db96d56Sopenharmony_ci#define LOAD_MSG_5_1(b0, b1) \ 1787db96d56Sopenharmony_cido \ 1797db96d56Sopenharmony_ci{ \ 1807db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m1, m3); \ 1817db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m0, m4); \ 1827db96d56Sopenharmony_ci} while(0) 1837db96d56Sopenharmony_ci 1847db96d56Sopenharmony_ci 1857db96d56Sopenharmony_ci#define LOAD_MSG_5_2(b0, b1) \ 1867db96d56Sopenharmony_cido \ 1877db96d56Sopenharmony_ci{ \ 1887db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m6, m5); \ 1897db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m5, m1); \ 1907db96d56Sopenharmony_ci} while(0) 1917db96d56Sopenharmony_ci 1927db96d56Sopenharmony_ci 1937db96d56Sopenharmony_ci#define LOAD_MSG_5_3(b0, b1) \ 1947db96d56Sopenharmony_cido \ 1957db96d56Sopenharmony_ci{ \ 1967db96d56Sopenharmony_cib0 = _mm_blend_epi16(m2, m3, 0xF0); \ 1977db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m7, m0); \ 1987db96d56Sopenharmony_ci} while(0) 1997db96d56Sopenharmony_ci 2007db96d56Sopenharmony_ci 2017db96d56Sopenharmony_ci#define LOAD_MSG_5_4(b0, b1) \ 2027db96d56Sopenharmony_cido \ 2037db96d56Sopenharmony_ci{ \ 2047db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m6, m2); \ 2057db96d56Sopenharmony_cib1 = _mm_blend_epi16(m7, m4, 0xF0); \ 2067db96d56Sopenharmony_ci} while(0) 2077db96d56Sopenharmony_ci 2087db96d56Sopenharmony_ci 2097db96d56Sopenharmony_ci#define LOAD_MSG_6_1(b0, b1) \ 2107db96d56Sopenharmony_cido \ 2117db96d56Sopenharmony_ci{ \ 2127db96d56Sopenharmony_cib0 = _mm_blend_epi16(m6, m0, 0xF0); \ 2137db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m7, m2); \ 2147db96d56Sopenharmony_ci} while(0) 2157db96d56Sopenharmony_ci 2167db96d56Sopenharmony_ci 2177db96d56Sopenharmony_ci#define LOAD_MSG_6_2(b0, b1) \ 2187db96d56Sopenharmony_cido \ 2197db96d56Sopenharmony_ci{ \ 2207db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m2, m7); \ 2217db96d56Sopenharmony_cib1 = _mm_alignr_epi8(m5, m6, 8); \ 2227db96d56Sopenharmony_ci} while(0) 2237db96d56Sopenharmony_ci 2247db96d56Sopenharmony_ci 2257db96d56Sopenharmony_ci#define LOAD_MSG_6_3(b0, b1) \ 2267db96d56Sopenharmony_cido \ 2277db96d56Sopenharmony_ci{ \ 2287db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m0, m3); \ 2297db96d56Sopenharmony_cib1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \ 2307db96d56Sopenharmony_ci} while(0) 2317db96d56Sopenharmony_ci 2327db96d56Sopenharmony_ci 2337db96d56Sopenharmony_ci#define LOAD_MSG_6_4(b0, b1) \ 2347db96d56Sopenharmony_cido \ 2357db96d56Sopenharmony_ci{ \ 2367db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m3, m1); \ 2377db96d56Sopenharmony_cib1 = _mm_blend_epi16(m1, m5, 0xF0); \ 2387db96d56Sopenharmony_ci} while(0) 2397db96d56Sopenharmony_ci 2407db96d56Sopenharmony_ci 2417db96d56Sopenharmony_ci#define LOAD_MSG_7_1(b0, b1) \ 2427db96d56Sopenharmony_cido \ 2437db96d56Sopenharmony_ci{ \ 2447db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m6, m3); \ 2457db96d56Sopenharmony_cib1 = _mm_blend_epi16(m6, m1, 0xF0); \ 2467db96d56Sopenharmony_ci} while(0) 2477db96d56Sopenharmony_ci 2487db96d56Sopenharmony_ci 2497db96d56Sopenharmony_ci#define LOAD_MSG_7_2(b0, b1) \ 2507db96d56Sopenharmony_cido \ 2517db96d56Sopenharmony_ci{ \ 2527db96d56Sopenharmony_cib0 = _mm_alignr_epi8(m7, m5, 8); \ 2537db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m0, m4); \ 2547db96d56Sopenharmony_ci} while(0) 2557db96d56Sopenharmony_ci 2567db96d56Sopenharmony_ci 2577db96d56Sopenharmony_ci#define LOAD_MSG_7_3(b0, b1) \ 2587db96d56Sopenharmony_cido \ 2597db96d56Sopenharmony_ci{ \ 2607db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m2, m7); \ 2617db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m4, m1); \ 2627db96d56Sopenharmony_ci} while(0) 2637db96d56Sopenharmony_ci 2647db96d56Sopenharmony_ci 2657db96d56Sopenharmony_ci#define LOAD_MSG_7_4(b0, b1) \ 2667db96d56Sopenharmony_cido \ 2677db96d56Sopenharmony_ci{ \ 2687db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m0, m2); \ 2697db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m3, m5); \ 2707db96d56Sopenharmony_ci} while(0) 2717db96d56Sopenharmony_ci 2727db96d56Sopenharmony_ci 2737db96d56Sopenharmony_ci#define LOAD_MSG_8_1(b0, b1) \ 2747db96d56Sopenharmony_cido \ 2757db96d56Sopenharmony_ci{ \ 2767db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m3, m7); \ 2777db96d56Sopenharmony_cib1 = _mm_alignr_epi8(m0, m5, 8); \ 2787db96d56Sopenharmony_ci} while(0) 2797db96d56Sopenharmony_ci 2807db96d56Sopenharmony_ci 2817db96d56Sopenharmony_ci#define LOAD_MSG_8_2(b0, b1) \ 2827db96d56Sopenharmony_cido \ 2837db96d56Sopenharmony_ci{ \ 2847db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m7, m4); \ 2857db96d56Sopenharmony_cib1 = _mm_alignr_epi8(m4, m1, 8); \ 2867db96d56Sopenharmony_ci} while(0) 2877db96d56Sopenharmony_ci 2887db96d56Sopenharmony_ci 2897db96d56Sopenharmony_ci#define LOAD_MSG_8_3(b0, b1) \ 2907db96d56Sopenharmony_cido \ 2917db96d56Sopenharmony_ci{ \ 2927db96d56Sopenharmony_cib0 = m6; \ 2937db96d56Sopenharmony_cib1 = _mm_alignr_epi8(m5, m0, 8); \ 2947db96d56Sopenharmony_ci} while(0) 2957db96d56Sopenharmony_ci 2967db96d56Sopenharmony_ci 2977db96d56Sopenharmony_ci#define LOAD_MSG_8_4(b0, b1) \ 2987db96d56Sopenharmony_cido \ 2997db96d56Sopenharmony_ci{ \ 3007db96d56Sopenharmony_cib0 = _mm_blend_epi16(m1, m3, 0xF0); \ 3017db96d56Sopenharmony_cib1 = m2; \ 3027db96d56Sopenharmony_ci} while(0) 3037db96d56Sopenharmony_ci 3047db96d56Sopenharmony_ci 3057db96d56Sopenharmony_ci#define LOAD_MSG_9_1(b0, b1) \ 3067db96d56Sopenharmony_cido \ 3077db96d56Sopenharmony_ci{ \ 3087db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m5, m4); \ 3097db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m3, m0); \ 3107db96d56Sopenharmony_ci} while(0) 3117db96d56Sopenharmony_ci 3127db96d56Sopenharmony_ci 3137db96d56Sopenharmony_ci#define LOAD_MSG_9_2(b0, b1) \ 3147db96d56Sopenharmony_cido \ 3157db96d56Sopenharmony_ci{ \ 3167db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m1, m2); \ 3177db96d56Sopenharmony_cib1 = _mm_blend_epi16(m3, m2, 0xF0); \ 3187db96d56Sopenharmony_ci} while(0) 3197db96d56Sopenharmony_ci 3207db96d56Sopenharmony_ci 3217db96d56Sopenharmony_ci#define LOAD_MSG_9_3(b0, b1) \ 3227db96d56Sopenharmony_cido \ 3237db96d56Sopenharmony_ci{ \ 3247db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m7, m4); \ 3257db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m1, m6); \ 3267db96d56Sopenharmony_ci} while(0) 3277db96d56Sopenharmony_ci 3287db96d56Sopenharmony_ci 3297db96d56Sopenharmony_ci#define LOAD_MSG_9_4(b0, b1) \ 3307db96d56Sopenharmony_cido \ 3317db96d56Sopenharmony_ci{ \ 3327db96d56Sopenharmony_cib0 = _mm_alignr_epi8(m7, m5, 8); \ 3337db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m6, m0); \ 3347db96d56Sopenharmony_ci} while(0) 3357db96d56Sopenharmony_ci 3367db96d56Sopenharmony_ci 3377db96d56Sopenharmony_ci#define LOAD_MSG_10_1(b0, b1) \ 3387db96d56Sopenharmony_cido \ 3397db96d56Sopenharmony_ci{ \ 3407db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m0, m1); \ 3417db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m2, m3); \ 3427db96d56Sopenharmony_ci} while(0) 3437db96d56Sopenharmony_ci 3447db96d56Sopenharmony_ci 3457db96d56Sopenharmony_ci#define LOAD_MSG_10_2(b0, b1) \ 3467db96d56Sopenharmony_cido \ 3477db96d56Sopenharmony_ci{ \ 3487db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m0, m1); \ 3497db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m2, m3); \ 3507db96d56Sopenharmony_ci} while(0) 3517db96d56Sopenharmony_ci 3527db96d56Sopenharmony_ci 3537db96d56Sopenharmony_ci#define LOAD_MSG_10_3(b0, b1) \ 3547db96d56Sopenharmony_cido \ 3557db96d56Sopenharmony_ci{ \ 3567db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m4, m5); \ 3577db96d56Sopenharmony_cib1 = _mm_unpacklo_epi64(m6, m7); \ 3587db96d56Sopenharmony_ci} while(0) 3597db96d56Sopenharmony_ci 3607db96d56Sopenharmony_ci 3617db96d56Sopenharmony_ci#define LOAD_MSG_10_4(b0, b1) \ 3627db96d56Sopenharmony_cido \ 3637db96d56Sopenharmony_ci{ \ 3647db96d56Sopenharmony_cib0 = _mm_unpackhi_epi64(m4, m5); \ 3657db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m6, m7); \ 3667db96d56Sopenharmony_ci} while(0) 3677db96d56Sopenharmony_ci 3687db96d56Sopenharmony_ci 3697db96d56Sopenharmony_ci#define LOAD_MSG_11_1(b0, b1) \ 3707db96d56Sopenharmony_cido \ 3717db96d56Sopenharmony_ci{ \ 3727db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m7, m2); \ 3737db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m4, m6); \ 3747db96d56Sopenharmony_ci} while(0) 3757db96d56Sopenharmony_ci 3767db96d56Sopenharmony_ci 3777db96d56Sopenharmony_ci#define LOAD_MSG_11_2(b0, b1) \ 3787db96d56Sopenharmony_cido \ 3797db96d56Sopenharmony_ci{ \ 3807db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m5, m4); \ 3817db96d56Sopenharmony_cib1 = _mm_alignr_epi8(m3, m7, 8); \ 3827db96d56Sopenharmony_ci} while(0) 3837db96d56Sopenharmony_ci 3847db96d56Sopenharmony_ci 3857db96d56Sopenharmony_ci#define LOAD_MSG_11_3(b0, b1) \ 3867db96d56Sopenharmony_cido \ 3877db96d56Sopenharmony_ci{ \ 3887db96d56Sopenharmony_cib0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \ 3897db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m5, m2); \ 3907db96d56Sopenharmony_ci} while(0) 3917db96d56Sopenharmony_ci 3927db96d56Sopenharmony_ci 3937db96d56Sopenharmony_ci#define LOAD_MSG_11_4(b0, b1) \ 3947db96d56Sopenharmony_cido \ 3957db96d56Sopenharmony_ci{ \ 3967db96d56Sopenharmony_cib0 = _mm_unpacklo_epi64(m6, m1); \ 3977db96d56Sopenharmony_cib1 = _mm_unpackhi_epi64(m3, m1); \ 3987db96d56Sopenharmony_ci} while(0) 3997db96d56Sopenharmony_ci 4007db96d56Sopenharmony_ci 4017db96d56Sopenharmony_ci#endif 4027db96d56Sopenharmony_ci 403