Lines Matching refs:in1
235 Arguments : Inputs - in0, in1, in2, in3, pdst, stride
237 Stores word from 'in1' to (pdst + stride)
241 #define SW4(in0, in1, in2, in3, pdst, stride) \
244 SW(in1, (pdst) + stride); \
250 Arguments : Inputs - in0, in1, in2, in3, pdst, stride
252 Stores double word from 'in1' to (pdst + stride)
256 #define SD4(in0, in1, in2, in3, pdst, stride) \
259 SD(in1, (pdst) + stride); \
353 Arguments : Inputs - in0, in1, stride
356 Stores elements from 'in1' to (pdst + stride)
358 #define ST_V2(RTYPE, in0, in1, pdst, stride) \
361 ST_V(RTYPE, in1, (pdst) + stride); \
369 #define ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride) \
371 ST_V2(RTYPE, in0, in1, (pdst), stride); \
379 #define ST_V6(RTYPE, in0, in1, in2, in3, in4, in5, pdst, stride) \
381 ST_V4(RTYPE, in0, in1, in2, in3, (pdst), stride); \
386 #define ST_V8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \
388 ST_V4(RTYPE, in0, in1, in2, in3, (pdst), stride); \
470 #define ST_W8(in0, in1, idx0, idx1, idx2, idx3, \
474 ST_W4(in1, idx4, idx5, idx6, idx7, pdst + 4*stride, stride) \
499 #define ST_D4(in0, in1, idx0, idx1, idx2, idx3, pdst, stride) \
504 out2_m = __msa_copy_u_d((v2i64) in1, idx2); \
505 out3_m = __msa_copy_u_d((v2i64) in1, idx3); \
511 #define ST_D8(in0, in1, in2, in3, idx0, idx1, idx2, idx3, \
514 ST_D4(in0, in1, idx0, idx1, idx2, idx3, pdst, stride) \
520 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride
527 #define ST12x8_UB(in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \
536 out1_m = __msa_copy_u_d((v2i64) in1, 0); \
545 out9_m = __msa_copy_u_w((v4i32) in1, 2); \
578 /* Description : average with rounding (in0 + in1 + 1) / 2.
579 Arguments : Inputs - in0, in1, in2, in3,
583 element from 'in1' vector. The addition of the elements plus 1
590 #define AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1) \
592 out0 = (RTYPE) __msa_aver_u_b((v16u8) in0, (v16u8) in1); \
597 #define AVER_UB4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
600 AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1) \
648 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
651 Details : Selective byte elements from in0 & in1 are copied to out0 as
656 #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
658 out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \
666 #define VSHF_B3(RTYPE, in0, in1, in2, in3, in4, in5, mask0, mask1, mask2, \
669 VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1); \
674 #define VSHF_B4(RTYPE, in0, in1, mask0, mask1, mask2, mask3, \
677 VSHF_B2(RTYPE, in0, in1, in0, in1, mask0, mask1, out0, out1); \
678 VSHF_B2(RTYPE, in0, in1, in0, in1, mask2, mask3, out2, out3); \
684 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
687 Details : Selective halfword elements from in0 & in1 are copied to out0
692 #define VSHF_H2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
694 out0 = (RTYPE) __msa_vshf_h((v8i16) mask0, (v8i16) in1, (v8i16) in0); \
699 #define VSHF_H3(RTYPE, in0, in1, in2, in3, in4, in5, mask0, mask1, mask2, \
702 VSHF_H2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1); \
708 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
711 Details : Selective byte elements from in0 & in1 are copied to out0 as
716 #define VSHF_W2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
718 out0 = (RTYPE) __msa_vshf_w((v4i32) mask0, (v4i32) in1, (v4i32) in0); \
895 Arguments : Inputs - in0, in1, min_vec
896 Outputs - in0, in1, (in place)
901 #define MIN_UH2(RTYPE, in0, in1, min_vec) \
904 in1 = (RTYPE) __msa_min_u_h((v8u16) in1, min_vec); \
908 #define MIN_UH4(RTYPE, in0, in1, in2, in3, min_vec) \
910 MIN_UH2(RTYPE, in0, in1, min_vec); \
941 #define CLIP_SH2_0_255(in0, in1) \
944 CLIP_SH_0_255(in1); \
947 #define CLIP_SH4_0_255(in0, in1, in2, in3) \
949 CLIP_SH2_0_255(in0, in1); \
953 #define CLIP_SH8_0_255(in0, in1, in2, in3, \
956 CLIP_SH4_0_255(in0, in1, in2, in3); \
972 #define CLIP_SW2_0_255(in0, in1) \
975 CLIP_SW_0_255(in1); \
978 #define CLIP_SW4_0_255(in0, in1, in2, in3) \
980 CLIP_SW2_0_255(in0, in1); \
984 #define CLIP_SW8_0_255(in0, in1, in2, in3, \
987 CLIP_SW4_0_255(in0, in1, in2, in3); \
1032 Arguments : Inputs - in0, in1
1039 #define HADD_SB2(RTYPE, in0, in1, out0, out1) \
1042 out1 = (RTYPE) __msa_hadd_s_h((v16i8) in1, (v16i8) in1); \
1046 #define HADD_SB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \
1048 HADD_SB2(RTYPE, in0, in1, out0, out1); \
1055 Arguments : Inputs - in0, in1
1062 #define HADD_UB2(RTYPE, in0, in1, out0, out1) \
1065 out1 = (RTYPE) __msa_hadd_u_h((v16u8) in1, (v16u8) in1); \
1069 #define HADD_UB3(RTYPE, in0, in1, in2, out0, out1, out2) \
1071 HADD_UB2(RTYPE, in0, in1, out0, out1); \
1076 #define HADD_UB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \
1078 HADD_UB2(RTYPE, in0, in1, out0, out1); \
1086 Arguments : Inputs - in0, in1
1093 #define HSUB_UB2(RTYPE, in0, in1, out0, out1) \
1096 out1 = (RTYPE) __msa_hsub_u_h((v16u8) in1, (v16u8) in1); \
1101 #define HSUB_UB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \
1103 HSUB_UB2(RTYPE, in0, in1, out0, out1); \
1110 Arguments : Inputs - in0, in1, ref0, ref1 (unsigned byte src & ref)
1118 #define SAD_UB2_UH(in0, in1, ref0, ref1) \
1124 diff1_m = __msa_asub_u_b((v16u8) in1, (v16u8) ref1); \
1134 Arguments : Inputs - in0, in1, in2, in3 (4 input vectors)
1138 #define INSERT_W2(RTYPE, in0, in1, out) \
1141 out = (RTYPE) __msa_insert_w((v4i32) out, 1, in1); \
1146 #define INSERT_W4(RTYPE, in0, in1, in2, in3, out) \
1149 out = (RTYPE) __msa_insert_w((v4i32) out, 1, in1); \
1160 Arguments : Inputs - in0, in1 (2 input vectors)
1164 #define INSERT_D2(RTYPE, in0, in1, out) \
1167 out = (RTYPE) __msa_insert_d((v2i64) out, 1, in1); \
1175 Arguments : Inputs - in0, in1, in2, in3
1179 elements of 'in1' are interleaved and copied to 'out0'
1183 #define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
1185 out0 = (RTYPE) __msa_ilvev_b((v16i8) in1, (v16i8) in0); \
1194 Arguments : Inputs - in0, in1, in2, in3
1198 elements of 'in1' are interleaved and copied to 'out0'
1202 #define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
1204 out0 = (RTYPE) __msa_ilvev_h((v8i16) in1, (v8i16) in0); \
1212 Arguments : Inputs - in0, in1, in2, in3
1216 elements of 'in1' are interleaved and copied to 'out0'
1220 #define ILVEV_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
1222 out0 = (RTYPE) __msa_ilvev_w((v4i32) in1, (v4i32) in0); \
1231 Arguments : Inputs - in0, in1, in2, in3
1235 elements of 'in1' are interleaved and copied to 'out0'
1239 #define ILVEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
1241 out0 = (RTYPE) __msa_ilvev_d((v2i64) in1, (v2i64) in0); \
1249 Arguments : Inputs - in0, in1, in2, in3
1253 elements of in1 are interleaved and copied to out0.
1257 #define ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
1259 out0 = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1); \
1267 #define ILVL_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1270 ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
1279 Arguments : Inputs - in0, in1, in2, in3
1283 elements of in1 are interleaved and copied to out0.
1287 #define ILVL_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
1289 out0 = (RTYPE) __msa_ilvl_h((v8i16) in0, (v8i16) in1); \
1295 #define ILVL_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1298 ILVL_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
1305 Arguments : Inputs - in0, in1, in2, in3
1309 elements of in1 are interleaved and copied to out0.
1313 #define ILVL_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
1315 out0 = (RTYPE) __msa_ilvl_w((v4i32) in0, (v4i32) in1); \
1323 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
1327 elements of in1 are interleaved and copied to out0.
1332 #define ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
1334 out0 = (RTYPE) __msa_ilvr_b((v16i8) in0, (v16i8) in1); \
1343 #define ILVR_B3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
1345 ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
1353 #define ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1356 ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
1365 #define ILVR_B8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1369 ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1378 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
1382 halfword elements of in1 are interleaved and copied to out0.
1387 #define ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
1389 out0 = (RTYPE) __msa_ilvr_h((v8i16) in0, (v8i16) in1); \
1395 #define ILVR_H3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
1397 ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
1402 #define ILVR_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1405 ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
1411 #define ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
1413 out0 = (RTYPE) __msa_ilvr_w((v4i32) in0, (v4i32) in1); \
1420 #define ILVR_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1423 ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1); \
1430 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
1434 double word elements of in1 are interleaved and copied to out0.
1438 #define ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
1440 out0 = (RTYPE) __msa_ilvr_d((v2i64) in0, (v2i64) in1); \
1447 #define ILVR_D3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
1449 ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
1454 #define ILVR_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1457 ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
1464 Arguments : Inputs - in0, in1, in2, in3
1468 double word elements of in1 are interleaved and copied to out0.
1472 #define ILVL_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
1474 out0 = (RTYPE) __msa_ilvl_d((v2i64) in0, (v2i64) in1); \
1482 Arguments : Inputs - in0, in1
1485 Details : Right half of byte elements from 'in0' and 'in1' are
1487 Left half of byte elements from 'in0' and 'in1' are
1490 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \
1492 out0 = (RTYPE) __msa_ilvr_b((v16i8) in0, (v16i8) in1); \
1493 out1 = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1); \
1501 #define ILVRL_H2(RTYPE, in0, in1, out0, out1) \
1503 out0 = (RTYPE) __msa_ilvr_h((v8i16) in0, (v8i16) in1); \
1504 out1 = (RTYPE) __msa_ilvl_h((v8i16) in0, (v8i16) in1); \
1511 #define ILVRL_W2(RTYPE, in0, in1, out0, out1) \
1513 out0 = (RTYPE) __msa_ilvr_w((v4i32) in0, (v4i32) in1); \
1514 out1 = (RTYPE) __msa_ilvl_w((v4i32) in0, (v4i32) in1); \
1522 Arguments : Inputs - in0, in1, in2, in3, max_val
1523 Outputs - in0, in1, in2, in3 (in place)
1528 #define MAXI_SH2(RTYPE, in0, in1, max_val) \
1531 in1 = (RTYPE) __msa_maxi_s_h((v8i16) in1, max_val); \
1536 #define MAXI_SH4(RTYPE, in0, in1, in2, in3, max_val) \
1538 MAXI_SH2(RTYPE, in0, in1, max_val); \
1544 #define MAXI_SH8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, max_val) \
1546 MAXI_SH4(RTYPE, in0, in1, in2, in3, max_val); \
1555 Arguments : Inputs - in0, in1, in2, in3, sat_val
1556 Outputs - in0, in1, in2, in3 (in place)
1562 #define SAT_UH2(RTYPE, in0, in1, sat_val) \
1565 in1 = (RTYPE) __msa_sat_u_h((v8u16) in1, sat_val); \
1570 #define SAT_UH4(RTYPE, in0, in1, in2, in3, sat_val) \
1572 SAT_UH2(RTYPE, in0, in1, sat_val); \
1578 #define SAT_UH8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, sat_val) \
1580 SAT_UH4(RTYPE, in0, in1, in2, in3, sat_val); \
1589 Arguments : Inputs - in0, in1, in2, in3, sat_val
1590 Outputs - in0, in1, in2, in3 (in place)
1596 #define SAT_SH2(RTYPE, in0, in1, sat_val) \
1599 in1 = (RTYPE) __msa_sat_s_h((v8i16) in1, sat_val); \
1603 #define SAT_SH3(RTYPE, in0, in1, in2, sat_val) \
1605 SAT_SH2(RTYPE, in0, in1, sat_val); \
1610 #define SAT_SH4(RTYPE, in0, in1, in2, in3, sat_val) \
1612 SAT_SH2(RTYPE, in0, in1, sat_val); \
1620 Arguments : Inputs - in0, in1, in2, in3, sat_val
1621 Outputs - in0, in1, in2, in3 (in place)
1627 #define SAT_SW2(RTYPE, in0, in1, sat_val) \
1630 in1 = (RTYPE) __msa_sat_s_w((v4i32) in1, sat_val); \
1634 #define SAT_SW4(RTYPE, in0, in1, in2, in3, sat_val) \
1636 SAT_SW2(RTYPE, in0, in1, sat_val); \
1704 Arguments : Inputs - in0, in1, in2, in3
1708 out0 & even byte elements of in1 are copied to the right
1714 #define PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
1716 out0 = (RTYPE) __msa_pckev_b((v16i8) in0, (v16i8) in1); \
1724 #define PCKEV_B3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
1726 PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
1732 #define PCKEV_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1735 PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
1744 Arguments : Inputs - in0, in1, in2, in3
1748 out0 & even halfword elements of in1 are copied to the right
1754 #define PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
1756 out0 = (RTYPE) __msa_pckev_h((v8i16) in0, (v8i16) in1); \
1762 #define PCKEV_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1765 PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
1772 Arguments : Inputs - in0, in1, in2, in3
1776 out0 & even double elements of in1 are copied to the right
1782 #define PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
1784 out0 = (RTYPE) __msa_pckev_d((v2i64) in0, (v2i64) in1); \
1791 #define PCKEV_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1794 PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
1800 Arguments : Inputs - in0, in1
1805 As operation is on same input 'in1' vector, index 1 double word
1808 #define PCKOD_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
1810 out0 = (RTYPE) __msa_pckod_d((v2i64) in0, (v2i64) in1); \
1818 Arguments : Inputs - in0, in1
1819 Outputs - in0, in1 (in-place)
1824 Each unsigned byte element from input vector 'in1' is
1826 'in1' vector
1829 #define XORI_B2_128(RTYPE, in0, in1) \
1832 in1 = (RTYPE) __msa_xori_b((v16u8) in1, 128); \
1838 #define XORI_B3_128(RTYPE, in0, in1, in2) \
1840 XORI_B2_128(RTYPE, in0, in1); \
1845 #define XORI_B4_128(RTYPE, in0, in1, in2, in3) \
1847 XORI_B2_128(RTYPE, in0, in1); \
1854 #define XORI_B5_128(RTYPE, in0, in1, in2, in3, in4) \
1856 XORI_B3_128(RTYPE, in0, in1, in2); \
1861 #define XORI_B6_128(RTYPE, in0, in1, in2, in3, in4, in5) \
1863 XORI_B4_128(RTYPE, in0, in1, in2, in3); \
1868 #define XORI_B7_128(RTYPE, in0, in1, in2, in3, in4, in5, in6) \
1870 XORI_B4_128(RTYPE, in0, in1, in2, in3); \
1875 #define XORI_B8_128(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7) \
1877 XORI_B4_128(RTYPE, in0, in1, in2, in3); \
1884 Arguments : Inputs - in0, in1, in2, in3
1888 halfword elements of 'in1'. The result is then signed saturated
1892 #define ADDS_SH2(RTYPE, in0, in1, in2, in3, out0, out1) \
1894 out0 = (RTYPE) __msa_adds_s_h((v8i16) in0, (v8i16) in1); \
1899 #define ADDS_SH4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1902 ADDS_SH2(RTYPE, in0, in1, in2, in3, out0, out1); \
1909 Arguments : Inputs - in0, in1, in2, in3, shift
1910 Outputs - in0, in1, in2, in3 (in place)
1916 #define SLLI_2V(in0, in1, shift) \
1919 in1 = in1 << shift; \
1921 #define SLLI_4V(in0, in1, in2, in3, shift) \
1924 in1 = in1 << shift; \
1931 Arguments : Inputs - in0, in1, in2, in3, shift
1932 Outputs - in0, in1, in2, in3 (in place)
1939 #define SRA_4V(in0, in1, in2, in3, shift) \
1942 in1 = in1 >> shift; \
1948 Arguments : Inputs - in0, in1, in2, in3, shift
1949 Outputs - in0, in1, in2, in3 (in place)
1957 #define SRL_H4(RTYPE, in0, in1, in2, in3, shift) \
1960 in1 = (RTYPE) __msa_srl_h((v8i16) in1, (v8i16) shift); \
1966 #define SRLR_H4(RTYPE, in0, in1, in2, in3, shift) \
1969 in1 = (RTYPE) __msa_srlr_h((v8i16) in1, (v8i16) shift); \
1976 #define SRLR_H8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, shift) \
1978 SRLR_H4(RTYPE, in0, in1, in2, in3, shift); \
1985 Arguments : Inputs - in0, in1, shift
1986 Outputs - in0, in1, (in place)
1995 #define SRAR_H2(RTYPE, in0, in1, shift) \
1998 in1 = (RTYPE) __msa_srar_h((v8i16) in1, (v8i16) shift); \
2003 #define SRAR_H3(RTYPE, in0, in1, in2, shift) \
2005 SRAR_H2(RTYPE, in0, in1, shift) \
2010 #define SRAR_H4(RTYPE, in0, in1, in2, in3, shift) \
2012 SRAR_H2(RTYPE, in0, in1, shift) \
2019 Arguments : Inputs - in0, in1, shift
2020 Outputs - in0, in1, (in place)
2029 #define SRAR_W2(RTYPE, in0, in1, shift) \
2032 in1 = (RTYPE) __msa_srar_w((v4i32) in1, (v4i32) shift); \
2036 #define SRAR_W4(RTYPE, in0, in1, in2, in3, shift) \
2038 SRAR_W2(RTYPE, in0, in1, shift) \
2044 Arguments : Inputs - in0, in1, in2, in3, shift
2045 Outputs - in0, in1, in2, in3 (in place)
2053 #define SRARI_H2(RTYPE, in0, in1, shift) \
2056 in1 = (RTYPE) __msa_srari_h((v8i16) in1, shift); \
2061 #define SRARI_H4(RTYPE, in0, in1, in2, in3, shift) \
2063 SRARI_H2(RTYPE, in0, in1, shift); \
2070 Arguments : Inputs - in0, in1, shift
2071 Outputs - in0, in1 (in place)
2079 #define SRARI_W2(RTYPE, in0, in1, shift) \
2082 in1 = (RTYPE) __msa_srari_w((v4i32) in1, shift); \
2086 #define SRARI_W4(RTYPE, in0, in1, in2, in3, shift) \
2088 SRARI_W2(RTYPE, in0, in1, shift); \
2095 Arguments : Inputs - in0, in1, in2, in3
2097 Details : Each element from 'in0' is multiplied with elements from 'in1'
2101 #define MUL2(in0, in1, in2, in3, out0, out1) \
2103 out0 = in0 * in1; \
2106 #define MUL4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
2108 MUL2(in0, in1, in2, in3, out0, out1); \
2113 Arguments : Inputs - in0, in1, in2, in3
2118 #define ADD2(in0, in1, in2, in3, out0, out1) \
2120 out0 = in0 + in1; \
2123 #define ADD4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
2125 ADD2(in0, in1, in2, in3, out0, out1); \
2130 Arguments : Inputs - in0, in1, in2, in3
2135 #define SUB2(in0, in1, in2, in3, out0, out1) \
2137 out0 = in0 - in1; \
2140 #define SUB4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
2142 out0 = in0 - in1; \
2233 Arguments : Inputs - in0, in1
2234 Outputs - in0, in1 (in-place)
2237 #define SWAP(in0, in1) \
2239 in0 = in0 ^ in1; \
2240 in1 = in0 ^ in1; \
2241 in0 = in0 ^ in1; \
2245 Arguments : Inputs - in0, in1, in2, in3
2249 #define BUTTERFLY_4(in0, in1, in2, in3, out0, out1, out2, out3) \
2252 out1 = in1 + in2; \
2254 out2 = in1 - in2; \
2263 #define BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, \
2267 out1 = in1 + in6; \
2273 out6 = in1 - in6; \
2282 #define BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, \
2288 out1 = in1 + in14; \
2302 out14 = in1 - in14; \
2307 Arguments : Inputs - in0, in1, in2, in3 (input 4x4 byte block)
2312 #define TRANSPOSE4x4_UB_UB(in0, in1, in2, in3, out0, out1, out2, out3) \
2317 ILVR_D2_SB(in1, in0, in3, in2, s0_m, s1_m); \
2327 Arguments : Inputs - in0, in1, in2, in3 (input 8x4 byte block)
2332 #define TRANSPOSE8x4_UB(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
2337 ILVEV_W2_SB(in0, in4, in1, in5, tmp0_m, tmp1_m); \
2352 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
2359 #define TRANSPOSE8x8_UB(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
2366 ILVR_B4_SB(in2, in0, in3, in1, in6, in4, in7, in5, \
2379 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7,
2385 #define TRANSPOSE16x4_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
2394 ILVEV_W2_SD(in1, in5, in9, in13, tmp0_m, tmp1_m); \
2414 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7,
2420 #define TRANSPOSE16x8_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
2427 ILVEV_D2_UB(in0, in8, in1, in9, out7, out6); \
2461 Arguments : Inputs - in0, in1, in2, in3
2466 #define TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, out0, out1, out2, out3) \
2470 ILVR_H2_SH(in1, in0, in3, in2, s0_m, s1_m); \
2477 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
2482 #define TRANSPOSE8x8_H(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
2493 ILVR_H2_SH(in2, in0, in3, in1, s0_m, s1_m); \
2495 ILVL_H2_SH(in2, in0, in3, in1, s0_m, s1_m); \
2508 Arguments : Inputs - in0, in1, in2, in3
2513 #define TRANSPOSE4x4_SW_SW(in0, in1, in2, in3, out0, out1, out2, out3) \
2517 ILVRL_W2_SW(in1, in0, s0_m, s1_m); \
2528 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride
2529 Details : Each byte element from input vector pair 'in0' and 'in1' are
2540 #define AVE_ST8x4_UB(in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \
2545 tmp0_m = __msa_ave_u_b((v16u8) in0, (v16u8) in1); \
2559 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride
2560 Details : Each byte element from input vector pair 'in0' and 'in1' are
2571 #define AVE_ST16x4_UB(in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \
2575 tmp0_m = __msa_ave_u_b((v16u8) in0, (v16u8) in1); \
2585 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride
2586 Details : Each byte element from input vector pair 'in0' and 'in1' are
2597 #define AVER_ST8x4_UB(in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \
2602 AVER_UB4_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
2614 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride
2615 Details : Each byte element from input vector pair 'in0' and 'in1' are
2626 #define AVER_ST16x4_UB(in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \
2630 AVER_UB4_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
2638 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride
2639 Details : Each byte element from input vector pair 'in0' and 'in1' are
2650 #define AVER_DST_ST8x4_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
2657 AVER_UB4_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
2666 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride
2667 Details : Each byte element from input vector pair 'in0' and 'in1' are
2678 #define AVER_DST_ST16x4_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
2685 AVER_UB4_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
2692 Arguments : Inputs - in0, in1, in2, in3, pdst, stride
2696 #define ADDBLK_ST4x4_UB(in0, in1, in2, in3, pdst, stride) \
2705 ILVR_D2_SH(in1, in0, in3, in2, inp0_m, inp1_m) \
2722 Arguments : Inputs - in0, in1, in2, coeff0, coeff1, coeff2
2726 Dot product of 'in1' with 'coeff1'
2730 out0_m = (in0 * coeff0) + (in1 * coeff1) + (in2 * coeff2)
2732 #define DPADD_SH3_SH(in0, in1, in2, coeff0, coeff1, coeff2) \
2737 out0_m = __msa_dpadd_s_h(out0_m, (v16i8) in1, (v16i8) coeff1); \
2744 Arguments : Inputs - in0, in1
2747 Details : Signed byte even elements from 'in0' and 'in1' are packed
2751 #define PCKEV_XORI128_UB(in0, in1) \
2754 out_m = (v16u8) __msa_pckev_b((v16i8) in1, (v16i8) in0); \
2761 Arguments : Inputs - in0, in1, in2, in3, dst0, dst1, pdst, stride
2763 #define CONVERT_UB_AVG_ST8x4_UB(in0, in1, in2, in3, \
2769 tmp0_m = PCKEV_XORI128_UB(in0, in1); \
2778 Arguments : Inputs - in0, in1, in2, in3, pdst, stride
2780 #define PCKEV_ST4x4_UB(in0, in1, in2, in3, pdst, stride) \
2785 PCKEV_B2_SB(in1, in0, in3, in2, tmp0_m, tmp1_m); \
2797 Arguments : Inputs - in0, in1, pdst
2799 #define PCKEV_ST_SB(in0, in1, pdst) \
2802 tmp_m = __msa_pckev_b((v16i8) in1, (v16i8) in0); \
2807 Arguments : Inputs - in0, in1, mask, coeff, shift
2809 #define HORIZ_2TAP_FILT_UH(in0, in1, mask, coeff, shift) \
2814 tmp0_m = __msa_vshf_b((v16i8) mask, (v16i8) in1, (v16i8) in0); \