Lines Matching refs:in0
235 Arguments : Inputs - in0, in1, in2, in3, pdst, stride
236 Details : Stores word from 'in0' to (pdst)
241 #define SW4(in0, in1, in2, in3, pdst, stride) \
243 SW(in0, (pdst)) \
250 Arguments : Inputs - in0, in1, in2, in3, pdst, stride
251 Details : Stores double word from 'in0' to (pdst)
256 #define SD4(in0, in1, in2, in3, pdst, stride) \
258 SD(in0, (pdst)) \
353 Arguments : Inputs - in0, in1, stride
355 Details : Stores elements from 'in0' to (pdst)
358 #define ST_V2(RTYPE, in0, in1, pdst, stride) \
360 ST_V(RTYPE, in0, (pdst)); \
369 #define ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride) \
371 ST_V2(RTYPE, in0, in1, (pdst), stride); \
379 #define ST_V6(RTYPE, in0, in1, in2, in3, in4, in5, pdst, stride) \
381 ST_V4(RTYPE, in0, in1, in2, in3, (pdst), stride); \
386 #define ST_V8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \
388 ST_V4(RTYPE, in0, in1, in2, in3, (pdst), stride); \
470 #define ST_W8(in0, in1, idx0, idx1, idx2, idx3, \
473 ST_W4(in0, idx0, idx1, idx2, idx3, pdst, stride) \
499 #define ST_D4(in0, in1, idx0, idx1, idx2, idx3, pdst, stride) \
502 out0_m = __msa_copy_u_d((v2i64) in0, idx0); \
503 out1_m = __msa_copy_u_d((v2i64) in0, idx1); \
511 #define ST_D8(in0, in1, in2, in3, idx0, idx1, idx2, idx3, \
514 ST_D4(in0, in1, idx0, idx1, idx2, idx3, pdst, stride) \
520 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride
521 Details : Index 0 double word element from input vector 'in0' is copied
523 index 2 word element from same input vector 'in0' at
527 #define ST12x8_UB(in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \
535 out0_m = __msa_copy_u_d((v2i64) in0, 0); \
544 out8_m = __msa_copy_u_w((v4i32) in0, 2); \
578 /* Description : average with rounding (in0 + in1 + 1) / 2.
579 Arguments : Inputs - in0, in1, in2, in3,
582 Details : Each byte element from 'in0' vector is added with each byte
590 #define AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1) \
592 out0 = (RTYPE) __msa_aver_u_b((v16u8) in0, (v16u8) in1); \
597 #define AVER_UB4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
600 AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1) \
648 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
651 Details : Selective byte elements from in0 & in1 are copied to out0 as
656 #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
658 out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \
666 #define VSHF_B3(RTYPE, in0, in1, in2, in3, in4, in5, mask0, mask1, mask2, \
669 VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1); \
674 #define VSHF_B4(RTYPE, in0, in1, mask0, mask1, mask2, mask3, \
677 VSHF_B2(RTYPE, in0, in1, in0, in1, mask0, mask1, out0, out1); \
678 VSHF_B2(RTYPE, in0, in1, in0, in1, mask2, mask3, out2, out3); \
684 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
687 Details : Selective halfword elements from in0 & in1 are copied to out0
692 #define VSHF_H2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
694 out0 = (RTYPE) __msa_vshf_h((v8i16) mask0, (v8i16) in1, (v8i16) in0); \
699 #define VSHF_H3(RTYPE, in0, in1, in2, in3, in4, in5, mask0, mask1, mask2, \
702 VSHF_H2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1); \
708 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
711 Details : Selective byte elements from in0 & in1 are copied to out0 as
716 #define VSHF_W2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
718 out0 = (RTYPE) __msa_vshf_w((v4i32) mask0, (v4i32) in1, (v4i32) in0); \
895 Arguments : Inputs - in0, in1, min_vec
896 Outputs - in0, in1, (in place)
898 Details : Minimum of unsigned halfword element values from 'in0' and
899 'min_value' are written to output vector 'in0'
901 #define MIN_UH2(RTYPE, in0, in1, min_vec) \
903 in0 = (RTYPE) __msa_min_u_h((v8u16) in0, min_vec); \
908 #define MIN_UH4(RTYPE, in0, in1, in2, in3, min_vec) \
910 MIN_UH2(RTYPE, in0, in1, min_vec); \
941 #define CLIP_SH2_0_255(in0, in1) \
943 CLIP_SH_0_255(in0); \
947 #define CLIP_SH4_0_255(in0, in1, in2, in3) \
949 CLIP_SH2_0_255(in0, in1); \
953 #define CLIP_SH8_0_255(in0, in1, in2, in3, \
956 CLIP_SH4_0_255(in0, in1, in2, in3); \
972 #define CLIP_SW2_0_255(in0, in1) \
974 CLIP_SW_0_255(in0); \
978 #define CLIP_SW4_0_255(in0, in1, in2, in3) \
980 CLIP_SW2_0_255(in0, in1); \
984 #define CLIP_SW8_0_255(in0, in1, in2, in3, \
987 CLIP_SW4_0_255(in0, in1, in2, in3); \
1032 Arguments : Inputs - in0, in1
1035 Details : Each signed odd byte element from 'in0' is added to
1036 even signed byte element from 'in0' (pairwise) and the
1039 #define HADD_SB2(RTYPE, in0, in1, out0, out1) \
1041 out0 = (RTYPE) __msa_hadd_s_h((v16i8) in0, (v16i8) in0); \
1046 #define HADD_SB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \
1048 HADD_SB2(RTYPE, in0, in1, out0, out1); \
1055 Arguments : Inputs - in0, in1
1058 Details : Each unsigned odd byte element from 'in0' is added to
1059 even unsigned byte element from 'in0' (pairwise) and the
1062 #define HADD_UB2(RTYPE, in0, in1, out0, out1) \
1064 out0 = (RTYPE) __msa_hadd_u_h((v16u8) in0, (v16u8) in0); \
1069 #define HADD_UB3(RTYPE, in0, in1, in2, out0, out1, out2) \
1071 HADD_UB2(RTYPE, in0, in1, out0, out1); \
1076 #define HADD_UB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \
1078 HADD_UB2(RTYPE, in0, in1, out0, out1); \
1086 Arguments : Inputs - in0, in1
1089 Details : Each unsigned odd byte element from 'in0' is subtracted from
1090 even unsigned byte element from 'in0' (pairwise) and the
1093 #define HSUB_UB2(RTYPE, in0, in1, out0, out1) \
1095 out0 = (RTYPE) __msa_hsub_u_h((v16u8) in0, (v16u8) in0); \
1101 #define HSUB_UB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \
1103 HSUB_UB2(RTYPE, in0, in1, out0, out1); \
1110 Arguments : Inputs - in0, in1, ref0, ref1 (unsigned byte src & ref)
1113 Details : Absolute difference of all the byte elements from 'in0' with
1118 #define SAD_UB2_UH(in0, in1, ref0, ref1) \
1123 diff0_m = __msa_asub_u_b((v16u8) in0, (v16u8) ref0); \
1134 Arguments : Inputs - in0, in1, in2, in3 (4 input vectors)
1138 #define INSERT_W2(RTYPE, in0, in1, out) \
1140 out = (RTYPE) __msa_insert_w((v4i32) out, 0, in0); \
1146 #define INSERT_W4(RTYPE, in0, in1, in2, in3, out) \
1148 out = (RTYPE) __msa_insert_w((v4i32) out, 0, in0); \
1160 Arguments : Inputs - in0, in1 (2 input vectors)
1164 #define INSERT_D2(RTYPE, in0, in1, out) \
1166 out = (RTYPE) __msa_insert_d((v2i64) out, 0, in0); \
1175 Arguments : Inputs - in0, in1, in2, in3
1178 Details : Even byte elements of 'in0' and even byte
1183 #define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
1185 out0 = (RTYPE) __msa_ilvev_b((v16i8) in1, (v16i8) in0); \
1194 Arguments : Inputs - in0, in1, in2, in3
1197 Details : Even halfword elements of 'in0' and even halfword
1202 #define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
1204 out0 = (RTYPE) __msa_ilvev_h((v8i16) in1, (v8i16) in0); \
1212 Arguments : Inputs - in0, in1, in2, in3
1215 Details : Even word elements of 'in0' and even word
1220 #define ILVEV_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
1222 out0 = (RTYPE) __msa_ilvev_w((v4i32) in1, (v4i32) in0); \
1231 Arguments : Inputs - in0, in1, in2, in3
1234 Details : Even double word elements of 'in0' and even double word
1239 #define ILVEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
1241 out0 = (RTYPE) __msa_ilvev_d((v2i64) in1, (v2i64) in0); \
1249 Arguments : Inputs - in0, in1, in2, in3
1252 Details : Left half of byte elements of in0 and left half of byte
1257 #define ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
1259 out0 = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1); \
1267 #define ILVL_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1270 ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
1279 Arguments : Inputs - in0, in1, in2, in3
1282 Details : Left half of halfword elements of in0 and left half of halfword
1287 #define ILVL_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
1289 out0 = (RTYPE) __msa_ilvl_h((v8i16) in0, (v8i16) in1); \
1295 #define ILVL_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1298 ILVL_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
1305 Arguments : Inputs - in0, in1, in2, in3
1308 Details : Left half of word elements of in0 and left half of word
1313 #define ILVL_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
1315 out0 = (RTYPE) __msa_ilvl_w((v4i32) in0, (v4i32) in1); \
1323 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
1326 Details : Right half of byte elements of in0 and right half of byte
1332 #define ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
1334 out0 = (RTYPE) __msa_ilvr_b((v16i8) in0, (v16i8) in1); \
1343 #define ILVR_B3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
1345 ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
1353 #define ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1356 ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
1365 #define ILVR_B8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1369 ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1378 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
1381 Details : Right half of halfword elements of in0 and right half of
1387 #define ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
1389 out0 = (RTYPE) __msa_ilvr_h((v8i16) in0, (v8i16) in1); \
1395 #define ILVR_H3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
1397 ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
1402 #define ILVR_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1405 ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
1411 #define ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1) \
1413 out0 = (RTYPE) __msa_ilvr_w((v4i32) in0, (v4i32) in1); \
1420 #define ILVR_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1423 ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1); \
1430 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
1433 Details : Right half of double word elements of in0 and right half of
1438 #define ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
1440 out0 = (RTYPE) __msa_ilvr_d((v2i64) in0, (v2i64) in1); \
1447 #define ILVR_D3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
1449 ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
1454 #define ILVR_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1457 ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
1464 Arguments : Inputs - in0, in1, in2, in3
1467 Details : Left half of double word elements of in0 and left half of
1472 #define ILVL_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
1474 out0 = (RTYPE) __msa_ilvl_d((v2i64) in0, (v2i64) in1); \
1482 Arguments : Inputs - in0, in1
1485 Details : Right half of byte elements from 'in0' and 'in1' are
1487 Left half of byte elements from 'in0' and 'in1' are
1490 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \
1492 out0 = (RTYPE) __msa_ilvr_b((v16i8) in0, (v16i8) in1); \
1493 out1 = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1); \
1501 #define ILVRL_H2(RTYPE, in0, in1, out0, out1) \
1503 out0 = (RTYPE) __msa_ilvr_h((v8i16) in0, (v8i16) in1); \
1504 out1 = (RTYPE) __msa_ilvl_h((v8i16) in0, (v8i16) in1); \
1511 #define ILVRL_W2(RTYPE, in0, in1, out0, out1) \
1513 out0 = (RTYPE) __msa_ilvr_w((v4i32) in0, (v4i32) in1); \
1514 out1 = (RTYPE) __msa_ilvl_w((v4i32) in0, (v4i32) in1); \
1522 Arguments : Inputs - in0, in1, in2, in3, max_val
1523 Outputs - in0, in1, in2, in3 (in place)
1525 Details : Maximum of signed halfword element values from 'in0' and
1526 'max_val' are written to output vector 'in0'
1528 #define MAXI_SH2(RTYPE, in0, in1, max_val) \
1530 in0 = (RTYPE) __msa_maxi_s_h((v8i16) in0, max_val); \
1536 #define MAXI_SH4(RTYPE, in0, in1, in2, in3, max_val) \
1538 MAXI_SH2(RTYPE, in0, in1, max_val); \
1544 #define MAXI_SH8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, max_val) \
1546 MAXI_SH4(RTYPE, in0, in1, in2, in3, max_val); \
1555 Arguments : Inputs - in0, in1, in2, in3, sat_val
1556 Outputs - in0, in1, in2, in3 (in place)
1558 Details : Each unsigned halfword element from 'in0' is saturated to the
1562 #define SAT_UH2(RTYPE, in0, in1, sat_val) \
1564 in0 = (RTYPE) __msa_sat_u_h((v8u16) in0, sat_val); \
1570 #define SAT_UH4(RTYPE, in0, in1, in2, in3, sat_val) \
1572 SAT_UH2(RTYPE, in0, in1, sat_val); \
1578 #define SAT_UH8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, sat_val) \
1580 SAT_UH4(RTYPE, in0, in1, in2, in3, sat_val); \
1589 Arguments : Inputs - in0, in1, in2, in3, sat_val
1590 Outputs - in0, in1, in2, in3 (in place)
1592 Details : Each unsigned halfword element from 'in0' is saturated to the
1596 #define SAT_SH2(RTYPE, in0, in1, sat_val) \
1598 in0 = (RTYPE) __msa_sat_s_h((v8i16) in0, sat_val); \
1603 #define SAT_SH3(RTYPE, in0, in1, in2, sat_val) \
1605 SAT_SH2(RTYPE, in0, in1, sat_val); \
1610 #define SAT_SH4(RTYPE, in0, in1, in2, in3, sat_val) \
1612 SAT_SH2(RTYPE, in0, in1, sat_val); \
1620 Arguments : Inputs - in0, in1, in2, in3, sat_val
1621 Outputs - in0, in1, in2, in3 (in place)
1623 Details : Each unsigned word element from 'in0' is saturated to the
1627 #define SAT_SW2(RTYPE, in0, in1, sat_val) \
1629 in0 = (RTYPE) __msa_sat_s_w((v4i32) in0, sat_val); \
1634 #define SAT_SW4(RTYPE, in0, in1, in2, in3, sat_val) \
1636 SAT_SW2(RTYPE, in0, in1, sat_val); \
1704 Arguments : Inputs - in0, in1, in2, in3
1707 Details : Even byte elements of in0 are copied to the left half of
1714 #define PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
1716 out0 = (RTYPE) __msa_pckev_b((v16i8) in0, (v16i8) in1); \
1724 #define PCKEV_B3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
1726 PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
1732 #define PCKEV_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1735 PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
1744 Arguments : Inputs - in0, in1, in2, in3
1747 Details : Even halfword elements of in0 are copied to the left half of
1754 #define PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
1756 out0 = (RTYPE) __msa_pckev_h((v8i16) in0, (v8i16) in1); \
1762 #define PCKEV_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1765 PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
1772 Arguments : Inputs - in0, in1, in2, in3
1775 Details : Even double elements of in0 are copied to the left half of
1782 #define PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
1784 out0 = (RTYPE) __msa_pckev_d((v2i64) in0, (v2i64) in1); \
1791 #define PCKEV_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1794 PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
1800 Arguments : Inputs - in0, in1
1803 Details : As operation is on same input 'in0' vector, index 1 double word
1808 #define PCKOD_D2(RTYPE, in0, in1, in2, in3, out0, out1) \
1810 out0 = (RTYPE) __msa_pckod_d((v2i64) in0, (v2i64) in1); \
1818 Arguments : Inputs - in0, in1
1819 Outputs - in0, in1 (in-place)
1821 Details : Each unsigned byte element from input vector 'in0' is
1823 'in0' vector
1829 #define XORI_B2_128(RTYPE, in0, in1) \
1831 in0 = (RTYPE) __msa_xori_b((v16u8) in0, 128); \
1838 #define XORI_B3_128(RTYPE, in0, in1, in2) \
1840 XORI_B2_128(RTYPE, in0, in1); \
1845 #define XORI_B4_128(RTYPE, in0, in1, in2, in3) \
1847 XORI_B2_128(RTYPE, in0, in1); \
1854 #define XORI_B5_128(RTYPE, in0, in1, in2, in3, in4) \
1856 XORI_B3_128(RTYPE, in0, in1, in2); \
1861 #define XORI_B6_128(RTYPE, in0, in1, in2, in3, in4, in5) \
1863 XORI_B4_128(RTYPE, in0, in1, in2, in3); \
1868 #define XORI_B7_128(RTYPE, in0, in1, in2, in3, in4, in5, in6) \
1870 XORI_B4_128(RTYPE, in0, in1, in2, in3); \
1875 #define XORI_B8_128(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7) \
1877 XORI_B4_128(RTYPE, in0, in1, in2, in3); \
1884 Arguments : Inputs - in0, in1, in2, in3
1887 Details : Signed halfword elements from 'in0' are added to signed
1892 #define ADDS_SH2(RTYPE, in0, in1, in2, in3, out0, out1) \
1894 out0 = (RTYPE) __msa_adds_s_h((v8i16) in0, (v8i16) in1); \
1899 #define ADDS_SH4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
1902 ADDS_SH2(RTYPE, in0, in1, in2, in3, out0, out1); \
1909 Arguments : Inputs - in0, in1, in2, in3, shift
1910 Outputs - in0, in1, in2, in3 (in place)
1912 Details : Each element of vector 'in0' is left shifted by 'shift' and
1913 result is in place written to 'in0'
1916 #define SLLI_2V(in0, in1, shift) \
1918 in0 = in0 << shift; \
1921 #define SLLI_4V(in0, in1, in2, in3, shift) \
1923 in0 = in0 << shift; \
1931 Arguments : Inputs - in0, in1, in2, in3, shift
1932 Outputs - in0, in1, in2, in3 (in place)
1934 Details : Each element of vector 'in0' is right shifted by 'shift' and
1935 result is in place written to 'in0'
1939 #define SRA_4V(in0, in1, in2, in3, shift) \
1941 in0 = in0 >> shift; \
1948 Arguments : Inputs - in0, in1, in2, in3, shift
1949 Outputs - in0, in1, in2, in3 (in place)
1951 Details : Each element of vector 'in0' is shifted right logical by
1953 result is in place written to 'in0'
1957 #define SRL_H4(RTYPE, in0, in1, in2, in3, shift) \
1959 in0 = (RTYPE) __msa_srl_h((v8i16) in0, (v8i16) shift); \
1966 #define SRLR_H4(RTYPE, in0, in1, in2, in3, shift) \
1968 in0 = (RTYPE) __msa_srlr_h((v8i16) in0, (v8i16) shift); \
1976 #define SRLR_H8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, shift) \
1978 SRLR_H4(RTYPE, in0, in1, in2, in3, shift); \
1985 Arguments : Inputs - in0, in1, shift
1986 Outputs - in0, in1, (in place)
1988 Details : Each element of vector 'in0' is shifted right arithmetic by
1991 and the result is in place written to 'in0'
1995 #define SRAR_H2(RTYPE, in0, in1, shift) \
1997 in0 = (RTYPE) __msa_srar_h((v8i16) in0, (v8i16) shift); \
2003 #define SRAR_H3(RTYPE, in0, in1, in2, shift) \
2005 SRAR_H2(RTYPE, in0, in1, shift) \
2010 #define SRAR_H4(RTYPE, in0, in1, in2, in3, shift) \
2012 SRAR_H2(RTYPE, in0, in1, shift) \
2019 Arguments : Inputs - in0, in1, shift
2020 Outputs - in0, in1, (in place)
2022 Details : Each element of vector 'in0' is shifted right arithmetic by
2025 and the result is in place written to 'in0'
2029 #define SRAR_W2(RTYPE, in0, in1, shift) \
2031 in0 = (RTYPE) __msa_srar_w((v4i32) in0, (v4i32) shift); \
2036 #define SRAR_W4(RTYPE, in0, in1, in2, in3, shift) \
2038 SRAR_W2(RTYPE, in0, in1, shift) \
2044 Arguments : Inputs - in0, in1, in2, in3, shift
2045 Outputs - in0, in1, in2, in3 (in place)
2047 Details : Each element of vector 'in0' is shifted right arithmetic by
2050 and the result is in place written to 'in0'
2053 #define SRARI_H2(RTYPE, in0, in1, shift) \
2055 in0 = (RTYPE) __msa_srari_h((v8i16) in0, shift); \
2061 #define SRARI_H4(RTYPE, in0, in1, in2, in3, shift) \
2063 SRARI_H2(RTYPE, in0, in1, shift); \
2070 Arguments : Inputs - in0, in1, shift
2071 Outputs - in0, in1 (in place)
2073 Details : Each element of vector 'in0' is shifted right arithmetic by
2076 and the result is in place written to 'in0'
2079 #define SRARI_W2(RTYPE, in0, in1, shift) \
2081 in0 = (RTYPE) __msa_srari_w((v4i32) in0, shift); \
2086 #define SRARI_W4(RTYPE, in0, in1, in2, in3, shift) \
2088 SRARI_W2(RTYPE, in0, in1, shift); \
2095 Arguments : Inputs - in0, in1, in2, in3
2097 Details : Each element from 'in0' is multiplied with elements from 'in1'
2101 #define MUL2(in0, in1, in2, in3, out0, out1) \
2103 out0 = in0 * in1; \
2106 #define MUL4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
2108 MUL2(in0, in1, in2, in3, out0, out1); \
2113 Arguments : Inputs - in0, in1, in2, in3
2118 #define ADD2(in0, in1, in2, in3, out0, out1) \
2120 out0 = in0 + in1; \
2123 #define ADD4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
2125 ADD2(in0, in1, in2, in3, out0, out1); \
2130 Arguments : Inputs - in0, in1, in2, in3
2135 #define SUB2(in0, in1, in2, in3, out0, out1) \
2137 out0 = in0 - in1; \
2140 #define SUB4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
2142 out0 = in0 - in1; \
2169 extracted and interleaved with same vector 'in0' to generate
2186 extracted and interleaved right with same vector 'in0' to
2188 Then interleaved left with same vector 'in0' to
2219 extracted and interleaved right with same vector 'in0' to
2221 Then interleaved left with same vector 'in0' to
2233 Arguments : Inputs - in0, in1
2234 Outputs - in0, in1 (in-place)
2237 #define SWAP(in0, in1) \
2239 in0 = in0 ^ in1; \
2240 in1 = in0 ^ in1; \
2241 in0 = in0 ^ in1; \
2245 Arguments : Inputs - in0, in1, in2, in3
2249 #define BUTTERFLY_4(in0, in1, in2, in3, out0, out1, out2, out3) \
2251 out0 = in0 + in3; \
2255 out3 = in0 - in3; \
2259 Arguments : Inputs - in0 ... in7
2263 #define BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, \
2266 out0 = in0 + in7; \
2274 out7 = in0 - in7; \
2278 Arguments : Inputs - in0 ... in15
2282 #define BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, \
2287 out0 = in0 + in15; \
2303 out15 = in0 - in15; \
2307 Arguments : Inputs - in0, in1, in2, in3 (input 4x4 byte block)
2312 #define TRANSPOSE4x4_UB_UB(in0, in1, in2, in3, out0, out1, out2, out3) \
2317 ILVR_D2_SB(in1, in0, in3, in2, s0_m, s1_m); \
2327 Arguments : Inputs - in0, in1, in2, in3 (input 8x4 byte block)
2332 #define TRANSPOSE8x4_UB(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
2337 ILVEV_W2_SB(in0, in4, in1, in5, tmp0_m, tmp1_m); \
2352 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
2359 #define TRANSPOSE8x8_UB(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
2366 ILVR_B4_SB(in2, in0, in3, in1, in6, in4, in7, in5, \
2379 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7,
2385 #define TRANSPOSE16x4_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
2391 ILVEV_W2_SD(in0, in4, in8, in12, tmp0_m, tmp1_m); \
2414 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7,
2420 #define TRANSPOSE16x8_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
2427 ILVEV_D2_UB(in0, in8, in1, in9, out7, out6); \
2461 Arguments : Inputs - in0, in1, in2, in3
2466 #define TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, out0, out1, out2, out3) \
2470 ILVR_H2_SH(in1, in0, in3, in2, s0_m, s1_m); \
2477 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
2482 #define TRANSPOSE8x8_H(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
2493 ILVR_H2_SH(in2, in0, in3, in1, s0_m, s1_m); \
2495 ILVL_H2_SH(in2, in0, in3, in1, s0_m, s1_m); \
2508 Arguments : Inputs - in0, in1, in2, in3
2513 #define TRANSPOSE4x4_SW_SW(in0, in1, in2, in3, out0, out1, out2, out3) \
2517 ILVRL_W2_SW(in1, in0, s0_m, s1_m); \
2528 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride
2529 Details : Each byte element from input vector pair 'in0' and 'in1' are
2540 #define AVE_ST8x4_UB(in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \
2545 tmp0_m = __msa_ave_u_b((v16u8) in0, (v16u8) in1); \
2559 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride
2560 Details : Each byte element from input vector pair 'in0' and 'in1' are
2571 #define AVE_ST16x4_UB(in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \
2575 tmp0_m = __msa_ave_u_b((v16u8) in0, (v16u8) in1); \
2585 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride
2586 Details : Each byte element from input vector pair 'in0' and 'in1' are
2597 #define AVER_ST8x4_UB(in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \
2602 AVER_UB4_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
2614 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride
2615 Details : Each byte element from input vector pair 'in0' and 'in1' are
2626 #define AVER_ST16x4_UB(in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \
2630 AVER_UB4_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
2638 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride
2639 Details : Each byte element from input vector pair 'in0' and 'in1' are
2650 #define AVER_DST_ST8x4_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
2657 AVER_UB4_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
2666 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride
2667 Details : Each byte element from input vector pair 'in0' and 'in1' are
2678 #define AVER_DST_ST16x4_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
2685 AVER_UB4_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
2692 Arguments : Inputs - in0, in1, in2, in3, pdst, stride
2696 #define ADDBLK_ST4x4_UB(in0, in1, in2, in3, pdst, stride) \
2705 ILVR_D2_SH(in1, in0, in3, in2, inp0_m, inp1_m) \
2722 Arguments : Inputs - in0, in1, in2, coeff0, coeff1, coeff2
2725 Details : Dot product of 'in0' with 'coeff0'
2730 out0_m = (in0 * coeff0) + (in1 * coeff1) + (in2 * coeff2)
2732 #define DPADD_SH3_SH(in0, in1, in2, coeff0, coeff1, coeff2) \
2736 out0_m = __msa_dotp_s_h((v16i8) in0, (v16i8) coeff0); \
2744 Arguments : Inputs - in0, in1
2747 Details : Signed byte even elements from 'in0' and 'in1' are packed
2751 #define PCKEV_XORI128_UB(in0, in1) \
2754 out_m = (v16u8) __msa_pckev_b((v16i8) in1, (v16i8) in0); \
2761 Arguments : Inputs - in0, in1, in2, in3, dst0, dst1, pdst, stride
2763 #define CONVERT_UB_AVG_ST8x4_UB(in0, in1, in2, in3, \
2769 tmp0_m = PCKEV_XORI128_UB(in0, in1); \
2778 Arguments : Inputs - in0, in1, in2, in3, pdst, stride
2780 #define PCKEV_ST4x4_UB(in0, in1, in2, in3, pdst, stride) \
2785 PCKEV_B2_SB(in1, in0, in3, in2, tmp0_m, tmp1_m); \
2797 Arguments : Inputs - in0, in1, pdst
2799 #define PCKEV_ST_SB(in0, in1, pdst) \
2802 tmp_m = __msa_pckev_b((v16i8) in1, (v16i8) in0); \
2807 Arguments : Inputs - in0, in1, mask, coeff, shift
2809 #define HORIZ_2TAP_FILT_UH(in0, in1, mask, coeff, shift) \
2814 tmp0_m = __msa_vshf_b((v16i8) mask, (v16i8) in1, (v16i8) in0); \