Lines Matching defs:inp1
24 #define APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask, coef0, coef1, coef2) \
32 VSHF_B2_UB(inp0, inp0, inp1, inp1, mask, mask, tmp0, tmp1); \
33 ILVRL_B2_UH(inp1, inp0, sum0_r, sum0_l); \
35 data3 = (v16u8) __msa_sldi_b((v16i8) tmp1, (v16i8) inp1, 1); \
39 data4 = (v16u8) __msa_sldi_b((v16i8) tmp1, (v16i8) inp1, 2); \
44 data5 = (v16u8) __msa_sldi_b((v16i8) tmp1, (v16i8) inp1, 3); \
58 #define APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, \
67 VSHF_B2_UH(inp0, inp0, inp1, inp1, mask0, mask0, sum0_r, sum4_r); \
68 VSHF_B2_UH(inp0, inp0, inp1, inp1, mask3, mask3, sum3_r, sum7_r); \
71 VSHF_B2_UH(inp0, inp0, inp1, inp1, mask2, mask2, sum2_r, sum6_r); \
72 VSHF_B2_UH(inp0, inp0, inp1, inp1, mask1, mask1, sum1_r, sum5_r); \
127 #define APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask, \
136 VSHF_B2_UB(inp0, inp0, inp1, inp1, mask, mask, tmp0, tmp1); \
137 ILVRL_B2_UH(inp1, inp0, sum0_r, sum0_l); \
139 data3 = (v16u8) __msa_sldi_b((v16i8) tmp1, (v16i8) inp1, 1); \
143 data4 = (v16u8) __msa_sldi_b((v16i8) tmp1, (v16i8) inp1, 2); \
148 data5 = (v16u8) __msa_sldi_b((v16i8) tmp1, (v16i8) inp1, 3); \
165 #define APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, \
174 VSHF_B2_UH(inp0, inp0, inp1, inp1, mask0, mask0, sum0_r, sum4_r); \
175 VSHF_B2_UH(inp0, inp0, inp1, inp1, mask3, mask3, sum3_r, sum7_r); \
178 VSHF_B2_UH(inp0, inp0, inp1, inp1, mask2, mask2, sum2_r, sum6_r); \
179 VSHF_B2_UH(inp0, inp0, inp1, inp1, mask1, mask1, sum1_r, sum5_r); \
194 #define APPLY_VERT_QPEL_FILTER(inp0, inp1, inp2, inp3, \
208 ILVRL_B2_UH(inp5, inp1, sum1_r, sum1_l); \
248 #define APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp1, inp2, inp3, \
262 ILVRL_B2_UH(inp5, inp1, sum1_r, sum1_l); \
315 v16u8 inp0, inp1, inp2, inp3;
326 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
328 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
334 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
349 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
358 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
360 res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
393 v16u8 inp0, inp1, inp2, inp3;
404 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
406 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
424 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
433 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
435 res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
464 v16u8 inp0, inp1, inp2, inp3;
475 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
477 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
483 SLDI_B4_UB(inp0, inp0, inp1, inp1, inp2, inp2, inp3, inp3, 1,
484 inp0, inp1, inp2, inp3);
485 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
500 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
509 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
511 res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
513 res = __msa_aver_u_b(res, inp1);
544 v16u8 inp0, inp1, inp2, inp3;
555 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
557 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
563 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
579 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
588 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
590 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
623 v16u8 inp0, inp1, inp2, inp3;
634 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
636 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
654 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
663 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
665 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
694 v16u8 inp0, inp1, inp2, inp3;
705 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
707 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
713 SLDI_B4_UB(inp0, inp0, inp1, inp1, inp2, inp2, inp3, inp3, 1,
714 inp0, inp1, inp2, inp3);
715 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
731 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
740 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
742 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
744 res = __msa_ave_u_b(res, inp1);
775 v16u8 inp0, inp1, inp2, inp3;
787 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
789 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
796 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
814 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
824 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
826 res0 = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
855 v16u8 inp0, inp1, inp2, inp3;
867 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
869 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
891 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
901 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
903 res0 = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
930 v16u8 inp0, inp1, inp2, inp3;
942 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
944 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
951 SLDI_B4_UB(inp0, inp0, inp1, inp1, inp2, inp2, inp3, inp3, 1,
952 inp0, inp1, inp2, inp3);
953 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
971 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
980 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
982 res0 = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
987 AVER_UB2_UB(res0, inp1, res1, inp3, res0, res1);
1009 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1015 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1019 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1020 inp1, inp2, inp3, inp4,
1021 inp1, inp0, inp0, inp1,
1026 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1028 inp3, inp2, inp1, inp0,
1031 tmp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
1037 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1058 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1065 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
1067 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
1068 inp1, inp2, inp3, inp4,
1076 res0 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
1079 res0 = __msa_aver_u_b(res0, inp1);
1085 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
1094 res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
1103 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
1199 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1205 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1209 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1210 inp1, inp2, inp3, inp4,
1211 inp1, inp0, inp0, inp1,
1216 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1218 inp3, inp2, inp1, inp0,
1224 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1242 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1249 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1253 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
1254 inp1, inp2, inp3, inp4,
1261 res0 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
1269 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
1277 res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
1285 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
1378 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1384 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1388 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1389 inp1, inp2, inp3, inp4,
1390 inp1, inp0, inp0, inp1,
1396 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1398 inp3, inp2, inp1, inp0,
1401 tmp0 = (v16u8) __msa_insve_d((v2i64) inp1, 1, (v2i64) inp2);
1407 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1428 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1435 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1439 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
1440 inp1, inp2, inp3, inp4,
1442 res0 = __msa_aver_u_b(res0, inp1);
1448 res0 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
1457 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
1466 res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
1475 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
1579 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1585 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1589 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1590 inp1, inp2, inp3, inp4,
1591 inp1, inp0, inp0, inp1,
1596 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1598 inp3, inp2, inp1, inp0,
1601 tmp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
1608 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1630 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1637 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
1639 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp0, inp1, inp2,
1640 inp1, inp2, inp3, inp4,
1648 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp1, inp0, inp0, inp1,
1651 res0 = __msa_ave_u_b(res0, inp1);
1657 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp2, inp1, inp0, inp0,
1666 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
1675 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp4, inp3, inp2, inp1,
1780 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1786 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1790 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1791 inp1, inp2, inp3, inp4,
1792 inp1, inp0, inp0, inp1,
1797 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1799 inp3, inp2, inp1, inp0,
1805 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1823 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1830 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
1832 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp0, inp1, inp2,
1833 inp1, inp2, inp3, inp4,
1840 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp1, inp0, inp0, inp1,
1848 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp2, inp1, inp0, inp0,
1856 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
1864 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp4, inp3, inp2, inp1,
1956 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1962 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1966 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1967 inp1, inp2, inp3, inp4,
1968 inp1, inp0, inp0, inp1,
1973 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1975 inp3, inp2, inp1, inp0,
1978 tmp0 = (v16u8) __msa_insve_d((v2i64) inp1, 1, (v2i64) inp2);
1985 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp4, inp3, inp2, inp1,
2007 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2014 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
2016 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp0, inp1, inp2,
2017 inp1, inp2, inp3, inp4,
2019 res0 = __msa_ave_u_b(res0, inp1);
2025 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp1, inp0, inp0, inp1,
2034 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp2, inp1, inp0, inp0,
2043 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
2052 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp4, inp3, inp2, inp1,
2156 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2163 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
2167 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
2168 inp1, inp2, inp3, inp4,
2169 inp1, inp0, inp0, inp1,
2175 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
2177 inp3, inp2, inp1, inp0,
2182 tmp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
2192 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
2218 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2225 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
2227 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
2228 inp1, inp2, inp3, inp4,
2233 res1 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
2238 AVER_UB2_UB(res0, inp0, res1, inp1, res0, res1);
2245 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
2251 res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
2263 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
2350 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2357 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
2361 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
2362 inp1, inp2, inp3, inp4,
2363 inp1, inp0, inp0, inp1,
2368 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
2370 inp3, inp2, inp1, inp0,
2381 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
2403 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2410 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
2412 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
2413 inp1, inp2, inp3, inp4,
2417 res1 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
2427 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
2432 res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
2442 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
2528 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2535 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
2539 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
2540 inp1, inp2, inp3, inp4,
2541 inp1, inp0, inp0, inp1,
2546 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
2548 inp3, inp2, inp1, inp0,
2552 tmp0 = (v16u8) __msa_insve_d((v2i64) inp1, 1, (v2i64) inp2);
2562 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
2587 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2594 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
2596 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
2597 inp1, inp2, inp3, inp4,
2601 res1 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
2605 AVER_UB2_UB(res0, inp1, res1, inp2, res0, res1);
2612 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
2617 res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
2628 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
2722 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
2731 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
2733 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
2758 LD_UB2(src, 1, inp0, inp1);
2759 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
2781 v16u8 inp0, inp1, inp2, inp3;
2793 LD_UB2(src, src_stride, inp0, inp1);
2795 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
2798 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
2809 LD_UB2(src, src_stride, inp0, inp1);
2811 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
2814 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
2877 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
2886 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
2888 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
2909 LD_UB2(src, 1, inp0, inp1);
2910 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
2931 v16u8 inp0, inp1, inp2, inp3;
2943 LD_UB2(src, src_stride, inp0, inp1);
2945 horiz0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
2956 LD_UB2(src, src_stride, inp0, inp1);
2958 horiz4 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3021 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
3030 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
3032 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
3034 res = __msa_ave_u_b(res, inp1);
3057 LD_UB2(src, 1, inp0, inp1);
3058 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
3060 res = __msa_ave_u_b(inp1, res);
3080 v16u8 inp0, inp1, inp2, inp3;
3092 LD_UB2(src, src_stride, inp0, inp1);
3094 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3097 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
3099 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
3112 LD_UB2(src, src_stride, inp0, inp1);
3114 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3117 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
3119 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
3194 v16u8 inp0, inp1, inp2, inp3;
3206 LD_UB2(src, src_stride, inp0, inp1);
3208 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3211 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
3222 LD_UB2(src, src_stride, inp0, inp1);
3224 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3227 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
3289 v16u8 inp0, inp1, inp2, inp3;
3301 LD_UB2(src, src_stride, inp0, inp1);
3303 horiz0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3313 LD_UB2(src, src_stride, inp0, inp1);
3315 horiz4 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3375 v16u8 inp0, inp1, inp2, inp3;
3387 LD_UB2(src, src_stride, inp0, inp1);
3389 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3392 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
3394 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
3407 LD_UB2(src, src_stride, inp0, inp1);
3409 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3412 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
3414 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
3478 v16u8 inp0, inp1, inp2, inp3;
3490 LD_UB2(src, src_stride, inp0, inp1);
3492 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3495 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
3506 LD_UB2(src, src_stride, inp0, inp1);
3508 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3511 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
3584 v16u8 inp0, inp1, inp2, inp3;
3596 LD_UB2(src, src_stride, inp0, inp1);
3598 horiz0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3608 LD_UB2(src, src_stride, inp0, inp1);
3610 horiz4 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3677 v16u8 inp0, inp1, inp2, inp3;
3689 LD_UB2(src, src_stride, inp0, inp1);
3691 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3694 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
3696 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
3709 LD_UB2(src, src_stride, inp0, inp1);
3711 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3715 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
3716 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
3779 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
3788 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
3790 res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
3815 LD_UB2(src, 1, inp0, inp1);
3816 res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask, const20, const6, const3);
3837 v16u8 inp0, inp1, inp2, inp3;
3849 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
3851 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
3855 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
3861 LD_UB2(src, src_stride, inp0, inp1);
3863 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
3865 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
3924 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
3933 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
3935 res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
3956 LD_UB2(src, 1, inp0, inp1);
3957 res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask, const20, const6, const3);
3977 v16u8 inp0, inp1, inp2, inp3;
3989 LD_UB2(src, src_stride, inp0, inp1);
3991 horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4001 LD_UB2(src, src_stride, inp0, inp1);
4003 horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4061 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
4070 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
4072 res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
4074 res = __msa_aver_u_b(res, inp1);
4097 LD_UB2(src, 1, inp0, inp1);
4098 res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask, const20, const6, const3);
4099 res = __msa_aver_u_b(inp1, res);
4119 v16u8 inp0, inp1, inp2, inp3;
4131 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4133 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4137 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
4139 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
4147 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4149 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4153 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
4155 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
4221 v16u8 inp0, inp1, inp2, inp3;
4233 LD_UB2(src, src_stride, inp0, inp1);
4235 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4237 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
4248 LD_UB2(src, src_stride, inp0, inp1);
4250 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4252 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
4309 v16u8 inp0, inp1, inp2, inp3;
4321 LD_UB2(src, src_stride, inp0, inp1);
4323 horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4333 LD_UB2(src, src_stride, inp0, inp1);
4335 horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4394 v16u8 inp0, inp1, inp2, inp3;
4406 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4409 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4413 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
4415 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
4423 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4425 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4429 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
4431 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
4486 v16u8 inp0, inp1, inp2, inp3;
4498 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4501 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4505 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
4511 LD_UB2(src, src_stride, inp0, inp1);
4514 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4516 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
4583 v16u8 inp0, inp1, inp2, inp3;
4595 LD_UB2(src, src_stride, inp0, inp1);
4597 horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4607 LD_UB2(src, src_stride, inp0, inp1);
4609 horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4676 v16u8 inp0, inp1, inp2, inp3;
4688 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4690 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4693 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
4695 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
4705 LD_UB2(src, src_stride, inp0, inp1);
4707 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4709 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
4711 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
4781 v16u8 inp0, inp1, inp2, inp3;
4794 LD_UB2(src, src_stride, inp0, inp1);
4796 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4800 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
4805 LD_UB2(src, src_stride, inp0, inp1);
4810 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4812 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
4894 v16u8 inp0, inp1, inp2, inp3;
4907 LD_UB2(src, src_stride, inp0, inp1);
4909 horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4918 LD_UB2(src, src_stride, inp0, inp1);
4921 horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
5003 v16u8 inp0, inp1, inp2, inp3;
5016 LD_UB2(src, src_stride, inp0, inp1);
5018 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5023 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
5025 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5030 LD_UB2(src, src_stride, inp0, inp1);
5037 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5040 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
5042 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5128 v16u8 inp0, inp1, inp2, inp3;
5141 LD_UB2(src, src_stride, inp0, inp1);
5143 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5147 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5152 LD_UB2(src, src_stride, inp0, inp1);
5157 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5159 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5230 v16u8 inp0, inp1, inp2, inp3;
5243 LD_UB2(src, src_stride, inp0, inp1);
5245 horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
5254 LD_UB2(src, src_stride, inp0, inp1);
5257 horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
5331 v16u8 inp0, inp1, inp2, inp3;
5344 LD_UB2(src, src_stride, inp0, inp1);
5346 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5350 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
5352 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5357 LD_UB2(src, src_stride, inp0, inp1);
5364 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5367 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
5369 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5447 v16u8 inp0, inp1, inp2, inp3;
5460 LD_UB2(src, src_stride, inp0, inp1);
5463 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5465 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5476 LD_UB2(src, src_stride, inp0, inp1);
5478 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5480 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5561 v16u8 inp0, inp1, inp2, inp3;
5574 LD_UB2(src, src_stride, inp0, inp1);
5576 horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
5587 LD_UB2(src, src_stride, inp0, inp1);
5589 horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
5668 v16u8 inp0, inp1, inp2, inp3;
5681 LD_UB2(src, src_stride, inp0, inp1);
5683 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5687 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
5689 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5694 LD_UB2(src, src_stride, inp0, inp1);
5701 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5703 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
5705 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);