Lines Matching defs:inp3
194 #define APPLY_VERT_QPEL_FILTER(inp0, inp1, inp2, inp3, \
204 ILVRL_B2_UH(inp7, inp3, sum3_r, sum3_l); \
248 #define APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp1, inp2, inp3, \
258 ILVRL_B2_UH(inp7, inp3, sum3_r, sum3_l); \
315 v16u8 inp0, inp1, inp2, inp3;
326 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
331 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
335 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
349 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
358 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
366 res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
393 v16u8 inp0, inp1, inp2, inp3;
404 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
409 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
424 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
433 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
440 res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
464 v16u8 inp0, inp1, inp2, inp3;
475 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
480 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
483 SLDI_B4_UB(inp0, inp0, inp1, inp1, inp2, inp2, inp3, inp3, 1,
484 inp0, inp1, inp2, inp3);
486 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
500 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
509 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
517 res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
519 res = __msa_aver_u_b(res, inp3);
544 v16u8 inp0, inp1, inp2, inp3;
555 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
560 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
564 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
579 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
588 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
596 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
623 v16u8 inp0, inp1, inp2, inp3;
634 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
639 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
654 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
663 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
670 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
694 v16u8 inp0, inp1, inp2, inp3;
705 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
710 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
713 SLDI_B4_UB(inp0, inp0, inp1, inp1, inp2, inp2, inp3, inp3, 1,
714 inp0, inp1, inp2, inp3);
716 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
731 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
740 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
748 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
750 res = __msa_ave_u_b(res, inp3);
775 v16u8 inp0, inp1, inp2, inp3;
787 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
792 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
797 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
814 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
824 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
828 res1 = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
855 v16u8 inp0, inp1, inp2, inp3;
867 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
872 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
891 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
901 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
905 res1 = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
930 v16u8 inp0, inp1, inp2, inp3;
942 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
947 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
951 SLDI_B4_UB(inp0, inp0, inp1, inp1, inp2, inp2, inp3, inp3, 1,
952 inp0, inp1, inp2, inp3);
954 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
971 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
980 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
984 res1 = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
987 AVER_UB2_UB(res0, inp1, res1, inp3, res0, res1);
1009 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1015 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1020 inp1, inp2, inp3, inp4,
1022 inp2, inp3, inp4, inp5,
1027 inp3, inp4, inp5, inp6,
1028 inp3, inp2, inp1, inp0,
1032 tmp1 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
1037 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1039 inp5, inp4, inp3, inp2,
1042 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
1058 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1065 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
1068 inp1, inp2, inp3, inp4,
1077 inp2, inp3, inp4, inp5,
1086 inp3, inp4, inp5, inp6,
1094 res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
1097 res0 = __msa_aver_u_b(res0, inp3);
1103 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
1110 res0 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
1119 res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
1199 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1205 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1210 inp1, inp2, inp3, inp4,
1212 inp2, inp3, inp4, inp5,
1217 inp3, inp4, inp5, inp6,
1218 inp3, inp2, inp1, inp0,
1224 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1226 inp5, inp4, inp3, inp2,
1229 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
1242 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1249 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1254 inp1, inp2, inp3, inp4,
1262 inp2, inp3, inp4, inp5,
1270 inp3, inp4, inp5, inp6,
1277 res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
1285 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
1293 res0 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
1301 res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
1378 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1384 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1389 inp1, inp2, inp3, inp4,
1391 inp2, inp3, inp4, inp5,
1397 inp3, inp4, inp5, inp6,
1398 inp3, inp2, inp1, inp0,
1402 tmp1 = (v16u8) __msa_insve_d((v2i64) inp3, 1, (v2i64) inp4);
1407 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1409 inp5, inp4, inp3, inp2,
1412 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
1428 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1435 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1440 inp1, inp2, inp3, inp4,
1449 inp2, inp3, inp4, inp5,
1458 inp3, inp4, inp5, inp6,
1460 res0 = __msa_aver_u_b(res0, inp3);
1466 res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
1475 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
1484 res0 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
1493 res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
1579 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1585 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1590 inp1, inp2, inp3, inp4,
1592 inp2, inp3, inp4, inp5,
1597 inp3, inp4, inp5, inp6,
1598 inp3, inp2, inp1, inp0,
1602 tmp1 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
1608 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1610 inp5, inp4, inp3, inp2,
1613 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp6, inp5, inp4, inp3,
1630 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1637 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
1640 inp1, inp2, inp3, inp4,
1649 inp2, inp3, inp4, inp5,
1658 inp3, inp4, inp5, inp6,
1666 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
1669 res0 = __msa_ave_u_b(res0, inp3);
1675 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp4, inp3, inp2, inp1,
1684 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp5, inp4, inp3, inp2,
1693 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp6, inp5, inp4, inp3,
1780 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1786 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1791 inp1, inp2, inp3, inp4,
1793 inp2, inp3, inp4, inp5,
1798 inp3, inp4, inp5, inp6,
1799 inp3, inp2, inp1, inp0,
1805 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1807 inp5, inp4, inp3, inp2,
1810 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp6, inp5, inp4, inp3,
1823 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1830 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
1833 inp1, inp2, inp3, inp4,
1841 inp2, inp3, inp4, inp5,
1849 inp3, inp4, inp5, inp6,
1856 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
1864 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp4, inp3, inp2, inp1,
1872 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp5, inp4, inp3, inp2,
1880 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp6, inp5, inp4, inp3,
1956 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1962 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1967 inp1, inp2, inp3, inp4,
1969 inp2, inp3, inp4, inp5,
1974 inp3, inp4, inp5, inp6,
1975 inp3, inp2, inp1, inp0,
1979 tmp1 = (v16u8) __msa_insve_d((v2i64) inp3, 1, (v2i64) inp4);
1985 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1987 inp5, inp4, inp3, inp2,
1990 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp6, inp5, inp4, inp3,
2007 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2014 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
2017 inp1, inp2, inp3, inp4,
2026 inp2, inp3, inp4, inp5,
2035 inp3, inp4, inp5, inp6,
2037 res0 = __msa_ave_u_b(res0, inp3);
2043 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
2052 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp4, inp3, inp2, inp1,
2061 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp5, inp4, inp3, inp2,
2070 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp6, inp5, inp4, inp3,
2156 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2163 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
2168 inp1, inp2, inp3, inp4,
2170 inp2, inp3, inp4, inp5,
2176 inp3, inp4, inp5, inp6,
2177 inp3, inp2, inp1, inp0,
2183 tmp1 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
2192 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
2194 inp5, inp4, inp3, inp2,
2197 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
2218 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2225 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
2228 inp1, inp2, inp3, inp4,
2234 inp2, inp3, inp4, inp5,
2246 inp3, inp4, inp5, inp6,
2251 res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
2256 AVER_UB2_UB(res0, inp2, res1, inp3, res0, res1);
2263 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
2266 res1 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
2278 res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
2350 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2357 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
2362 inp1, inp2, inp3, inp4,
2364 inp2, inp3, inp4, inp5,
2369 inp3, inp4, inp5, inp6,
2370 inp3, inp2, inp1, inp0,
2381 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
2383 inp5, inp4, inp3, inp2,
2386 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
2403 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2410 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
2413 inp1, inp2, inp3, inp4,
2418 inp2, inp3, inp4, inp5,
2428 inp3, inp4, inp5, inp6,
2432 res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
2442 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
2447 res1 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
2457 res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
2528 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2535 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
2540 inp1, inp2, inp3, inp4,
2542 inp2, inp3, inp4, inp5,
2547 inp3, inp4, inp5, inp6,
2548 inp3, inp2, inp1, inp0,
2553 tmp1 = (v16u8) __msa_insve_d((v2i64) inp3, 1, (v2i64) inp4);
2562 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
2564 inp5, inp4, inp3, inp2,
2567 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
2587 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2594 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
2597 inp1, inp2, inp3, inp4,
2602 inp2, inp3, inp4, inp5,
2613 inp3, inp4, inp5, inp6,
2617 res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
2621 AVER_UB2_UB(res0, inp3, res1, inp4, res0, res1);
2628 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
2633 res1 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
2644 res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
2722 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
2731 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
2739 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
2781 v16u8 inp0, inp1, inp2, inp3;
2801 LD_UB2(src, src_stride, inp2, inp3);
2803 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
2806 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
2827 LD_UB2(src, src_stride, inp2, inp3);
2829 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
2832 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
2877 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
2886 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
2893 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
2931 v16u8 inp0, inp1, inp2, inp3;
2950 LD_UB2(src, src_stride, inp2, inp3);
2952 horiz2 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
2972 LD_UB2(src, src_stride, inp2, inp3);
2974 horiz6 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3021 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
3030 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
3038 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
3040 res = __msa_ave_u_b(res, inp3);
3080 v16u8 inp0, inp1, inp2, inp3;
3102 LD_UB2(src, src_stride, inp2, inp3);
3104 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3107 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
3109 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
3132 LD_UB2(src, src_stride, inp2, inp3);
3134 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3137 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
3139 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
3194 v16u8 inp0, inp1, inp2, inp3;
3214 LD_UB2(src, src_stride, inp2, inp3);
3216 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3219 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
3236 LD_UB2(src, src_stride, inp2, inp3);
3241 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3244 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
3289 v16u8 inp0, inp1, inp2, inp3;
3307 LD_UB2(src, src_stride, inp2, inp3);
3309 horiz2 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3324 LD_UB2(src, src_stride, inp2, inp3);
3329 horiz6 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3375 v16u8 inp0, inp1, inp2, inp3;
3397 LD_UB2(src, src_stride, inp2, inp3);
3399 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3402 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
3404 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
3422 LD_UB2(src, src_stride, inp2, inp3);
3427 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3430 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
3432 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
3478 v16u8 inp0, inp1, inp2, inp3;
3498 LD_UB2(src, src_stride, inp2, inp3);
3500 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3503 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
3524 LD_UB2(src, src_stride, inp2, inp3);
3526 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3529 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
3584 v16u8 inp0, inp1, inp2, inp3;
3602 LD_UB2(src, src_stride, inp2, inp3);
3604 horiz2 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3621 LD_UB2(src, src_stride, inp2, inp3);
3626 horiz6 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3677 v16u8 inp0, inp1, inp2, inp3;
3699 LD_UB2(src, src_stride, inp2, inp3);
3701 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3704 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
3706 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
3729 LD_UB2(src, src_stride, inp2, inp3);
3731 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3734 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
3736 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
3779 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
3788 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
3796 res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
3837 v16u8 inp0, inp1, inp2, inp3;
3849 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
3853 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
3858 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
3878 LD_UB2(src, src_stride, inp2, inp3);
3880 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
3882 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
3924 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
3933 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
3940 res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
3977 v16u8 inp0, inp1, inp2, inp3;
3995 LD_UB2(src, src_stride, inp2, inp3);
3997 horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4017 LD_UB2(src, src_stride, inp2, inp3);
4019 horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4061 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
4070 LD_UB4((src + 1), src_stride, inp1, inp3, inp5, inp7);
4078 res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
4080 res = __msa_aver_u_b(res, inp3);
4119 v16u8 inp0, inp1, inp2, inp3;
4131 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4135 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4142 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
4144 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
4147 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4151 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4158 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
4160 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
4221 v16u8 inp0, inp1, inp2, inp3;
4241 LD_UB2(src, src_stride, inp2, inp3);
4243 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4245 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
4263 LD_UB2(src, src_stride, inp2, inp3);
4265 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4267 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
4309 v16u8 inp0, inp1, inp2, inp3;
4327 LD_UB2(src, src_stride, inp2, inp3);
4329 horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4347 LD_UB2(src, src_stride, inp2, inp3);
4349 horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4394 v16u8 inp0, inp1, inp2, inp3;
4406 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4411 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4418 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
4420 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
4423 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4427 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4434 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
4436 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
4486 v16u8 inp0, inp1, inp2, inp3;
4498 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4503 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4508 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
4529 LD_UB2(src, src_stride, inp2, inp3);
4531 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4533 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
4583 v16u8 inp0, inp1, inp2, inp3;
4601 LD_UB2(src, src_stride, inp2, inp3);
4603 horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4624 LD_UB2(src, src_stride, inp2, inp3);
4626 horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4676 v16u8 inp0, inp1, inp2, inp3;
4688 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4698 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4700 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
4702 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
4721 LD_UB2(src, src_stride, inp2, inp3);
4726 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4728 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
4730 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
4781 v16u8 inp0, inp1, inp2, inp3;
4798 LD_UB2(src, src_stride, inp2, inp3);
4803 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4807 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
4828 LD_UB2(src, src_stride, inp2, inp3);
4830 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4832 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
4894 v16u8 inp0, inp1, inp2, inp3;
4912 LD_UB2(src, src_stride, inp2, inp3);
4915 horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4938 LD_UB2(src, src_stride, inp2, inp3);
4940 horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
5003 v16u8 inp0, inp1, inp2, inp3;
5021 LD_UB2(src, src_stride, inp2, inp3);
5028 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5032 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
5034 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5058 LD_UB2(src, src_stride, inp2, inp3);
5060 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5063 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
5065 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5128 v16u8 inp0, inp1, inp2, inp3;
5145 LD_UB2(src, src_stride, inp2, inp3);
5150 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5154 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5173 LD_UB2(src, src_stride, inp2, inp3);
5175 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5177 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5230 v16u8 inp0, inp1, inp2, inp3;
5248 LD_UB2(src, src_stride, inp2, inp3);
5251 horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
5261 LD_UB2(src, src_stride, inp2, inp3);
5263 horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
5331 v16u8 inp0, inp1, inp2, inp3;
5348 LD_UB2(src, src_stride, inp2, inp3);
5355 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5359 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
5361 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5383 LD_UB2(src, src_stride, inp2, inp3);
5385 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5388 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
5390 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5447 v16u8 inp0, inp1, inp2, inp3;
5468 LD_UB2(src, src_stride, inp2, inp3);
5470 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5472 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5496 LD_UB2(src, src_stride, inp2, inp3);
5498 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5500 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5561 v16u8 inp0, inp1, inp2, inp3;
5580 LD_UB2(src, src_stride, inp2, inp3);
5582 horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
5606 LD_UB2(src, src_stride, inp2, inp3);
5608 horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
5668 v16u8 inp0, inp1, inp2, inp3;
5685 LD_UB2(src, src_stride, inp2, inp3);
5692 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5696 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
5698 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5720 LD_UB2(src, src_stride, inp2, inp3);
5722 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5724 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
5726 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);