Lines Matching refs:inp2

194 #define APPLY_VERT_QPEL_FILTER(inp0, inp1, inp2, inp3,                  \
207 ILVRL_B2_UH(inp6, inp2, sum2_r, sum2_l); \
248 #define APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp1, inp2, inp3, \
261 ILVRL_B2_UH(inp6, inp2, sum2_r, sum2_l); \
315 v16u8 inp0, inp1, inp2, inp3;
326 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
331 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
335 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
336 AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
349 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
357 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
366 res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
368 res = __msa_aver_u_b(inp2, res);
393 v16u8 inp0, inp1, inp2, inp3;
404 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
409 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
424 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
432 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
440 res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
464 v16u8 inp0, inp1, inp2, inp3;
475 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
480 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
483 SLDI_B4_UB(inp0, inp0, inp1, inp1, inp2, inp2, inp3, inp3, 1,
484 inp0, inp1, inp2, inp3);
486 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
487 AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
500 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
508 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
517 res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
544 v16u8 inp0, inp1, inp2, inp3;
555 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
560 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
564 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
566 res1 = __msa_ave_u_b(inp2, res1);
579 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
587 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
596 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
598 res = __msa_ave_u_b(inp2, res);
623 v16u8 inp0, inp1, inp2, inp3;
634 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
639 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
654 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
662 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
670 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
694 v16u8 inp0, inp1, inp2, inp3;
705 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
710 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
713 SLDI_B4_UB(inp0, inp0, inp1, inp1, inp2, inp2, inp3, inp3, 1,
714 inp0, inp1, inp2, inp3);
716 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
718 res1 = __msa_ave_u_b(inp2, res1);
731 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
739 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
748 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
775 v16u8 inp0, inp1, inp2, inp3;
787 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
792 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
797 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
800 AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
814 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
823 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
828 res1 = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
831 AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
855 v16u8 inp0, inp1, inp2, inp3;
867 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
872 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
891 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
900 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
905 res1 = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
930 v16u8 inp0, inp1, inp2, inp3;
942 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
947 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
951 SLDI_B4_UB(inp0, inp0, inp1, inp1, inp2, inp2, inp3, inp3, 1,
952 inp0, inp1, inp2, inp3);
954 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
957 AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
971 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
979 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
984 res1 = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
1009 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1015 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1019 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1020 inp1, inp2, inp3, inp4,
1022 inp2, inp3, inp4, inp5,
1026 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1028 inp3, inp2, inp1, inp0,
1032 tmp1 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
1037 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1039 inp5, inp4, inp3, inp2,
1058 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1065 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
1067 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
1068 inp1, inp2, inp3, inp4,
1077 inp2, inp3, inp4, inp5,
1085 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
1088 res0 = __msa_aver_u_b(res0, inp2);
1094 res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
1103 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
1110 res0 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
1199 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1205 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1209 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1210 inp1, inp2, inp3, inp4,
1212 inp2, inp3, inp4, inp5,
1216 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1218 inp3, inp2, inp1, inp0,
1224 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1226 inp5, inp4, inp3, inp2,
1242 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1249 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1253 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
1254 inp1, inp2, inp3, inp4,
1262 inp2, inp3, inp4, inp5,
1269 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
1277 res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
1285 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
1293 res0 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
1378 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1384 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1388 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1389 inp1, inp2, inp3, inp4,
1391 inp2, inp3, inp4, inp5,
1396 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1398 inp3, inp2, inp1, inp0,
1401 tmp0 = (v16u8) __msa_insve_d((v2i64) inp1, 1, (v2i64) inp2);
1407 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1409 inp5, inp4, inp3, inp2,
1428 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1435 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1439 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
1440 inp1, inp2, inp3, inp4,
1449 inp2, inp3, inp4, inp5,
1451 res0 = __msa_aver_u_b(res0, inp2);
1457 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
1466 res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
1475 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
1484 res0 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
1579 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1585 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1589 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1590 inp1, inp2, inp3, inp4,
1592 inp2, inp3, inp4, inp5,
1596 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1598 inp3, inp2, inp1, inp0,
1602 tmp1 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
1608 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1610 inp5, inp4, inp3, inp2,
1630 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1637 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
1639 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp0, inp1, inp2,
1640 inp1, inp2, inp3, inp4,
1649 inp2, inp3, inp4, inp5,
1657 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp2, inp1, inp0, inp0,
1660 res0 = __msa_ave_u_b(res0, inp2);
1666 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
1675 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp4, inp3, inp2, inp1,
1684 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp5, inp4, inp3, inp2,
1780 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1786 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1790 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1791 inp1, inp2, inp3, inp4,
1793 inp2, inp3, inp4, inp5,
1797 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1799 inp3, inp2, inp1, inp0,
1805 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1807 inp5, inp4, inp3, inp2,
1823 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1830 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
1832 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp0, inp1, inp2,
1833 inp1, inp2, inp3, inp4,
1841 inp2, inp3, inp4, inp5,
1848 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp2, inp1, inp0, inp0,
1856 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
1864 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp4, inp3, inp2, inp1,
1872 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp5, inp4, inp3, inp2,
1956 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1962 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1966 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1967 inp1, inp2, inp3, inp4,
1969 inp2, inp3, inp4, inp5,
1973 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1975 inp3, inp2, inp1, inp0,
1978 tmp0 = (v16u8) __msa_insve_d((v2i64) inp1, 1, (v2i64) inp2);
1985 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1987 inp5, inp4, inp3, inp2,
2007 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2014 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
2016 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp0, inp1, inp2,
2017 inp1, inp2, inp3, inp4,
2026 inp2, inp3, inp4, inp5,
2028 res0 = __msa_ave_u_b(res0, inp2);
2034 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp2, inp1, inp0, inp0,
2043 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
2052 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp4, inp3, inp2, inp1,
2061 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp5, inp4, inp3, inp2,
2156 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2163 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
2167 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
2168 inp1, inp2, inp3, inp4,
2170 inp2, inp3, inp4, inp5,
2175 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
2177 inp3, inp2, inp1, inp0,
2183 tmp1 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
2192 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
2194 inp5, inp4, inp3, inp2,
2218 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2225 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
2227 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
2228 inp1, inp2, inp3, inp4,
2234 inp2, inp3, inp4, inp5,
2245 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
2251 res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
2256 AVER_UB2_UB(res0, inp2, res1, inp3, res0, res1);
2263 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
2266 res1 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
2350 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2357 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
2361 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
2362 inp1, inp2, inp3, inp4,
2364 inp2, inp3, inp4, inp5,
2368 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
2370 inp3, inp2, inp1, inp0,
2381 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
2383 inp5, inp4, inp3, inp2,
2403 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2410 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
2412 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
2413 inp1, inp2, inp3, inp4,
2418 inp2, inp3, inp4, inp5,
2427 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
2432 res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
2442 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
2447 res1 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
2528 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2535 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
2539 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
2540 inp1, inp2, inp3, inp4,
2542 inp2, inp3, inp4, inp5,
2546 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
2548 inp3, inp2, inp1, inp0,
2552 tmp0 = (v16u8) __msa_insve_d((v2i64) inp1, 1, (v2i64) inp2);
2562 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
2564 inp5, inp4, inp3, inp2,
2587 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2594 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
2596 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
2597 inp1, inp2, inp3, inp4,
2602 inp2, inp3, inp4, inp5,
2605 AVER_UB2_UB(res0, inp1, res1, inp2, res0, res1);
2612 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
2617 res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
2628 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
2633 res1 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
2722 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
2730 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
2739 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
2741 res = __msa_ave_u_b(inp2, res);
2781 v16u8 inp0, inp1, inp2, inp3;
2801 LD_UB2(src, src_stride, inp2, inp3);
2803 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
2806 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
2807 horiz2 = __msa_ave_u_b(inp2, res1);
2827 LD_UB2(src, src_stride, inp2, inp3);
2829 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
2832 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
2833 horiz6 = __msa_ave_u_b(inp2, res1);
2877 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
2885 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
2893 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
2931 v16u8 inp0, inp1, inp2, inp3;
2950 LD_UB2(src, src_stride, inp2, inp3);
2952 horiz2 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
2972 LD_UB2(src, src_stride, inp2, inp3);
2974 horiz6 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3021 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
3029 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
3038 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp2, inp3, mask,
3080 v16u8 inp0, inp1, inp2, inp3;
3102 LD_UB2(src, src_stride, inp2, inp3);
3104 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3107 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
3109 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
3110 horiz2 = __msa_ave_u_b(inp2, res1);
3132 LD_UB2(src, src_stride, inp2, inp3);
3134 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3137 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
3139 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
3140 horiz6 = __msa_ave_u_b(inp2, res1);
3194 v16u8 inp0, inp1, inp2, inp3;
3214 LD_UB2(src, src_stride, inp2, inp3);
3216 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3219 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
3220 horiz2 = __msa_ave_u_b(inp2, res1);
3236 LD_UB2(src, src_stride, inp2, inp3);
3241 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3244 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
3245 horiz6 = __msa_ave_u_b(inp2, res1);
3289 v16u8 inp0, inp1, inp2, inp3;
3307 LD_UB2(src, src_stride, inp2, inp3);
3309 horiz2 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3324 LD_UB2(src, src_stride, inp2, inp3);
3329 horiz6 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3375 v16u8 inp0, inp1, inp2, inp3;
3397 LD_UB2(src, src_stride, inp2, inp3);
3399 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3402 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
3404 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
3405 horiz2 = __msa_ave_u_b(inp2, res1);
3422 LD_UB2(src, src_stride, inp2, inp3);
3427 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3430 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
3432 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
3433 horiz6 = __msa_ave_u_b(inp2, res1);
3478 v16u8 inp0, inp1, inp2, inp3;
3498 LD_UB2(src, src_stride, inp2, inp3);
3500 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3503 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
3504 horiz2 = __msa_ave_u_b(inp2, res1);
3524 LD_UB2(src, src_stride, inp2, inp3);
3526 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3529 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
3530 horiz6 = __msa_ave_u_b(inp2, res1);
3584 v16u8 inp0, inp1, inp2, inp3;
3602 LD_UB2(src, src_stride, inp2, inp3);
3604 horiz2 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3621 LD_UB2(src, src_stride, inp2, inp3);
3626 horiz6 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3677 v16u8 inp0, inp1, inp2, inp3;
3699 LD_UB2(src, src_stride, inp2, inp3);
3701 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3704 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
3706 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
3707 horiz2 = __msa_ave_u_b(inp2, res1);
3729 LD_UB2(src, src_stride, inp2, inp3);
3731 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3734 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
3736 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
3737 horiz6 = __msa_ave_u_b(inp2, res1);
3779 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
3787 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
3796 res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
3798 res = __msa_aver_u_b(inp2, res);
3837 v16u8 inp0, inp1, inp2, inp3;
3849 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
3853 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
3858 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
3859 horiz2 = __msa_aver_u_b(inp2, res1);
3878 LD_UB2(src, src_stride, inp2, inp3);
3880 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
3882 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
3883 horiz6 = __msa_aver_u_b(inp2, res1);
3924 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
3932 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
3940 res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
3977 v16u8 inp0, inp1, inp2, inp3;
3995 LD_UB2(src, src_stride, inp2, inp3);
3997 horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4017 LD_UB2(src, src_stride, inp2, inp3);
4019 horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4061 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
4069 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
4078 res = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
4119 v16u8 inp0, inp1, inp2, inp3;
4131 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4135 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4142 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
4144 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
4145 horiz2 = __msa_aver_u_b(inp2, res1);
4147 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4151 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4158 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
4160 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
4161 horiz6 = __msa_aver_u_b(inp2, res1);
4221 v16u8 inp0, inp1, inp2, inp3;
4241 LD_UB2(src, src_stride, inp2, inp3);
4243 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4245 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
4246 horiz2 = __msa_aver_u_b(inp2, res1);
4263 LD_UB2(src, src_stride, inp2, inp3);
4265 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4267 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
4268 horiz6 = __msa_aver_u_b(inp2, res1);
4309 v16u8 inp0, inp1, inp2, inp3;
4327 LD_UB2(src, src_stride, inp2, inp3);
4329 horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4347 LD_UB2(src, src_stride, inp2, inp3);
4349 horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4394 v16u8 inp0, inp1, inp2, inp3;
4406 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4411 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4418 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
4420 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
4421 horiz2 = __msa_aver_u_b(inp2, res1);
4423 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4427 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4434 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
4436 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
4437 horiz6 = __msa_aver_u_b(inp2, res1);
4486 v16u8 inp0, inp1, inp2, inp3;
4498 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4503 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4508 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
4509 horiz2 = __msa_aver_u_b(inp2, res1);
4529 LD_UB2(src, src_stride, inp2, inp3);
4531 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4533 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
4534 horiz6 = __msa_aver_u_b(inp2, res1);
4583 v16u8 inp0, inp1, inp2, inp3;
4601 LD_UB2(src, src_stride, inp2, inp3);
4603 horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4624 LD_UB2(src, src_stride, inp2, inp3);
4626 horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4676 v16u8 inp0, inp1, inp2, inp3;
4688 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4698 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4700 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
4702 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
4703 horiz2 = __msa_aver_u_b(inp2, res1);
4721 LD_UB2(src, src_stride, inp2, inp3);
4726 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4728 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
4730 inp2 = (v16u8) __msa_insve_d((v2i64) inp2, 1, (v2i64) inp3);
4731 horiz6 = __msa_aver_u_b(inp2, res1);
4781 v16u8 inp0, inp1, inp2, inp3;
4798 LD_UB2(src, src_stride, inp2, inp3);
4803 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4807 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
4808 horiz2 = __msa_aver_u_b(inp2, res1);
4828 LD_UB2(src, src_stride, inp2, inp3);
4830 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4832 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
4833 horiz6 = __msa_aver_u_b(inp2, res1);
4894 v16u8 inp0, inp1, inp2, inp3;
4912 LD_UB2(src, src_stride, inp2, inp3);
4915 horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
4938 LD_UB2(src, src_stride, inp2, inp3);
4940 horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
5003 v16u8 inp0, inp1, inp2, inp3;
5021 LD_UB2(src, src_stride, inp2, inp3);
5028 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5032 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
5034 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5035 horiz2 = __msa_aver_u_b(inp2, res1);
5058 LD_UB2(src, src_stride, inp2, inp3);
5060 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5063 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
5065 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5066 horiz6 = __msa_aver_u_b(inp2, res1);
5128 v16u8 inp0, inp1, inp2, inp3;
5145 LD_UB2(src, src_stride, inp2, inp3);
5150 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5154 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5155 horiz2 = __msa_aver_u_b(inp2, res1);
5173 LD_UB2(src, src_stride, inp2, inp3);
5175 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5177 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5178 horiz6 = __msa_aver_u_b(inp2, res1);
5230 v16u8 inp0, inp1, inp2, inp3;
5248 LD_UB2(src, src_stride, inp2, inp3);
5251 horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
5261 LD_UB2(src, src_stride, inp2, inp3);
5263 horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
5331 v16u8 inp0, inp1, inp2, inp3;
5348 LD_UB2(src, src_stride, inp2, inp3);
5355 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5359 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
5361 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5362 horiz2 = __msa_aver_u_b(inp2, res1);
5383 LD_UB2(src, src_stride, inp2, inp3);
5385 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5388 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
5390 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5391 horiz6 = __msa_aver_u_b(inp2, res1);
5447 v16u8 inp0, inp1, inp2, inp3;
5468 LD_UB2(src, src_stride, inp2, inp3);
5470 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5472 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5473 horiz2 = __msa_aver_u_b(inp2, res1);
5496 LD_UB2(src, src_stride, inp2, inp3);
5498 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5500 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5501 horiz6 = __msa_aver_u_b(inp2, res1);
5561 v16u8 inp0, inp1, inp2, inp3;
5580 LD_UB2(src, src_stride, inp2, inp3);
5582 horiz2 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
5606 LD_UB2(src, src_stride, inp2, inp3);
5608 horiz6 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
5668 v16u8 inp0, inp1, inp2, inp3;
5685 LD_UB2(src, src_stride, inp2, inp3);
5692 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5696 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
5698 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5699 horiz2 = __msa_aver_u_b(inp2, res1);
5720 LD_UB2(src, src_stride, inp2, inp3);
5722 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5724 SLDI_B2_UB(inp2, inp2, inp3, inp3, 1, inp2, inp3);
5726 inp2 = (v16u8) __msa_ilvr_d((v2i64) inp3, (v2i64) inp2);
5727 horiz6 = __msa_aver_u_b(inp2, res1);