Lines Matching refs:inp0
24 #define APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask, coef0, coef1, coef2) \
32 VSHF_B2_UB(inp0, inp0, inp1, inp1, mask, mask, tmp0, tmp1); \
33 ILVRL_B2_UH(inp1, inp0, sum0_r, sum0_l); \
34 data0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) tmp0, 15); \
38 data1 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) tmp0, 14); \
43 data2 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) tmp0, 13); \
58 #define APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, \
67 VSHF_B2_UH(inp0, inp0, inp1, inp1, mask0, mask0, sum0_r, sum4_r); \
68 VSHF_B2_UH(inp0, inp0, inp1, inp1, mask3, mask3, sum3_r, sum7_r); \
71 VSHF_B2_UH(inp0, inp0, inp1, inp1, mask2, mask2, sum2_r, sum6_r); \
72 VSHF_B2_UH(inp0, inp0, inp1, inp1, mask1, mask1, sum1_r, sum5_r); \
84 #define APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, \
92 VSHF_B2_UH(inp0, inp0, inp0, inp0, mask0, mask3, sum0_r, sum3_r); \
95 VSHF_B2_UH(inp0, inp0, inp0, inp0, mask2, mask1, sum2_r, sum1_r); \
105 #define APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1, \
113 VSHF_B2_UH(inp0, inp0, inp0, inp0, mask0, mask3, sum0_r, sum3_r); \
116 VSHF_B2_UH(inp0, inp0, inp0, inp0, mask2, mask1, sum2_r, sum1_r); \
127 #define APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask, \
136 VSHF_B2_UB(inp0, inp0, inp1, inp1, mask, mask, tmp0, tmp1); \
137 ILVRL_B2_UH(inp1, inp0, sum0_r, sum0_l); \
138 data0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) tmp0, 15); \
142 data1 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) tmp0, 14); \
147 data2 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) tmp0, 13); \
165 #define APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, \
174 VSHF_B2_UH(inp0, inp0, inp1, inp1, mask0, mask0, sum0_r, sum4_r); \
175 VSHF_B2_UH(inp0, inp0, inp1, inp1, mask3, mask3, sum3_r, sum7_r); \
178 VSHF_B2_UH(inp0, inp0, inp1, inp1, mask2, mask2, sum2_r, sum6_r); \
179 VSHF_B2_UH(inp0, inp0, inp1, inp1, mask1, mask1, sum1_r, sum5_r); \
194 #define APPLY_VERT_QPEL_FILTER(inp0, inp1, inp2, inp3, \
203 ILVRL_B2_UH(inp4, inp0, sum0_r, sum0_l); \
248 #define APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp1, inp2, inp3, \
257 ILVRL_B2_UH(inp4, inp0, sum0_r, sum0_l); \
315 v16u8 inp0, inp1, inp2, inp3;
326 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
328 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
334 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
336 AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
349 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
357 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
360 res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
362 res = __msa_aver_u_b(inp0, res);
393 v16u8 inp0, inp1, inp2, inp3;
404 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
406 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
424 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
432 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
435 res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
464 v16u8 inp0, inp1, inp2, inp3;
475 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
477 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
483 SLDI_B4_UB(inp0, inp0, inp1, inp1, inp2, inp2, inp3, inp3, 1,
484 inp0, inp1, inp2, inp3);
485 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
487 AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
500 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
508 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
511 res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
544 v16u8 inp0, inp1, inp2, inp3;
555 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
557 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
563 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
565 res0 = __msa_ave_u_b(inp0, res0);
579 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
587 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
590 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
592 res = __msa_ave_u_b(inp0, res);
623 v16u8 inp0, inp1, inp2, inp3;
634 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
636 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
654 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
662 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
665 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
694 v16u8 inp0, inp1, inp2, inp3;
705 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
707 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
713 SLDI_B4_UB(inp0, inp0, inp1, inp1, inp2, inp2, inp3, inp3, 1,
714 inp0, inp1, inp2, inp3);
715 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
717 res0 = __msa_ave_u_b(inp0, res0);
731 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
739 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
742 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
775 v16u8 inp0, inp1, inp2, inp3;
787 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
789 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
796 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
800 AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
814 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
823 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
826 res0 = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
831 AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
855 v16u8 inp0, inp1, inp2, inp3;
867 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
869 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
891 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
900 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
903 res0 = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
930 v16u8 inp0, inp1, inp2, inp3;
942 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
944 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
951 SLDI_B4_UB(inp0, inp0, inp1, inp1, inp2, inp2, inp3, inp3, 1,
952 inp0, inp1, inp2, inp3);
953 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
957 AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
971 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
979 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
982 res0 = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
1009 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1015 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1019 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1021 inp1, inp0, inp0, inp1,
1026 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1028 inp3, inp2, inp1, inp0,
1031 tmp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
1058 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1065 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
1067 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
1070 res0 = __msa_aver_u_b(res0, inp0);
1076 res0 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
1085 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
1094 res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
1199 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1205 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1209 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1211 inp1, inp0, inp0, inp1,
1216 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1218 inp3, inp2, inp1, inp0,
1242 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1249 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1253 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
1261 res0 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
1269 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
1277 res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
1378 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1384 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1388 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1390 inp1, inp0, inp0, inp1,
1396 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1398 inp3, inp2, inp1, inp0,
1428 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1435 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1439 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
1448 res0 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
1457 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
1466 res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
1579 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1585 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1589 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1591 inp1, inp0, inp0, inp1,
1596 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1598 inp3, inp2, inp1, inp0,
1601 tmp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
1630 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1637 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
1639 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp0, inp1, inp2,
1642 res0 = __msa_ave_u_b(res0, inp0);
1648 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp1, inp0, inp0, inp1,
1657 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp2, inp1, inp0, inp0,
1666 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
1780 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1786 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1790 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1792 inp1, inp0, inp0, inp1,
1797 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1799 inp3, inp2, inp1, inp0,
1823 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1830 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
1832 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp0, inp1, inp2,
1840 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp1, inp0, inp0, inp1,
1848 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp2, inp1, inp0, inp0,
1856 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
1956 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
1962 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
1966 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1968 inp1, inp0, inp0, inp1,
1973 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1975 inp3, inp2, inp1, inp0,
2007 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2014 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
2016 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp0, inp1, inp2,
2025 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp1, inp0, inp0, inp1,
2034 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp2, inp1, inp0, inp0,
2043 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
2156 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2163 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
2167 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
2169 inp1, inp0, inp0, inp1,
2175 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
2177 inp3, inp2, inp1, inp0,
2182 tmp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
2218 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2225 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
2227 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
2233 res1 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
2238 AVER_UB2_UB(res0, inp0, res1, inp1, res0, res1);
2245 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
2251 res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
2350 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2357 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
2361 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
2363 inp1, inp0, inp0, inp1,
2368 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
2370 inp3, inp2, inp1, inp0,
2403 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2410 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
2412 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
2417 res1 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
2427 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
2432 res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
2528 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2535 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
2539 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
2541 inp1, inp0, inp0, inp1,
2546 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
2548 inp3, inp2, inp1, inp0,
2587 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7, inp8;
2594 LD_UB5(src, src_stride, inp0, inp1, inp2, inp3, inp4);
2596 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
2601 res1 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
2612 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
2617 res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
2722 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
2730 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
2733 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
2735 res = __msa_ave_u_b(inp0, res);
2758 LD_UB2(src, 1, inp0, inp1);
2759 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
2761 res = __msa_ave_u_b(inp0, res);
2781 v16u8 inp0, inp1, inp2, inp3;
2793 LD_UB2(src, src_stride, inp0, inp1);
2795 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
2798 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
2799 horiz0 = __msa_ave_u_b(inp0, res0);
2809 LD_UB2(src, src_stride, inp0, inp1);
2811 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
2814 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
2815 horiz4 = __msa_ave_u_b(inp0, res0);
2835 inp0 = LD_UB(src);
2836 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
2839 horiz8 = __msa_ave_u_b(inp0, res0);
2877 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
2885 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
2888 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
2909 LD_UB2(src, 1, inp0, inp1);
2910 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
2931 v16u8 inp0, inp1, inp2, inp3;
2943 LD_UB2(src, src_stride, inp0, inp1);
2945 horiz0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
2956 LD_UB2(src, src_stride, inp0, inp1);
2958 horiz4 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
2978 inp0 = LD_UB(src);
2979 horiz8 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
3021 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
3029 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
3032 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
3057 LD_UB2(src, 1, inp0, inp1);
3058 res = APPLY_HORIZ_QPEL_NO_ROUND_FILTER(inp0, inp1, mask,
3080 v16u8 inp0, inp1, inp2, inp3;
3092 LD_UB2(src, src_stride, inp0, inp1);
3094 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3097 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
3099 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
3100 horiz0 = __msa_ave_u_b(inp0, res0);
3112 LD_UB2(src, src_stride, inp0, inp1);
3114 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3117 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
3119 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
3120 horiz4 = __msa_ave_u_b(inp0, res0);
3142 inp0 = LD_UB(src);
3143 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
3146 inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
3147 horiz8 = __msa_ave_u_b(inp0, res0);
3194 v16u8 inp0, inp1, inp2, inp3;
3206 LD_UB2(src, src_stride, inp0, inp1);
3208 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3211 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
3212 horiz0 = __msa_ave_u_b(inp0, res0);
3222 LD_UB2(src, src_stride, inp0, inp1);
3224 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3227 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
3228 horiz4 = __msa_ave_u_b(inp0, res0);
3247 inp0 = LD_UB(src);
3248 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
3251 horiz8 = __msa_ave_u_b(inp0, res0);
3289 v16u8 inp0, inp1, inp2, inp3;
3301 LD_UB2(src, src_stride, inp0, inp1);
3303 horiz0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3313 LD_UB2(src, src_stride, inp0, inp1);
3315 horiz4 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3333 inp0 = LD_UB(src);
3334 horiz8 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
3375 v16u8 inp0, inp1, inp2, inp3;
3387 LD_UB2(src, src_stride, inp0, inp1);
3389 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3392 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
3394 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
3395 horiz0 = __msa_ave_u_b(inp0, res0);
3407 LD_UB2(src, src_stride, inp0, inp1);
3409 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3412 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
3414 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
3415 horiz4 = __msa_ave_u_b(inp0, res0);
3435 inp0 = LD_UB(src);
3436 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
3439 inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
3440 horiz8 = __msa_ave_u_b(inp0, res0);
3478 v16u8 inp0, inp1, inp2, inp3;
3490 LD_UB2(src, src_stride, inp0, inp1);
3492 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3495 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
3496 horiz0 = __msa_ave_u_b(inp0, res0);
3506 LD_UB2(src, src_stride, inp0, inp1);
3508 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3511 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
3512 horiz4 = __msa_ave_u_b(inp0, res0);
3532 inp0 = LD_UB(src);
3533 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
3536 horiz8 = __msa_ave_u_b(inp0, res0);
3584 v16u8 inp0, inp1, inp2, inp3;
3596 LD_UB2(src, src_stride, inp0, inp1);
3598 horiz0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3608 LD_UB2(src, src_stride, inp0, inp1);
3610 horiz4 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3637 inp0 = LD_UB(src);
3638 horiz8 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
3677 v16u8 inp0, inp1, inp2, inp3;
3689 LD_UB2(src, src_stride, inp0, inp1);
3691 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3694 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
3696 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
3697 horiz0 = __msa_ave_u_b(inp0, res0);
3709 LD_UB2(src, src_stride, inp0, inp1);
3711 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3715 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
3716 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
3717 horiz4 = __msa_ave_u_b(inp0, res0);
3749 inp0 = LD_UB(src);
3750 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
3753 inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
3754 horiz8 = __msa_ave_u_b(inp0, res0);
3779 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
3787 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
3790 res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
3792 res = __msa_aver_u_b(inp0, res);
3815 LD_UB2(src, 1, inp0, inp1);
3816 res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask, const20, const6, const3);
3817 res = __msa_aver_u_b(inp0, res);
3837 v16u8 inp0, inp1, inp2, inp3;
3849 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
3851 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
3855 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
3856 horiz0 = __msa_aver_u_b(inp0, res0);
3861 LD_UB2(src, src_stride, inp0, inp1);
3863 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
3865 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
3866 horiz4 = __msa_aver_u_b(inp0, res0);
3893 inp0 = LD_UB(src);
3894 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
3896 horiz8 = __msa_aver_u_b(inp0, res0);
3924 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
3932 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
3935 res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
3956 LD_UB2(src, 1, inp0, inp1);
3957 res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask, const20, const6, const3);
3977 v16u8 inp0, inp1, inp2, inp3;
3989 LD_UB2(src, src_stride, inp0, inp1);
3991 horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4001 LD_UB2(src, src_stride, inp0, inp1);
4003 horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4028 inp0 = LD_UB(src);
4029 horiz8 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0,
4061 v16u8 inp0, inp1, inp2, inp3, inp4, inp5, inp6, inp7;
4069 LD_UB4(src, src_stride, inp0, inp2, inp4, inp6);
4072 res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
4097 LD_UB2(src, 1, inp0, inp1);
4098 res = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask, const20, const6, const3);
4119 v16u8 inp0, inp1, inp2, inp3;
4131 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4133 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4137 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
4139 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
4140 horiz0 = __msa_aver_u_b(inp0, res0);
4147 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4149 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4153 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
4155 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
4156 horiz4 = __msa_aver_u_b(inp0, res0);
4178 inp0 = LD_UB(src);
4179 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
4183 inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
4184 horiz8 = __msa_aver_u_b(inp0, res0);
4221 v16u8 inp0, inp1, inp2, inp3;
4233 LD_UB2(src, src_stride, inp0, inp1);
4235 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4237 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
4238 horiz0 = __msa_aver_u_b(inp0, res0);
4248 LD_UB2(src, src_stride, inp0, inp1);
4250 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4252 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
4253 horiz4 = __msa_aver_u_b(inp0, res0);
4275 inp0 = LD_UB(src);
4276 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
4278 horiz8 = __msa_aver_u_b(inp0, res0);
4309 v16u8 inp0, inp1, inp2, inp3;
4321 LD_UB2(src, src_stride, inp0, inp1);
4323 horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4333 LD_UB2(src, src_stride, inp0, inp1);
4335 horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4358 inp0 = LD_UB(src);
4359 horiz8 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0,
4394 v16u8 inp0, inp1, inp2, inp3;
4406 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4409 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4413 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
4415 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
4416 horiz0 = __msa_aver_u_b(inp0, res0);
4423 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4425 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4429 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
4431 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
4432 horiz4 = __msa_aver_u_b(inp0, res0);
4439 inp0 = LD_UB(src);
4440 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
4442 inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
4443 horiz8 = __msa_aver_u_b(inp0, res0);
4486 v16u8 inp0, inp1, inp2, inp3;
4498 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4501 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4505 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
4506 horiz0 = __msa_aver_u_b(inp0, res0);
4511 LD_UB2(src, src_stride, inp0, inp1);
4514 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4516 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
4517 horiz4 = __msa_aver_u_b(inp0, res0);
4536 inp0 = LD_UB(src);
4537 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
4539 horiz8 = __msa_aver_u_b(inp0, res0);
4583 v16u8 inp0, inp1, inp2, inp3;
4595 LD_UB2(src, src_stride, inp0, inp1);
4597 horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4607 LD_UB2(src, src_stride, inp0, inp1);
4609 horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4635 inp0 = LD_UB(src);
4636 horiz8 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0,
4676 v16u8 inp0, inp1, inp2, inp3;
4688 LD_UB4(src, src_stride, inp0, inp1, inp2, inp3);
4690 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4693 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
4695 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
4696 horiz0 = __msa_aver_u_b(inp0, res0);
4705 LD_UB2(src, src_stride, inp0, inp1);
4707 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4709 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
4711 inp0 = (v16u8) __msa_insve_d((v2i64) inp0, 1, (v2i64) inp1);
4712 horiz4 = __msa_aver_u_b(inp0, res0);
4740 inp0 = LD_UB(src);
4741 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
4743 inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
4744 horiz8 = __msa_aver_u_b(inp0, res0);
4781 v16u8 inp0, inp1, inp2, inp3;
4794 LD_UB2(src, src_stride, inp0, inp1);
4796 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4800 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
4801 horiz0 = __msa_aver_u_b(inp0, res0);
4805 LD_UB2(src, src_stride, inp0, inp1);
4810 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4812 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
4813 horiz4 = __msa_aver_u_b(inp0, res0);
4848 inp0 = LD_UB(src);
4849 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
4851 horiz8 = __msa_aver_u_b(inp0, res0);
4894 v16u8 inp0, inp1, inp2, inp3;
4907 LD_UB2(src, src_stride, inp0, inp1);
4909 horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4918 LD_UB2(src, src_stride, inp0, inp1);
4921 horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4957 inp0 = LD_UB(src);
4958 horiz8 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0,
5003 v16u8 inp0, inp1, inp2, inp3;
5016 LD_UB2(src, src_stride, inp0, inp1);
5018 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5023 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
5025 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5026 horiz0 = __msa_aver_u_b(inp0, res0);
5030 LD_UB2(src, src_stride, inp0, inp1);
5037 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5040 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
5042 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5043 horiz4 = __msa_aver_u_b(inp0, res0);
5081 inp0 = LD_UB(src);
5082 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
5084 inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
5085 horiz8 = __msa_aver_u_b(inp0, res0);
5128 v16u8 inp0, inp1, inp2, inp3;
5141 LD_UB2(src, src_stride, inp0, inp1);
5143 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5147 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5148 horiz0 = __msa_aver_u_b(inp0, res0);
5152 LD_UB2(src, src_stride, inp0, inp1);
5157 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5159 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5160 horiz4 = __msa_aver_u_b(inp0, res0);
5191 inp0 = LD_UB(src);
5192 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
5194 horiz8 = __msa_aver_u_b(inp0, res0);
5230 v16u8 inp0, inp1, inp2, inp3;
5243 LD_UB2(src, src_stride, inp0, inp1);
5245 horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
5254 LD_UB2(src, src_stride, inp0, inp1);
5257 horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
5267 inp0 = LD_UB(src);
5268 horiz8 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0,
5331 v16u8 inp0, inp1, inp2, inp3;
5344 LD_UB2(src, src_stride, inp0, inp1);
5346 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5350 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
5352 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5353 horiz0 = __msa_aver_u_b(inp0, res0);
5357 LD_UB2(src, src_stride, inp0, inp1);
5364 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5367 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
5369 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5370 horiz4 = __msa_aver_u_b(inp0, res0);
5404 inp0 = LD_UB(src);
5405 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
5407 inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
5408 horiz8 = __msa_aver_u_b(inp0, res0);
5447 v16u8 inp0, inp1, inp2, inp3;
5460 LD_UB2(src, src_stride, inp0, inp1);
5463 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5465 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5466 horiz0 = __msa_aver_u_b(inp0, res0);
5476 LD_UB2(src, src_stride, inp0, inp1);
5478 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5480 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5481 horiz4 = __msa_aver_u_b(inp0, res0);
5515 inp0 = LD_UB(src);
5516 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
5518 horiz8 = __msa_aver_u_b(inp0, res0);
5561 v16u8 inp0, inp1, inp2, inp3;
5574 LD_UB2(src, src_stride, inp0, inp1);
5576 horiz0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
5587 LD_UB2(src, src_stride, inp0, inp1);
5589 horiz4 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
5624 inp0 = LD_UB(src);
5625 horiz8 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0,
5668 v16u8 inp0, inp1, inp2, inp3;
5681 LD_UB2(src, src_stride, inp0, inp1);
5683 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5687 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
5689 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5690 horiz0 = __msa_aver_u_b(inp0, res0);
5694 LD_UB2(src, src_stride, inp0, inp1);
5701 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5703 SLDI_B2_UB(inp0, inp0, inp1, inp1, 1, inp0, inp1);
5705 inp0 = (v16u8) __msa_ilvr_d((v2i64) inp1, (v2i64) inp0);
5706 horiz4 = __msa_aver_u_b(inp0, res0);
5741 inp0 = LD_UB(src);
5742 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
5744 inp0 = (v16u8) __msa_sldi_b((v16i8) inp0, (v16i8) inp0, 1);
5745 horiz8 = __msa_aver_u_b(inp0, res0);