Lines Matching refs:res1
316 v16u8 res0, res1;
331 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
336 AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
337 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
394 v16u8 res0, res1;
409 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
412 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
465 v16u8 res0, res1;
480 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
487 AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
488 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
545 v16u8 res0, res1;
560 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
566 res1 = __msa_ave_u_b(inp2, res1);
567 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
624 v16u8 res0, res1;
639 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
642 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
695 v16u8 res0, res1;
710 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
718 res1 = __msa_ave_u_b(inp2, res1);
719 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
777 v16u8 res0, res1;
792 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
800 AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
801 AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
802 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
815 v16u8 res0, res1;
828 res1 = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
831 AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
832 AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
833 ST_UB2(res0, res1, dst, dst_stride);
838 res1 = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
841 AVER_UB2_UB(inp4, res0, inp6, res1, res0, res1);
842 AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
843 ST_UB2(res0, res1, dst, dst_stride);
857 v16u8 res0, res1;
872 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
878 AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
879 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
892 v16u8 res0, res1;
905 res1 = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
908 AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
909 ST_UB2(res0, res1, dst, dst_stride);
914 res1 = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
917 AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
918 ST_UB2(res0, res1, dst, dst_stride);
932 v16u8 res0, res1;
947 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3,
957 AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
958 AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
959 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
972 v16u8 res0, res1, dst0, dst1;
984 res1 = APPLY_HORIZ_QPEL_FILTER(inp2, inp3, mask,
987 AVER_UB2_UB(res0, inp1, res1, inp3, res0, res1);
988 AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
989 ST_UB2(res0, res1, dst, dst_stride);
993 res1 = APPLY_HORIZ_QPEL_FILTER(inp6, inp7, mask,
996 AVER_UB2_UB(res0, inp5, res1, inp7, res0, res1);
997 AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
998 ST_UB2(res0, res1, dst, dst_stride);
1010 v16u8 tmp0, tmp1, res0, res1;
1026 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1033 AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
1034 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
1042 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
1049 AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
1050 ST_D4(res0, res1, 0, 1, 0, 1, dst + 4 * dst_stride, dst_stride);
1200 v16u8 res0, res1;
1216 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1221 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
1229 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
1234 ST_D4(res0, res1, 0, 1, 0, 1, dst + 4 * dst_stride, dst_stride);
1379 v16u8 tmp0, tmp1, res0, res1;
1396 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1403 AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
1404 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
1412 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
1419 AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
1420 ST_D4(res0, res1, 0, 1, 0, 1, dst + 4 * dst_stride, dst_stride);
1580 v16u8 tmp0, tmp1, res0, res1;
1596 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1604 res1 = __msa_ave_u_b(res1, tmp1);
1605 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
1613 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp6, inp5, inp4, inp3,
1621 res1 = __msa_ave_u_b(res1, tmp1);
1622 ST_D4(res0, res1, 0, 1, 0, 1, dst + 4 * dst_stride, dst_stride);
1781 v16u8 res0, res1;
1797 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1802 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
1810 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp6, inp5, inp4, inp3,
1815 ST_D4(res0, res1, 0, 1, 0, 1, dst + 4 * dst_stride, dst_stride);
1957 v16u8 tmp0, tmp1, res0, res1;
1973 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp1, inp0, inp0,
1981 res1 = __msa_ave_u_b(res1, tmp1);
1982 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
1990 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp6, inp5, inp4, inp3,
1998 res1 = __msa_ave_u_b(res1, tmp1);
1999 ST_D4(res0, res1, 0, 1, 0, 1, dst + 4 * dst_stride, dst_stride);
2158 v16u8 tmp0, tmp1, res0, res1;
2175 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
2186 AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
2187 AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
2188 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
2197 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
2208 AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
2209 AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
2210 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
2220 v16u8 res0, res1, dst0, dst1;
2233 res1 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
2238 AVER_UB2_UB(res0, inp0, res1, inp1, res0, res1);
2239 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2240 ST_UB2(res0, res1, dst, dst_stride);
2251 res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
2256 AVER_UB2_UB(res0, inp2, res1, inp3, res0, res1);
2257 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2258 ST_UB2(res0, res1, dst, dst_stride);
2266 res1 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
2271 AVER_UB2_UB(res0, inp4, res1, inp5, res0, res1);
2272 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2273 ST_UB2(res0, res1, dst, dst_stride);
2281 res1 = APPLY_VERT_QPEL_FILTER(inp7, inp6, inp5, inp4,
2286 AVER_UB2_UB(res0, inp6, res1, inp7, res0, res1);
2287 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2288 ST_UB2(res0, res1, dst, dst_stride);
2296 res1 = APPLY_VERT_QPEL_FILTER(inp9, inp8, inp7, inp6,
2300 AVER_UB2_UB(res0, inp8, res1, inp9, res0, res1);
2301 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2302 ST_UB2(res0, res1, dst, dst_stride);
2310 res1 = APPLY_VERT_QPEL_FILTER(inp11, inp10, inp9, inp8,
2315 AVER_UB2_UB(res0, inp10, res1, inp11, res0, res1);
2316 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2317 ST_UB2(res0, res1, dst, dst_stride);
2324 res1 = APPLY_VERT_QPEL_FILTER(inp13, inp12, inp11, inp10,
2328 AVER_UB2_UB(res0, inp12, res1, inp13, res0, res1);
2329 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2330 ST_UB2(res0, res1, dst, dst_stride);
2336 res1 = APPLY_VERT_QPEL_FILTER(inp15, inp14, inp13, inp12,
2340 AVER_UB2_UB(res0, inp14, res1, inp15, res0, res1);
2341 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2342 ST_UB2(res0, res1, dst, dst_stride);
2352 v16u8 res0, res1;
2368 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
2376 AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
2377 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
2386 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
2394 AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
2395 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
2405 v16u8 res0, res1, dst0, dst1;
2417 res1 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
2421 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2422 ST_UB2(res0, res1, dst, dst_stride);
2432 res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
2436 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2437 ST_UB2(res0, res1, dst, dst_stride);
2447 res1 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
2451 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2452 ST_UB2(res0, res1, dst, dst_stride);
2462 res1 = APPLY_VERT_QPEL_FILTER(inp7, inp6, inp5, inp4,
2466 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2467 ST_UB2(res0, res1, dst, dst_stride);
2477 res1 = APPLY_VERT_QPEL_FILTER(inp9, inp8, inp7, inp6,
2481 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2482 ST_UB2(res0, res1, dst, dst_stride);
2492 res1 = APPLY_VERT_QPEL_FILTER(inp11, inp10, inp9, inp8,
2496 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2497 ST_UB2(res0, res1, dst, dst_stride);
2504 res1 = APPLY_VERT_QPEL_FILTER(inp13, inp12, inp11, inp10,
2508 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2509 ST_UB2(res0, res1, dst, dst_stride);
2515 res1 = APPLY_VERT_QPEL_FILTER(inp15, inp14, inp13, inp12,
2519 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2520 ST_UB2(res0, res1, dst, dst_stride);
2530 v16u8 tmp0, tmp1, res0, res1;
2546 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp2, inp1, inp0, inp0,
2556 AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
2557 AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
2558 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
2567 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(inp6, inp5, inp4, inp3,
2577 AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
2578 AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
2579 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
2589 v16u8 res0, res1, dst0, dst1;
2601 res1 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
2605 AVER_UB2_UB(res0, inp1, res1, inp2, res0, res1);
2606 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2607 ST_UB2(res0, res1, dst, dst_stride);
2617 res1 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
2621 AVER_UB2_UB(res0, inp3, res1, inp4, res0, res1);
2622 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2623 ST_UB2(res0, res1, dst, dst_stride);
2633 res1 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
2637 AVER_UB2_UB(res0, inp5, res1, inp6, res0, res1);
2638 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2639 ST_UB2(res0, res1, dst, dst_stride);
2649 res1 = APPLY_VERT_QPEL_FILTER(inp7, inp6, inp5, inp4,
2653 AVER_UB2_UB(res0, inp7, res1, inp8, res0, res1);
2654 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2655 ST_UB2(res0, res1, dst, dst_stride);
2665 res1 = APPLY_VERT_QPEL_FILTER(inp9, inp8, inp7, inp6,
2669 AVER_UB2_UB(res0, inp9, res1, inp10, res0, res1);
2670 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2671 ST_UB2(res0, res1, dst, dst_stride);
2681 res1 = APPLY_VERT_QPEL_FILTER(inp11, inp10, inp9, inp8,
2685 AVER_UB2_UB(res0, inp11, res1, inp12, res0, res1);
2686 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2687 ST_UB2(res0, res1, dst, dst_stride);
2694 res1 = APPLY_VERT_QPEL_FILTER(inp13, inp12, inp11, inp10,
2698 AVER_UB2_UB(res0, inp13, res1, inp14, res0, res1);
2699 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2700 ST_UB2(res0, res1, dst, dst_stride);
2706 res1 = APPLY_VERT_QPEL_FILTER(inp15, inp14, inp13, inp12,
2710 AVER_UB2_UB(res0, inp15, res1, inp16, res0, res1);
2711 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2712 ST_UB2(res0, res1, dst, dst_stride);
2782 v16u8 res0, res1, avg0, avg1;
2803 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
2807 horiz2 = __msa_ave_u_b(inp2, res1);
2829 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
2833 horiz6 = __msa_ave_u_b(inp2, res1);
2840 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
2846 res1 = __msa_ave_u_b(avg1, res1);
2852 ST_D2(res1, 0, 1, dst, dst_stride);
2857 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
2866 res1 = __msa_ave_u_b(avg1, res1);
2867 ST_D2(res1, 0, 1, dst, dst_stride);
2932 v16u8 res0, res1, avg0, avg1;
2982 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
2988 res1 = __msa_ave_u_b(avg1, res1);
2991 ST_D2(res1, 0, 1, dst, dst_stride);
3004 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
3010 res1 = __msa_ave_u_b(avg1, res1);
3011 ST_D2(res1, 0, 1, dst, dst_stride);
3081 v16u8 res0, res1, avg0, avg1;
3104 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3110 horiz2 = __msa_ave_u_b(inp2, res1);
3134 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3140 horiz6 = __msa_ave_u_b(inp2, res1);
3148 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
3154 res1 = __msa_ave_u_b(avg1, res1);
3155 ST_D2(res1, 0, 1, dst, dst_stride);
3168 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
3174 res1 = __msa_ave_u_b(avg1, res1);
3175 ST_D2(res1, 0, 1, dst, dst_stride);
3195 v16u8 res0, res1;
3216 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3220 horiz2 = __msa_ave_u_b(inp2, res1);
3241 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3245 horiz6 = __msa_ave_u_b(inp2, res1);
3252 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
3262 ST_D4(res1, res0, 0, 1, 0, 1, dst, dst_stride);
3265 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
3270 ST_D2(res1, 0, 1, dst, dst_stride);
3290 v16u8 res0, res1;
3337 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
3347 ST_D2(res1, 0, 1, dst, dst_stride);
3351 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
3356 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
3376 v16u8 res0, res1;
3399 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3405 horiz2 = __msa_ave_u_b(inp2, res1);
3427 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3433 horiz6 = __msa_ave_u_b(inp2, res1);
3441 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
3451 ST_D2(res1, 0, 1, dst, dst_stride);
3454 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
3459 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
3479 v16u8 res0, res1, avg0, avg1;
3500 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3504 horiz2 = __msa_ave_u_b(inp2, res1);
3526 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3530 horiz6 = __msa_ave_u_b(inp2, res1);
3537 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
3543 res1 = __msa_ave_u_b(avg1, res1);
3549 ST_D2(res1, 0, 1, dst, dst_stride);
3555 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
3564 res1 = __msa_ave_u_b(avg1, res1);
3565 ST_D2(res1, 0, 1, dst, dst_stride);
3585 v16u8 res0, res1, avg0, avg1;
3630 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
3636 res1 = __msa_ave_u_b(avg1, res1);
3641 ST_D2(res1, 0, 1, dst, dst_stride);
3651 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
3657 res1 = __msa_ave_u_b(avg1, res1);
3658 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
3678 v16u8 res0, res1, avg0, avg1;
3701 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3707 horiz2 = __msa_ave_u_b(inp2, res1);
3731 res1 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp2, inp3, mask0, mask1,
3737 horiz6 = __msa_ave_u_b(inp2, res1);
3739 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
3745 res1 = __msa_ave_u_b(avg1, res1);
3746 ST_D2(res1, 0, 1, dst, dst_stride);
3760 res1 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
3768 res1 = __msa_ave_u_b(avg1, res1);
3769 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
3838 v16u8 res0, res1, avg0, avg1;
3853 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
3859 horiz2 = __msa_aver_u_b(inp2, res1);
3880 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
3883 horiz6 = __msa_aver_u_b(inp2, res1);
3885 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
3891 res1 = __msa_aver_u_b(avg1, res1);
3897 ST_D2(res1, 0, 1, dst, dst_stride);
3907 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
3913 res1 = __msa_aver_u_b(avg1, res1);
3914 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
3978 v16u8 res0, res1, avg0, avg1;
4023 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
4033 res1 = __msa_aver_u_b(avg1, res1);
4039 ST_D2(res1, 0, 1, dst, dst_stride);
4044 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
4050 res1 = __msa_aver_u_b(avg1, res1);
4051 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4120 v16u8 res0, res1, avg0, avg1;
4135 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4145 horiz2 = __msa_aver_u_b(inp2, res1);
4151 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4161 horiz6 = __msa_aver_u_b(inp2, res1);
4170 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
4182 res1 = __msa_aver_u_b(avg1, res1);
4190 ST_D2(res1, 0, 1, dst, dst_stride);
4195 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
4201 res1 = __msa_aver_u_b(avg1, res1);
4202 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4222 v16u8 res0, res1;
4243 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4246 horiz2 = __msa_aver_u_b(inp2, res1);
4265 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4268 horiz6 = __msa_aver_u_b(inp2, res1);
4270 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
4284 ST_D2(res1, 0, 1, dst, dst_stride);
4287 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
4292 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4310 v16u8 res0, res1;
4353 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
4362 ST_D2(res1, 0, 1, dst, dst_stride);
4370 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
4375 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4395 v16u8 res0, res1;
4411 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4421 horiz2 = __msa_aver_u_b(inp2, res1);
4427 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4437 horiz6 = __msa_aver_u_b(inp2, res1);
4449 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
4454 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4462 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
4467 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4487 v16u8 res0, res1, avg0, avg1;
4503 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4509 horiz2 = __msa_aver_u_b(inp2, res1);
4531 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4534 horiz6 = __msa_aver_u_b(inp2, res1);
4540 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
4546 res1 = __msa_aver_u_b(avg1, res1);
4552 ST_D2(res1, 0, 1, dst, dst_stride);
4557 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
4563 res1 = __msa_aver_u_b(avg1, res1);
4564 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4584 v16u8 res0, res1, avg0, avg1;
4630 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
4640 res1 = __msa_aver_u_b(avg1, res1);
4646 ST_D2(res1, 0, 1, dst, dst_stride);
4651 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
4657 res1 = __msa_aver_u_b(avg1, res1);
4658 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4677 v16u8 res0, res1, avg0, avg1;
4698 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4703 horiz2 = __msa_aver_u_b(inp2, res1);
4726 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4731 horiz6 = __msa_aver_u_b(inp2, res1);
4733 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
4739 res1 = __msa_aver_u_b(avg1, res1);
4750 ST_D2(res1, 0, 1, dst, dst_stride);
4755 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
4761 res1 = __msa_aver_u_b(avg1, res1);
4762 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4782 v16u8 res0, res1, avg0, avg1;
4803 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4808 horiz2 = __msa_aver_u_b(inp2, res1);
4830 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
4833 horiz6 = __msa_aver_u_b(inp2, res1);
4837 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
4842 res1 = __msa_aver_u_b(avg1, res1);
4844 res1 = __msa_aver_u_b(avg1, res1);
4845 ST_D2(res1, 0, 1, dst, dst_stride);
4867 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
4872 res1 = __msa_aver_u_b(avg1, res1);
4874 res1 = __msa_aver_u_b(avg1, res1);
4875 ST_D2(res1, 0, 1, dst, dst_stride);
4895 v16u8 res0, res1, avg0, avg1;
4946 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
4951 res1 = __msa_aver_u_b(avg1, res1);
4953 res1 = __msa_aver_u_b(avg1, res1);
4954 ST_D2(res1, 0, 1, dst, dst_stride);
4976 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
4981 res1 = __msa_aver_u_b(avg1, res1);
4983 res1 = __msa_aver_u_b(avg1, res1);
4984 ST_D2(res1, 0, 1, dst, dst_stride);
5004 v16u8 res0, res1, avg0, avg1;
5028 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5035 horiz2 = __msa_aver_u_b(inp2, res1);
5060 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5066 horiz6 = __msa_aver_u_b(inp2, res1);
5070 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
5075 res1 = __msa_aver_u_b(avg1, res1);
5077 res1 = __msa_aver_u_b(avg1, res1);
5078 ST_D2(res1, 0, 1, dst, dst_stride);
5101 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
5106 res1 = __msa_aver_u_b(avg1, res1);
5108 res1 = __msa_aver_u_b(avg1, res1);
5109 ST_D2(res1, 0, 1, dst, dst_stride);
5129 v16u8 res0, res1, avg0, avg1;
5150 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5155 horiz2 = __msa_aver_u_b(inp2, res1);
5175 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5178 horiz6 = __msa_aver_u_b(inp2, res1);
5181 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
5187 res1 = __msa_aver_u_b(avg1, res1);
5188 ST_D2(res1, 0, 1, dst, dst_stride);
5207 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
5213 res1 = __msa_aver_u_b(avg1, res1);
5214 ST_D2(res1, 0, 1, dst, dst_stride);
5231 v16u8 res0, res1, avg0, avg1;
5283 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
5289 res1 = __msa_aver_u_b(avg1, res1);
5290 ST_D2(res1, 0, 1, dst, dst_stride);
5305 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
5311 res1 = __msa_aver_u_b(avg1, res1);
5312 ST_D2(res1, 0, 1, dst, dst_stride);
5332 v16u8 res0, res1, avg0, avg1;
5355 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5362 horiz2 = __msa_aver_u_b(inp2, res1);
5385 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5391 horiz6 = __msa_aver_u_b(inp2, res1);
5394 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
5400 res1 = __msa_aver_u_b(avg1, res1);
5401 ST_D2(res1, 0, 1, dst, dst_stride);
5421 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
5427 res1 = __msa_aver_u_b(avg1, res1);
5428 ST_D2(res1, 0, 1, dst, dst_stride);
5448 v16u8 res0, res1, avg0, avg1;
5470 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5473 horiz2 = __msa_aver_u_b(inp2, res1);
5498 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5501 horiz6 = __msa_aver_u_b(inp2, res1);
5503 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
5509 res1 = __msa_aver_u_b(avg1, res1);
5511 res1 = __msa_aver_u_b(avg1, res1);
5512 ST_D2(res1, 0, 1, dst, dst_stride);
5524 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3,
5538 res1 = __msa_aver_u_b(avg1, res1);
5541 res1 = __msa_aver_u_b(avg1, res1);
5542 ST_D2(res1, 0, 1, dst, dst_stride);
5562 v16u8 res0, res1, avg0, avg1;
5612 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0,
5618 res1 = __msa_aver_u_b(avg1, res1);
5620 res1 = __msa_aver_u_b(avg1, res1);
5621 ST_D2(res1, 0, 1, dst, dst_stride);
5632 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3, horiz7,
5645 res1 = __msa_aver_u_b(avg1, res1);
5648 res1 = __msa_aver_u_b(avg1, res1);
5649 ST_D2(res1, 0, 1, dst, dst_stride);
5669 v16u8 res0, res1, avg0, avg1;
5692 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5699 horiz2 = __msa_aver_u_b(inp2, res1);
5722 res1 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp2, inp3, mask0, mask1, mask2, mask3,
5727 horiz6 = __msa_aver_u_b(inp2, res1);
5731 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz2, horiz1, horiz0, horiz0, horiz3,
5735 res1 = __msa_aver_u_b(avg1, res1);
5737 res1 = __msa_aver_u_b(avg1, res1);
5738 ST_D2(res1, 0, 1, dst, dst_stride);
5760 res1 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz6, horiz5, horiz4, horiz3, horiz7,
5764 res1 = __msa_aver_u_b(avg1, res1);
5766 res1 = __msa_aver_u_b(avg1, res1);
5767 ST_D2(res1, 0, 1, dst, dst_stride);