Lines Matching defs:res0

316     v16u8 res0, res1;
328 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
336 AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
337 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
394 v16u8 res0, res1;
406 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
412 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
465 v16u8 res0, res1;
477 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
487 AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
488 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
545 v16u8 res0, res1;
557 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
565 res0 = __msa_ave_u_b(inp0, res0);
567 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
624 v16u8 res0, res1;
636 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
642 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
695 v16u8 res0, res1;
707 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
717 res0 = __msa_ave_u_b(inp0, res0);
719 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
777 v16u8 res0, res1;
789 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
800 AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
801 AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
802 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
815 v16u8 res0, res1;
826 res0 = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
831 AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
832 AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
833 ST_UB2(res0, res1, dst, dst_stride);
836 res0 = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
841 AVER_UB2_UB(inp4, res0, inp6, res1, res0, res1);
842 AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
843 ST_UB2(res0, res1, dst, dst_stride);
857 v16u8 res0, res1;
869 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
878 AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
879 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
892 v16u8 res0, res1;
903 res0 = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
908 AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
909 ST_UB2(res0, res1, dst, dst_stride);
912 res0 = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
917 AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
918 ST_UB2(res0, res1, dst, dst_stride);
932 v16u8 res0, res1;
944 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
957 AVER_UB2_UB(inp0, res0, inp2, res1, res0, res1);
958 AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
959 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
972 v16u8 res0, res1, dst0, dst1;
982 res0 = APPLY_HORIZ_QPEL_FILTER(inp0, inp1, mask,
987 AVER_UB2_UB(res0, inp1, res1, inp3, res0, res1);
988 AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
989 ST_UB2(res0, res1, dst, dst_stride);
991 res0 = APPLY_HORIZ_QPEL_FILTER(inp4, inp5, mask,
996 AVER_UB2_UB(res0, inp5, res1, inp7, res0, res1);
997 AVER_UB2_UB(dst0, res0, dst1, res1, res0, res1);
998 ST_UB2(res0, res1, dst, dst_stride);
1010 v16u8 tmp0, tmp1, res0, res1;
1019 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1033 AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
1034 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
1037 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1049 AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
1050 ST_D4(res0, res1, 0, 1, 0, 1, dst + 4 * dst_stride, dst_stride);
1060 v16u8 res0;
1067 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
1070 res0 = __msa_aver_u_b(res0, inp0);
1071 ST_UB(res0, dst);
1076 res0 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
1079 res0 = __msa_aver_u_b(res0, inp1);
1080 ST_UB(res0, dst);
1085 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
1088 res0 = __msa_aver_u_b(res0, inp2);
1089 ST_UB(res0, dst);
1094 res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
1097 res0 = __msa_aver_u_b(res0, inp3);
1098 ST_UB(res0, dst);
1103 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
1106 res0 = __msa_aver_u_b(res0, inp4);
1107 ST_UB(res0, dst);
1110 res0 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
1113 res0 = __msa_aver_u_b(res0, inp5);
1114 ST_UB(res0, dst);
1119 res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
1122 res0 = __msa_aver_u_b(res0, inp6);
1123 ST_UB(res0, dst);
1126 res0 = APPLY_VERT_QPEL_FILTER(inp7, inp6, inp5, inp4,
1129 res0 = __msa_aver_u_b(res0, inp7);
1130 ST_UB(res0, dst);
1135 res0 = APPLY_VERT_QPEL_FILTER(inp8, inp7, inp6, inp5,
1138 res0 = __msa_aver_u_b(res0, inp8);
1139 ST_UB(res0, dst);
1142 res0 = APPLY_VERT_QPEL_FILTER(inp9, inp8, inp7, inp6,
1145 res0 = __msa_aver_u_b(res0, inp9);
1146 ST_UB(res0, dst);
1151 res0 = APPLY_VERT_QPEL_FILTER(inp10, inp9, inp8, inp7,
1154 res0 = __msa_aver_u_b(res0, inp10);
1155 ST_UB(res0, dst);
1158 res0 = APPLY_VERT_QPEL_FILTER(inp11, inp10, inp9, inp8,
1161 res0 = __msa_aver_u_b(res0, inp11);
1162 ST_UB(res0, dst);
1166 res0 = APPLY_VERT_QPEL_FILTER(inp12, inp11, inp10, inp9,
1169 res0 = __msa_aver_u_b(res0, inp12);
1170 ST_UB(res0, dst);
1173 res0 = APPLY_VERT_QPEL_FILTER(inp13, inp12, inp11, inp10,
1176 res0 = __msa_aver_u_b(res0, inp13);
1177 ST_UB(res0, dst);
1180 res0 = APPLY_VERT_QPEL_FILTER(inp14, inp13, inp12, inp11,
1183 res0 = __msa_aver_u_b(res0, inp14);
1184 ST_UB(res0, dst);
1187 res0 = APPLY_VERT_QPEL_FILTER(inp15, inp14, inp13, inp12,
1190 res0 = __msa_aver_u_b(res0, inp15);
1191 ST_UB(res0, dst);
1200 v16u8 res0, res1;
1209 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1221 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
1224 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1234 ST_D4(res0, res1, 0, 1, 0, 1, dst + 4 * dst_stride, dst_stride);
1244 v16u8 res0;
1253 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
1256 ST_UB(res0, dst);
1261 res0 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
1264 ST_UB(res0, dst);
1269 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
1272 ST_UB(res0, dst);
1277 res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
1280 ST_UB(res0, dst);
1285 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
1288 ST_UB(res0, dst);
1293 res0 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
1296 ST_UB(res0, dst);
1301 res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
1304 ST_UB(res0, dst);
1309 res0 = APPLY_VERT_QPEL_FILTER(inp7, inp6, inp5, inp4,
1312 ST_UB(res0, dst);
1317 res0 = APPLY_VERT_QPEL_FILTER(inp8, inp7, inp6, inp5,
1320 ST_UB(res0, dst);
1325 res0 = APPLY_VERT_QPEL_FILTER(inp9, inp8, inp7, inp6,
1328 ST_UB(res0, dst);
1333 res0 = APPLY_VERT_QPEL_FILTER(inp10, inp9, inp8, inp7,
1336 ST_UB(res0, dst);
1341 res0 = APPLY_VERT_QPEL_FILTER(inp11, inp10, inp9, inp8,
1344 ST_UB(res0, dst);
1348 res0 = APPLY_VERT_QPEL_FILTER(inp12, inp11, inp10, inp9,
1351 ST_UB(res0, dst);
1354 res0 = APPLY_VERT_QPEL_FILTER(inp13, inp12, inp11, inp10,
1357 ST_UB(res0, dst);
1360 res0 = APPLY_VERT_QPEL_FILTER(inp14, inp13, inp12, inp11,
1363 ST_UB(res0, dst);
1366 res0 = APPLY_VERT_QPEL_FILTER(inp15, inp14, inp13, inp12,
1369 ST_UB(res0, dst);
1379 v16u8 tmp0, tmp1, res0, res1;
1388 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1403 AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
1404 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
1407 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1419 AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
1420 ST_D4(res0, res1, 0, 1, 0, 1, dst + 4 * dst_stride, dst_stride);
1430 v16u8 res0;
1439 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
1442 res0 = __msa_aver_u_b(res0, inp1);
1443 ST_UB(res0, dst);
1448 res0 = APPLY_VERT_QPEL_FILTER(inp1, inp0, inp0, inp1,
1451 res0 = __msa_aver_u_b(res0, inp2);
1452 ST_UB(res0, dst);
1457 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
1460 res0 = __msa_aver_u_b(res0, inp3);
1461 ST_UB(res0, dst);
1466 res0 = APPLY_VERT_QPEL_FILTER(inp3, inp2, inp1, inp0,
1469 res0 = __msa_aver_u_b(res0, inp4);
1470 ST_UB(res0, dst);
1475 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
1478 res0 = __msa_aver_u_b(res0, inp5);
1479 ST_UB(res0, dst);
1484 res0 = APPLY_VERT_QPEL_FILTER(inp5, inp4, inp3, inp2,
1487 res0 = __msa_aver_u_b(res0, inp6);
1488 ST_UB(res0, dst);
1493 res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
1496 res0 = __msa_aver_u_b(res0, inp7);
1497 ST_UB(res0, dst);
1502 res0 = APPLY_VERT_QPEL_FILTER(inp7, inp6, inp5, inp4,
1505 res0 = __msa_aver_u_b(res0, inp8);
1506 ST_UB(res0, dst);
1511 res0 = APPLY_VERT_QPEL_FILTER(inp8, inp7, inp6, inp5,
1514 res0 = __msa_aver_u_b(res0, inp9);
1515 ST_UB(res0, dst);
1520 res0 = APPLY_VERT_QPEL_FILTER(inp9, inp8, inp7, inp6,
1523 res0 = __msa_aver_u_b(res0, inp10);
1524 ST_UB(res0, dst);
1529 res0 = APPLY_VERT_QPEL_FILTER(inp10, inp9, inp8, inp7,
1532 res0 = __msa_aver_u_b(res0, inp11);
1533 ST_UB(res0, dst);
1538 res0 = APPLY_VERT_QPEL_FILTER(inp11, inp10, inp9, inp8,
1541 res0 = __msa_aver_u_b(res0, inp12);
1542 ST_UB(res0, dst);
1546 res0 = APPLY_VERT_QPEL_FILTER(inp12, inp11, inp10, inp9,
1549 res0 = __msa_aver_u_b(res0, inp13);
1550 ST_UB(res0, dst);
1553 res0 = APPLY_VERT_QPEL_FILTER(inp13, inp12, inp11, inp10,
1556 res0 = __msa_aver_u_b(res0, inp14);
1557 ST_UB(res0, dst);
1560 res0 = APPLY_VERT_QPEL_FILTER(inp14, inp13, inp12, inp11,
1563 res0 = __msa_aver_u_b(res0, inp15);
1564 ST_UB(res0, dst);
1567 res0 = APPLY_VERT_QPEL_FILTER(inp15, inp14, inp13, inp12,
1570 res0 = __msa_aver_u_b(res0, inp16);
1571 ST_UB(res0, dst);
1580 v16u8 tmp0, tmp1, res0, res1;
1589 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1603 res0 = __msa_ave_u_b(res0, tmp0);
1605 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
1608 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1620 res0 = __msa_ave_u_b(res0, tmp0);
1622 ST_D4(res0, res1, 0, 1, 0, 1, dst + 4 * dst_stride, dst_stride);
1632 v16u8 res0;
1639 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp0, inp1, inp2,
1642 res0 = __msa_ave_u_b(res0, inp0);
1643 ST_UB(res0, dst);
1648 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp1, inp0, inp0, inp1,
1651 res0 = __msa_ave_u_b(res0, inp1);
1652 ST_UB(res0, dst);
1657 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp2, inp1, inp0, inp0,
1660 res0 = __msa_ave_u_b(res0, inp2);
1661 ST_UB(res0, dst);
1666 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
1669 res0 = __msa_ave_u_b(res0, inp3);
1670 ST_UB(res0, dst);
1675 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp4, inp3, inp2, inp1,
1678 res0 = __msa_ave_u_b(res0, inp4);
1679 ST_UB(res0, dst);
1684 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp5, inp4, inp3, inp2,
1687 res0 = __msa_ave_u_b(res0, inp5);
1688 ST_UB(res0, dst);
1693 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp6, inp5, inp4, inp3,
1696 res0 = __msa_ave_u_b(res0, inp6);
1697 ST_UB(res0, dst);
1702 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp7, inp6, inp5, inp4,
1705 res0 = __msa_ave_u_b(res0, inp7);
1706 ST_UB(res0, dst);
1711 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp8, inp7, inp6, inp5,
1714 res0 = __msa_ave_u_b(res0, inp8);
1715 ST_UB(res0, dst);
1720 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp9, inp8, inp7, inp6,
1723 res0 = __msa_ave_u_b(res0, inp9);
1724 ST_UB(res0, dst);
1729 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp10, inp9, inp8, inp7,
1732 res0 = __msa_ave_u_b(res0, inp10);
1733 ST_UB(res0, dst);
1738 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp11, inp10, inp9, inp8,
1741 res0 = __msa_ave_u_b(res0, inp11);
1742 ST_UB(res0, dst);
1746 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp12, inp11, inp10, inp9,
1749 res0 = __msa_ave_u_b(res0, inp12);
1750 ST_UB(res0, dst);
1753 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp13, inp12, inp11, inp10,
1756 res0 = __msa_ave_u_b(res0, inp13);
1757 ST_UB(res0, dst);
1760 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp14, inp13, inp12, inp11,
1763 res0 = __msa_ave_u_b(res0, inp14);
1764 ST_UB(res0, dst);
1767 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp15, inp14, inp13, inp12,
1770 res0 = __msa_ave_u_b(res0, inp15);
1771 ST_UB(res0, dst);
1781 v16u8 res0, res1;
1790 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1802 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
1805 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1815 ST_D4(res0, res1, 0, 1, 0, 1, dst + 4 * dst_stride, dst_stride);
1825 v16u8 res0;
1832 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp0, inp1, inp2,
1835 ST_UB(res0, dst);
1840 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp1, inp0, inp0, inp1,
1843 ST_UB(res0, dst);
1848 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp2, inp1, inp0, inp0,
1851 ST_UB(res0, dst);
1856 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
1859 ST_UB(res0, dst);
1864 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp4, inp3, inp2, inp1,
1867 ST_UB(res0, dst);
1872 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp5, inp4, inp3, inp2,
1875 ST_UB(res0, dst);
1880 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp6, inp5, inp4, inp3,
1883 ST_UB(res0, dst);
1888 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp7, inp6, inp5, inp4,
1891 ST_UB(res0, dst);
1896 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp8, inp7, inp6, inp5,
1899 ST_UB(res0, dst);
1904 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp9, inp8, inp7, inp6,
1907 ST_UB(res0, dst);
1912 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp10, inp9, inp8, inp7,
1915 ST_UB(res0, dst);
1920 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp11, inp10, inp9, inp8,
1923 ST_UB(res0, dst);
1927 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp12, inp11, inp10, inp9,
1930 ST_UB(res0, dst);
1933 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp13, inp12, inp11, inp10,
1936 ST_UB(res0, dst);
1939 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp14, inp13, inp12, inp11,
1942 ST_UB(res0, dst);
1945 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp15, inp14, inp13, inp12,
1948 ST_UB(res0, dst);
1957 v16u8 tmp0, tmp1, res0, res1;
1966 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp0, inp1, inp2,
1980 res0 = __msa_ave_u_b(res0, tmp0);
1982 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
1985 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(inp4, inp3, inp2, inp1,
1997 res0 = __msa_ave_u_b(res0, tmp0);
1999 ST_D4(res0, res1, 0, 1, 0, 1, dst + 4 * dst_stride, dst_stride);
2009 v16u8 res0;
2016 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp0, inp0, inp1, inp2,
2019 res0 = __msa_ave_u_b(res0, inp1);
2020 ST_UB(res0, dst);
2025 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp1, inp0, inp0, inp1,
2028 res0 = __msa_ave_u_b(res0, inp2);
2029 ST_UB(res0, dst);
2034 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp2, inp1, inp0, inp0,
2037 res0 = __msa_ave_u_b(res0, inp3);
2038 ST_UB(res0, dst);
2043 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp3, inp2, inp1, inp0,
2046 res0 = __msa_ave_u_b(res0, inp4);
2047 ST_UB(res0, dst);
2052 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp4, inp3, inp2, inp1,
2055 res0 = __msa_ave_u_b(res0, inp5);
2056 ST_UB(res0, dst);
2061 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp5, inp4, inp3, inp2,
2064 res0 = __msa_ave_u_b(res0, inp6);
2065 ST_UB(res0, dst);
2070 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp6, inp5, inp4, inp3,
2073 res0 = __msa_ave_u_b(res0, inp7);
2074 ST_UB(res0, dst);
2079 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp7, inp6, inp5, inp4,
2082 res0 = __msa_ave_u_b(res0, inp8);
2083 ST_UB(res0, dst);
2088 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp8, inp7, inp6, inp5,
2091 res0 = __msa_ave_u_b(res0, inp9);
2092 ST_UB(res0, dst);
2097 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp9, inp8, inp7, inp6,
2100 res0 = __msa_ave_u_b(res0, inp10);
2101 ST_UB(res0, dst);
2106 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp10, inp9, inp8, inp7,
2109 res0 = __msa_ave_u_b(res0, inp11);
2110 ST_UB(res0, dst);
2115 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp11, inp10, inp9, inp8,
2118 res0 = __msa_ave_u_b(res0, inp12);
2119 ST_UB(res0, dst);
2123 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp12, inp11, inp10, inp9,
2126 res0 = __msa_ave_u_b(res0, inp13);
2127 ST_UB(res0, dst);
2130 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp13, inp12, inp11, inp10,
2133 res0 = __msa_ave_u_b(res0, inp14);
2134 ST_UB(res0, dst);
2137 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp14, inp13, inp12, inp11,
2140 res0 = __msa_ave_u_b(res0, inp15);
2141 ST_UB(res0, dst);
2144 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER(inp15, inp14, inp13, inp12,
2147 res0 = __msa_ave_u_b(res0, inp16);
2148 ST_UB(res0, dst);
2158 v16u8 tmp0, tmp1, res0, res1;
2167 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
2186 AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
2187 AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
2188 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
2192 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
2208 AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
2209 AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
2210 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
2220 v16u8 res0, res1, dst0, dst1;
2227 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
2238 AVER_UB2_UB(res0, inp0, res1, inp1, res0, res1);
2239 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2240 ST_UB2(res0, res1, dst, dst_stride);
2245 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
2256 AVER_UB2_UB(res0, inp2, res1, inp3, res0, res1);
2257 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2258 ST_UB2(res0, res1, dst, dst_stride);
2263 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
2271 AVER_UB2_UB(res0, inp4, res1, inp5, res0, res1);
2272 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2273 ST_UB2(res0, res1, dst, dst_stride);
2278 res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
2286 AVER_UB2_UB(res0, inp6, res1, inp7, res0, res1);
2287 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2288 ST_UB2(res0, res1, dst, dst_stride);
2293 res0 = APPLY_VERT_QPEL_FILTER(inp8, inp7, inp6, inp5,
2300 AVER_UB2_UB(res0, inp8, res1, inp9, res0, res1);
2301 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2302 ST_UB2(res0, res1, dst, dst_stride);
2307 res0 = APPLY_VERT_QPEL_FILTER(inp10, inp9, inp8, inp7,
2315 AVER_UB2_UB(res0, inp10, res1, inp11, res0, res1);
2316 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2317 ST_UB2(res0, res1, dst, dst_stride);
2321 res0 = APPLY_VERT_QPEL_FILTER(inp12, inp11, inp10, inp9,
2328 AVER_UB2_UB(res0, inp12, res1, inp13, res0, res1);
2329 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2330 ST_UB2(res0, res1, dst, dst_stride);
2333 res0 = APPLY_VERT_QPEL_FILTER(inp14, inp13, inp12, inp11,
2340 AVER_UB2_UB(res0, inp14, res1, inp15, res0, res1);
2341 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2342 ST_UB2(res0, res1, dst, dst_stride);
2352 v16u8 res0, res1;
2361 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
2376 AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
2377 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
2381 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
2394 AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
2395 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
2405 v16u8 res0, res1, dst0, dst1;
2412 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
2421 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2422 ST_UB2(res0, res1, dst, dst_stride);
2427 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
2436 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2437 ST_UB2(res0, res1, dst, dst_stride);
2442 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
2451 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2452 ST_UB2(res0, res1, dst, dst_stride);
2457 res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
2466 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2467 ST_UB2(res0, res1, dst, dst_stride);
2472 res0 = APPLY_VERT_QPEL_FILTER(inp8, inp7, inp6, inp5,
2481 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2482 ST_UB2(res0, res1, dst, dst_stride);
2487 res0 = APPLY_VERT_QPEL_FILTER(inp10, inp9, inp8, inp7,
2496 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2497 ST_UB2(res0, res1, dst, dst_stride);
2501 res0 = APPLY_VERT_QPEL_FILTER(inp12, inp11, inp10, inp9,
2508 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2509 ST_UB2(res0, res1, dst, dst_stride);
2512 res0 = APPLY_VERT_QPEL_FILTER(inp14, inp13, inp12, inp11,
2519 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2520 ST_UB2(res0, res1, dst, dst_stride);
2530 v16u8 tmp0, tmp1, res0, res1;
2539 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp0, inp0, inp1, inp2,
2556 AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
2557 AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
2558 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
2562 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(inp4, inp3, inp2, inp1,
2577 AVER_UB2_UB(res0, tmp0, res1, tmp1, res0, res1);
2578 AVER_UB2_UB(dst0, res0, dst2, res1, res0, res1);
2579 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
2589 v16u8 res0, res1, dst0, dst1;
2596 res0 = APPLY_VERT_QPEL_FILTER(inp0, inp0, inp1, inp2,
2605 AVER_UB2_UB(res0, inp1, res1, inp2, res0, res1);
2606 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2607 ST_UB2(res0, res1, dst, dst_stride);
2612 res0 = APPLY_VERT_QPEL_FILTER(inp2, inp1, inp0, inp0,
2621 AVER_UB2_UB(res0, inp3, res1, inp4, res0, res1);
2622 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2623 ST_UB2(res0, res1, dst, dst_stride);
2628 res0 = APPLY_VERT_QPEL_FILTER(inp4, inp3, inp2, inp1,
2637 AVER_UB2_UB(res0, inp5, res1, inp6, res0, res1);
2638 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2639 ST_UB2(res0, res1, dst, dst_stride);
2644 res0 = APPLY_VERT_QPEL_FILTER(inp6, inp5, inp4, inp3,
2653 AVER_UB2_UB(res0, inp7, res1, inp8, res0, res1);
2654 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2655 ST_UB2(res0, res1, dst, dst_stride);
2660 res0 = APPLY_VERT_QPEL_FILTER(inp8, inp7, inp6, inp5,
2669 AVER_UB2_UB(res0, inp9, res1, inp10, res0, res1);
2670 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2671 ST_UB2(res0, res1, dst, dst_stride);
2676 res0 = APPLY_VERT_QPEL_FILTER(inp10, inp9, inp8, inp7,
2685 AVER_UB2_UB(res0, inp11, res1, inp12, res0, res1);
2686 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2687 ST_UB2(res0, res1, dst, dst_stride);
2691 res0 = APPLY_VERT_QPEL_FILTER(inp12, inp11, inp10, inp9,
2698 AVER_UB2_UB(res0, inp13, res1, inp14, res0, res1);
2699 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2700 ST_UB2(res0, res1, dst, dst_stride);
2703 res0 = APPLY_VERT_QPEL_FILTER(inp14, inp13, inp12, inp11,
2710 AVER_UB2_UB(res0, inp15, res1, inp16, res0, res1);
2711 AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1);
2712 ST_UB2(res0, res1, dst, dst_stride);
2782 v16u8 res0, res1, avg0, avg1;
2795 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
2799 horiz0 = __msa_ave_u_b(inp0, res0);
2811 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
2815 horiz4 = __msa_ave_u_b(inp0, res0);
2817 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
2823 res0 = __msa_ave_u_b(avg0, res0);
2824 ST_D2(res0, 0, 1, dst, dst_stride);
2836 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
2839 horiz8 = __msa_ave_u_b(inp0, res0);
2847 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
2856 res0 = __msa_ave_u_b(avg0, res0);
2862 ST_D2(res0, 0, 1, dst, dst_stride);
2932 v16u8 res0, res1, avg0, avg1;
2962 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
2968 res0 = __msa_ave_u_b(avg0, res0);
2969 ST_D2(res0, 0, 1, dst, dst_stride);
2990 res0 = __msa_ave_u_b(avg0, res0);
2994 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
3000 res0 = __msa_ave_u_b(avg0, res0);
3001 ST_D2(res0, 0, 1, dst, dst_stride);
3081 v16u8 res0, res1, avg0, avg1;
3094 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3100 horiz0 = __msa_ave_u_b(inp0, res0);
3114 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3120 horiz4 = __msa_ave_u_b(inp0, res0);
3122 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
3128 res0 = __msa_ave_u_b(avg0, res0);
3129 ST_D2(res0, 0, 1, dst, dst_stride);
3143 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
3147 horiz8 = __msa_ave_u_b(inp0, res0);
3158 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
3164 res0 = __msa_ave_u_b(avg0, res0);
3165 ST_D2(res0, 0, 1, dst, dst_stride);
3195 v16u8 res0, res1;
3208 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3212 horiz0 = __msa_ave_u_b(inp0, res0);
3224 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3228 horiz4 = __msa_ave_u_b(inp0, res0);
3230 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
3238 ST_D2(res0, 0, 1, dst, dst_stride);
3248 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
3251 horiz8 = __msa_ave_u_b(inp0, res0);
3257 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
3262 ST_D4(res1, res0, 0, 1, 0, 1, dst, dst_stride);
3290 v16u8 res0, res1;
3319 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
3326 ST_D2(res0, 0, 1, dst, dst_stride);
3342 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
3356 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
3376 v16u8 res0, res1;
3389 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3395 horiz0 = __msa_ave_u_b(inp0, res0);
3409 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3415 horiz4 = __msa_ave_u_b(inp0, res0);
3417 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
3424 ST_D2(res0, 0, 1, dst, dst_stride);
3436 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
3440 horiz8 = __msa_ave_u_b(inp0, res0);
3446 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
3459 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
3479 v16u8 res0, res1, avg0, avg1;
3492 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3496 horiz0 = __msa_ave_u_b(inp0, res0);
3508 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3512 horiz4 = __msa_ave_u_b(inp0, res0);
3514 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
3520 res0 = __msa_ave_u_b(avg0, res0);
3521 ST_D2(res0, 0, 1, dst, dst_stride);
3533 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
3536 horiz8 = __msa_ave_u_b(inp0, res0);
3544 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
3553 res0 = __msa_ave_u_b(avg0, res0);
3560 ST_D2(res0, 0, 1, dst, dst_stride);
3585 v16u8 res0, res1, avg0, avg1;
3614 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
3620 res0 = __msa_ave_u_b(avg0, res0);
3623 ST_D2(res0, 0, 1, dst, dst_stride);
3644 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
3650 res0 = __msa_ave_u_b(avg0, res0);
3658 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
3678 v16u8 res0, res1, avg0, avg1;
3691 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3697 horiz0 = __msa_ave_u_b(inp0, res0);
3711 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE(inp0, inp1, mask0, mask1,
3717 horiz4 = __msa_ave_u_b(inp0, res0);
3719 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
3725 res0 = __msa_ave_u_b(avg0, res0);
3726 ST_D2(res0, 0, 1, dst, dst_stride);
3750 res0 = APPLY_HORIZ_QPEL_NO_ROUND_FILTER_8BYTE_1ROW(inp0, mask0, mask1,
3754 horiz8 = __msa_ave_u_b(inp0, res0);
3755 res0 = APPLY_VERT_QPEL_NO_ROUND_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
3766 res0 = __msa_ave_u_b(avg0, res0);
3769 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
3838 v16u8 res0, res1, avg0, avg1;
3851 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
3856 horiz0 = __msa_aver_u_b(inp0, res0);
3863 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
3866 horiz4 = __msa_aver_u_b(inp0, res0);
3868 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
3874 res0 = __msa_aver_u_b(avg0, res0);
3875 ST_D2(res0, 0, 1, dst, dst_stride);
3894 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
3896 horiz8 = __msa_aver_u_b(inp0, res0);
3900 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
3906 res0 = __msa_aver_u_b(avg0, res0);
3914 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
3978 v16u8 res0, res1, avg0, avg1;
4007 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
4013 res0 = __msa_aver_u_b(avg0, res0);
4014 ST_D2(res0, 0, 1, dst, dst_stride);
4034 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
4043 res0 = __msa_aver_u_b(avg0, res0);
4051 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4120 v16u8 res0, res1, avg0, avg1;
4133 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4140 horiz0 = __msa_aver_u_b(inp0, res0);
4149 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4156 horiz4 = __msa_aver_u_b(inp0, res0);
4163 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
4169 res0 = __msa_aver_u_b(avg0, res0);
4175 ST_D2(res0, 0, 1, dst, dst_stride);
4179 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
4184 horiz8 = __msa_aver_u_b(inp0, res0);
4185 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
4194 res0 = __msa_aver_u_b(avg0, res0);
4202 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4222 v16u8 res0, res1;
4235 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4238 horiz0 = __msa_aver_u_b(inp0, res0);
4250 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4253 horiz4 = __msa_aver_u_b(inp0, res0);
4255 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
4260 ST_D2(res0, 0, 1, dst, dst_stride);
4276 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
4278 horiz8 = __msa_aver_u_b(inp0, res0);
4279 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
4292 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4310 v16u8 res0, res1;
4339 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
4344 ST_D2(res0, 0, 1, dst, dst_stride);
4365 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
4375 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4395 v16u8 res0, res1;
4409 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4416 horiz0 = __msa_aver_u_b(inp0, res0);
4425 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4432 horiz4 = __msa_aver_u_b(inp0, res0);
4440 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
4443 horiz8 = __msa_aver_u_b(inp0, res0);
4444 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
4454 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4457 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
4467 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4487 v16u8 res0, res1, avg0, avg1;
4501 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4506 horiz0 = __msa_aver_u_b(inp0, res0);
4514 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4517 horiz4 = __msa_aver_u_b(inp0, res0);
4519 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
4525 res0 = __msa_aver_u_b(avg0, res0);
4526 ST_D2(res0, 0, 1, dst, dst_stride);
4537 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
4539 horiz8 = __msa_aver_u_b(inp0, res0);
4547 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
4556 res0 = __msa_aver_u_b(avg0, res0);
4564 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4584 v16u8 res0, res1, avg0, avg1;
4614 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
4620 res0 = __msa_aver_u_b(avg0, res0);
4621 ST_D2(res0, 0, 1, dst, dst_stride);
4641 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
4649 res0 = __msa_aver_u_b(avg0, res0);
4658 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4677 v16u8 res0, res1, avg0, avg1;
4690 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1,
4696 horiz0 = __msa_aver_u_b(inp0, res0);
4707 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4712 horiz4 = __msa_aver_u_b(inp0, res0);
4714 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
4720 res0 = __msa_aver_u_b(avg0, res0);
4723 ST_D2(res0, 0, 1, dst, dst_stride);
4741 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
4744 horiz8 = __msa_aver_u_b(inp0, res0);
4745 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
4754 res0 = __msa_aver_u_b(avg0, res0);
4762 ST_D4(res0, res1, 0, 1, 0, 1, dst, dst_stride);
4782 v16u8 res0, res1, avg0, avg1;
4796 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4801 horiz0 = __msa_aver_u_b(inp0, res0);
4810 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
4813 horiz4 = __msa_aver_u_b(inp0, res0);
4817 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
4822 res0 = __msa_aver_u_b(avg0, res0);
4824 res0 = __msa_aver_u_b(avg0, res0);
4825 ST_D2(res0, 0, 1, dst, dst_stride);
4849 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
4851 horiz8 = __msa_aver_u_b(inp0, res0);
4854 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
4859 res0 = __msa_aver_u_b(avg0, res0);
4861 res0 = __msa_aver_u_b(avg0, res0);
4862 ST_D2(res0, 0, 1, dst, dst_stride);
4895 v16u8 res0, res1, avg0, avg1;
4927 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
4932 res0 = __msa_aver_u_b(avg0, res0);
4934 res0 = __msa_aver_u_b(avg0, res0);
4935 ST_D2(res0, 0, 1, dst, dst_stride);
4963 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
4968 res0 = __msa_aver_u_b(avg0, res0);
4970 res0 = __msa_aver_u_b(avg0, res0);
4971 ST_D2(res0, 0, 1, dst, dst_stride);
5004 v16u8 res0, res1, avg0, avg1;
5018 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5026 horiz0 = __msa_aver_u_b(inp0, res0);
5037 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5043 horiz4 = __msa_aver_u_b(inp0, res0);
5047 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
5052 res0 = __msa_aver_u_b(avg0, res0);
5054 res0 = __msa_aver_u_b(avg0, res0);
5055 ST_D2(res0, 0, 1, dst, dst_stride);
5082 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
5085 horiz8 = __msa_aver_u_b(inp0, res0);
5088 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
5093 res0 = __msa_aver_u_b(avg0, res0);
5095 res0 = __msa_aver_u_b(avg0, res0);
5096 ST_D2(res0, 0, 1, dst, dst_stride);
5129 v16u8 res0, res1, avg0, avg1;
5143 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5148 horiz0 = __msa_aver_u_b(inp0, res0);
5157 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5160 horiz4 = __msa_aver_u_b(inp0, res0);
5163 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
5169 res0 = __msa_aver_u_b(avg0, res0);
5170 ST_D2(res0, 0, 1, dst, dst_stride);
5192 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
5194 horiz8 = __msa_aver_u_b(inp0, res0);
5196 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
5202 res0 = __msa_aver_u_b(avg0, res0);
5203 ST_D2(res0, 0, 1, dst, dst_stride);
5231 v16u8 res0, res1, avg0, avg1;
5272 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
5278 res0 = __msa_aver_u_b(avg0, res0);
5279 ST_D2(res0, 0, 1, dst, dst_stride);
5294 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
5300 res0 = __msa_aver_u_b(avg0, res0);
5301 ST_D2(res0, 0, 1, dst, dst_stride);
5332 v16u8 res0, res1, avg0, avg1;
5346 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5353 horiz0 = __msa_aver_u_b(inp0, res0);
5364 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5370 horiz4 = __msa_aver_u_b(inp0, res0);
5373 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
5379 res0 = __msa_aver_u_b(avg0, res0);
5380 ST_D2(res0, 0, 1, dst, dst_stride);
5405 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
5408 horiz8 = __msa_aver_u_b(inp0, res0);
5410 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
5416 res0 = __msa_aver_u_b(avg0, res0);
5417 ST_D2(res0, 0, 1, dst, dst_stride);
5448 v16u8 res0, res1, avg0, avg1;
5463 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5466 horiz0 = __msa_aver_u_b(inp0, res0);
5478 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5481 horiz4 = __msa_aver_u_b(inp0, res0);
5483 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
5489 res0 = __msa_aver_u_b(avg0, res0);
5491 res0 = __msa_aver_u_b(avg0, res0);
5492 ST_D2(res0, 0, 1, dst, dst_stride);
5516 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
5518 horiz8 = __msa_aver_u_b(inp0, res0);
5519 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1,
5530 res0 = __msa_aver_u_b(avg0, res0);
5533 res0 = __msa_aver_u_b(avg0, res0);
5534 ST_D2(res0, 0, 1, dst, dst_stride);
5562 v16u8 res0, res1, avg0, avg1;
5593 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2,
5599 res0 = __msa_aver_u_b(avg0, res0);
5601 res0 = __msa_aver_u_b(avg0, res0);
5602 ST_D2(res0, 0, 1, dst, dst_stride);
5628 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1, horiz5,
5637 res0 = __msa_aver_u_b(avg0, res0);
5640 res0 = __msa_aver_u_b(avg0, res0);
5641 ST_D2(res0, 0, 1, dst, dst_stride);
5669 v16u8 res0, res1, avg0, avg1;
5683 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5690 horiz0 = __msa_aver_u_b(inp0, res0);
5701 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE(inp0, inp1, mask0, mask1, mask2, mask3,
5706 horiz4 = __msa_aver_u_b(inp0, res0);
5710 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz0, horiz0, horiz1, horiz2, horiz1,
5714 res0 = __msa_aver_u_b(avg0, res0);
5716 res0 = __msa_aver_u_b(avg0, res0);
5717 ST_D2(res0, 0, 1, dst, dst_stride);
5742 res0 = APPLY_HORIZ_QPEL_FILTER_8BYTE_1ROW(inp0, mask0, mask1, mask2, mask3,
5745 horiz8 = __msa_aver_u_b(inp0, res0);
5748 res0 = APPLY_VERT_QPEL_FILTER_8BYTE(horiz4, horiz3, horiz2, horiz1, horiz5,
5752 res0 = __msa_aver_u_b(avg0, res0);
5754 res0 = __msa_aver_u_b(avg0, res0);
5755 ST_D2(res0, 0, 1, dst, dst_stride);