Lines Matching refs:dst
37 #define MMI_PCMPGTUB(dst, src1, src2) \
41 "pxor "#dst", %[db_2], %[db_1] \n\t"
193 #define PUT_VP8_EPEL4_H6_MMI(src, dst) \
227 MMI_SWC1(%[ftmp1], dst, 0x00)
230 #define PUT_VP8_EPEL4_H4_MMI(src, dst) \
255 MMI_SWC1(%[ftmp1], dst, 0x00)
258 #define PUT_VP8_EPEL4_V6_MMI(src, src1, dst, srcstride) \
298 MMI_SWC1(%[ftmp1], dst, 0x00)
301 #define PUT_VP8_EPEL4_V4_MMI(src, src1, dst, srcstride) \
329 MMI_SWC1(%[ftmp1], dst, 0x00)
332 #define PUT_VP8_EPEL8_H6_MMI(src, dst) \
386 MMI_SDC1(%[ftmp1], dst, 0x00)
389 #define PUT_VP8_EPEL8_H4_MMI(src, dst) \
427 MMI_SDC1(%[ftmp1], dst, 0x00)
430 #define PUT_VP8_EPEL8_V6_MMI(src, src1, dst, srcstride) \
489 MMI_SDC1(%[ftmp1], dst, 0x00)
492 #define PUT_VP8_EPEL8_V4_MMI(src, src1, dst, srcstride) \
533 MMI_SDC1(%[ftmp1], dst, 0x00)
536 #define PUT_VP8_BILINEAR8_H_MMI(src, dst) \
557 MMI_SDC1(%[ftmp1], dst, 0x00)
560 #define PUT_VP8_BILINEAR4_H_MMI(src, dst) \
574 MMI_SWC1(%[ftmp1], dst, 0x00)
577 #define PUT_VP8_BILINEAR8_V_MMI(src, src1, dst, sstride) \
599 MMI_SDC1(%[ftmp1], dst, 0x00)
602 #define PUT_VP8_BILINEAR4_V_MMI(src, src1, dst, sstride) \
617 MMI_SWC1(%[ftmp1], dst, 0x00)
786 static av_always_inline void vp8_v_loop_filter8_mmi(uint8_t *dst,
797 /* Get data from dst */
798 MMI_ULDC1(%[q0], %[dst], 0x0)
799 PTR_SUBU "%[tmp0], %[dst], %[stride] \n\t"
807 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t"
814 /* Move to dst */
815 MMI_USDC1(%[q0], %[dst], 0x0)
816 PTR_SUBU "%[tmp0], %[dst], %[stride] \n\t"
822 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t"
836 [dst]"+&r"(dst), [tmp0]"=&r"(tmp[0]),
845 static av_always_inline void vp8_v_loop_filter8_inner_mmi(uint8_t *dst,
851 if (vp8_normal_limit(dst + i * 1, stride, flim_E, flim_I)) {
852 int hv = hev(dst + i * 1, stride, hev_thresh);
854 vp8_filter_common_is4tap(dst + i * 1, stride);
856 vp8_filter_common_isnot4tap(dst + i * 1, stride);
860 static av_always_inline void vp8_h_loop_filter8_mmi(uint8_t *dst,
871 /* Get data from dst */
872 MMI_ULDC1(%[p3], %[dst], -0x04)
873 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t"
896 /* Move to dst */
897 MMI_USDC1(%[p3], %[dst], -0x04)
898 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
899 MMI_USDC1(%[p2], %[dst], -0x04)
900 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
901 MMI_USDC1(%[p1], %[dst], -0x04)
902 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
903 MMI_USDC1(%[p0], %[dst], -0x04)
904 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
905 MMI_USDC1(%[q0], %[dst], -0x04)
906 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
907 MMI_USDC1(%[q1], %[dst], -0x04)
908 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
909 MMI_USDC1(%[q2], %[dst], -0x04)
910 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
911 MMI_USDC1(%[q3], %[dst], -0x04)
922 [dst]"+&r"(dst), [tmp0]"=&r"(tmp[0]),
931 static av_always_inline void vp8_h_loop_filter8_inner_mmi(uint8_t *dst,
937 if (vp8_normal_limit(dst + i * stride, 1, flim_E, flim_I)) {
938 int hv = hev(dst + i * stride, 1, hev_thresh);
940 vp8_filter_common_is4tap(dst + i * stride, 1);
942 vp8_filter_common_isnot4tap(dst + i * stride, 1);
1104 void ff_vp8_idct_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
1232 : [dst0]"r"(dst), [dst1]"r"(dst+stride),
1233 [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
1264 dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3));
1265 dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3));
1266 dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3));
1267 dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3));
1268 dst += stride;
1273 void ff_vp8_idct_dc_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
1311 : [dst0]"r"(dst), [dst1]"r"(dst+stride),
1312 [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
1322 dst[0] = av_clip_uint8(dst[0] + dc);
1323 dst[1] = av_clip_uint8(dst[1] + dc);
1324 dst[2] = av_clip_uint8(dst[2] + dc);
1325 dst[3] = av_clip_uint8(dst[3] + dc);
1326 dst += stride;
1331 void ff_vp8_idct_dc_add4y_mmi(uint8_t *dst, int16_t block[4][16],
1334 ff_vp8_idct_dc_add_mmi(dst + 0, block[0], stride);
1335 ff_vp8_idct_dc_add_mmi(dst + 4, block[1], stride);
1336 ff_vp8_idct_dc_add_mmi(dst + 8, block[2], stride);
1337 ff_vp8_idct_dc_add_mmi(dst + 12, block[3], stride);
1340 void ff_vp8_idct_dc_add4uv_mmi(uint8_t *dst, int16_t block[4][16],
1343 ff_vp8_idct_dc_add_mmi(dst + stride * 0 + 0, block[0], stride);
1344 ff_vp8_idct_dc_add_mmi(dst + stride * 0 + 4, block[1], stride);
1345 ff_vp8_idct_dc_add_mmi(dst + stride * 4 + 0, block[2], stride);
1346 ff_vp8_idct_dc_add_mmi(dst + stride * 4 + 4, block[3], stride);
1350 void ff_vp8_v_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E,
1353 vp8_v_loop_filter8_mmi(dst, stride, flim_E, flim_I, hev_thresh);
1354 vp8_v_loop_filter8_mmi(dst + 8, stride, flim_E, flim_I, hev_thresh);
1357 void ff_vp8_h_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E,
1360 vp8_h_loop_filter8_mmi(dst, stride, flim_E, flim_I, hev_thresh);
1361 vp8_h_loop_filter8_mmi(dst + 8 * stride, stride, flim_E, flim_I,
1380 void ff_vp8_v_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride,
1386 if (vp8_normal_limit(dst + i * 1, stride, flim_E, flim_I)) {
1387 int hv = hev(dst + i * 1, stride, hev_thresh);
1389 vp8_filter_common_is4tap(dst + i * 1, stride);
1391 vp8_filter_common_isnot4tap(dst + i * 1, stride);
1395 void ff_vp8_h_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride,
1401 if (vp8_normal_limit(dst + i * stride, 1, flim_E, flim_I)) {
1402 int hv = hev(dst + i * stride, 1, hev_thresh);
1404 vp8_filter_common_is4tap(dst + i * stride, 1);
1406 vp8_filter_common_isnot4tap(dst + i * stride, 1);
1424 void ff_vp8_v_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim)
1429 if (vp8_simple_limit(dst + i, stride, flim))
1430 vp8_filter_common_is4tap(dst + i, stride);
1433 void ff_vp8_h_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim)
1438 if (vp8_simple_limit(dst + i * stride, 1, flim))
1439 vp8_filter_common_is4tap(dst + i * stride, 1);
1442 void ff_put_vp8_pixels16_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
1460 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t"
1461 MMI_SDC1(%[ftmp0], %[dst], 0x00)
1462 "sdl %[tmp0], 0x0f(%[dst]) \n\t"
1463 "sdr %[tmp0], 0x08(%[dst]) \n\t"
1469 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t"
1475 [dst]"+&r"(dst), [src]"+&r"(src),
1484 for (i = 0; i < h; i++, dst += dststride, src += srcstride)
1485 memcpy(dst, src, 16);
1489 void ff_put_vp8_pixels8_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
1504 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t"
1505 MMI_SDC1(%[ftmp0], %[dst], 0x00)
1510 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t"
1515 [dst]"+&r"(dst), [src]"+&r"(src),
1524 for (i = 0; i < h; i++, dst += dststride, src += srcstride)
1525 memcpy(dst, src, 8);
1529 void ff_put_vp8_pixels4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
1544 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t"
1545 MMI_SWC1(%[ftmp0], %[dst], 0x00)
1550 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t"
1555 [dst]"+&r"(dst), [src]"+&r"(src),
1564 for (i = 0; i < h; i++, dst += dststride, src += srcstride)
1565 memcpy(dst, src, 4);
1569 void ff_put_vp8_epel16_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
1588 dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7];
1589 dst[1] = cm[(filter[2] * src[1] - filter[1] * src[ 0] + filter[3] * src[2] - filter[4] * src[3] + 64) >> 7];
1590 dst[2] = cm[(filter[2] * src[2] - filter[1] * src[ 1] + filter[3] * src[3] - filter[4] * src[4] + 64) >> 7];
1591 dst[3] = cm[(filter[2] * src[3] - filter[1] * src[ 2] + filter[3] * src[4] - filter[4] * src[5] + 64) >> 7];
1592 dst[4] = cm[(filter[2] * src[4] - filter[1] * src[ 3] + filter[3] * src[5] - filter[4] * src[6] + 64) >> 7];
1593 dst[5] = cm[(filter[2] * src[5] - filter[1] * src[ 4] + filter[3] * src[6] - filter[4] * src[7] + 64) >> 7];
1594 dst[6] = cm[(filter[2] * src[6] - filter[1] * src[ 5] + filter[3] * src[7] - filter[4] * src[8] + 64) >> 7];
1595 dst[7] = cm[(filter[2] * src[7] - filter[1] * src[ 6] + filter[3] * src[8] - filter[4] * src[9] + 64) >> 7];
1597 dst[ 8] = cm[(filter[2] * src[ 8] - filter[1] * src[ 7] + filter[3] * src[ 9] - filter[4] * src[10] + 64) >> 7];
1598 dst[ 9] = cm[(filter[2] * src[ 9] - filter[1] * src[ 8] + filter[3] * src[10] - filter[4] * src[11] + 64) >> 7];
1599 dst[10] = cm[(filter[2] * src[10] - filter[1] * src[ 9] + filter[3] * src[11] - filter[4] * src[12] + 64) >> 7];
1600 dst[11] = cm[(filter[2] * src[11] - filter[1] * src[10] + filter[3] * src[12] - filter[4] * src[13] + 64) >> 7];
1601 dst[12] = cm[(filter[2] * src[12] - filter[1] * src[11] + filter[3] * src[13] - filter[4] * src[14] + 64) >> 7];
1602 dst[13] = cm[(filter[2] * src[13] - filter[1] * src[12] + filter[3] * src[14] - filter[4] * src[15] + 64) >> 7];
1603 dst[14] = cm[(filter[2] * src[14] - filter[1] * src[13] + filter[3] * src[15] - filter[4] * src[16] + 64) >> 7];
1604 dst[15] = cm[(filter[2] * src[15] - filter[1] * src[14] + filter[3] * src[16] - filter[4] * src[17] + 64) >> 7];
1613 PUT_VP8_EPEL8_H4_MMI(%[src], %[dst])
1615 PTR_ADDIU "%[dst1], %[dst], 0x08 \n\t"
1621 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1632 [dst]"+&r"(dst), [src]"+&r"(src)
1647 dst[x] = FILTER_4TAP(src, filter, 1);
1648 dst += dststride;
1654 void ff_put_vp8_epel8_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
1673 dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7];
1674 dst[1] = cm[(filter[2] * src[1] - filter[1] * src[ 0] + filter[3] * src[2] - filter[4] * src[3] + 64) >> 7];
1675 dst[2] = cm[(filter[2] * src[2] - filter[1] * src[ 1] + filter[3] * src[3] - filter[4] * src[4] + 64) >> 7];
1676 dst[3] = cm[(filter[2] * src[3] - filter[1] * src[ 2] + filter[3] * src[4] - filter[4] * src[5] + 64) >> 7];
1677 dst[4] = cm[(filter[2] * src[4] - filter[1] * src[ 3] + filter[3] * src[5] - filter[4] * src[6] + 64) >> 7];
1678 dst[5] = cm[(filter[2] * src[5] - filter[1] * src[ 4] + filter[3] * src[6] - filter[4] * src[7] + 64) >> 7];
1679 dst[6] = cm[(filter[2] * src[6] - filter[1] * src[ 5] + filter[3] * src[7] - filter[4] * src[8] + 64) >> 7];
1680 dst[7] = cm[(filter[2] * src[7] - filter[1] * src[ 6] + filter[3] * src[8] - filter[4] * src[9] + 64) >> 7];
1688 PUT_VP8_EPEL8_H4_MMI(%[src], %[dst])
1692 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1702 [dst]"+&r"(dst), [src]"+&r"(src)
1717 dst[x] = FILTER_4TAP(src, filter, 1);
1718 dst += dststride;
1724 void ff_put_vp8_epel4_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
1742 dst[0] = cm[(filter[2] * src[0] - filter[1] * src[-1] + filter[3] * src[1] - filter[4] * src[2] + 64) >> 7];
1743 dst[1] = cm[(filter[2] * src[1] - filter[1] * src[ 0] + filter[3] * src[2] - filter[4] * src[3] + 64) >> 7];
1744 dst[2] = cm[(filter[2] * src[2] - filter[1] * src[ 1] + filter[3] * src[3] - filter[4] * src[4] + 64) >> 7];
1745 dst[3] = cm[(filter[2] * src[3] - filter[1] * src[ 2] + filter[3] * src[4] - filter[4] * src[5] + 64) >> 7];
1753 PUT_VP8_EPEL4_H4_MMI(%[src], %[dst])
1757 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1765 [dst]"+&r"(dst), [src]"+&r"(src)
1780 dst[x] = FILTER_4TAP(src, filter, 1);
1781 dst += dststride;
1787 void ff_put_vp8_epel16_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
1810 dst[ 0] = cm[(filter[2]*src[ 0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[ 1] - filter[4]*src[ 2] + filter[5]*src[ 3] + 64) >> 7];
1811 dst[ 1] = cm[(filter[2]*src[ 1] - filter[1]*src[ 0] + filter[0]*src[-1] + filter[3]*src[ 2] - filter[4]*src[ 3] + filter[5]*src[ 4] + 64) >> 7];
1812 dst[ 2] = cm[(filter[2]*src[ 2] - filter[1]*src[ 1] + filter[0]*src[ 0] + filter[3]*src[ 3] - filter[4]*src[ 4] + filter[5]*src[ 5] + 64) >> 7];
1813 dst[ 3] = cm[(filter[2]*src[ 3] - filter[1]*src[ 2] + filter[0]*src[ 1] + filter[3]*src[ 4] - filter[4]*src[ 5] + filter[5]*src[ 6] + 64) >> 7];
1814 dst[ 4] = cm[(filter[2]*src[ 4] - filter[1]*src[ 3] + filter[0]*src[ 2] + filter[3]*src[ 5] - filter[4]*src[ 6] + filter[5]*src[ 7] + 64) >> 7];
1815 dst[ 5] = cm[(filter[2]*src[ 5] - filter[1]*src[ 4] + filter[0]*src[ 3] + filter[3]*src[ 6] - filter[4]*src[ 7] + filter[5]*src[ 8] + 64) >> 7];
1816 dst[ 6] = cm[(filter[2]*src[ 6] - filter[1]*src[ 5] + filter[0]*src[ 4] + filter[3]*src[ 7] - filter[4]*src[ 8] + filter[5]*src[ 9] + 64) >> 7];
1817 dst[ 7] = cm[(filter[2]*src[ 7] - filter[1]*src[ 6] + filter[0]*src[ 5] + filter[3]*src[ 8] - filter[4]*src[ 9] + filter[5]*src[10] + 64) >> 7];
1819 dst[ 8] = cm[(filter[2]*src[ 8] - filter[1]*src[ 7] + filter[0]*src[ 6] + filter[3]*src[ 9] - filter[4]*src[10] + filter[5]*src[11] + 64) >> 7];
1820 dst[ 9] = cm[(filter[2]*src[ 9] - filter[1]*src[ 8] + filter[0]*src[ 7] + filter[3]*src[10] - filter[4]*src[11] + filter[5]*src[12] + 64) >> 7];
1821 dst[10] = cm[(filter[2]*src[10] - filter[1]*src[ 9] + filter[0]*src[ 8] + filter[3]*src[11] - filter[4]*src[12] + filter[5]*src[13] + 64) >> 7];
1822 dst[11] = cm[(filter[2]*src[11] - filter[1]*src[10] + filter[0]*src[ 9] + filter[3]*src[12] - filter[4]*src[13] + filter[5]*src[14] + 64) >> 7];
1823 dst[12] = cm[(filter[2]*src[12] - filter[1]*src[11] + filter[0]*src[10] + filter[3]*src[13] - filter[4]*src[14] + filter[5]*src[15] + 64) >> 7];
1824 dst[13] = cm[(filter[2]*src[13] - filter[1]*src[12] + filter[0]*src[11] + filter[3]*src[14] - filter[4]*src[15] + filter[5]*src[16] + 64) >> 7];
1825 dst[14] = cm[(filter[2]*src[14] - filter[1]*src[13] + filter[0]*src[12] + filter[3]*src[15] - filter[4]*src[16] + filter[5]*src[17] + 64) >> 7];
1826 dst[15] = cm[(filter[2]*src[15] - filter[1]*src[14] + filter[0]*src[13] + filter[3]*src[16] - filter[4]*src[17] + filter[5]*src[18] + 64) >> 7];
1835 PUT_VP8_EPEL8_H6_MMI(%[src], %[dst])
1837 PTR_ADDIU "%[dst1], %[dst], 0x08 \n\t"
1843 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1854 [dst]"+&r"(dst), [src]"+&r"(src)
1870 dst[x] = FILTER_6TAP(src, filter, 1);
1871 dst += dststride;
1877 void ff_put_vp8_epel8_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
1899 dst[0] = cm[(filter[2]*src[0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[1] - filter[4]*src[2] + filter[5]*src[ 3] + 64) >> 7];
1900 dst[1] = cm[(filter[2]*src[1] - filter[1]*src[ 0] + filter[0]*src[-1] + filter[3]*src[2] - filter[4]*src[3] + filter[5]*src[ 4] + 64) >> 7];
1901 dst[2] = cm[(filter[2]*src[2] - filter[1]*src[ 1] + filter[0]*src[ 0] + filter[3]*src[3] - filter[4]*src[4] + filter[5]*src[ 5] + 64) >> 7];
1902 dst[3] = cm[(filter[2]*src[3] - filter[1]*src[ 2] + filter[0]*src[ 1] + filter[3]*src[4] - filter[4]*src[5] + filter[5]*src[ 6] + 64) >> 7];
1903 dst[4] = cm[(filter[2]*src[4] - filter[1]*src[ 3] + filter[0]*src[ 2] + filter[3]*src[5] - filter[4]*src[6] + filter[5]*src[ 7] + 64) >> 7];
1904 dst[5] = cm[(filter[2]*src[5] - filter[1]*src[ 4] + filter[0]*src[ 3] + filter[3]*src[6] - filter[4]*src[7] + filter[5]*src[ 8] + 64) >> 7];
1905 dst[6] = cm[(filter[2]*src[6] - filter[1]*src[ 5] + filter[0]*src[ 4] + filter[3]*src[7] - filter[4]*src[8] + filter[5]*src[ 9] + 64) >> 7];
1906 dst[7] = cm[(filter[2]*src[7] - filter[1]*src[ 6] + filter[0]*src[ 5] + filter[3]*src[8] - filter[4]*src[9] + filter[5]*src[10] + 64) >> 7];
1914 PUT_VP8_EPEL8_H6_MMI(%[src], %[dst])
1918 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1928 [dst]"+&r"(dst), [src]"+&r"(src)
1944 dst[x] = FILTER_6TAP(src, filter, 1);
1945 dst += dststride;
1951 void ff_put_vp8_epel4_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
1973 dst[0] = cm[(filter[2]*src[0] - filter[1]*src[-1] + filter[0]*src[-2] + filter[3]*src[1] - filter[4]*src[2] + filter[5]*src[ 3] + 64) >> 7];
1974 dst[1] = cm[(filter[2]*src[1] - filter[1]*src[ 0] + filter[0]*src[-1] + filter[3]*src[2] - filter[4]*src[3] + filter[5]*src[ 4] + 64) >> 7];
1975 dst[2] = cm[(filter[2]*src[2] - filter[1]*src[ 1] + filter[0]*src[ 0] + filter[3]*src[3] - filter[4]*src[4] + filter[5]*src[ 5] + 64) >> 7];
1976 dst[3] = cm[(filter[2]*src[3] - filter[1]*src[ 2] + filter[0]*src[ 1] + filter[3]*src[4] - filter[4]*src[5] + filter[5]*src[ 6] + 64) >> 7];
1984 PUT_VP8_EPEL4_H6_MMI(%[src], %[dst])
1988 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1996 [dst]"+&r"(dst), [src]"+&r"(src)
2012 dst[x] = FILTER_6TAP(src, filter, 1);
2013 dst += dststride;
2019 void ff_put_vp8_epel16_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2038 dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[ srcstride] - filter[4] * src[ 2*srcstride] + 64) >> 7];
2039 dst[1] = cm[(filter[2] * src[1] - filter[1] * src[1-srcstride] + filter[3] * src[1+srcstride] - filter[4] * src[1+2*srcstride] + 64) >> 7];
2040 dst[2] = cm[(filter[2] * src[2] - filter[1] * src[2-srcstride] + filter[3] * src[2+srcstride] - filter[4] * src[2+2*srcstride] + 64) >> 7];
2041 dst[3] = cm[(filter[2] * src[3] - filter[1] * src[3-srcstride] + filter[3] * src[3+srcstride] - filter[4] * src[3+2*srcstride] + 64) >> 7];
2042 dst[4] = cm[(filter[2] * src[4] - filter[1] * src[4-srcstride] + filter[3] * src[4+srcstride] - filter[4] * src[4+2*srcstride] + 64) >> 7];
2043 dst[5] = cm[(filter[2] * src[5] - filter[1] * src[5-srcstride] + filter[3] * src[5+srcstride] - filter[4] * src[5+2*srcstride] + 64) >> 7];
2044 dst[6] = cm[(filter[2] * src[6] - filter[1] * src[6-srcstride] + filter[3] * src[6+srcstride] - filter[4] * src[6+2*srcstride] + 64) >> 7];
2045 dst[7] = cm[(filter[2] * src[7] - filter[1] * src[7-srcstride] + filter[3] * src[7+srcstride] - filter[4] * src[7+2*srcstride] + 64) >> 7];
2047 dst[ 8] = cm[(filter[2] * src[ 8] - filter[1] * src[ 8-srcstride] + filter[3] * src[ 8+srcstride] - filter[4] * src[ 8+2*srcstride] + 64) >> 7];
2048 dst[ 9] = cm[(filter[2] * src[ 9] - filter[1] * src[ 9-srcstride] + filter[3] * src[ 9+srcstride] - filter[4] * src[ 9+2*srcstride] + 64) >> 7];
2049 dst[10] = cm[(filter[2] * src[10] - filter[1] * src[10-srcstride] + filter[3] * src[10+srcstride] - filter[4] * src[10+2*srcstride] + 64) >> 7];
2050 dst[11] = cm[(filter[2] * src[11] - filter[1] * src[11-srcstride] + filter[3] * src[11+srcstride] - filter[4] * src[11+2*srcstride] + 64) >> 7];
2051 dst[12] = cm[(filter[2] * src[12] - filter[1] * src[12-srcstride] + filter[3] * src[12+srcstride] - filter[4] * src[12+2*srcstride] + 64) >> 7];
2052 dst[13] = cm[(filter[2] * src[13] - filter[1] * src[13-srcstride] + filter[3] * src[13+srcstride] - filter[4] * src[13+2*srcstride] + 64) >> 7];
2053 dst[14] = cm[(filter[2] * src[14] - filter[1] * src[14-srcstride] + filter[3] * src[14+srcstride] - filter[4] * src[14+2*srcstride] + 64) >> 7];
2054 dst[15] = cm[(filter[2] * src[15] - filter[1] * src[15-srcstride] + filter[3] * src[15+srcstride] - filter[4] * src[15+2*srcstride] + 64) >> 7];
2063 PUT_VP8_EPEL8_V4_MMI(%[src], %[src1], %[dst], %[srcstride])
2065 PTR_ADDIU "%[dst0], %[dst], 0x08 \n\t"
2067 PUT_VP8_EPEL8_V4_MMI(%[src0], %[src1], %[dst], %[srcstride])
2071 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2083 [dst]"+&r"(dst), [src]"+&r"(src)
2098 dst[x] = FILTER_4TAP(src, filter, srcstride);
2099 dst += dststride;
2105 void ff_put_vp8_epel8_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2124 dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[ srcstride] - filter[4] * src[ 2*srcstride] + 64) >> 7];
2125 dst[1] = cm[(filter[2] * src[1] - filter[1] * src[1-srcstride] + filter[3] * src[1+srcstride] - filter[4] * src[1+2*srcstride] + 64) >> 7];
2126 dst[2] = cm[(filter[2] * src[2] - filter[1] * src[2-srcstride] + filter[3] * src[2+srcstride] - filter[4] * src[2+2*srcstride] + 64) >> 7];
2127 dst[3] = cm[(filter[2] * src[3] - filter[1] * src[3-srcstride] + filter[3] * src[3+srcstride] - filter[4] * src[3+2*srcstride] + 64) >> 7];
2128 dst[4] = cm[(filter[2] * src[4] - filter[1] * src[4-srcstride] + filter[3] * src[4+srcstride] - filter[4] * src[4+2*srcstride] + 64) >> 7];
2129 dst[5] = cm[(filter[2] * src[5] - filter[1] * src[5-srcstride] + filter[3] * src[5+srcstride] - filter[4] * src[5+2*srcstride] + 64) >> 7];
2130 dst[6] = cm[(filter[2] * src[6] - filter[1] * src[6-srcstride] + filter[3] * src[6+srcstride] - filter[4] * src[6+2*srcstride] + 64) >> 7];
2131 dst[7] = cm[(filter[2] * src[7] - filter[1] * src[7-srcstride] + filter[3] * src[7+srcstride] - filter[4] * src[7+2*srcstride] + 64) >> 7];
2139 PUT_VP8_EPEL8_V4_MMI(%[src], %[src1], %[dst], %[srcstride])
2143 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2154 [dst]"+&r"(dst), [src]"+&r"(src)
2169 dst[x] = FILTER_4TAP(src, filter, srcstride);
2170 dst += dststride;
2176 void ff_put_vp8_epel4_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2195 dst[0] = cm[(filter[2] * src[0] - filter[1] * src[ -srcstride] + filter[3] * src[ srcstride] - filter[4] * src[ 2*srcstride] + 64) >> 7];
2196 dst[1] = cm[(filter[2] * src[1] - filter[1] * src[1-srcstride] + filter[3] * src[1+srcstride] - filter[4] * src[1+2*srcstride] + 64) >> 7];
2197 dst[2] = cm[(filter[2] * src[2] - filter[1] * src[2-srcstride] + filter[3] * src[2+srcstride] - filter[4] * src[2+2*srcstride] + 64) >> 7];
2198 dst[3] = cm[(filter[2] * src[3] - filter[1] * src[3-srcstride] + filter[3] * src[3+srcstride] - filter[4] * src[3+2*srcstride] + 64) >> 7];
2206 PUT_VP8_EPEL4_V4_MMI(%[src], %[src1], %[dst], %[srcstride])
2210 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2219 [dst]"+&r"(dst), [src]"+&r"(src)
2234 dst[x] = FILTER_4TAP(src, filter, srcstride);
2235 dst += dststride;
2241 void ff_put_vp8_epel16_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2264 dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7];
2265 dst[1] = cm[(filter[2]*src[1] - filter[1]*src[1-srcstride] + filter[0]*src[1-2*srcstride] + filter[3]*src[1+srcstride] - filter[4]*src[1+2*srcstride] + filter[5]*src[1+3*srcstride] + 64) >> 7];
2266 dst[2] = cm[(filter[2]*src[2] - filter[1]*src[2-srcstride] + filter[0]*src[2-2*srcstride] + filter[3]*src[2+srcstride] - filter[4]*src[2+2*srcstride] + filter[5]*src[2+3*srcstride] + 64) >> 7];
2267 dst[3] = cm[(filter[2]*src[3] - filter[1]*src[3-srcstride] + filter[0]*src[3-2*srcstride] + filter[3]*src[3+srcstride] - filter[4]*src[3+2*srcstride] + filter[5]*src[3+3*srcstride] + 64) >> 7];
2268 dst[4] = cm[(filter[2]*src[4] - filter[1]*src[4-srcstride] + filter[0]*src[4-2*srcstride] + filter[3]*src[4+srcstride] - filter[4]*src[4+2*srcstride] + filter[5]*src[4+3*srcstride] + 64) >> 7];
2269 dst[5] = cm[(filter[2]*src[5] - filter[1]*src[5-srcstride] + filter[0]*src[5-2*srcstride] + filter[3]*src[5+srcstride] - filter[4]*src[5+2*srcstride] + filter[5]*src[5+3*srcstride] + 64) >> 7];
2270 dst[6] = cm[(filter[2]*src[6] - filter[1]*src[6-srcstride] + filter[0]*src[6-2*srcstride] + filter[3]*src[6+srcstride] - filter[4]*src[6+2*srcstride] + filter[5]*src[6+3*srcstride] + 64) >> 7];
2271 dst[7] = cm[(filter[2]*src[7] - filter[1]*src[7-srcstride] + filter[0]*src[7-2*srcstride] + filter[3]*src[7+srcstride] - filter[4]*src[7+2*srcstride] + filter[5]*src[7+3*srcstride] + 64) >> 7];
2273 dst[ 8] = cm[(filter[2]*src[ 8] - filter[1]*src[ 8-srcstride] + filter[0]*src[ 8-2*srcstride] + filter[3]*src[ 8+srcstride] - filter[4]*src[ 8+2*srcstride] + filter[5]*src[ 8+3*srcstride] + 64) >> 7];
2274 dst[ 9] = cm[(filter[2]*src[ 9] - filter[1]*src[ 9-srcstride] + filter[0]*src[ 9-2*srcstride] + filter[3]*src[ 9+srcstride] - filter[4]*src[ 9+2*srcstride] + filter[5]*src[ 9+3*srcstride] + 64) >> 7];
2275 dst[10] = cm[(filter[2]*src[10] - filter[1]*src[10-srcstride] + filter[0]*src[10-2*srcstride] + filter[3]*src[10+srcstride] - filter[4]*src[10+2*srcstride] + filter[5]*src[10+3*srcstride] + 64) >> 7];
2276 dst[11] = cm[(filter[2]*src[11] - filter[1]*src[11-srcstride] + filter[0]*src[11-2*srcstride] + filter[3]*src[11+srcstride] - filter[4]*src[11+2*srcstride] + filter[5]*src[11+3*srcstride] + 64) >> 7];
2277 dst[12] = cm[(filter[2]*src[12] - filter[1]*src[12-srcstride] + filter[0]*src[12-2*srcstride] + filter[3]*src[12+srcstride] - filter[4]*src[12+2*srcstride] + filter[5]*src[12+3*srcstride] + 64) >> 7];
2278 dst[13] = cm[(filter[2]*src[13] - filter[1]*src[13-srcstride] + filter[0]*src[13-2*srcstride] + filter[3]*src[13+srcstride] - filter[4]*src[13+2*srcstride] + filter[5]*src[13+3*srcstride] + 64) >> 7];
2279 dst[14] = cm[(filter[2]*src[14] - filter[1]*src[14-srcstride] + filter[0]*src[14-2*srcstride] + filter[3]*src[14+srcstride] - filter[4]*src[14+2*srcstride] + filter[5]*src[14+3*srcstride] + 64) >> 7];
2280 dst[15] = cm[(filter[2]*src[15] - filter[1]*src[15-srcstride] + filter[0]*src[15-2*srcstride] + filter[3]*src[15+srcstride] - filter[4]*src[15+2*srcstride] + filter[5]*src[15+3*srcstride] + 64) >> 7];
2289 PUT_VP8_EPEL8_V6_MMI(%[src], %[src1], %[dst], %[srcstride])
2291 PTR_ADDIU "%[dst0], %[dst], 0x08 \n\t"
2297 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2309 [dst]"+&r"(dst), [src]"+&r"(src)
2325 dst[x] = FILTER_6TAP(src, filter, srcstride);
2326 dst += dststride;
2332 void ff_put_vp8_epel8_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2355 dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7];
2356 dst[1] = cm[(filter[2]*src[1] - filter[1]*src[1-srcstride] + filter[0]*src[1-2*srcstride] + filter[3]*src[1+srcstride] - filter[4]*src[1+2*srcstride] + filter[5]*src[1+3*srcstride] + 64) >> 7];
2357 dst[2] = cm[(filter[2]*src[2] - filter[1]*src[2-srcstride] + filter[0]*src[2-2*srcstride] + filter[3]*src[2+srcstride] - filter[4]*src[2+2*srcstride] + filter[5]*src[2+3*srcstride] + 64) >> 7];
2358 dst[3] = cm[(filter[2]*src[3] - filter[1]*src[3-srcstride] + filter[0]*src[3-2*srcstride] + filter[3]*src[3+srcstride] - filter[4]*src[3+2*srcstride] + filter[5]*src[3+3*srcstride] + 64) >> 7];
2359 dst[4] = cm[(filter[2]*src[4] - filter[1]*src[4-srcstride] + filter[0]*src[4-2*srcstride] + filter[3]*src[4+srcstride] - filter[4]*src[4+2*srcstride] + filter[5]*src[4+3*srcstride] + 64) >> 7];
2360 dst[5] = cm[(filter[2]*src[5] - filter[1]*src[5-srcstride] + filter[0]*src[5-2*srcstride] + filter[3]*src[5+srcstride] - filter[4]*src[5+2*srcstride] + filter[5]*src[5+3*srcstride] + 64) >> 7];
2361 dst[6] = cm[(filter[2]*src[6] - filter[1]*src[6-srcstride] + filter[0]*src[6-2*srcstride] + filter[3]*src[6+srcstride] - filter[4]*src[6+2*srcstride] + filter[5]*src[6+3*srcstride] + 64) >> 7];
2362 dst[7] = cm[(filter[2]*src[7] - filter[1]*src[7-srcstride] + filter[0]*src[7-2*srcstride] + filter[3]*src[7+srcstride] - filter[4]*src[7+2*srcstride] + filter[5]*src[7+3*srcstride] + 64) >> 7];
2370 PUT_VP8_EPEL8_V6_MMI(%[src], %[src1], %[dst], %[srcstride])
2374 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2385 [dst]"+&r"(dst), [src]"+&r"(src)
2401 dst[x] = FILTER_6TAP(src, filter, srcstride);
2402 dst += dststride;
2408 void ff_put_vp8_epel4_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2431 dst[0] = cm[(filter[2]*src[0] - filter[1]*src[0-srcstride] + filter[0]*src[0-2*srcstride] + filter[3]*src[0+srcstride] - filter[4]*src[0+2*srcstride] + filter[5]*src[0+3*srcstride] + 64) >> 7];
2432 dst[1] = cm[(filter[2]*src[1] - filter[1]*src[1-srcstride] + filter[0]*src[1-2*srcstride] + filter[3]*src[1+srcstride] - filter[4]*src[1+2*srcstride] + filter[5]*src[1+3*srcstride] + 64) >> 7];
2433 dst[2] = cm[(filter[2]*src[2] - filter[1]*src[2-srcstride] + filter[0]*src[2-2*srcstride] + filter[3]*src[2+srcstride] - filter[4]*src[2+2*srcstride] + filter[5]*src[2+3*srcstride] + 64) >> 7];
2434 dst[3] = cm[(filter[2]*src[3] - filter[1]*src[3-srcstride] + filter[0]*src[3-2*srcstride] + filter[3]*src[3+srcstride] - filter[4]*src[3+2*srcstride] + filter[5]*src[3+3*srcstride] + 64) >> 7];
2442 PUT_VP8_EPEL4_V6_MMI(%[src], %[src1], %[dst], %[srcstride])
2446 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2455 [dst]"+&r"(dst), [src]"+&r"(src)
2471 dst[x] = FILTER_6TAP(src, filter, srcstride);
2472 dst += dststride;
2478 void ff_put_vp8_epel16_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2488 ff_put_vp8_epel16_v4_mmi(dst, dststride, tmp, 16, h, mx, my);
2510 dst[x] = FILTER_4TAP(tmp, filter, 16);
2511 dst += dststride;
2517 void ff_put_vp8_epel8_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2527 ff_put_vp8_epel8_v4_mmi(dst, dststride, tmp, 8, h, mx, my);
2549 dst[x] = FILTER_4TAP(tmp, filter, 8);
2550 dst += dststride;
2556 void ff_put_vp8_epel4_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2566 ff_put_vp8_epel4_v4_mmi(dst, dststride, tmp, 4, h, mx, my);
2587 dst[x] = FILTER_4TAP(tmp, filter, 4);
2588 dst += dststride;
2594 void ff_put_vp8_epel16_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2604 ff_put_vp8_epel16_v6_mmi(dst, dststride, tmp, 16, h, mx, my);
2626 dst[x] = FILTER_6TAP(tmp, filter, 16);
2627 dst += dststride;
2633 void ff_put_vp8_epel8_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2643 ff_put_vp8_epel8_v6_mmi(dst, dststride, tmp, 8, h, mx, my);
2665 dst[x] = FILTER_6TAP(tmp, filter, 8);
2666 dst += dststride;
2672 void ff_put_vp8_epel4_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2682 ff_put_vp8_epel4_v6_mmi(dst, dststride, tmp, 4, h, mx, my);
2704 dst[x] = FILTER_6TAP(tmp, filter, 4);
2705 dst += dststride;
2711 void ff_put_vp8_epel16_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2721 ff_put_vp8_epel16_v4_mmi(dst, dststride, tmp, 16, h, mx, my);
2743 dst[x] = FILTER_4TAP(tmp, filter, 16);
2744 dst += dststride;
2750 void ff_put_vp8_epel8_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2760 ff_put_vp8_epel8_v4_mmi(dst, dststride, tmp, 8, h, mx, my);
2782 dst[x] = FILTER_4TAP(tmp, filter, 8);
2783 dst += dststride;
2789 void ff_put_vp8_epel4_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2799 ff_put_vp8_epel4_v4_mmi(dst, dststride, tmp, 4, h, mx, my);
2821 dst[x] = FILTER_4TAP(tmp, filter, 4);
2822 dst += dststride;
2828 void ff_put_vp8_epel16_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2838 ff_put_vp8_epel16_v6_mmi(dst, dststride, tmp, 16, h, mx, my);
2860 dst[x] = FILTER_6TAP(tmp, filter, 16);
2861 dst += dststride;
2867 void ff_put_vp8_epel8_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2877 ff_put_vp8_epel8_v6_mmi(dst, dststride, tmp, 8, h, mx, my);
2899 dst[x] = FILTER_6TAP(tmp, filter, 8);
2900 dst += dststride;
2906 void ff_put_vp8_epel4_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src,
2916 ff_put_vp8_epel4_v6_mmi(dst, dststride, tmp, 4, h, mx, my);
2938 dst[x] = FILTER_6TAP(tmp, filter, 4);
2939 dst += dststride;
2945 void ff_put_vp8_bilinear16_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
2958 dst[0] = (a * src[0] + b * src[1] + 4) >> 3;
2959 dst[1] = (a * src[1] + b * src[2] + 4) >> 3;
2960 dst[2] = (a * src[2] + b * src[3] + 4) >> 3;
2961 dst[3] = (a * src[3] + b * src[4] + 4) >> 3;
2962 dst[4] = (a * src[4] + b * src[5] + 4) >> 3;
2963 dst[5] = (a * src[5] + b * src[6] + 4) >> 3;
2964 dst[6] = (a * src[6] + b * src[7] + 4) >> 3;
2965 dst[7] = (a * src[7] + b * src[8] + 4) >> 3;
2967 dst[ 8] = (a * src[ 8] + b * src[ 9] + 4) >> 3;
2968 dst[ 9] = (a * src[ 9] + b * src[10] + 4) >> 3;
2969 dst[10] = (a * src[10] + b * src[11] + 4) >> 3;
2970 dst[11] = (a * src[11] + b * src[12] + 4) >> 3;
2971 dst[12] = (a * src[12] + b * src[13] + 4) >> 3;
2972 dst[13] = (a * src[13] + b * src[14] + 4) >> 3;
2973 dst[14] = (a * src[14] + b * src[15] + 4) >> 3;
2974 dst[15] = (a * src[15] + b * src[16] + 4) >> 3;
2985 PUT_VP8_BILINEAR8_H_MMI(%[src], %[dst])
2987 PTR_ADDIU "%[dst0], %[dst], 0x08 \n\t"
2993 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3003 [dst]"+&r"(dst), [src]"+&r"(src),
3016 dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;
3017 dst += dstride;
3023 void ff_put_vp8_bilinear16_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
3036 dst[0] = (c * src[0] + d * src[ sstride] + 4) >> 3;
3037 dst[1] = (c * src[1] + d * src[1 + sstride] + 4) >> 3;
3038 dst[2] = (c * src[2] + d * src[2 + sstride] + 4) >> 3;
3039 dst[3] = (c * src[3] + d * src[3 + sstride] + 4) >> 3;
3040 dst[4] = (c * src[4] + d * src[4 + sstride] + 4) >> 3;
3041 dst[5] = (c * src[5] + d * src[5 + sstride] + 4) >> 3;
3042 dst[6] = (c * src[6] + d * src[6 + sstride] + 4) >> 3;
3043 dst[7] = (c * src[7] + d * src[7 + sstride] + 4) >> 3;
3054 PUT_VP8_BILINEAR8_V_MMI(%[src], %[src1], %[dst], %[sstride])
3056 PTR_ADDIU "%[dst0], %[dst], 0x08 \n\t"
3062 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3073 [dst]"+&r"(dst), [src]"+&r"(src),
3086 dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3;
3087 dst += dstride;
3093 void ff_put_vp8_bilinear16_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
3101 ff_put_vp8_bilinear16_v_mmi(dst, dstride, tmp, 16, h, mx, my);
3120 dst[x] = (c * tmp[x] + d * tmp[x + 16] + 4) >> 3;
3121 dst += dstride;
3127 void ff_put_vp8_bilinear8_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
3139 dst[0] = (a * src[0] + b * src[1] + 4) >> 3;
3140 dst[1] = (a * src[1] + b * src[2] + 4) >> 3;
3141 dst[2] = (a * src[2] + b * src[3] + 4) >> 3;
3142 dst[3] = (a * src[3] + b * src[4] + 4) >> 3;
3143 dst[4] = (a * src[4] + b * src[5] + 4) >> 3;
3144 dst[5] = (a * src[5] + b * src[6] + 4) >> 3;
3145 dst[6] = (a * src[6] + b * src[7] + 4) >> 3;
3146 dst[7] = (a * src[7] + b * src[8] + 4) >> 3;
3156 PUT_VP8_BILINEAR8_H_MMI(%[src], %[dst])
3160 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3169 [dst]"+&r"(dst), [src]"+&r"(src),
3182 dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;
3183 dst += dstride;
3189 void ff_put_vp8_bilinear8_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
3202 dst[0] = (c * src[0] + d * src[ sstride] + 4) >> 3;
3203 dst[1] = (c * src[1] + d * src[1 + sstride] + 4) >> 3;
3204 dst[2] = (c * src[2] + d * src[2 + sstride] + 4) >> 3;
3205 dst[3] = (c * src[3] + d * src[3 + sstride] + 4) >> 3;
3206 dst[4] = (c * src[4] + d * src[4 + sstride] + 4) >> 3;
3207 dst[5] = (c * src[5] + d * src[5 + sstride] + 4) >> 3;
3208 dst[6] = (c * src[6] + d * src[6 + sstride] + 4) >> 3;
3209 dst[7] = (c * src[7] + d * src[7 + sstride] + 4) >> 3;
3219 PUT_VP8_BILINEAR8_V_MMI(%[src], %[src1], %[dst], %[sstride])
3223 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3233 [dst]"+&r"(dst), [src]"+&r"(src),
3246 dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3;
3247 dst += dstride;
3253 void ff_put_vp8_bilinear8_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
3261 ff_put_vp8_bilinear8_v_mmi(dst, dstride, tmp, 8, h, mx, my);
3280 dst[x] = (c * tmp[x] + d * tmp[x + 8] + 4) >> 3;
3281 dst += dstride;
3287 void ff_put_vp8_bilinear4_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
3300 dst[0] = (a * src[0] + b * src[1] + 4) >> 3;
3301 dst[1] = (a * src[1] + b * src[2] + 4) >> 3;
3302 dst[2] = (a * src[2] + b * src[3] + 4) >> 3;
3303 dst[3] = (a * src[3] + b * src[4] + 4) >> 3;
3313 PUT_VP8_BILINEAR4_H_MMI(%[src], %[dst])
3317 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3326 [dst]"+&r"(dst), [src]"+&r"(src),
3339 dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;
3340 dst += dstride;
3346 void ff_put_vp8_bilinear4_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
3360 dst[0] = (c * src[0] + d * src[ sstride] + 4) >> 3;
3361 dst[1] = (c * src[1] + d * src[1 + sstride] + 4) >> 3;
3362 dst[2] = (c * src[2] + d * src[2 + sstride] + 4) >> 3;
3363 dst[3] = (c * src[3] + d * src[3 + sstride] + 4) >> 3;
3373 PUT_VP8_BILINEAR4_V_MMI(%[src], %[src1], %[dst], %[sstride])
3377 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3387 [dst]"+&r"(dst), [src]"+&r"(src),
3400 dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3;
3401 dst += dstride;
3407 void ff_put_vp8_bilinear4_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src,
3415 ff_put_vp8_bilinear4_v_mmi(dst, dstride, tmp, 4, h, mx, my);
3434 dst[x] = (c * tmp[x] + d * tmp[x + 4] + 4) >> 3;
3435 dst += dstride;