1/*
2 * Loongson SIMD optimized idctdsp
3 *
4 * Copyright (c) 2015 Loongson Technology Corporation Limited
5 * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24#include "idctdsp_mips.h"
25#include "constants.h"
26#include "libavutil/mips/mmiutils.h"
27
28void ff_put_pixels_clamped_mmi(const int16_t *block,
29        uint8_t *av_restrict pixels, ptrdiff_t line_size)
30{
31    double ftmp[8];
32
33    __asm__ volatile (
34        MMI_LDC1(%[ftmp0], %[block], 0x00)
35        MMI_LDC1(%[ftmp1], %[block], 0x08)
36        MMI_LDC1(%[ftmp2], %[block], 0x10)
37        MMI_LDC1(%[ftmp3], %[block], 0x18)
38        MMI_LDC1(%[ftmp4], %[block], 0x20)
39        MMI_LDC1(%[ftmp5], %[block], 0x28)
40        MMI_LDC1(%[ftmp6], %[block], 0x30)
41        MMI_LDC1(%[ftmp7], %[block], 0x38)
42        "packushb   %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
43        "packushb   %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
44        "packushb   %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
45        "packushb   %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
46        MMI_SDC1(%[ftmp0], %[pixels], 0x00)
47        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
48        MMI_SDC1(%[ftmp2], %[pixels], 0x00)
49        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
50        MMI_SDC1(%[ftmp4], %[pixels], 0x00)
51        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
52        MMI_SDC1(%[ftmp6], %[pixels], 0x00)
53        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
54
55        MMI_LDC1(%[ftmp0], %[block], 0x40)
56        MMI_LDC1(%[ftmp1], %[block], 0x48)
57        MMI_LDC1(%[ftmp2], %[block], 0x50)
58        MMI_LDC1(%[ftmp3], %[block], 0x58)
59        MMI_LDC1(%[ftmp4], %[block], 0x60)
60        MMI_LDC1(%[ftmp5], %[block], 0x68)
61        MMI_LDC1(%[ftmp6], %[block], 0x70)
62        MMI_LDC1(%[ftmp7], %[block], 0x78)
63        "packushb   %[ftmp0],   %[ftmp0],       %[ftmp1]                \n\t"
64        "packushb   %[ftmp2],   %[ftmp2],       %[ftmp3]                \n\t"
65        "packushb   %[ftmp4],   %[ftmp4],       %[ftmp5]                \n\t"
66        "packushb   %[ftmp6],   %[ftmp6],       %[ftmp7]                \n\t"
67        MMI_SDC1(%[ftmp0], %[pixels], 0x00)
68        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
69        MMI_SDC1(%[ftmp2], %[pixels], 0x00)
70        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
71        MMI_SDC1(%[ftmp4], %[pixels], 0x00)
72        PTR_ADDU   "%[pixels],  %[pixels],      %[line_size]            \n\t"
73        MMI_SDC1(%[ftmp6], %[pixels], 0x00)
74        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
75          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
76          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
77          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
78          [pixels]"+&r"(pixels)
79        : [line_size]"r"((mips_reg)line_size),
80          [block]"r"(block)
81        : "memory"
82    );
83}
84
85void ff_put_signed_pixels_clamped_mmi(const int16_t *block,
86    uint8_t *av_restrict pixels, ptrdiff_t line_size)
87{
88    double ftmp[5];
89
90    __asm__ volatile (
91        MMI_LDC1(%[ftmp1], %[block], 0x00)
92        MMI_LDC1(%[ftmp0], %[block], 0x08)
93        "packsshb   %[ftmp1],       %[ftmp1],       %[ftmp0]            \n\t"
94        MMI_LDC1(%[ftmp2], %[block], 0x10)
95        MMI_LDC1(%[ftmp0], %[block], 0x18)
96        "packsshb   %[ftmp2],       %[ftmp2],       %[ftmp0]            \n\t"
97        MMI_LDC1(%[ftmp3], %[block], 0x20)
98        MMI_LDC1(%[ftmp0], %[block], 0x28)
99        "packsshb   %[ftmp3],       %[ftmp3],       %[ftmp0]            \n\t"
100        MMI_LDC1(%[ftmp4], %[block], 0x30)
101        MMI_LDC1(%[ftmp0], %[block], 0x38)
102        "packsshb   %[ftmp4],       %[ftmp4],       %[ftmp0]            \n\t"
103        "paddb      %[ftmp1],       %[ftmp1],       %[ff_pb_80]         \n\t"
104        "paddb      %[ftmp2],       %[ftmp2],       %[ff_pb_80]         \n\t"
105        "paddb      %[ftmp3],       %[ftmp3],       %[ff_pb_80]         \n\t"
106        "paddb      %[ftmp4],       %[ftmp4],       %[ff_pb_80]         \n\t"
107        MMI_SDC1(%[ftmp1], %[pixels], 0x00)
108        PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
109        MMI_SDC1(%[ftmp2], %[pixels], 0x00)
110        PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
111        MMI_SDC1(%[ftmp3], %[pixels], 0x00)
112        PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
113        MMI_SDC1(%[ftmp4], %[pixels], 0x00)
114        PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
115
116        MMI_LDC1(%[ftmp1], %[block], 0x40)
117        MMI_LDC1(%[ftmp0], %[block], 0x48)
118        "packsshb   %[ftmp1],       %[ftmp1],       %[ftmp0]            \n\t"
119        MMI_LDC1(%[ftmp2], %[block], 0x50)
120        MMI_LDC1(%[ftmp0], %[block], 0x58)
121        "packsshb   %[ftmp2],       %[ftmp2],       %[ftmp0]            \n\t"
122        MMI_LDC1(%[ftmp3], %[block], 0x60)
123        MMI_LDC1(%[ftmp0], %[block], 0x68)
124        "packsshb   %[ftmp3],       %[ftmp3],       %[ftmp0]            \n\t"
125        MMI_LDC1(%[ftmp4], %[block], 0x70)
126        MMI_LDC1(%[ftmp0], %[block], 0x78)
127        "packsshb   %[ftmp4],       %[ftmp4],       %[ftmp0]            \n\t"
128        "paddb      %[ftmp1],       %[ftmp1],       %[ff_pb_80]         \n\t"
129        "paddb      %[ftmp2],       %[ftmp2],       %[ff_pb_80]         \n\t"
130        "paddb      %[ftmp3],       %[ftmp3],       %[ff_pb_80]         \n\t"
131        "paddb      %[ftmp4],       %[ftmp4],       %[ff_pb_80]         \n\t"
132        MMI_SDC1(%[ftmp1], %[pixels], 0x00)
133        PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
134        MMI_SDC1(%[ftmp2], %[pixels], 0x00)
135        PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
136        MMI_SDC1(%[ftmp3], %[pixels], 0x00)
137        PTR_ADDU   "%[pixels],      %[pixels],      %[line_size]        \n\t"
138        MMI_SDC1(%[ftmp4], %[pixels], 0x00)
139        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
140          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
141          [ftmp4]"=&f"(ftmp[4]),
142          [pixels]"+&r"(pixels)
143        : [block]"r"(block),
144          [line_size]"r"((mips_reg)line_size),
145          [ff_pb_80]"f"(ff_pb_80.f)
146        : "memory"
147    );
148}
149
150void ff_add_pixels_clamped_mmi(const int16_t *block,
151        uint8_t *av_restrict pixels, ptrdiff_t line_size)
152{
153    double ftmp[9];
154    uint64_t tmp[1];
155    __asm__ volatile (
156        "li         %[tmp0],    0x04                           \n\t"
157        "pxor       %[ftmp0],   %[ftmp0],   %[ftmp0]           \n\t"
158        "1:                                                    \n\t"
159        MMI_LDC1(%[ftmp5], %[pixels], 0x00)
160        PTR_ADDU   "%[pixels],  %[pixels],  %[line_size]       \n\t"
161        MMI_LDC1(%[ftmp6], %[pixels], 0x00)
162        PTR_SUBU   "%[pixels],  %[pixels],  %[line_size]       \n\t"
163        MMI_LDC1(%[ftmp1], %[block], 0x00)
164        MMI_LDC1(%[ftmp2], %[block], 0x08)
165        MMI_LDC1(%[ftmp3], %[block], 0x10)
166        MMI_LDC1(%[ftmp4], %[block], 0x18)
167        PTR_ADDIU  "%[block],   %[block],   0x20               \n\t"
168        "punpckhbh  %[ftmp7],   %[ftmp5],   %[ftmp0]           \n\t"
169        "punpcklbh  %[ftmp5],   %[ftmp5],   %[ftmp0]           \n\t"
170        "punpckhbh  %[ftmp8],   %[ftmp6],   %[ftmp0]           \n\t"
171        "punpcklbh  %[ftmp6],   %[ftmp6],   %[ftmp0]           \n\t"
172        "paddh      %[ftmp1],   %[ftmp1],   %[ftmp5]           \n\t"
173        "paddh      %[ftmp2],   %[ftmp2],   %[ftmp7]           \n\t"
174        "paddh      %[ftmp3],   %[ftmp3],   %[ftmp6]           \n\t"
175        "paddh      %[ftmp4],   %[ftmp4],   %[ftmp8]           \n\t"
176        "packushb   %[ftmp1],   %[ftmp1],   %[ftmp2]           \n\t"
177        "packushb   %[ftmp3],   %[ftmp3],   %[ftmp4]           \n\t"
178        MMI_SDC1(%[ftmp1], %[pixels], 0x00)
179        PTR_ADDU   "%[pixels],  %[pixels],  %[line_size]       \n\t"
180        MMI_SDC1(%[ftmp3], %[pixels], 0x00)
181        "addi       %[tmp0],    %[tmp0],    -0x01              \n\t"
182        PTR_ADDU   "%[pixels],  %[pixels],  %[line_size]       \n\t"
183        "bnez       %[tmp0],    1b                             \n\t"
184        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
185          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
186          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
187          [ftmp6]"=&f"(ftmp[6]),            [ftmp7]"=&f"(ftmp[7]),
188          [ftmp8]"=&f"(ftmp[8]),            [tmp0]"=&r"(tmp[0]),
189          [pixels]"+&r"(pixels),            [block]"+&r"(block)
190        : [line_size]"r"((mips_reg)line_size)
191        : "memory"
192    );
193}
194