1cabdff1aSopenharmony_ci/*
2cabdff1aSopenharmony_ci * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
3cabdff1aSopenharmony_ci *
4cabdff1aSopenharmony_ci * This file is part of FFmpeg.
5cabdff1aSopenharmony_ci *
6cabdff1aSopenharmony_ci * FFmpeg is free software; you can redistribute it and/or
7cabdff1aSopenharmony_ci * modify it under the terms of the GNU Lesser General Public
8cabdff1aSopenharmony_ci * License as published by the Free Software Foundation; either
9cabdff1aSopenharmony_ci * version 2.1 of the License, or (at your option) any later version.
10cabdff1aSopenharmony_ci *
11cabdff1aSopenharmony_ci * FFmpeg is distributed in the hope that it will be useful,
12cabdff1aSopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
13cabdff1aSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14cabdff1aSopenharmony_ci * Lesser General Public License for more details.
15cabdff1aSopenharmony_ci *
16cabdff1aSopenharmony_ci * You should have received a copy of the GNU Lesser General Public
17cabdff1aSopenharmony_ci * License along with FFmpeg; if not, write to the Free Software
18cabdff1aSopenharmony_ci * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19cabdff1aSopenharmony_ci */
20cabdff1aSopenharmony_ci
21cabdff1aSopenharmony_ci#include "libavutil/attributes.h"
22cabdff1aSopenharmony_ci#include "libavcodec/idctdsp.h"
23cabdff1aSopenharmony_ci#include "idctdsp_alpha.h"
24cabdff1aSopenharmony_ci#include "asm.h"
25cabdff1aSopenharmony_ci
26cabdff1aSopenharmony_civoid put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
27cabdff1aSopenharmony_ci                                ptrdiff_t line_size);
28cabdff1aSopenharmony_civoid add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
29cabdff1aSopenharmony_ci                                ptrdiff_t line_size);
30cabdff1aSopenharmony_ci
31cabdff1aSopenharmony_civoid (*put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
32cabdff1aSopenharmony_ci                                 ptrdiff_t line_size);
33cabdff1aSopenharmony_civoid (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
34cabdff1aSopenharmony_ci                                 ptrdiff_t line_size);
35cabdff1aSopenharmony_ci
36cabdff1aSopenharmony_ci#if 0
37cabdff1aSopenharmony_ci/* These functions were the base for the optimized assembler routines,
38cabdff1aSopenharmony_ci   and remain here for documentation purposes.  */
39cabdff1aSopenharmony_cistatic void put_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
40cabdff1aSopenharmony_ci                                   ptrdiff_t line_size)
41cabdff1aSopenharmony_ci{
42cabdff1aSopenharmony_ci    int i = 8;
43cabdff1aSopenharmony_ci    uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
44cabdff1aSopenharmony_ci
45cabdff1aSopenharmony_ci    do {
46cabdff1aSopenharmony_ci        uint64_t shorts0, shorts1;
47cabdff1aSopenharmony_ci
48cabdff1aSopenharmony_ci        shorts0 = ldq(block);
49cabdff1aSopenharmony_ci        shorts0 = maxsw4(shorts0, 0);
50cabdff1aSopenharmony_ci        shorts0 = minsw4(shorts0, clampmask);
51cabdff1aSopenharmony_ci        stl(pkwb(shorts0), pixels);
52cabdff1aSopenharmony_ci
53cabdff1aSopenharmony_ci        shorts1 = ldq(block + 4);
54cabdff1aSopenharmony_ci        shorts1 = maxsw4(shorts1, 0);
55cabdff1aSopenharmony_ci        shorts1 = minsw4(shorts1, clampmask);
56cabdff1aSopenharmony_ci        stl(pkwb(shorts1), pixels + 4);
57cabdff1aSopenharmony_ci
58cabdff1aSopenharmony_ci        pixels += line_size;
59cabdff1aSopenharmony_ci        block += 8;
60cabdff1aSopenharmony_ci    } while (--i);
61cabdff1aSopenharmony_ci}
62cabdff1aSopenharmony_ci
63cabdff1aSopenharmony_civoid add_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
64cabdff1aSopenharmony_ci                            ptrdiff_t line_size)
65cabdff1aSopenharmony_ci{
66cabdff1aSopenharmony_ci    int h = 8;
67cabdff1aSopenharmony_ci    /* Keep this function a leaf function by generating the constants
68cabdff1aSopenharmony_ci       manually (mainly for the hack value ;-).  */
69cabdff1aSopenharmony_ci    uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
70cabdff1aSopenharmony_ci    uint64_t signmask  = zap(-1, 0x33);
71cabdff1aSopenharmony_ci    signmask ^= signmask >> 1;  /* 0x8000800080008000 */
72cabdff1aSopenharmony_ci
73cabdff1aSopenharmony_ci    do {
74cabdff1aSopenharmony_ci        uint64_t shorts0, pix0, signs0;
75cabdff1aSopenharmony_ci        uint64_t shorts1, pix1, signs1;
76cabdff1aSopenharmony_ci
77cabdff1aSopenharmony_ci        shorts0 = ldq(block);
78cabdff1aSopenharmony_ci        shorts1 = ldq(block + 4);
79cabdff1aSopenharmony_ci
80cabdff1aSopenharmony_ci        pix0    = unpkbw(ldl(pixels));
81cabdff1aSopenharmony_ci        /* Signed subword add (MMX paddw).  */
82cabdff1aSopenharmony_ci        signs0  = shorts0 & signmask;
83cabdff1aSopenharmony_ci        shorts0 &= ~signmask;
84cabdff1aSopenharmony_ci        shorts0 += pix0;
85cabdff1aSopenharmony_ci        shorts0 ^= signs0;
86cabdff1aSopenharmony_ci        /* Clamp. */
87cabdff1aSopenharmony_ci        shorts0 = maxsw4(shorts0, 0);
88cabdff1aSopenharmony_ci        shorts0 = minsw4(shorts0, clampmask);
89cabdff1aSopenharmony_ci
90cabdff1aSopenharmony_ci        /* Next 4.  */
91cabdff1aSopenharmony_ci        pix1    = unpkbw(ldl(pixels + 4));
92cabdff1aSopenharmony_ci        signs1  = shorts1 & signmask;
93cabdff1aSopenharmony_ci        shorts1 &= ~signmask;
94cabdff1aSopenharmony_ci        shorts1 += pix1;
95cabdff1aSopenharmony_ci        shorts1 ^= signs1;
96cabdff1aSopenharmony_ci        shorts1 = maxsw4(shorts1, 0);
97cabdff1aSopenharmony_ci        shorts1 = minsw4(shorts1, clampmask);
98cabdff1aSopenharmony_ci
99cabdff1aSopenharmony_ci        stl(pkwb(shorts0), pixels);
100cabdff1aSopenharmony_ci        stl(pkwb(shorts1), pixels + 4);
101cabdff1aSopenharmony_ci
102cabdff1aSopenharmony_ci        pixels += line_size;
103cabdff1aSopenharmony_ci        block += 8;
104cabdff1aSopenharmony_ci    } while (--h);
105cabdff1aSopenharmony_ci}
106cabdff1aSopenharmony_ci#endif
107cabdff1aSopenharmony_ci
108cabdff1aSopenharmony_ciav_cold void ff_idctdsp_init_alpha(IDCTDSPContext *c, AVCodecContext *avctx,
109cabdff1aSopenharmony_ci                                   unsigned high_bit_depth)
110cabdff1aSopenharmony_ci{
111cabdff1aSopenharmony_ci    /* amask clears all bits that correspond to present features.  */
112cabdff1aSopenharmony_ci    if (amask(AMASK_MVI) == 0) {
113cabdff1aSopenharmony_ci        c->put_pixels_clamped = put_pixels_clamped_mvi_asm;
114cabdff1aSopenharmony_ci        c->add_pixels_clamped = add_pixels_clamped_mvi_asm;
115cabdff1aSopenharmony_ci    }
116cabdff1aSopenharmony_ci
117cabdff1aSopenharmony_ci    put_pixels_clamped_axp_p = c->put_pixels_clamped;
118cabdff1aSopenharmony_ci    add_pixels_clamped_axp_p = c->add_pixels_clamped;
119cabdff1aSopenharmony_ci
120cabdff1aSopenharmony_ci    if (!high_bit_depth && !avctx->lowres &&
121cabdff1aSopenharmony_ci        (avctx->idct_algo == FF_IDCT_AUTO)) {
122cabdff1aSopenharmony_ci        c->idct_put = ff_simple_idct_put_axp;
123cabdff1aSopenharmony_ci        c->idct_add = ff_simple_idct_add_axp;
124cabdff1aSopenharmony_ci        c->idct =     ff_simple_idct_axp;
125cabdff1aSopenharmony_ci    }
126cabdff1aSopenharmony_ci}
127