1/* 2 * Copyright (c) 2012 3 * MIPS Technologies, Inc., California. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its 14 * contributors may be used to endorse or promote products derived from 15 * this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * Author: Nedeljko Babic (nbabic@mips.com) 30 * 31 * This file is part of FFmpeg. 32 * 33 * FFmpeg is free software; you can redistribute it and/or 34 * modify it under the terms of the GNU Lesser General Public 35 * License as published by the Free Software Foundation; either 36 * version 2.1 of the License, or (at your option) any later version. 37 * 38 * FFmpeg is distributed in the hope that it will be useful, 39 * but WITHOUT ANY WARRANTY; without even the implied warranty of 40 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 41 * Lesser General Public License for more details. 42 * 43 * You should have received a copy of the GNU Lesser General Public 44 * License along with FFmpeg; if not, write to the Free Software 45 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 46 */ 47 48#ifndef AVUTIL_FIXED_DSP_H 49#define AVUTIL_FIXED_DSP_H 50 51#include <stdint.h> 52#include "config.h" 53#include "attributes.h" 54#include "libavcodec/mathops.h" 55 56typedef struct AVFixedDSPContext { 57 /* Assume len is a multiple of 16, and arrays are 32-byte aligned */ 58 /* Results of multiplications are scaled down by 31 bit (and rounded) if not 59 * stated otherwise */ 60 61 /** 62 * Overlap/add with window function. 63 * Result is scaled down by "bits" bits. 64 * Used primarily by MDCT-based audio codecs. 65 * Source and destination vectors must overlap exactly or not at all. 66 * 67 * @param dst result vector 68 * constraints: 16-byte aligned 69 * @param src0 first source vector 70 * constraints: 16-byte aligned 71 * @param src1 second source vector 72 * constraints: 16-byte aligned 73 * @param win half-window vector 74 * constraints: 16-byte aligned 75 * @param len length of vector 76 * constraints: multiple of 4 77 * @param bits scaling parameter 78 * 79 */ 80 void (*vector_fmul_window_scaled)(int16_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len, uint8_t bits); 81 82 /** 83 * Overlap/add with window function. 84 * Used primarily by MDCT-based audio codecs. 85 * Source and destination vectors must overlap exactly or not at all. 86 * 87 * @param dst result vector 88 * constraints: 32-byte aligned 89 * @param src0 first source vector 90 * constraints: 16-byte aligned 91 * @param src1 second source vector 92 * constraints: 16-byte aligned 93 * @param win half-window vector 94 * constraints: 16-byte aligned 95 * @param len length of vector 96 * constraints: multiple of 4 97 */ 98 void (*vector_fmul_window)(int32_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len); 99 100 /** 101 * Fixed-point multiplication that calculates the entry wise product of two 102 * vectors of integers and stores the result in a vector of integers. 103 * 104 * @param dst output vector 105 * constraints: 32-byte aligned 106 * @param src0 first input vector 107 * constraints: 32-byte aligned 108 * @param src1 second input vector 109 * constraints: 32-byte aligned 110 * @param len number of elements in the input 111 * constraints: multiple of 16 112 */ 113 void (*vector_fmul)(int *dst, const int *src0, const int *src1, 114 int len); 115 116 void (*vector_fmul_reverse)(int *dst, const int *src0, const int *src1, int len); 117 /** 118 * Calculate the entry wise product of two vectors of integers, add a third vector of 119 * integers and store the result in a vector of integers. 120 * 121 * @param dst output vector 122 * constraints: 32-byte aligned 123 * @param src0 first input vector 124 * constraints: 32-byte aligned 125 * @param src1 second input vector 126 * constraints: 32-byte aligned 127 * @param src2 third input vector 128 * constraints: 32-byte aligned 129 * @param len number of elements in the input 130 * constraints: multiple of 16 131 */ 132 void (*vector_fmul_add)(int *dst, const int *src0, const int *src1, 133 const int *src2, int len); 134 135 /** 136 * Calculate the scalar product of two vectors of integers. 137 * 138 * @param v1 first vector, 16-byte aligned 139 * @param v2 second vector, 16-byte aligned 140 * @param len length of vectors, multiple of 4 141 * 142 * @return sum of elementwise products 143 */ 144 int (*scalarproduct_fixed)(const int *v1, const int *v2, int len); 145 146 /** 147 * Calculate the sum and difference of two vectors of integers. 148 * 149 * @param v1 first input vector, sum output, 16-byte aligned 150 * @param v2 second input vector, difference output, 16-byte aligned 151 * @param len length of vectors, multiple of 4 152 */ 153 void (*butterflies_fixed)(int *av_restrict v1, int *av_restrict v2, int len); 154} AVFixedDSPContext; 155 156/** 157 * Allocate and initialize a fixed DSP context. 158 * note: should be freed with a av_free call when no longer needed. 159 * 160 * @param strict setting to non-zero avoids using functions which may not be IEEE-754 compliant 161 */ 162AVFixedDSPContext * avpriv_alloc_fixed_dsp(int strict); 163 164void ff_fixed_dsp_init_x86(AVFixedDSPContext *fdsp); 165 166/** 167 * Calculate the square root 168 * 169 * @param x input fixed point number 170 * 171 * @param bits format of fixed point number (32 - bits).bits 172 * 173 * note: input is normalized to (0, 1) fixed point value 174 */ 175 176static av_always_inline int fixed_sqrt(int x, int bits) 177{ 178 int retval, bit_mask, guess, square, i; 179 int64_t accu; 180 int shift1 = 30 - bits; 181 int shift2 = bits - 15; 182 183 if (shift1 > 0) retval = ff_sqrt(x << shift1); 184 else retval = ff_sqrt(x >> -shift1); 185 186 if (shift2 > 0) { 187 retval = retval << shift2; 188 bit_mask = (1 << (shift2 - 1)); 189 190 for (i=0; i<shift2; i++){ 191 guess = retval + bit_mask; 192 accu = (int64_t)guess * guess; 193 square = (int)((accu + bit_mask) >> bits); 194 if (x >= square) 195 retval += bit_mask; 196 bit_mask >>= 1; 197 } 198 199 } 200 else retval >>= (-shift2); 201 202 return retval; 203} 204 205#endif /* AVUTIL_FIXED_DSP_H */ 206