1/* 2 * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com) 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#ifndef AVCODEC_MIPS_HEVCDSP_MIPS_H 22#define AVCODEC_MIPS_HEVCDSP_MIPS_H 23 24#include "libavcodec/hevcdsp.h" 25 26#define MC(PEL, DIR, WIDTH) \ 27void ff_hevc_put_hevc_##PEL##_##DIR##WIDTH##_8_msa(int16_t *dst, \ 28 uint8_t *src, \ 29 ptrdiff_t src_stride, \ 30 int height, \ 31 intptr_t mx, \ 32 intptr_t my, \ 33 int width) 34 35MC(pel, pixels, 4); 36MC(pel, pixels, 6); 37MC(pel, pixels, 8); 38MC(pel, pixels, 12); 39MC(pel, pixels, 16); 40MC(pel, pixels, 24); 41MC(pel, pixels, 32); 42MC(pel, pixels, 48); 43MC(pel, pixels, 64); 44 45MC(qpel, h, 4); 46MC(qpel, h, 8); 47MC(qpel, h, 12); 48MC(qpel, h, 16); 49MC(qpel, h, 24); 50MC(qpel, h, 32); 51MC(qpel, h, 48); 52MC(qpel, h, 64); 53 54MC(qpel, v, 4); 55MC(qpel, v, 8); 56MC(qpel, v, 12); 57MC(qpel, v, 16); 58MC(qpel, v, 24); 59MC(qpel, v, 32); 60MC(qpel, v, 48); 61MC(qpel, v, 64); 62 63MC(qpel, hv, 4); 64MC(qpel, hv, 8); 65MC(qpel, hv, 12); 66MC(qpel, hv, 16); 67MC(qpel, hv, 24); 68MC(qpel, hv, 32); 69MC(qpel, hv, 48); 70MC(qpel, hv, 64); 71 72MC(epel, h, 4); 73MC(epel, h, 6); 74MC(epel, h, 8); 75MC(epel, h, 12); 76MC(epel, h, 16); 77MC(epel, h, 24); 78MC(epel, h, 32); 79MC(epel, h, 48); 80MC(epel, h, 64); 81 82MC(epel, v, 4); 83MC(epel, v, 6); 84MC(epel, v, 8); 85MC(epel, v, 12); 86MC(epel, v, 16); 87MC(epel, v, 24); 88MC(epel, v, 32); 89MC(epel, v, 48); 90MC(epel, v, 64); 91 92MC(epel, hv, 4); 93MC(epel, hv, 6); 94MC(epel, hv, 8); 95MC(epel, hv, 12); 96MC(epel, hv, 16); 97MC(epel, hv, 24); 98MC(epel, hv, 32); 99MC(epel, hv, 48); 100MC(epel, hv, 64); 101 102#undef MC 103 104#define UNI_MC(PEL, DIR, WIDTH) \ 105void ff_hevc_put_hevc_uni_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst, \ 106 ptrdiff_t dst_stride, \ 107 uint8_t *src, \ 108 ptrdiff_t src_stride, \ 109 int height, \ 110 intptr_t mx, \ 111 intptr_t my, \ 112 int width) 113 114UNI_MC(pel, pixels, 4); 115UNI_MC(pel, pixels, 6); 116UNI_MC(pel, pixels, 8); 117UNI_MC(pel, pixels, 12); 118UNI_MC(pel, pixels, 16); 119UNI_MC(pel, pixels, 24); 120UNI_MC(pel, pixels, 32); 121UNI_MC(pel, pixels, 48); 122UNI_MC(pel, pixels, 64); 123 124UNI_MC(qpel, h, 4); 125UNI_MC(qpel, h, 8); 126UNI_MC(qpel, h, 12); 127UNI_MC(qpel, h, 16); 128UNI_MC(qpel, h, 24); 129UNI_MC(qpel, h, 32); 130UNI_MC(qpel, h, 48); 131UNI_MC(qpel, h, 64); 132 133UNI_MC(qpel, v, 4); 134UNI_MC(qpel, v, 8); 135UNI_MC(qpel, v, 12); 136UNI_MC(qpel, v, 16); 137UNI_MC(qpel, v, 24); 138UNI_MC(qpel, v, 32); 139UNI_MC(qpel, v, 48); 140UNI_MC(qpel, v, 64); 141 142UNI_MC(qpel, hv, 4); 143UNI_MC(qpel, hv, 8); 144UNI_MC(qpel, hv, 12); 145UNI_MC(qpel, hv, 16); 146UNI_MC(qpel, hv, 24); 147UNI_MC(qpel, hv, 32); 148UNI_MC(qpel, hv, 48); 149UNI_MC(qpel, hv, 64); 150 151UNI_MC(epel, h, 4); 152UNI_MC(epel, h, 6); 153UNI_MC(epel, h, 8); 154UNI_MC(epel, h, 12); 155UNI_MC(epel, h, 16); 156UNI_MC(epel, h, 24); 157UNI_MC(epel, h, 32); 158UNI_MC(epel, h, 48); 159UNI_MC(epel, h, 64); 160 161UNI_MC(epel, v, 4); 162UNI_MC(epel, v, 6); 163UNI_MC(epel, v, 8); 164UNI_MC(epel, v, 12); 165UNI_MC(epel, v, 16); 166UNI_MC(epel, v, 24); 167UNI_MC(epel, v, 32); 168UNI_MC(epel, v, 48); 169UNI_MC(epel, v, 64); 170 171UNI_MC(epel, hv, 4); 172UNI_MC(epel, hv, 6); 173UNI_MC(epel, hv, 8); 174UNI_MC(epel, hv, 12); 175UNI_MC(epel, hv, 16); 176UNI_MC(epel, hv, 24); 177UNI_MC(epel, hv, 32); 178UNI_MC(epel, hv, 48); 179UNI_MC(epel, hv, 64); 180 181#undef UNI_MC 182 183#define UNI_W_MC(PEL, DIR, WIDTH) \ 184void ff_hevc_put_hevc_uni_w_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst, \ 185 ptrdiff_t \ 186 dst_stride, \ 187 uint8_t *src, \ 188 ptrdiff_t \ 189 src_stride, \ 190 int height, \ 191 int denom, \ 192 int weight, \ 193 int offset, \ 194 intptr_t mx, \ 195 intptr_t my, \ 196 int width) 197 198UNI_W_MC(pel, pixels, 4); 199UNI_W_MC(pel, pixels, 6); 200UNI_W_MC(pel, pixels, 8); 201UNI_W_MC(pel, pixels, 12); 202UNI_W_MC(pel, pixels, 16); 203UNI_W_MC(pel, pixels, 24); 204UNI_W_MC(pel, pixels, 32); 205UNI_W_MC(pel, pixels, 48); 206UNI_W_MC(pel, pixels, 64); 207 208UNI_W_MC(qpel, h, 4); 209UNI_W_MC(qpel, h, 8); 210UNI_W_MC(qpel, h, 12); 211UNI_W_MC(qpel, h, 16); 212UNI_W_MC(qpel, h, 24); 213UNI_W_MC(qpel, h, 32); 214UNI_W_MC(qpel, h, 48); 215UNI_W_MC(qpel, h, 64); 216 217UNI_W_MC(qpel, v, 4); 218UNI_W_MC(qpel, v, 8); 219UNI_W_MC(qpel, v, 12); 220UNI_W_MC(qpel, v, 16); 221UNI_W_MC(qpel, v, 24); 222UNI_W_MC(qpel, v, 32); 223UNI_W_MC(qpel, v, 48); 224UNI_W_MC(qpel, v, 64); 225 226UNI_W_MC(qpel, hv, 4); 227UNI_W_MC(qpel, hv, 8); 228UNI_W_MC(qpel, hv, 12); 229UNI_W_MC(qpel, hv, 16); 230UNI_W_MC(qpel, hv, 24); 231UNI_W_MC(qpel, hv, 32); 232UNI_W_MC(qpel, hv, 48); 233UNI_W_MC(qpel, hv, 64); 234 235UNI_W_MC(epel, h, 4); 236UNI_W_MC(epel, h, 6); 237UNI_W_MC(epel, h, 8); 238UNI_W_MC(epel, h, 12); 239UNI_W_MC(epel, h, 16); 240UNI_W_MC(epel, h, 24); 241UNI_W_MC(epel, h, 32); 242UNI_W_MC(epel, h, 48); 243UNI_W_MC(epel, h, 64); 244 245UNI_W_MC(epel, v, 4); 246UNI_W_MC(epel, v, 6); 247UNI_W_MC(epel, v, 8); 248UNI_W_MC(epel, v, 12); 249UNI_W_MC(epel, v, 16); 250UNI_W_MC(epel, v, 24); 251UNI_W_MC(epel, v, 32); 252UNI_W_MC(epel, v, 48); 253UNI_W_MC(epel, v, 64); 254 255UNI_W_MC(epel, hv, 4); 256UNI_W_MC(epel, hv, 6); 257UNI_W_MC(epel, hv, 8); 258UNI_W_MC(epel, hv, 12); 259UNI_W_MC(epel, hv, 16); 260UNI_W_MC(epel, hv, 24); 261UNI_W_MC(epel, hv, 32); 262UNI_W_MC(epel, hv, 48); 263UNI_W_MC(epel, hv, 64); 264 265#undef UNI_W_MC 266 267#define BI_MC(PEL, DIR, WIDTH) \ 268void ff_hevc_put_hevc_bi_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst, \ 269 ptrdiff_t dst_stride, \ 270 uint8_t *src, \ 271 ptrdiff_t src_stride, \ 272 int16_t *src_16bit, \ 273 int height, \ 274 intptr_t mx, \ 275 intptr_t my, \ 276 int width) 277 278BI_MC(pel, pixels, 4); 279BI_MC(pel, pixels, 6); 280BI_MC(pel, pixels, 8); 281BI_MC(pel, pixels, 12); 282BI_MC(pel, pixels, 16); 283BI_MC(pel, pixels, 24); 284BI_MC(pel, pixels, 32); 285BI_MC(pel, pixels, 48); 286BI_MC(pel, pixels, 64); 287 288BI_MC(qpel, h, 4); 289BI_MC(qpel, h, 8); 290BI_MC(qpel, h, 12); 291BI_MC(qpel, h, 16); 292BI_MC(qpel, h, 24); 293BI_MC(qpel, h, 32); 294BI_MC(qpel, h, 48); 295BI_MC(qpel, h, 64); 296 297BI_MC(qpel, v, 4); 298BI_MC(qpel, v, 8); 299BI_MC(qpel, v, 12); 300BI_MC(qpel, v, 16); 301BI_MC(qpel, v, 24); 302BI_MC(qpel, v, 32); 303BI_MC(qpel, v, 48); 304BI_MC(qpel, v, 64); 305 306BI_MC(qpel, hv, 4); 307BI_MC(qpel, hv, 8); 308BI_MC(qpel, hv, 12); 309BI_MC(qpel, hv, 16); 310BI_MC(qpel, hv, 24); 311BI_MC(qpel, hv, 32); 312BI_MC(qpel, hv, 48); 313BI_MC(qpel, hv, 64); 314 315BI_MC(epel, h, 4); 316BI_MC(epel, h, 6); 317BI_MC(epel, h, 8); 318BI_MC(epel, h, 12); 319BI_MC(epel, h, 16); 320BI_MC(epel, h, 24); 321BI_MC(epel, h, 32); 322BI_MC(epel, h, 48); 323BI_MC(epel, h, 64); 324 325BI_MC(epel, v, 4); 326BI_MC(epel, v, 6); 327BI_MC(epel, v, 8); 328BI_MC(epel, v, 12); 329BI_MC(epel, v, 16); 330BI_MC(epel, v, 24); 331BI_MC(epel, v, 32); 332BI_MC(epel, v, 48); 333BI_MC(epel, v, 64); 334 335BI_MC(epel, hv, 4); 336BI_MC(epel, hv, 6); 337BI_MC(epel, hv, 8); 338BI_MC(epel, hv, 12); 339BI_MC(epel, hv, 16); 340BI_MC(epel, hv, 24); 341BI_MC(epel, hv, 32); 342BI_MC(epel, hv, 48); 343BI_MC(epel, hv, 64); 344 345#undef BI_MC 346 347#define BI_W_MC(PEL, DIR, WIDTH) \ 348void ff_hevc_put_hevc_bi_w_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst, \ 349 ptrdiff_t \ 350 dst_stride, \ 351 uint8_t *src, \ 352 ptrdiff_t \ 353 src_stride, \ 354 int16_t *src_16bit, \ 355 int height, \ 356 int denom, \ 357 int weight0, \ 358 int weight1, \ 359 int offset0, \ 360 int offset1, \ 361 intptr_t mx, \ 362 intptr_t my, \ 363 int width) 364 365BI_W_MC(pel, pixels, 4); 366BI_W_MC(pel, pixels, 6); 367BI_W_MC(pel, pixels, 8); 368BI_W_MC(pel, pixels, 12); 369BI_W_MC(pel, pixels, 16); 370BI_W_MC(pel, pixels, 24); 371BI_W_MC(pel, pixels, 32); 372BI_W_MC(pel, pixels, 48); 373BI_W_MC(pel, pixels, 64); 374 375BI_W_MC(qpel, h, 4); 376BI_W_MC(qpel, h, 8); 377BI_W_MC(qpel, h, 12); 378BI_W_MC(qpel, h, 16); 379BI_W_MC(qpel, h, 24); 380BI_W_MC(qpel, h, 32); 381BI_W_MC(qpel, h, 48); 382BI_W_MC(qpel, h, 64); 383 384BI_W_MC(qpel, v, 4); 385BI_W_MC(qpel, v, 8); 386BI_W_MC(qpel, v, 12); 387BI_W_MC(qpel, v, 16); 388BI_W_MC(qpel, v, 24); 389BI_W_MC(qpel, v, 32); 390BI_W_MC(qpel, v, 48); 391BI_W_MC(qpel, v, 64); 392 393BI_W_MC(qpel, hv, 4); 394BI_W_MC(qpel, hv, 8); 395BI_W_MC(qpel, hv, 12); 396BI_W_MC(qpel, hv, 16); 397BI_W_MC(qpel, hv, 24); 398BI_W_MC(qpel, hv, 32); 399BI_W_MC(qpel, hv, 48); 400BI_W_MC(qpel, hv, 64); 401 402BI_W_MC(epel, h, 4); 403BI_W_MC(epel, h, 6); 404BI_W_MC(epel, h, 8); 405BI_W_MC(epel, h, 12); 406BI_W_MC(epel, h, 16); 407BI_W_MC(epel, h, 24); 408BI_W_MC(epel, h, 32); 409BI_W_MC(epel, h, 48); 410BI_W_MC(epel, h, 64); 411 412BI_W_MC(epel, v, 4); 413BI_W_MC(epel, v, 6); 414BI_W_MC(epel, v, 8); 415BI_W_MC(epel, v, 12); 416BI_W_MC(epel, v, 16); 417BI_W_MC(epel, v, 24); 418BI_W_MC(epel, v, 32); 419BI_W_MC(epel, v, 48); 420BI_W_MC(epel, v, 64); 421 422BI_W_MC(epel, hv, 4); 423BI_W_MC(epel, hv, 6); 424BI_W_MC(epel, hv, 8); 425BI_W_MC(epel, hv, 12); 426BI_W_MC(epel, hv, 16); 427BI_W_MC(epel, hv, 24); 428BI_W_MC(epel, hv, 32); 429BI_W_MC(epel, hv, 48); 430BI_W_MC(epel, hv, 64); 431 432#undef BI_W_MC 433 434void ff_hevc_loop_filter_luma_h_8_msa(uint8_t *src, 435 ptrdiff_t src_stride, 436 int32_t beta, int32_t *tc, 437 uint8_t *no_p, uint8_t *no_q); 438 439void ff_hevc_loop_filter_luma_v_8_msa(uint8_t *src, 440 ptrdiff_t src_stride, 441 int32_t beta, int32_t *tc, 442 uint8_t *no_p, uint8_t *no_q); 443 444void ff_hevc_loop_filter_chroma_h_8_msa(uint8_t *src, 445 ptrdiff_t src_stride, 446 int32_t *tc, uint8_t *no_p, 447 uint8_t *no_q); 448 449void ff_hevc_loop_filter_chroma_v_8_msa(uint8_t *src, 450 ptrdiff_t src_stride, 451 int32_t *tc, uint8_t *no_p, 452 uint8_t *no_q); 453 454void ff_hevc_sao_band_filter_0_8_msa(uint8_t *dst, uint8_t *src, 455 ptrdiff_t stride_dst, ptrdiff_t stride_src, 456 int16_t *sao_offset_val, int sao_left_class, 457 int width, int height); 458 459void ff_hevc_sao_edge_filter_8_msa(uint8_t *dst, uint8_t *src, 460 ptrdiff_t stride_dst, 461 int16_t *sao_offset_val, 462 int eo, int width, int height); 463 464void ff_hevc_idct_4x4_msa(int16_t *coeffs, int col_limit); 465void ff_hevc_idct_8x8_msa(int16_t *coeffs, int col_limit); 466void ff_hevc_idct_16x16_msa(int16_t *coeffs, int col_limit); 467void ff_hevc_idct_32x32_msa(int16_t *coeffs, int col_limit); 468void ff_hevc_idct_dc_4x4_msa(int16_t *coeffs); 469void ff_hevc_idct_dc_8x8_msa(int16_t *coeffs); 470void ff_hevc_idct_dc_16x16_msa(int16_t *coeffs); 471void ff_hevc_idct_dc_32x32_msa(int16_t *coeffs); 472void ff_hevc_addblk_4x4_msa(uint8_t *dst, int16_t *pi16Coeffs, 473 ptrdiff_t stride); 474void ff_hevc_addblk_8x8_msa(uint8_t *dst, int16_t *pi16Coeffs, 475 ptrdiff_t stride); 476void ff_hevc_addblk_16x16_msa(uint8_t *dst, int16_t *pi16Coeffs, 477 ptrdiff_t stride); 478void ff_hevc_addblk_32x32_msa(uint8_t *dst, int16_t *pi16Coeffs, 479 ptrdiff_t stride); 480void ff_hevc_idct_luma_4x4_msa(int16_t *pi16Coeffs); 481 482/* Loongson optimization */ 483#define L_MC(PEL, DIR, WIDTH, TYPE) \ 484void ff_hevc_put_hevc_##PEL##_##DIR##WIDTH##_8_##TYPE(int16_t *dst, \ 485 uint8_t *src, \ 486 ptrdiff_t src_stride, \ 487 int height, \ 488 intptr_t mx, \ 489 intptr_t my, \ 490 int width) 491L_MC(qpel, h, 4, mmi); 492L_MC(qpel, h, 8, mmi); 493L_MC(qpel, h, 12, mmi); 494L_MC(qpel, h, 16, mmi); 495L_MC(qpel, h, 24, mmi); 496L_MC(qpel, h, 32, mmi); 497L_MC(qpel, h, 48, mmi); 498L_MC(qpel, h, 64, mmi); 499 500L_MC(qpel, hv, 4, mmi); 501L_MC(qpel, hv, 8, mmi); 502L_MC(qpel, hv, 12, mmi); 503L_MC(qpel, hv, 16, mmi); 504L_MC(qpel, hv, 24, mmi); 505L_MC(qpel, hv, 32, mmi); 506L_MC(qpel, hv, 48, mmi); 507L_MC(qpel, hv, 64, mmi); 508 509#define L_BI_MC(PEL, DIR, WIDTH, TYPE) \ 510void ff_hevc_put_hevc_##PEL##_bi_##DIR##WIDTH##_8_##TYPE(uint8_t *dst, \ 511 ptrdiff_t dst_stride, \ 512 uint8_t *src, \ 513 ptrdiff_t src_stride, \ 514 int16_t *src2, \ 515 int height, \ 516 intptr_t mx, \ 517 intptr_t my, \ 518 int width) 519 520L_BI_MC(pel, pixels, 8, mmi); 521L_BI_MC(pel, pixels, 16, mmi); 522L_BI_MC(pel, pixels, 24, mmi); 523L_BI_MC(pel, pixels, 32, mmi); 524L_BI_MC(pel, pixels, 48, mmi); 525L_BI_MC(pel, pixels, 64, mmi); 526 527L_BI_MC(qpel, hv, 4, mmi); 528L_BI_MC(qpel, hv, 8, mmi); 529L_BI_MC(qpel, hv, 12, mmi); 530L_BI_MC(qpel, hv, 16, mmi); 531L_BI_MC(qpel, hv, 24, mmi); 532L_BI_MC(qpel, hv, 32, mmi); 533L_BI_MC(qpel, hv, 48, mmi); 534L_BI_MC(qpel, hv, 64, mmi); 535 536L_BI_MC(qpel, h, 4, mmi); 537L_BI_MC(qpel, h, 8, mmi); 538L_BI_MC(qpel, h, 12, mmi); 539L_BI_MC(qpel, h, 16, mmi); 540L_BI_MC(qpel, h, 24, mmi); 541L_BI_MC(qpel, h, 32, mmi); 542L_BI_MC(qpel, h, 48, mmi); 543L_BI_MC(qpel, h, 64, mmi); 544 545L_BI_MC(epel, hv, 4, mmi); 546L_BI_MC(epel, hv, 8, mmi); 547L_BI_MC(epel, hv, 12, mmi); 548L_BI_MC(epel, hv, 16, mmi); 549L_BI_MC(epel, hv, 24, mmi); 550L_BI_MC(epel, hv, 32, mmi); 551#undef L_BI_MC 552 553#define L_UNI_MC(PEL, DIR, WIDTH, TYPE) \ 554void ff_hevc_put_hevc_##PEL##_uni_##DIR##WIDTH##_8_##TYPE(uint8_t *dst, \ 555 ptrdiff_t dst_stride, \ 556 uint8_t *src, \ 557 ptrdiff_t src_stride, \ 558 int height, \ 559 intptr_t mx, \ 560 intptr_t my, \ 561 int width) 562 563L_UNI_MC(qpel, hv, 4, mmi); 564L_UNI_MC(qpel, hv, 8, mmi); 565L_UNI_MC(qpel, hv, 12, mmi); 566L_UNI_MC(qpel, hv, 16, mmi); 567L_UNI_MC(qpel, hv, 24, mmi); 568L_UNI_MC(qpel, hv, 32, mmi); 569L_UNI_MC(qpel, hv, 48, mmi); 570L_UNI_MC(qpel, hv, 64, mmi); 571#undef L_UNI_MC 572 573#endif // #ifndef AVCODEC_MIPS_HEVCDSP_MIPS_H 574