/third_party/python/Modules/_blake2/impl/ |
H A D | blake2s-round.h | 44 #define G1(row1,row2,row3,row4,buf) \ 48 row3 = _mm_add_epi32( row3, row4 ); \ 49 row2 = _mm_xor_si128( row2, row3 ); \ 52 #define G2(row1,row2,row3,row4,buf) \ 56 row3 = _mm_add_epi32( row3, row4 ); \ 57 row2 = _mm_xor_si128( row2, row3 ); \ 60 #define DIAGONALIZE(row1,row2,row3,row4) \ 62 row3 [all...] |
H A D | blake2s.c | 268 __m128i row1, row2, row3, row4; in blake2s_compress() local 306 row3 = _mm_setr_epi32( 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A ); in blake2s_compress() 318 STOREU( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) ); in blake2s_compress()
|
/third_party/skia/third_party/externals/libjpeg-turbo/simd/arm/ |
H A D | jquanti-neon.c | 63 int16x8_t row3 = in jsimd_convsamp_neon() local 77 vst1q_s16(workspace + 3 * DCTSIZE, row3); in jsimd_convsamp_neon() 108 int16x8_t row3 = vld1q_s16(workspace + (i + 3) * DCTSIZE); in jsimd_quantize_neon() local 127 int16x8_t sign_row3 = vshrq_n_s16(row3, 15); in jsimd_quantize_neon() 132 uint16x8_t abs_row3 = vreinterpretq_u16_s16(vabsq_s16(row3)); in jsimd_quantize_neon() 160 row3 = vcombine_s16(vshrn_n_s32(row3_l, 16), vshrn_n_s32(row3_h, 16)); in jsimd_quantize_neon() 171 row3 = vreinterpretq_s16_u16(vshlq_u16(vreinterpretq_u16_s16(row3), in jsimd_quantize_neon() 181 row3 = veorq_s16(row3, sign_row in jsimd_quantize_neon() [all...] |
H A D | jidctred-neon.c | 84 int16x8_t row3 = vld1q_s16(coef_block + 3 * DCTSIZE); in jsimd_idct_2x2_neon() local 98 row3 = vmulq_s16(row3, quant_row3); in jsimd_idct_2x2_neon() 115 tmp0_l = vmlal_lane_s16(tmp0_l, vget_low_s16(row3), consts, 2); in jsimd_idct_2x2_neon() 119 tmp0_h = vmlal_lane_s16(tmp0_h, vget_high_s16(row3), consts, 2); in jsimd_idct_2x2_neon() 205 int16x8_t row3 = vld1q_s16(coef_block + 3 * DCTSIZE); in jsimd_idct_4x4_neon() local 217 bitmap = vorrq_s16(bitmap, row3); in jsimd_idct_4x4_neon() 244 row3 = dcval; in jsimd_idct_4x4_neon() 276 z3 = vmul_s16(vget_high_s16(row3), quant_row3); in jsimd_idct_4x4_neon() 292 row3 in jsimd_idct_4x4_neon() [all...] |
H A D | jidctint-neon.c | 100 int16x4_t row3, 119 int16x4_t row3, 204 int16x4_t row3 = vld1_s16(coef_block + 3 * DCTSIZE); in jsimd_idct_islow_neon() local 227 bitmap = vorr_s16(bitmap, row3); in jsimd_idct_islow_neon() 239 jsimd_idct_islow_pass1_sparse(row0, row1, row2, row3, quant_row0, in jsimd_idct_islow_neon() 244 jsimd_idct_islow_pass1_regular(row0, row1, row2, row3, row4, row5, in jsimd_idct_islow_neon() 257 row3 = vld1_s16(coef_block + 3 * DCTSIZE + 4); in jsimd_idct_islow_neon() 278 bitmap = vorr_s16(bitmap, row3); in jsimd_idct_islow_neon() 299 jsimd_idct_islow_pass1_sparse(row0, row1, row2, row3, quant_row0, in jsimd_idct_islow_neon() 304 jsimd_idct_islow_pass1_regular(row0, row1, row2, row3, row in jsimd_idct_islow_neon() 338 jsimd_idct_islow_pass1_regular(int16x4_t row0, int16x4_t row1, int16x4_t row2, int16x4_t row3, int16x4_t row4, int16x4_t row5, int16x4_t row6, int16x4_t row7, int16x4_t quant_row0, int16x4_t quant_row1, int16x4_t quant_row2, int16x4_t quant_row3, int16x4_t quant_row4, int16x4_t quant_row5, int16x4_t quant_row6, int16x4_t quant_row7, int16_t *workspace_1, int16_t *workspace_2) jsimd_idct_islow_pass1_regular() argument 473 jsimd_idct_islow_pass1_sparse(int16x4_t row0, int16x4_t row1, int16x4_t row2, int16x4_t row3, int16x4_t quant_row0, int16x4_t quant_row1, int16x4_t quant_row2, int16x4_t quant_row3, int16_t *workspace_1, int16_t *workspace_2) jsimd_idct_islow_pass1_sparse() argument [all...] |
H A D | jfdctfst-neon.c | 155 int16x8_t row3 = vreinterpretq_s16_s32(rows_37.val[0]); in jsimd_fdct_ifast_neon() local 169 tmp3 = vaddq_s16(row3, row4); in jsimd_fdct_ifast_neon() 170 tmp4 = vsubq_s16(row3, row4); in jsimd_fdct_ifast_neon() 202 row3 = vsubq_s16(z13, z2); in jsimd_fdct_ifast_neon() 209 vst1q_s16(data + 3 * DCTSIZE, row3); in jsimd_fdct_ifast_neon()
|
H A D | jidctfst-neon.c | 72 int16x8_t row3 = vld1q_s16(coef_block + 3 * DCTSIZE); in jsimd_idct_ifast_neon() local 85 bitmap = vorrq_s16(bitmap, row3); in jsimd_idct_ifast_neon() 104 row3 = dcval; in jsimd_idct_ifast_neon() 148 int16x4_t tmp5 = vmul_s16(vget_high_s16(row3), quant_row3); in jsimd_idct_ifast_neon() 183 row3 = vcombine_s16(dcval, vsub_s16(tmp3, tmp4)); in jsimd_idct_ifast_neon() 223 int16x4_t tmp5 = vmul_s16(vget_low_s16(row3), quant_row3); in jsimd_idct_ifast_neon() 258 row3 = vcombine_s16(vsub_s16(tmp3, tmp4), dcval); in jsimd_idct_ifast_neon() 293 int16x8_t tmp5 = vmulq_s16(row3, quant_row3); in jsimd_idct_ifast_neon() 328 row3 = vsubq_s16(tmp3, tmp4); in jsimd_idct_ifast_neon() 333 int16x8x2_t rows_23 = vtrnq_s16(row2, row3); in jsimd_idct_ifast_neon() [all...] |
H A D | jfdctint-neon.c | 254 int16x8_t row3 = vreinterpretq_s16_s32(rows_37.val[0]); in jsimd_fdct_islow_neon() local 268 tmp3 = vaddq_s16(row3, row4); in jsimd_fdct_islow_neon() 269 tmp4 = vsubq_s16(row3, row4); in jsimd_fdct_islow_neon() 358 row3 = vcombine_s16(vrshrn_n_s32(tmp6_l, DESCALE_P2), in jsimd_fdct_islow_neon() 371 vst1q_s16(data + 3 * DCTSIZE, row3); in jsimd_fdct_islow_neon()
|
H A D | jcphuff-neon.c | 201 int16x8_t row3 = vld1q_s16(values + 3 * DCTSIZE); in jsimd_encode_mcu_AC_first_prepare_neon() local 210 uint8x8_t row3_eq0 = vmovn_u16(vceqq_s16(row3, vdupq_n_s16(0))); in jsimd_encode_mcu_AC_first_prepare_neon()
|
/third_party/skia/third_party/externals/libjpeg-turbo/simd/arm/aarch32/ |
H A D | jchuff-neon.c | 79 int16x8_t row3 = vld1q_dup_s16(block + 27); in jsimd_huff_encode_one_block_neon() local 80 row3 = vld1q_lane_s16(block + 20, row3, 1); in jsimd_huff_encode_one_block_neon() 81 row3 = vld1q_lane_s16(block + 13, row3, 2); in jsimd_huff_encode_one_block_neon() 82 row3 = vld1q_lane_s16(block + 6, row3, 3); in jsimd_huff_encode_one_block_neon() 83 row3 = vld1q_lane_s16(block + 7, row3, 4); in jsimd_huff_encode_one_block_neon() 84 row3 in jsimd_huff_encode_one_block_neon() [all...] |
/third_party/skia/third_party/externals/swiftshader/src/Pipeline/ |
H A D | ShaderCore.cpp | 472 void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3) in transpose4x4() argument 475 Int2 tmp1 = UnpackHigh(row2, row3); in transpose4x4() 477 Int2 tmp3 = UnpackLow(row2, row3); in transpose4x4() 482 row3 = UnpackHigh(tmp0, tmp1); in transpose4x4() 485 void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3) in transpose4x3() argument 488 Int2 tmp1 = UnpackHigh(row2, row3); in transpose4x3() 490 Int2 tmp3 = UnpackLow(row2, row3); in transpose4x3() 497 void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) in transpose4x4() argument 500 Float4 tmp1 = UnpackLow(row2, row3); in transpose4x4() 502 Float4 tmp3 = UnpackHigh(row2, row3); in transpose4x4() 510 transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) transpose4x3() argument 522 transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) transpose4x2() argument 531 transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) transpose4x1() argument 539 transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) transpose2x4() argument 550 transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N) transpose4xN() argument [all...] |
H A D | ShaderCore.hpp | 214 void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3); 215 void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3); 216 void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 217 void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 218 void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 219 void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 220 void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 221 void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N);
|
/third_party/ffmpeg/libavcodec/mips/ |
H A D | vp8_lpf_msa.c | 380 v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8; in ff_vp8_h_loop_filter16_msa() local 388 LD_UB8(temp_src, pitch, row0, row1, row2, row3, row4, row5, row6, row7); in ff_vp8_h_loop_filter16_msa() 392 TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, in ff_vp8_h_loop_filter16_msa() 445 v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8; in ff_vp8_h_loop_filter8uv_msa() local 453 LD_UB8(src_u - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7); in ff_vp8_h_loop_filter8uv_msa() 456 TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, in ff_vp8_h_loop_filter8uv_msa() 525 v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8; in ff_vp8_h_loop_filter_simple_msa() local 531 LD_UB8(temp_src, pitch, row0, row1, row2, row3, row4, row5, row6, row7); in ff_vp8_h_loop_filter_simple_msa() 535 TRANSPOSE16x4_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, in ff_vp8_h_loop_filter_simple_msa() 595 v16u8 row0, row1, row2, row3, row in ff_vp8_h_loop_filter8uv_inner_msa() local 649 v16u8 row0, row1, row2, row3, row4, row5, row6, row7; ff_vp8_h_loop_filter16_inner_msa() local [all...] |
H A D | h264dsp_msa.c | 829 v16u8 row0, row1, row2, row3, row4, row5, row6, row7; in avc_loopfilter_luma_intra_edge_ver_msa() local 832 LD_UB8(src, img_width, row0, row1, row2, row3, row4, row5, row6, row7); in avc_loopfilter_luma_intra_edge_ver_msa() 836 TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, in avc_loopfilter_luma_intra_edge_ver_msa() 1240 v16u8 row0, row1, row2, row3, row4, row5, row6, row7; in avc_loopfilter_cb_or_cr_intra_edge_ver_msa() local 1243 row0, row1, row2, row3, row4, row5, row6, row7); in avc_loopfilter_cb_or_cr_intra_edge_ver_msa() 1245 TRANSPOSE8x4_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, in avc_loopfilter_cb_or_cr_intra_edge_ver_msa() 1926 v16u8 row0, row1, row2, row3, row4, row5, row6, row7; in avc_loopfilter_cb_or_cr_inter_edge_ver_msa() local 1952 row0, row1, row2, row3, row4, row5, row6, row7); in avc_loopfilter_cb_or_cr_inter_edge_ver_msa() 1954 TRANSPOSE8x4_UB_UB(row0, row1, row2, row3, in avc_loopfilter_cb_or_cr_inter_edge_ver_msa()
|
/third_party/skia/third_party/externals/swiftshader/src/Shader/ |
H A D | ShaderCore.hpp | 87 void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3); 88 void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3); 89 void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 90 void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 91 void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 92 void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 93 void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 94 void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N);
|
H A D | ShaderCore.cpp | 488 void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3) in transpose4x4() argument 491 Int2 tmp1 = UnpackHigh(row2, row3); in transpose4x4() 493 Int2 tmp3 = UnpackLow(row2, row3); in transpose4x4() 498 row3 = UnpackHigh(tmp0, tmp1); in transpose4x4() 501 void transpose4x3(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3) in transpose4x3() argument 504 Int2 tmp1 = UnpackHigh(row2, row3); in transpose4x3() 506 Int2 tmp3 = UnpackLow(row2, row3); in transpose4x3() 513 void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) in transpose4x4() argument 516 Float4 tmp1 = UnpackLow(row2, row3); in transpose4x4() 518 Float4 tmp3 = UnpackHigh(row2, row3); in transpose4x4() 526 transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) transpose4x3() argument 538 transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) transpose4x2() argument 547 transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) transpose4x1() argument 555 transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) transpose2x4() argument 566 transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N) transpose4xN() argument [all...] |
/third_party/skia/gm/ |
H A D | hardstop_gradients.cpp | 117 SkScalar row3[] = {0.00f, 0.25f, 1.00f}; variable 127 row3,
|
/third_party/ffmpeg/libavcodec/loongarch/ |
H A D | h264dsp_lasx.c | 96 __m256i row0, row1, row2, row3, row4, row5, row6, row7; in ff_h264_h_lpf_luma_8_lasx() local 100 src, img_width_3x, row0, row1, row2, row3); in ff_h264_h_lpf_luma_8_lasx() 114 LASX_TRANSPOSE16x8_B(row0, row1, row2, row3, row4, row5, row6, in ff_h264_h_lpf_luma_8_lasx() 201 __m256i row0, row1, row2, row3, row4, row5, row6, row7; in ff_h264_h_lpf_luma_8_lasx() local 211 row1, row3); in ff_h264_h_lpf_luma_8_lasx() 212 DUP2_ARG2(__lasx_xvilvl_b, row2, row0, row3, row1, row4, row6); in ff_h264_h_lpf_luma_8_lasx() 213 DUP2_ARG2(__lasx_xvilvh_b, row2, row0, row3, row1, row5, row7); in ff_h264_h_lpf_luma_8_lasx() 393 __m256i row0, row1, row2, row3, row4, row5, row6, row7; in ff_h264_h_lpf_chroma_8_lasx() local 396 src, img_width_3x, row0, row1, row2, row3); in ff_h264_h_lpf_chroma_8_lasx() 402 DUP4_ARG2(__lasx_xvilvl_b, row2, row0, row3, row in ff_h264_h_lpf_chroma_8_lasx() 597 __m256i row0, row1, row2, row3, row4, row5, row6, row7; ff_h264_h_lpf_luma_intra_8_lasx() local 717 __m256i row0, row1, row2, row3, row4, row5, row6, row7; ff_h264_h_lpf_luma_intra_8_lasx() local 892 __m256i row0, row1, row2, row3, row4, row5, row6, row7; ff_h264_h_lpf_chroma_intra_8_lasx() local [all...] |
H A D | vp8_lpf_lsx.c | 335 __m128i row0, row1, row2, row3, row4, row5, row6, row7, row8; in ff_vp8_h_loop_filter16_lsx() local 349 temp_src + stride3, 0, row0, row1, row2, row3); in ff_vp8_h_loop_filter16_lsx() 360 LSX_TRANSPOSE16x8_B(row0, row1, row2, row3, row4, row5, row6, row7, row8, row9, row10, in ff_vp8_h_loop_filter16_lsx() 422 __m128i row0, row1, row2, row3, row4, row5, row6, row7, row8; in ff_vp8_h_loop_filter8uv_lsx() local 436 temp_src + stride3, 0, row0, row1, row2, row3); in ff_vp8_h_loop_filter8uv_lsx() 448 LSX_TRANSPOSE16x8_B(row0, row1, row2, row3, row4, row5, row6, row7, in ff_vp8_h_loop_filter8uv_lsx()
|
/third_party/skia/third_party/externals/libwebp/src/dsp/ |
H A D | dec_msa.c | 364 v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8; in HFilter16() local 371 LD_UB8(ptmp, stride, row0, row1, row2, row3, row4, row5, row6, row7); in HFilter16() 374 TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, in HFilter16() 446 v16u8 row0, row1, row2, row3, row4, row5, row6, row7; in HFilterVertEdge16i() local 453 LD_UB8(src - 4, stride, row0, row1, row2, row3, row4, row5, row6, row7); in HFilterVertEdge16i() 456 TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, in HFilterVertEdge16i() 530 v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8; in HFilter8() local 537 LD_UB8(ptmp_src_u, stride, row0, row1, row2, row3, row4, row5, row6, row7); in HFilter8() 540 TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, in HFilter8() 595 v16u8 row0, row1, row2, row3, row in HFilter8i() local 636 v16u8 p1, p0, q1, q0, mask, row0, row1, row2, row3, row4, row5, row6, row7; SimpleHFilter16() local [all...] |
H A D | enc_sse2.c | 406 __m128i row0, row1, row2, row3; in FTransformWHT_SSE2() local 411 FTransformWHTRow_SSE2(in + 3 * 64, &row3); in FTransformWHT_SSE2() 416 const __m128i a1 = _mm_add_epi32(row1, row3); in FTransformWHT_SSE2() 417 const __m128i a2 = _mm_sub_epi32(row1, row3); in FTransformWHT_SSE2()
|
/third_party/skia/third_party/externals/d3d12allocator/src/ |
H A D | Common.h | 186 const vec4& row3,
in mat4() 190 _31(row3.x), _32(row3.y), _33(row3.z), _34(row3.w),
in mat4()
|
/third_party/skia/third_party/externals/libjpeg-turbo/simd/arm/aarch64/ |
H A D | jchuff-neon.c | 127 int16x8_t row3 = in jsimd_huff_encode_one_block_neon() local 164 int16x8_t abs_row3 = vabsq_s16(row3); in jsimd_huff_encode_one_block_neon() 178 vreinterpretq_u16_s16(veorq_s16(abs_row3, vshrq_n_s16(row3, 15))); in jsimd_huff_encode_one_block_neon()
|
/third_party/skia/third_party/externals/swiftshader/third_party/llvm-10.0/llvm/lib/MC/ |
H A D | MCWin64EH.cpp | 611 uint32_t row3 = EpilogOffset; in ARM64EmitUnwindInfo() local 612 row3 |= (EpilogIndex & 0x3FF) << 22; in ARM64EmitUnwindInfo() 613 streamer.EmitIntValue(row3, 4); in ARM64EmitUnwindInfo()
|
/third_party/astc-encoder/Source/ |
H A D | stb_image.h | 2531 __m128i row0, row1, row2, row3, row4, row5, row6, row7; in stbi__idct_simd() local 2598 dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \ in stbi__idct_simd() 2601 __m128i sum35 = _mm_add_epi16(row3, row5); \ in stbi__idct_simd() 2610 dct_bfly32o(row3,row4, x3,x4,bias,shift); \ in stbi__idct_simd() 2630 row3 = _mm_load_si128((const __m128i *) (data + 3*8)); in stbi__idct_simd() 2644 dct_interleave16(row3, row7); in stbi__idct_simd() 2648 dct_interleave16(row1, row3); in stbi__idct_simd() 2654 dct_interleave16(row2, row3); in stbi__idct_simd() 2665 __m128i p1 = _mm_packus_epi16(row2, row3); in stbi__idct_simd() 2711 int16x8_t row0, row1, row2, row3, row in stbi__idct_simd() local [all...] |