11cb0ef41Sopenharmony_ci/* slide_hash_simd.h 21cb0ef41Sopenharmony_ci * 31cb0ef41Sopenharmony_ci * Copyright 2022 The Chromium Authors 41cb0ef41Sopenharmony_ci * Use of this source code is governed by a BSD-style license that can be 51cb0ef41Sopenharmony_ci * found in the Chromium source repository LICENSE file. 61cb0ef41Sopenharmony_ci */ 71cb0ef41Sopenharmony_ci 81cb0ef41Sopenharmony_ci#ifndef SLIDE_HASH_SIMD_H 91cb0ef41Sopenharmony_ci#define SLIDE_HASH_SIMD_H 101cb0ef41Sopenharmony_ci 111cb0ef41Sopenharmony_ci#include "deflate.h" 121cb0ef41Sopenharmony_ci 131cb0ef41Sopenharmony_ci#ifndef INLINE 141cb0ef41Sopenharmony_ci#if defined(_MSC_VER) && !defined(__clang__) 151cb0ef41Sopenharmony_ci#define INLINE __inline 161cb0ef41Sopenharmony_ci#else 171cb0ef41Sopenharmony_ci#define INLINE inline 181cb0ef41Sopenharmony_ci#endif 191cb0ef41Sopenharmony_ci#endif 201cb0ef41Sopenharmony_ci 211cb0ef41Sopenharmony_ci#if defined(CPU_NO_SIMD) 221cb0ef41Sopenharmony_ci 231cb0ef41Sopenharmony_ci#error SIMD has been disabled for your build target 241cb0ef41Sopenharmony_ci 251cb0ef41Sopenharmony_ci#elif defined(DEFLATE_SLIDE_HASH_SSE2) 261cb0ef41Sopenharmony_ci 271cb0ef41Sopenharmony_ci#include <emmintrin.h> /* SSE2 */ 281cb0ef41Sopenharmony_ci 291cb0ef41Sopenharmony_ci#define Z_SLIDE_INIT_SIMD(wsize) _mm_set1_epi16((ush)(wsize)) 301cb0ef41Sopenharmony_ci 311cb0ef41Sopenharmony_ci#define Z_SLIDE_HASH_SIMD(table, size, vector_wsize) \ 321cb0ef41Sopenharmony_ci for (const Posf* const end = table + size; table != end;) { \ 331cb0ef41Sopenharmony_ci __m128i vO = _mm_loadu_si128((__m128i *)(table + 0)); \ 341cb0ef41Sopenharmony_ci vO = _mm_subs_epu16(vO, vector_wsize); \ 351cb0ef41Sopenharmony_ci _mm_storeu_si128((__m128i *)(table + 0), vO); \ 361cb0ef41Sopenharmony_ci table += 8; \ 371cb0ef41Sopenharmony_ci } 381cb0ef41Sopenharmony_ci 391cb0ef41Sopenharmony_citypedef __m128i z_vec128i_u16x8_t; 401cb0ef41Sopenharmony_ci 411cb0ef41Sopenharmony_ci#elif defined(DEFLATE_SLIDE_HASH_NEON) 421cb0ef41Sopenharmony_ci 431cb0ef41Sopenharmony_ci#include <arm_neon.h> /* NEON */ 441cb0ef41Sopenharmony_ci 451cb0ef41Sopenharmony_ci#define Z_SLIDE_INIT_SIMD(wsize) vdupq_n_u16((ush)(wsize)) 461cb0ef41Sopenharmony_ci 471cb0ef41Sopenharmony_ci#define Z_SLIDE_HASH_SIMD(table, size, vector_wsize) \ 481cb0ef41Sopenharmony_ci for (const Posf* const end = table + size; table != end;) { \ 491cb0ef41Sopenharmony_ci uint16x8_t vO = vld1q_u16(table + 0); \ 501cb0ef41Sopenharmony_ci uint16x8_t v8 = vld1q_u16(table + 8); \ 511cb0ef41Sopenharmony_ci vO = vqsubq_u16(vO, vector_wsize); \ 521cb0ef41Sopenharmony_ci v8 = vqsubq_u16(v8, vector_wsize); \ 531cb0ef41Sopenharmony_ci vst1q_u16(table + 0, vO); \ 541cb0ef41Sopenharmony_ci vst1q_u16(table + 8, v8); \ 551cb0ef41Sopenharmony_ci table += 8 + 8; \ 561cb0ef41Sopenharmony_ci } 571cb0ef41Sopenharmony_ci 581cb0ef41Sopenharmony_citypedef uint16x8_t z_vec128i_u16x8_t; 591cb0ef41Sopenharmony_ci 601cb0ef41Sopenharmony_ci#else 611cb0ef41Sopenharmony_ci 621cb0ef41Sopenharmony_ci#error slide_hash_simd is not defined for your build target 631cb0ef41Sopenharmony_ci 641cb0ef41Sopenharmony_ci#endif 651cb0ef41Sopenharmony_ci 661cb0ef41Sopenharmony_ci/* =========================================================================== 671cb0ef41Sopenharmony_ci * Slide the hash table when sliding the window down (could be avoided with 32 681cb0ef41Sopenharmony_ci * bit values at the expense of memory usage). We slide even when level == 0 to 691cb0ef41Sopenharmony_ci * keep the hash table consistent if we switch back to level > 0 later. 701cb0ef41Sopenharmony_ci */ 711cb0ef41Sopenharmony_cilocal INLINE void slide_hash_simd( 721cb0ef41Sopenharmony_ci Posf *head, Posf *prev, const uInt w_size, const uInt hash_size) { 731cb0ef41Sopenharmony_ci /* 741cb0ef41Sopenharmony_ci * The SIMD implementation of the hash table slider assumes: 751cb0ef41Sopenharmony_ci * 761cb0ef41Sopenharmony_ci * 1. hash chain offset is 2 bytes. Should be true as Pos is "ush" type. 771cb0ef41Sopenharmony_ci */ 781cb0ef41Sopenharmony_ci Assert(sizeof(Pos) == 2, "Pos type size error: should be 2 bytes"); 791cb0ef41Sopenharmony_ci Assert(sizeof(ush) == 2, "ush type size error: should be 2 bytes"); 801cb0ef41Sopenharmony_ci 811cb0ef41Sopenharmony_ci Assert(hash_size <= (1 << 16), "Hash table maximum size error"); 821cb0ef41Sopenharmony_ci Assert(hash_size >= (1 << 8), "Hash table minimum size error"); 831cb0ef41Sopenharmony_ci Assert(w_size == (ush)w_size, "Prev table size error"); 841cb0ef41Sopenharmony_ci 851cb0ef41Sopenharmony_ci /* 861cb0ef41Sopenharmony_ci * 2. The hash & prev table sizes are a multiple of 32 bytes (256 bits), 871cb0ef41Sopenharmony_ci * since the NEON table slider moves two 128-bit items per loop (loop is 881cb0ef41Sopenharmony_ci * unrolled on NEON for performance, see http://crbug.com/863257). 891cb0ef41Sopenharmony_ci */ 901cb0ef41Sopenharmony_ci Assert(!((hash_size * sizeof(head[0])) & (32 - 1)), 911cb0ef41Sopenharmony_ci "Hash table size error: should be a multiple of 32 bytes"); 921cb0ef41Sopenharmony_ci Assert(!((w_size * sizeof(prev[0])) & (32 - 1)), 931cb0ef41Sopenharmony_ci "Prev table size error: should be a multiple of 32 bytes"); 941cb0ef41Sopenharmony_ci 951cb0ef41Sopenharmony_ci /* 961cb0ef41Sopenharmony_ci * Duplicate (ush)w_size in each uint16_t component of a 128-bit vector. 971cb0ef41Sopenharmony_ci */ 981cb0ef41Sopenharmony_ci const z_vec128i_u16x8_t vec_wsize = Z_SLIDE_INIT_SIMD(w_size); 991cb0ef41Sopenharmony_ci 1001cb0ef41Sopenharmony_ci /* 1011cb0ef41Sopenharmony_ci * Slide {head,prev} hash chain values: subtracts (ush)w_size from every 1021cb0ef41Sopenharmony_ci * value with a saturating SIMD subtract, to clamp the result to 0(NIL), 1031cb0ef41Sopenharmony_ci * to implement slide_hash() `(m >= wsize ? m - wsize : NIL);` code. 1041cb0ef41Sopenharmony_ci */ 1051cb0ef41Sopenharmony_ci Z_SLIDE_HASH_SIMD(head, hash_size, vec_wsize); 1061cb0ef41Sopenharmony_ci#ifndef FASTEST 1071cb0ef41Sopenharmony_ci Z_SLIDE_HASH_SIMD(prev, w_size, vec_wsize); 1081cb0ef41Sopenharmony_ci#endif 1091cb0ef41Sopenharmony_ci 1101cb0ef41Sopenharmony_ci} 1111cb0ef41Sopenharmony_ci 1121cb0ef41Sopenharmony_ci#undef z_vec128i_u16x8_t 1131cb0ef41Sopenharmony_ci#undef Z_SLIDE_HASH_SIMD 1141cb0ef41Sopenharmony_ci#undef Z_SLIDE_INIT_SIMD 1151cb0ef41Sopenharmony_ci 1161cb0ef41Sopenharmony_ci#endif /* SLIDE_HASH_SIMD_H */ 117