1be168c0dSopenharmony_ciFrom a80a695b3f6ce3118cc01c31359762cfac35f02d Mon Sep 17 00:00:00 2001 2be168c0dSopenharmony_ciFrom: zhangyanhui <zhangyanhui17@huawei.com> 3be168c0dSopenharmony_ciDate: Thu, 13 Jun 2024 14:23:56 +0800 4be168c0dSopenharmony_ciSubject: [PATCH] 0023-support-x86-emulator-build 5be168c0dSopenharmony_ci 6be168c0dSopenharmony_ci--- 7be168c0dSopenharmony_ci .../plugin/device/cpu/kernel/nnacl/BUILD.gn | 92 +++++++++++++++---- 8be168c0dSopenharmony_ci mindspore/lite/BUILD.gn | 60 ++++++------ 9be168c0dSopenharmony_ci mindspore/lite/src/common/thread_utils.cc | 2 +- 10be168c0dSopenharmony_ci mindspore/lite/src/litert/kernel/cpu/BUILD.gn | 28 +++++- 11be168c0dSopenharmony_ci .../cpu/fp32/convolution_delegate_fp32.cc | 2 + 12be168c0dSopenharmony_ci ...volution_depthwise_slidewindow_x86_fp32.cc | 4 +- 13be168c0dSopenharmony_ci ...nvolution_depthwise_slidewindow_x86_fp32.h | 2 +- 14be168c0dSopenharmony_ci 7 files changed, 138 insertions(+), 52 deletions(-) 15be168c0dSopenharmony_ci 16be168c0dSopenharmony_cidiff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn 17be168c0dSopenharmony_ciindex d27817be..387a675a 100644 18be168c0dSopenharmony_ci--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn 19be168c0dSopenharmony_ci+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn 20be168c0dSopenharmony_ci@@ -46,7 +46,6 @@ config("nnacl_public_config") { 21be168c0dSopenharmony_ci } 22be168c0dSopenharmony_ci } else if (target_cpu == "x86_64") { 23be168c0dSopenharmony_ci cflags_c += [ 24be168c0dSopenharmony_ci- "-mavx512f", 25be168c0dSopenharmony_ci "-mavx", 26be168c0dSopenharmony_ci "-mavx2", 27be168c0dSopenharmony_ci "-mfma", 28be168c0dSopenharmony_ci@@ -56,8 +55,16 @@ config("nnacl_public_config") { 29be168c0dSopenharmony_ci defines += [ 30be168c0dSopenharmony_ci "ENABLE_SSE", 31be168c0dSopenharmony_ci "ENABLE_AVX", 32be168c0dSopenharmony_ci- "ENABLE_AVX512", 33be168c0dSopenharmony_ci ] 34be168c0dSopenharmony_ci+ # emulator not support avx512 35be168c0dSopenharmony_ci+ if (!is_emulator) { 36be168c0dSopenharmony_ci+ cflags_c += [ 37be168c0dSopenharmony_ci+ "-mavx512f", 38be168c0dSopenharmony_ci+ ] 39be168c0dSopenharmony_ci+ defines += [ 40be168c0dSopenharmony_ci+ "ENABLE_AVX512", 41be168c0dSopenharmony_ci+ ] 42be168c0dSopenharmony_ci+ } 43be168c0dSopenharmony_ci } 44be168c0dSopenharmony_ci } 45be168c0dSopenharmony_ci 46be168c0dSopenharmony_ci@@ -102,7 +109,6 @@ c_kernel_sources = [ 47be168c0dSopenharmony_ci "kernel/convolution_depthwise_sw.c", 48be168c0dSopenharmony_ci "kernel/convolution_im2col_arm32.c", 49be168c0dSopenharmony_ci "kernel/convolution_im2col_arm64.c", 50be168c0dSopenharmony_ci- "kernel/convolution_im2col_avx512.c", 51be168c0dSopenharmony_ci "kernel/convolution_im2col_avx.c", 52be168c0dSopenharmony_ci "kernel/convolution_im2col_base.c", 53be168c0dSopenharmony_ci "kernel/convolution_im2col.c", 54be168c0dSopenharmony_ci@@ -136,7 +142,6 @@ c_kernel_sources = [ 55be168c0dSopenharmony_ci "kernel/log_softmax.c", 56be168c0dSopenharmony_ci "kernel/matmul_arm32.c", 57be168c0dSopenharmony_ci "kernel/matmul_arm64.c", 58be168c0dSopenharmony_ci- "kernel/matmul_avx512.c", 59be168c0dSopenharmony_ci "kernel/matmul_avx.c", 60be168c0dSopenharmony_ci "kernel/matmul_base.c", 61be168c0dSopenharmony_ci "kernel/matmul.c", 62be168c0dSopenharmony_ci@@ -169,10 +174,6 @@ c_kernel_sources = [ 63be168c0dSopenharmony_ci "kernel/zeros_like.c", 64be168c0dSopenharmony_ci ] 65be168c0dSopenharmony_ci 66be168c0dSopenharmony_ci-# list of ${NNACL_DIR}/experimental/*.c 67be168c0dSopenharmony_ci-experimental_kernel_sources = [ 68be168c0dSopenharmony_ci-] 69be168c0dSopenharmony_ci- 70be168c0dSopenharmony_ci # list of ${NNACL_DIR}/base/*.c 71be168c0dSopenharmony_ci base_kernel_sources = [ 72be168c0dSopenharmony_ci "base/arithmetic_base.c", 73be168c0dSopenharmony_ci@@ -221,7 +222,6 @@ fp32_kernel_sources = [ 74be168c0dSopenharmony_ci "fp32/conv_common_fp32.c", 75be168c0dSopenharmony_ci "fp32/conv_depthwise_avx_fp32.c", 76be168c0dSopenharmony_ci "fp32/conv_depthwise_fp32.c", 77be168c0dSopenharmony_ci- "fp32/conv_im2col_avx512_fp32.c", 78be168c0dSopenharmony_ci "fp32/conv_im2col_fp32.c", 79be168c0dSopenharmony_ci "fp32/conv_sw_arm64_fp32.c", 80be168c0dSopenharmony_ci "fp32/conv_sw_avx_fp32.c", 81be168c0dSopenharmony_ci@@ -246,8 +246,6 @@ fp32_kernel_sources = [ 82be168c0dSopenharmony_ci "fp32/local_response_norm_fp32.c", 83be168c0dSopenharmony_ci "fp32/log_softmax_fp32.c", 84be168c0dSopenharmony_ci "fp32/lstm_fp32.c", 85be168c0dSopenharmony_ci- "fp32/matmul_avx512_fp32.c", 86be168c0dSopenharmony_ci- "fp32/matmul_avx512_mask_fp32.c", 87be168c0dSopenharmony_ci "fp32/matmul_avx_fp32.c", 88be168c0dSopenharmony_ci "fp32/matmul_fp32.c", 89be168c0dSopenharmony_ci "fp32/mul_fp32.c", 90be168c0dSopenharmony_ci@@ -784,6 +782,13 @@ sse_avx_avx512_sources = [ 91be168c0dSopenharmony_ci "assembly/avx/MatmulAvx.S", 92be168c0dSopenharmony_ci ] 93be168c0dSopenharmony_ci 94be168c0dSopenharmony_ci+# only x86_64 real machine support avx512 95be168c0dSopenharmony_ci+if (target_cpu == "x86_64" && !is_emulator) { 96be168c0dSopenharmony_ci+ sse_avx_avx512_sources += [ 97be168c0dSopenharmony_ci+ "assembly/avx512/ConvDwFp32RowAVX512.S", 98be168c0dSopenharmony_ci+ ] 99be168c0dSopenharmony_ci+} 100be168c0dSopenharmony_ci+ 101be168c0dSopenharmony_ci gemm_avx512_kernel_sources = [ 102be168c0dSopenharmony_ci "experimental/HPC-generator/gemm_avx512/nnacl_gemm_avx512_10x16_kernel_nhwc_fp32.c", 103be168c0dSopenharmony_ci "experimental/HPC-generator/gemm_avx512/nnacl_gemm_avx512_10x32_kernel_nhwc_fp32.c", 104be168c0dSopenharmony_ci@@ -834,16 +839,64 @@ gemm_avx512_kernel_sources = [ 105be168c0dSopenharmony_ci "experimental/HPC-generator/gemm_avx512/nnacl_gemm_avx512_9x32_kernel_nhwc_fp32.c", 106be168c0dSopenharmony_ci ] 107be168c0dSopenharmony_ci 108be168c0dSopenharmony_ci+gemm_mask_avx512_kernel_sources = [ 109be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_10x16_mask_kernel_nhwc_fp32.c", 110be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_10x32_mask_kernel_nhwc_fp32.c", 111be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_11x16_mask_kernel_nhwc_fp32.c", 112be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_11x32_mask_kernel_nhwc_fp32.c", 113be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_12x16_mask_kernel_nhwc_fp32.c", 114be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_12x32_mask_kernel_nhwc_fp32.c", 115be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x16_mask_kernel_nhwc_fp32.c", 116be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x32_mask_kernel_nhwc_fp32.c", 117be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x48_mask_kernel_nhwc_fp32.c", 118be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x64_mask_kernel_nhwc_fp32.c", 119be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x80_mask_kernel_nhwc_fp32.c", 120be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x96_mask_kernel_nhwc_fp32.c", 121be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x16_mask_kernel_nhwc_fp32.c", 122be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x32_mask_kernel_nhwc_fp32.c", 123be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x48_mask_kernel_nhwc_fp32.c", 124be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x64_mask_kernel_nhwc_fp32.c", 125be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x80_mask_kernel_nhwc_fp32.c", 126be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x96_mask_kernel_nhwc_fp32.c", 127be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x16_mask_kernel_nhwc_fp32.c", 128be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x32_mask_kernel_nhwc_fp32.c", 129be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x48_mask_kernel_nhwc_fp32.c", 130be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x64_mask_kernel_nhwc_fp32.c", 131be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x80_mask_kernel_nhwc_fp32.c", 132be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x96_mask_kernel_nhwc_fp32.c", 133be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x16_mask_kernel_nhwc_fp32.c", 134be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x32_mask_kernel_nhwc_fp32.c", 135be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x48_mask_kernel_nhwc_fp32.c", 136be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x64_mask_kernel_nhwc_fp32.c", 137be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x80_mask_kernel_nhwc_fp32.c", 138be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x96_mask_kernel_nhwc_fp32.c", 139be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_5x16_mask_kernel_nhwc_fp32.c", 140be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_5x32_mask_kernel_nhwc_fp32.c", 141be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_5x48_mask_kernel_nhwc_fp32.c", 142be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_5x64_mask_kernel_nhwc_fp32.c", 143be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_5x80_mask_kernel_nhwc_fp32.c", 144be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_6x16_mask_kernel_nhwc_fp32.c", 145be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_6x32_mask_kernel_nhwc_fp32.c", 146be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_6x48_mask_kernel_nhwc_fp32.c", 147be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_6x64_mask_kernel_nhwc_fp32.c", 148be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_7x16_mask_kernel_nhwc_fp32.c", 149be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_7x32_mask_kernel_nhwc_fp32.c", 150be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_7x48_mask_kernel_nhwc_fp32.c", 151be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_8x16_mask_kernel_nhwc_fp32.c", 152be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_8x32_mask_kernel_nhwc_fp32.c", 153be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_8x48_mask_kernel_nhwc_fp32.c", 154be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_9x16_mask_kernel_nhwc_fp32.c", 155be168c0dSopenharmony_ci+ "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_9x32_mask_kernel_nhwc_fp32.c", 156be168c0dSopenharmony_ci+] 157be168c0dSopenharmony_ci+ 158be168c0dSopenharmony_ci fp32_kernel_sources -= no_fast_math_fp32_kernel_sources 159be168c0dSopenharmony_ci fp32_kernel_sources -= avx_fp32_kernel_sources 160be168c0dSopenharmony_ci-fp32_kernel_sources -= avx512_fp32_kernel_sources 161be168c0dSopenharmony_ci fp32_kernel_sources -= arm64_fp32_kernel_sources 162be168c0dSopenharmony_ci 163be168c0dSopenharmony_ci # source files on all target 164be168c0dSopenharmony_ci nnacl_sources = common_sources 165be168c0dSopenharmony_ci nnacl_sources += base_kernel_sources 166be168c0dSopenharmony_ci nnacl_sources += c_kernel_sources 167be168c0dSopenharmony_ci-nnacl_sources += experimental_kernel_sources 168be168c0dSopenharmony_ci nnacl_sources += fp32_kernel_sources 169be168c0dSopenharmony_ci nnacl_sources += fp32_sparse_kernel_sources 170be168c0dSopenharmony_ci nnacl_sources += fp32_grad_kernel_sources 171be168c0dSopenharmony_ci@@ -854,7 +907,6 @@ nnacl_sources += infer_control_sources 172be168c0dSopenharmony_ci 173be168c0dSopenharmony_ci # source files on arm32 174be168c0dSopenharmony_ci arm_only_sources = arm32_assembly_sources 175be168c0dSopenharmony_ci-#arm_only_sources += arm32_fp16_assembly_sources 176be168c0dSopenharmony_ci not_needed(arm32_fp16_assembly_sources) 177be168c0dSopenharmony_ci 178be168c0dSopenharmony_ci # source files on arm64 179be168c0dSopenharmony_ci@@ -868,8 +920,16 @@ arm64_only_sources += arm64_fp32_kernel_sources 180be168c0dSopenharmony_ci # sources files on x86_64 181be168c0dSopenharmony_ci x86_64_only_sources = sse_avx_avx512_sources 182be168c0dSopenharmony_ci x86_64_only_sources += avx_fp32_kernel_sources 183be168c0dSopenharmony_ci-x86_64_only_sources += avx512_fp32_kernel_sources 184be168c0dSopenharmony_ci-x86_64_only_sources += gemm_avx512_kernel_sources 185be168c0dSopenharmony_ci+# emulator not support avx512 186be168c0dSopenharmony_ci+if (is_emulator) { 187be168c0dSopenharmony_ci+ not_needed(avx512_fp32_kernel_sources) 188be168c0dSopenharmony_ci+ not_needed(gemm_avx512_kernel_sources) 189be168c0dSopenharmony_ci+ not_needed(gemm_mask_avx512_kernel_sources) 190be168c0dSopenharmony_ci+} else { 191be168c0dSopenharmony_ci+ x86_64_only_sources += avx512_fp32_kernel_sources 192be168c0dSopenharmony_ci+ x86_64_only_sources += gemm_avx512_kernel_sources 193be168c0dSopenharmony_ci+ x86_64_only_sources += gemm_mask_avx512_kernel_sources 194be168c0dSopenharmony_ci+} 195be168c0dSopenharmony_ci 196be168c0dSopenharmony_ci if (target_cpu == "arm") { 197be168c0dSopenharmony_ci nnacl_sources += arm_only_sources 198be168c0dSopenharmony_cidiff --git a/mindspore/lite/BUILD.gn b/mindspore/lite/BUILD.gn 199be168c0dSopenharmony_ciindex 467cdb6a..124c84c9 100644 200be168c0dSopenharmony_ci--- a/mindspore/lite/BUILD.gn 201be168c0dSopenharmony_ci+++ b/mindspore/lite/BUILD.gn 202be168c0dSopenharmony_ci@@ -118,12 +118,6 @@ control_flow_kernel_sources = [ 203be168c0dSopenharmony_ci "src/control_flow/kernel/identity_kernel.cc", 204be168c0dSopenharmony_ci ] 205be168c0dSopenharmony_ci 206be168c0dSopenharmony_ci-experimental_sources = [ 207be168c0dSopenharmony_ci-] 208be168c0dSopenharmony_ci- 209be168c0dSopenharmony_ci-string_kernel_source = [ 210be168c0dSopenharmony_ci-] 211be168c0dSopenharmony_ci- 212be168c0dSopenharmony_ci auto_parallel_source = [ 213be168c0dSopenharmony_ci "src/litert/sub_graph_split.cc" 214be168c0dSopenharmony_ci ] 215be168c0dSopenharmony_ci@@ -186,19 +180,11 @@ lite_mindrt_sources = [ 216be168c0dSopenharmony_ci all_lite_sources += cxx_api_sources 217be168c0dSopenharmony_ci all_lite_sources += api_source 218be168c0dSopenharmony_ci all_lite_sources += control_flow_kernel_sources 219be168c0dSopenharmony_ci-all_lite_sources += experimental_sources 220be168c0dSopenharmony_ci-all_lite_sources += string_kernel_source 221be168c0dSopenharmony_ci all_lite_sources += auto_parallel_source 222be168c0dSopenharmony_ci all_lite_sources += custom_registry_sources 223be168c0dSopenharmony_ci all_lite_sources += weight_decode_source 224be168c0dSopenharmony_ci all_lite_sources += lite_mindrt_sources 225be168c0dSopenharmony_ci 226be168c0dSopenharmony_ci-ops_base_sources = [ 227be168c0dSopenharmony_ci-# "src/common/ops/anf_utils.cc", # disable runtiem convert 228be168c0dSopenharmony_ci-# "src/common/ops/ops_def.cc", # disable kernel executor 229be168c0dSopenharmony_ci-# "src/common/ops/ops_utils.cc" # disable kernel executor 230be168c0dSopenharmony_ci-] 231be168c0dSopenharmony_ci- 232be168c0dSopenharmony_ci basic_populate_sources = [ 233be168c0dSopenharmony_ci "src/common/ops/populate/activation_grad_populate.cc", 234be168c0dSopenharmony_ci "src/common/ops/populate/activation_populate.cc", 235be168c0dSopenharmony_ci@@ -346,8 +332,7 @@ control_populate_sources = [ 236be168c0dSopenharmony_ci "src/common/ops/populate/control/tensorliststack_populate.cc", 237be168c0dSopenharmony_ci ] 238be168c0dSopenharmony_ci 239be168c0dSopenharmony_ci-all_ops_sources = ops_base_sources 240be168c0dSopenharmony_ci-all_ops_sources += basic_populate_sources 241be168c0dSopenharmony_ci+all_ops_sources = basic_populate_sources 242be168c0dSopenharmony_ci all_ops_sources += string_populate_sources 243be168c0dSopenharmony_ci all_ops_sources += control_populate_sources 244be168c0dSopenharmony_ci 245be168c0dSopenharmony_ci@@ -360,6 +345,12 @@ missing_sources = [ 246be168c0dSopenharmony_ci 247be168c0dSopenharmony_ci all_sources += missing_sources 248be168c0dSopenharmony_ci 249be168c0dSopenharmony_ci+SUPPORT_NNRT = false 250be168c0dSopenharmony_ci+# currently, only arm/arm64 real machine support nnrt 251be168c0dSopenharmony_ci+if ((target_cpu == "arm" || target_cpu == "arm64") && !is_emulator) { 252be168c0dSopenharmony_ci+ SUPPORT_NNRT = true 253be168c0dSopenharmony_ci+} 254be168c0dSopenharmony_ci+ 255be168c0dSopenharmony_ci ohos_shared_library("mindspore_lib") { 256be168c0dSopenharmony_ci deps = [ 257be168c0dSopenharmony_ci "../ccsrc/plugin/device/cpu/kernel/nnacl/:nnacl_obj", 258be168c0dSopenharmony_ci@@ -387,7 +378,6 @@ ohos_shared_library("mindspore_lib") { 259be168c0dSopenharmony_ci "../ccsrc/", 260be168c0dSopenharmony_ci "src/litert/kernel/cpu/", 261be168c0dSopenharmony_ci "../core/mindrt/src/", 262be168c0dSopenharmony_ci- "//foundation/ai/neural_network_runtime/", 263be168c0dSopenharmony_ci ] 264be168c0dSopenharmony_ci 265be168c0dSopenharmony_ci defines = [ 266be168c0dSopenharmony_ci@@ -418,6 +408,17 @@ ohos_shared_library("mindspore_lib") { 267be168c0dSopenharmony_ci "CL_HPP_TARGET_OPENCL_VERSION=120", 268be168c0dSopenharmony_ci "CL_HPP_MINIMUM_OPENCL_VERSION=120", 269be168c0dSopenharmony_ci ] 270be168c0dSopenharmony_ci+ } else if (target_cpu == "x86_64") { 271be168c0dSopenharmony_ci+ defines += [ 272be168c0dSopenharmony_ci+ "ENABLE_SSE", 273be168c0dSopenharmony_ci+ "ENABLE_AVX", 274be168c0dSopenharmony_ci+ ] 275be168c0dSopenharmony_ci+ # emulator not support avx512 276be168c0dSopenharmony_ci+ if (!is_emulator) { 277be168c0dSopenharmony_ci+ defines += [ 278be168c0dSopenharmony_ci+ "ENABLE_AVX512", 279be168c0dSopenharmony_ci+ ] 280be168c0dSopenharmony_ci+ } 281be168c0dSopenharmony_ci } 282be168c0dSopenharmony_ci 283be168c0dSopenharmony_ci configs = [ 284be168c0dSopenharmony_ci@@ -434,10 +435,10 @@ ohos_shared_library("mindspore_lib") { 285be168c0dSopenharmony_ci output_name = "libmindspore-lite" 286be168c0dSopenharmony_ci output_extension = "so" 287be168c0dSopenharmony_ci innerapi_tags = [ "platformsdk" ] 288be168c0dSopenharmony_ci- SUPPORT_NNRT = true 289be168c0dSopenharmony_ci if (SUPPORT_NNRT) { 290be168c0dSopenharmony_ci if (mindspore_feature_nnrt_metagraph) { 291be168c0dSopenharmony_ci defines += [ "SUPPORT_NNRT_METAGRAPH" ] 292be168c0dSopenharmony_ci+ sources += [ "src/litert/delegate/nnrt/hiai_foundation_wrapper.cc", ] 293be168c0dSopenharmony_ci print("enabled feature: mindspore_feature_nnrt_metagraph") 294be168c0dSopenharmony_ci } 295be168c0dSopenharmony_ci sources += [ 296be168c0dSopenharmony_ci@@ -445,7 +446,6 @@ ohos_shared_library("mindspore_lib") { 297be168c0dSopenharmony_ci "src/litert/delegate/nnrt/nnrt_delegate.cc", 298be168c0dSopenharmony_ci "src/litert/delegate/nnrt/nnrt_model_kernel.cc", 299be168c0dSopenharmony_ci "src/litert/delegate/nnrt/nnrt_allocator.cc", 300be168c0dSopenharmony_ci- "src/litert/delegate/nnrt/hiai_foundation_wrapper.cc", 301be168c0dSopenharmony_ci "src/litert/delegate/nnrt/extension_options_parser.cc", 302be168c0dSopenharmony_ci ] 303be168c0dSopenharmony_ci include_dirs += [ 304be168c0dSopenharmony_ci@@ -453,6 +453,7 @@ ohos_shared_library("mindspore_lib") { 305be168c0dSopenharmony_ci "../../mindspore/core/ir", 306be168c0dSopenharmony_ci "mindir/include", 307be168c0dSopenharmony_ci "mindir/inner_headers", 308be168c0dSopenharmony_ci+ "//foundation/ai/neural_network_runtime/", 309be168c0dSopenharmony_ci ] 310be168c0dSopenharmony_ci 311be168c0dSopenharmony_ci external_deps += [ "neural_network_runtime:nnrt_target" ] 312be168c0dSopenharmony_ci@@ -499,11 +500,9 @@ ohos_shared_library("mindspore_ndk") { 313be168c0dSopenharmony_ci "../../third_party/", 314be168c0dSopenharmony_ci "./schema/", 315be168c0dSopenharmony_ci "../ccsrc/", 316be168c0dSopenharmony_ci- "//foundation/ai/neural_network_runtime/", 317be168c0dSopenharmony_ci ] 318be168c0dSopenharmony_ci 319be168c0dSopenharmony_ci defines = [ 320be168c0dSopenharmony_ci- "SUPPORT_NNRT", 321be168c0dSopenharmony_ci "MS_COMPILE_OHOS", 322be168c0dSopenharmony_ci "PRIMITIVE_WRITEABLE", 323be168c0dSopenharmony_ci "RUNTIME_PASS_CLIP", 324be168c0dSopenharmony_ci@@ -512,9 +511,18 @@ ohos_shared_library("mindspore_ndk") { 325be168c0dSopenharmony_ci "ENABLE_HI_APP_EVENT", 326be168c0dSopenharmony_ci ] 327be168c0dSopenharmony_ci 328be168c0dSopenharmony_ci- if (mindspore_feature_nnrt_metagraph) { 329be168c0dSopenharmony_ci- defines += [ "SUPPORT_NNRT_METAGRAPH" ] 330be168c0dSopenharmony_ci- print("enabled feature: mindspore_feature_nnrt_metagraph") 331be168c0dSopenharmony_ci+ if (SUPPORT_NNRT) { 332be168c0dSopenharmony_ci+ include_dirs += [ 333be168c0dSopenharmony_ci+ "//foundation/ai/neural_network_runtime/", 334be168c0dSopenharmony_ci+ ] 335be168c0dSopenharmony_ci+ defines += [ 336be168c0dSopenharmony_ci+ "SUPPORT_NNRT", 337be168c0dSopenharmony_ci+ ] 338be168c0dSopenharmony_ci+ if (mindspore_feature_nnrt_metagraph) { 339be168c0dSopenharmony_ci+ defines += [ "SUPPORT_NNRT_METAGRAPH" ] 340be168c0dSopenharmony_ci+ print("enabled feature: mindspore_feature_nnrt_metagraph") 341be168c0dSopenharmony_ci+ } 342be168c0dSopenharmony_ci+ external_deps = [ "neural_network_runtime:nnrt_target" ] 343be168c0dSopenharmony_ci } 344be168c0dSopenharmony_ci 345be168c0dSopenharmony_ci configs = [ 346be168c0dSopenharmony_ci@@ -523,8 +531,6 @@ ohos_shared_library("mindspore_ndk") { 347be168c0dSopenharmony_ci ":secure_option", 348be168c0dSopenharmony_ci ] 349be168c0dSopenharmony_ci 350be168c0dSopenharmony_ci- external_deps = [ "neural_network_runtime:nnrt_target" ] 351be168c0dSopenharmony_ci- 352be168c0dSopenharmony_ci remove_configs = [ "//build/config/compiler:no_rtti" ] 353be168c0dSopenharmony_ci 354be168c0dSopenharmony_ci output_name = "libmindspore_lite_ndk" 355be168c0dSopenharmony_ci@@ -749,4 +755,4 @@ config("secure_option") { 356be168c0dSopenharmony_ci 357be168c0dSopenharmony_ci config("train_kernel_option") { 358be168c0dSopenharmony_ci cflags_cc = [ "-fno-finite-math-only" ] 359be168c0dSopenharmony_ci-} 360be168c0dSopenharmony_ci+} 361be168c0dSopenharmony_ci\ No newline at end of file 362be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/common/thread_utils.cc b/mindspore/lite/src/common/thread_utils.cc 363be168c0dSopenharmony_ciindex 28c8e1cd..28c7acab 100644 364be168c0dSopenharmony_ci--- a/mindspore/lite/src/common/thread_utils.cc 365be168c0dSopenharmony_ci+++ b/mindspore/lite/src/common/thread_utils.cc 366be168c0dSopenharmony_ci@@ -17,7 +17,7 @@ 367be168c0dSopenharmony_ci #if defined(__linux__) && !defined(ENABLE_ARM) 368be168c0dSopenharmony_ci #include "src/common/thread_utils.h" 369be168c0dSopenharmony_ci #include <sys/stat.h> 370be168c0dSopenharmony_ci-#include <wait.h> 371be168c0dSopenharmony_ci+#include <sys/wait.h> 372be168c0dSopenharmony_ci #include "src/common/log_adapter.h" 373be168c0dSopenharmony_ci 374be168c0dSopenharmony_ci namespace mindspore { 375be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/litert/kernel/cpu/BUILD.gn b/mindspore/lite/src/litert/kernel/cpu/BUILD.gn 376be168c0dSopenharmony_ciindex 297fc6f6..d51b9f4a 100644 377be168c0dSopenharmony_ci--- a/mindspore/lite/src/litert/kernel/cpu/BUILD.gn 378be168c0dSopenharmony_ci+++ b/mindspore/lite/src/litert/kernel/cpu/BUILD.gn 379be168c0dSopenharmony_ci@@ -52,7 +52,6 @@ cpu_kernel_sources = [ 380be168c0dSopenharmony_ci "fp32/convolution_fp32.cc", 381be168c0dSopenharmony_ci "fp32/convolution_im2col_arm32_fp32.cc", 382be168c0dSopenharmony_ci "fp32/convolution_im2col_arm64_fp32.cc", 383be168c0dSopenharmony_ci- "fp32/convolution_im2col_avx512_fp32.cc", 384be168c0dSopenharmony_ci "fp32/convolution_im2col_avx_fp32.cc", 385be168c0dSopenharmony_ci "fp32/convolution_im2col_base_fp32.cc", 386be168c0dSopenharmony_ci "fp32/convolution_im2col_fp32.cc", 387be168c0dSopenharmony_ci@@ -90,7 +89,6 @@ cpu_kernel_sources = [ 388be168c0dSopenharmony_ci "fp32/lstm_non_mindir_fp32.cc", 389be168c0dSopenharmony_ci "fp32/matmul_fp32_arm32.cc", 390be168c0dSopenharmony_ci "fp32/matmul_fp32_arm64.cc", 391be168c0dSopenharmony_ci- "fp32/matmul_fp32_avx512.cc", 392be168c0dSopenharmony_ci "fp32/matmul_fp32_avx.cc", 393be168c0dSopenharmony_ci "fp32/matmul_fp32_base.cc", 394be168c0dSopenharmony_ci "fp32/matmul_fp32.cc", 395be168c0dSopenharmony_ci@@ -125,7 +123,7 @@ cpu_kernel_sources = [ 396be168c0dSopenharmony_ci ] 397be168c0dSopenharmony_ci 398be168c0dSopenharmony_ci if ((target_cpu != "arm") && (target_cpu != "arm64")) { 399be168c0dSopenharmony_ci- cpu_kernel_sources += [ "src/runtime/kernel/cpu/fp32/cast_for_x86_fp16.cc" ] 400be168c0dSopenharmony_ci+ cpu_kernel_sources += [ "fp32/cast_for_x86_fp16.cc" ] 401be168c0dSopenharmony_ci } 402be168c0dSopenharmony_ci 403be168c0dSopenharmony_ci arm64_cpu_kernel_sources = [ 404be168c0dSopenharmony_ci@@ -148,8 +146,6 @@ sse_avx_avx512_kernel_sources = [ 405be168c0dSopenharmony_ci "fp32/convolution_im2col_avx_fp32.cc", 406be168c0dSopenharmony_ci "fp32/matmul_fp32_avx.cc", 407be168c0dSopenharmony_ci "fp32/convolution_winograd_avx_fp32.cc", 408be168c0dSopenharmony_ci- "fp32/convolution_im2col_avx512_fp32.cc", 409be168c0dSopenharmony_ci- "fp32/matmul_fp32_avx512.cc", 410be168c0dSopenharmony_ci ] 411be168c0dSopenharmony_ci 412be168c0dSopenharmony_ci fp16_kernel_sources = [ 413be168c0dSopenharmony_ci@@ -272,6 +268,18 @@ control_kernel_sources = [ 414be168c0dSopenharmony_ci "control/tensorlist_stack.cc", 415be168c0dSopenharmony_ci ] 416be168c0dSopenharmony_ci 417be168c0dSopenharmony_ci+# emulator not support avx512 418be168c0dSopenharmony_ci+if (!is_emulator) { 419be168c0dSopenharmony_ci+ cpu_kernel_sources += [ 420be168c0dSopenharmony_ci+ "fp32/convolution_im2col_avx512_fp32.cc", 421be168c0dSopenharmony_ci+ "fp32/matmul_fp32_avx512.cc", 422be168c0dSopenharmony_ci+ ] 423be168c0dSopenharmony_ci+ sse_avx_avx512_kernel_sources += [ 424be168c0dSopenharmony_ci+ "fp32/convolution_im2col_avx512_fp32.cc", 425be168c0dSopenharmony_ci+ "fp32/matmul_fp32_avx512.cc", 426be168c0dSopenharmony_ci+ ] 427be168c0dSopenharmony_ci+} 428be168c0dSopenharmony_ci+ 429be168c0dSopenharmony_ci all_cpu_kernel_sources = cpu_kernel_sources 430be168c0dSopenharmony_ci all_cpu_kernel_sources += int8_kernel_sources 431be168c0dSopenharmony_ci all_cpu_kernel_sources += string_kernel_sources 432be168c0dSopenharmony_ci@@ -348,6 +356,16 @@ ohos_source_set("cpu_kernel_obj") { 433be168c0dSopenharmony_ci "CL_HPP_TARGET_OPENCL_VERSION=120", 434be168c0dSopenharmony_ci "CL_HPP_MINIMUM_OPENCL_VERSION=120", 435be168c0dSopenharmony_ci ] 436be168c0dSopenharmony_ci+ } else if (target_cpu == "x86_64") { 437be168c0dSopenharmony_ci+ defines += [ 438be168c0dSopenharmony_ci+ "ENABLE_SSE", 439be168c0dSopenharmony_ci+ "ENABLE_AVX", 440be168c0dSopenharmony_ci+ ] 441be168c0dSopenharmony_ci+ if (!is_emulator) { 442be168c0dSopenharmony_ci+ defines += [ 443be168c0dSopenharmony_ci+ "ENABLE_AVX512", 444be168c0dSopenharmony_ci+ ] 445be168c0dSopenharmony_ci+ } 446be168c0dSopenharmony_ci } 447be168c0dSopenharmony_ci 448be168c0dSopenharmony_ci cflags_cc = [ 449be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_delegate_fp32.cc b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_delegate_fp32.cc 450be168c0dSopenharmony_ciindex f907bbbf..ac693c44 100644 451be168c0dSopenharmony_ci--- a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_delegate_fp32.cc 452be168c0dSopenharmony_ci+++ b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_delegate_fp32.cc 453be168c0dSopenharmony_ci@@ -49,7 +49,9 @@ using mindspore::schema::PrimitiveType_Conv2DFusion; 454be168c0dSopenharmony_ci 455be168c0dSopenharmony_ci namespace mindspore::kernel { 456be168c0dSopenharmony_ci namespace { 457be168c0dSopenharmony_ci+#ifndef ENABLE_AVX 458be168c0dSopenharmony_ci constexpr int kMaxDwConvSWSize = 32; 459be168c0dSopenharmony_ci+#endif 460be168c0dSopenharmony_ci } // namespace 461be168c0dSopenharmony_ci 462be168c0dSopenharmony_ci float *ConvolutionDelegateCPUKernel::CopyData(const lite::Tensor *tensor) { 463be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.cc b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.cc 464be168c0dSopenharmony_ciindex 568b9463..d35669ce 100644 465be168c0dSopenharmony_ci--- a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.cc 466be168c0dSopenharmony_ci+++ b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.cc 467be168c0dSopenharmony_ci@@ -106,7 +106,7 @@ int ConvolutionDepthwiseSWCPUKernelX86::ReSize() { 468be168c0dSopenharmony_ci return RET_OK; 469be168c0dSopenharmony_ci } 470be168c0dSopenharmony_ci 471be168c0dSopenharmony_ci-int ConvolutionDepthwiseSWCPUKernelX86::Execute(int task_id) { 472be168c0dSopenharmony_ci+int ConvolutionDepthwiseSWCPUKernelX86::DoExecute(int task_id) { 473be168c0dSopenharmony_ci DepthwiseSWAvxFp32(packed_output_, packed_input_, reinterpret_cast<float *>(packed_weight_), 474be168c0dSopenharmony_ci reinterpret_cast<float *>(bias_data_), conv_param_, sliding_, task_id); 475be168c0dSopenharmony_ci return RET_OK; 476be168c0dSopenharmony_ci@@ -114,7 +114,7 @@ int ConvolutionDepthwiseSWCPUKernelX86::Execute(int task_id) { 477be168c0dSopenharmony_ci 478be168c0dSopenharmony_ci int ConvDwSWAvxRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) { 479be168c0dSopenharmony_ci auto conv_dw = reinterpret_cast<ConvolutionDepthwiseSWCPUKernelX86 *>(cdata); 480be168c0dSopenharmony_ci- auto ret = conv_dw->Execute(task_id); 481be168c0dSopenharmony_ci+ auto ret = conv_dw->DoExecute(task_id); 482be168c0dSopenharmony_ci if (ret != RET_OK) { 483be168c0dSopenharmony_ci MS_LOG(ERROR) << "ConvolutionDepthwiseSWRun in x86 error task_id[" << task_id << "] error_code[" << ret << "]"; 484be168c0dSopenharmony_ci return RET_ERROR; 485be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.h b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.h 486be168c0dSopenharmony_ciindex e959fe45..928321e5 100644 487be168c0dSopenharmony_ci--- a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.h 488be168c0dSopenharmony_ci+++ b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.h 489be168c0dSopenharmony_ci@@ -35,7 +35,7 @@ class ConvolutionDepthwiseSWCPUKernelX86 : public ConvolutionBaseCPUKernel { 490be168c0dSopenharmony_ci int ReSize() override; 491be168c0dSopenharmony_ci int Run() override; 492be168c0dSopenharmony_ci 493be168c0dSopenharmony_ci- int Execute(int task_id); 494be168c0dSopenharmony_ci+ int DoExecute(int task_id); 495be168c0dSopenharmony_ci 496be168c0dSopenharmony_ci private: 497be168c0dSopenharmony_ci void FreePackedInputOutput(); 498be168c0dSopenharmony_ci-- 499be168c0dSopenharmony_ci2.25.1 500be168c0dSopenharmony_ci 501