1be168c0dSopenharmony_ciFrom a80a695b3f6ce3118cc01c31359762cfac35f02d Mon Sep 17 00:00:00 2001
2be168c0dSopenharmony_ciFrom: zhangyanhui <zhangyanhui17@huawei.com>
3be168c0dSopenharmony_ciDate: Thu, 13 Jun 2024 14:23:56 +0800
4be168c0dSopenharmony_ciSubject: [PATCH] 0023-support-x86-emulator-build
5be168c0dSopenharmony_ci
6be168c0dSopenharmony_ci---
7be168c0dSopenharmony_ci .../plugin/device/cpu/kernel/nnacl/BUILD.gn   | 92 +++++++++++++++----
8be168c0dSopenharmony_ci mindspore/lite/BUILD.gn                       | 60 ++++++------
9be168c0dSopenharmony_ci mindspore/lite/src/common/thread_utils.cc     |  2 +-
10be168c0dSopenharmony_ci mindspore/lite/src/litert/kernel/cpu/BUILD.gn | 28 +++++-
11be168c0dSopenharmony_ci .../cpu/fp32/convolution_delegate_fp32.cc     |  2 +
12be168c0dSopenharmony_ci ...volution_depthwise_slidewindow_x86_fp32.cc |  4 +-
13be168c0dSopenharmony_ci ...nvolution_depthwise_slidewindow_x86_fp32.h |  2 +-
14be168c0dSopenharmony_ci 7 files changed, 138 insertions(+), 52 deletions(-)
15be168c0dSopenharmony_ci
16be168c0dSopenharmony_cidiff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn
17be168c0dSopenharmony_ciindex d27817be..387a675a 100644
18be168c0dSopenharmony_ci--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn
19be168c0dSopenharmony_ci+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn
20be168c0dSopenharmony_ci@@ -46,7 +46,6 @@ config("nnacl_public_config") {
21be168c0dSopenharmony_ci     }
22be168c0dSopenharmony_ci   } else if (target_cpu == "x86_64") {
23be168c0dSopenharmony_ci     cflags_c += [
24be168c0dSopenharmony_ci-      "-mavx512f",
25be168c0dSopenharmony_ci       "-mavx",
26be168c0dSopenharmony_ci       "-mavx2",
27be168c0dSopenharmony_ci       "-mfma",
28be168c0dSopenharmony_ci@@ -56,8 +55,16 @@ config("nnacl_public_config") {
29be168c0dSopenharmony_ci     defines += [
30be168c0dSopenharmony_ci       "ENABLE_SSE",
31be168c0dSopenharmony_ci       "ENABLE_AVX",
32be168c0dSopenharmony_ci-      "ENABLE_AVX512",
33be168c0dSopenharmony_ci     ]
34be168c0dSopenharmony_ci+    # emulator not support avx512
35be168c0dSopenharmony_ci+    if (!is_emulator) {
36be168c0dSopenharmony_ci+      cflags_c += [
37be168c0dSopenharmony_ci+        "-mavx512f",
38be168c0dSopenharmony_ci+      ]
39be168c0dSopenharmony_ci+      defines += [
40be168c0dSopenharmony_ci+        "ENABLE_AVX512",
41be168c0dSopenharmony_ci+      ]
42be168c0dSopenharmony_ci+    }
43be168c0dSopenharmony_ci   }
44be168c0dSopenharmony_ci }
45be168c0dSopenharmony_ci 
46be168c0dSopenharmony_ci@@ -102,7 +109,6 @@ c_kernel_sources = [
47be168c0dSopenharmony_ci   "kernel/convolution_depthwise_sw.c",
48be168c0dSopenharmony_ci   "kernel/convolution_im2col_arm32.c",
49be168c0dSopenharmony_ci   "kernel/convolution_im2col_arm64.c",
50be168c0dSopenharmony_ci-  "kernel/convolution_im2col_avx512.c",
51be168c0dSopenharmony_ci   "kernel/convolution_im2col_avx.c",
52be168c0dSopenharmony_ci   "kernel/convolution_im2col_base.c",
53be168c0dSopenharmony_ci   "kernel/convolution_im2col.c",
54be168c0dSopenharmony_ci@@ -136,7 +142,6 @@ c_kernel_sources = [
55be168c0dSopenharmony_ci   "kernel/log_softmax.c",
56be168c0dSopenharmony_ci   "kernel/matmul_arm32.c",
57be168c0dSopenharmony_ci   "kernel/matmul_arm64.c",
58be168c0dSopenharmony_ci-  "kernel/matmul_avx512.c",
59be168c0dSopenharmony_ci   "kernel/matmul_avx.c",
60be168c0dSopenharmony_ci   "kernel/matmul_base.c",
61be168c0dSopenharmony_ci   "kernel/matmul.c",
62be168c0dSopenharmony_ci@@ -169,10 +174,6 @@ c_kernel_sources = [
63be168c0dSopenharmony_ci   "kernel/zeros_like.c",
64be168c0dSopenharmony_ci ]
65be168c0dSopenharmony_ci 
66be168c0dSopenharmony_ci-# list of ${NNACL_DIR}/experimental/*.c
67be168c0dSopenharmony_ci-experimental_kernel_sources = [
68be168c0dSopenharmony_ci-]
69be168c0dSopenharmony_ci-
70be168c0dSopenharmony_ci # list of ${NNACL_DIR}/base/*.c
71be168c0dSopenharmony_ci base_kernel_sources = [
72be168c0dSopenharmony_ci   "base/arithmetic_base.c",
73be168c0dSopenharmony_ci@@ -221,7 +222,6 @@ fp32_kernel_sources = [
74be168c0dSopenharmony_ci   "fp32/conv_common_fp32.c",
75be168c0dSopenharmony_ci   "fp32/conv_depthwise_avx_fp32.c",
76be168c0dSopenharmony_ci   "fp32/conv_depthwise_fp32.c",
77be168c0dSopenharmony_ci-  "fp32/conv_im2col_avx512_fp32.c",
78be168c0dSopenharmony_ci   "fp32/conv_im2col_fp32.c",
79be168c0dSopenharmony_ci   "fp32/conv_sw_arm64_fp32.c",
80be168c0dSopenharmony_ci   "fp32/conv_sw_avx_fp32.c",
81be168c0dSopenharmony_ci@@ -246,8 +246,6 @@ fp32_kernel_sources = [
82be168c0dSopenharmony_ci   "fp32/local_response_norm_fp32.c",
83be168c0dSopenharmony_ci   "fp32/log_softmax_fp32.c",
84be168c0dSopenharmony_ci   "fp32/lstm_fp32.c",
85be168c0dSopenharmony_ci-  "fp32/matmul_avx512_fp32.c",
86be168c0dSopenharmony_ci-  "fp32/matmul_avx512_mask_fp32.c",
87be168c0dSopenharmony_ci   "fp32/matmul_avx_fp32.c",
88be168c0dSopenharmony_ci   "fp32/matmul_fp32.c",
89be168c0dSopenharmony_ci   "fp32/mul_fp32.c",
90be168c0dSopenharmony_ci@@ -784,6 +782,13 @@ sse_avx_avx512_sources = [
91be168c0dSopenharmony_ci   "assembly/avx/MatmulAvx.S",
92be168c0dSopenharmony_ci ]
93be168c0dSopenharmony_ci 
94be168c0dSopenharmony_ci+# only x86_64 real machine support avx512
95be168c0dSopenharmony_ci+if (target_cpu == "x86_64" && !is_emulator) {
96be168c0dSopenharmony_ci+  sse_avx_avx512_sources += [
97be168c0dSopenharmony_ci+    "assembly/avx512/ConvDwFp32RowAVX512.S",
98be168c0dSopenharmony_ci+  ]
99be168c0dSopenharmony_ci+}
100be168c0dSopenharmony_ci+
101be168c0dSopenharmony_ci gemm_avx512_kernel_sources = [
102be168c0dSopenharmony_ci   "experimental/HPC-generator/gemm_avx512/nnacl_gemm_avx512_10x16_kernel_nhwc_fp32.c",
103be168c0dSopenharmony_ci   "experimental/HPC-generator/gemm_avx512/nnacl_gemm_avx512_10x32_kernel_nhwc_fp32.c",
104be168c0dSopenharmony_ci@@ -834,16 +839,64 @@ gemm_avx512_kernel_sources = [
105be168c0dSopenharmony_ci   "experimental/HPC-generator/gemm_avx512/nnacl_gemm_avx512_9x32_kernel_nhwc_fp32.c",
106be168c0dSopenharmony_ci ]
107be168c0dSopenharmony_ci 
108be168c0dSopenharmony_ci+gemm_mask_avx512_kernel_sources = [
109be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_10x16_mask_kernel_nhwc_fp32.c",
110be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_10x32_mask_kernel_nhwc_fp32.c",
111be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_11x16_mask_kernel_nhwc_fp32.c",
112be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_11x32_mask_kernel_nhwc_fp32.c",
113be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_12x16_mask_kernel_nhwc_fp32.c",
114be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_12x32_mask_kernel_nhwc_fp32.c",
115be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x16_mask_kernel_nhwc_fp32.c",
116be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x32_mask_kernel_nhwc_fp32.c",
117be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x48_mask_kernel_nhwc_fp32.c",
118be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x64_mask_kernel_nhwc_fp32.c",
119be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x80_mask_kernel_nhwc_fp32.c",
120be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_1x96_mask_kernel_nhwc_fp32.c",
121be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x16_mask_kernel_nhwc_fp32.c",
122be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x32_mask_kernel_nhwc_fp32.c",
123be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x48_mask_kernel_nhwc_fp32.c",
124be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x64_mask_kernel_nhwc_fp32.c",
125be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x80_mask_kernel_nhwc_fp32.c",
126be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_2x96_mask_kernel_nhwc_fp32.c",
127be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x16_mask_kernel_nhwc_fp32.c",
128be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x32_mask_kernel_nhwc_fp32.c",
129be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x48_mask_kernel_nhwc_fp32.c",
130be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x64_mask_kernel_nhwc_fp32.c",
131be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x80_mask_kernel_nhwc_fp32.c",
132be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_3x96_mask_kernel_nhwc_fp32.c",
133be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x16_mask_kernel_nhwc_fp32.c",
134be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x32_mask_kernel_nhwc_fp32.c",
135be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x48_mask_kernel_nhwc_fp32.c",
136be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x64_mask_kernel_nhwc_fp32.c",
137be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x80_mask_kernel_nhwc_fp32.c",
138be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_4x96_mask_kernel_nhwc_fp32.c",
139be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_5x16_mask_kernel_nhwc_fp32.c",
140be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_5x32_mask_kernel_nhwc_fp32.c",
141be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_5x48_mask_kernel_nhwc_fp32.c",
142be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_5x64_mask_kernel_nhwc_fp32.c",
143be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_5x80_mask_kernel_nhwc_fp32.c",
144be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_6x16_mask_kernel_nhwc_fp32.c",
145be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_6x32_mask_kernel_nhwc_fp32.c",
146be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_6x48_mask_kernel_nhwc_fp32.c",
147be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_6x64_mask_kernel_nhwc_fp32.c",
148be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_7x16_mask_kernel_nhwc_fp32.c",
149be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_7x32_mask_kernel_nhwc_fp32.c",
150be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_7x48_mask_kernel_nhwc_fp32.c",
151be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_8x16_mask_kernel_nhwc_fp32.c",
152be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_8x32_mask_kernel_nhwc_fp32.c",
153be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_8x48_mask_kernel_nhwc_fp32.c",
154be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_9x16_mask_kernel_nhwc_fp32.c",
155be168c0dSopenharmony_ci+  "experimental/HPC-generator/gemm_mask_avx512/nnacl_gemm_mask_avx512_9x32_mask_kernel_nhwc_fp32.c",
156be168c0dSopenharmony_ci+]
157be168c0dSopenharmony_ci+
158be168c0dSopenharmony_ci fp32_kernel_sources -= no_fast_math_fp32_kernel_sources
159be168c0dSopenharmony_ci fp32_kernel_sources -= avx_fp32_kernel_sources
160be168c0dSopenharmony_ci-fp32_kernel_sources -= avx512_fp32_kernel_sources
161be168c0dSopenharmony_ci fp32_kernel_sources -= arm64_fp32_kernel_sources
162be168c0dSopenharmony_ci 
163be168c0dSopenharmony_ci # source files on all target
164be168c0dSopenharmony_ci nnacl_sources = common_sources
165be168c0dSopenharmony_ci nnacl_sources += base_kernel_sources
166be168c0dSopenharmony_ci nnacl_sources += c_kernel_sources
167be168c0dSopenharmony_ci-nnacl_sources += experimental_kernel_sources
168be168c0dSopenharmony_ci nnacl_sources += fp32_kernel_sources
169be168c0dSopenharmony_ci nnacl_sources += fp32_sparse_kernel_sources
170be168c0dSopenharmony_ci nnacl_sources += fp32_grad_kernel_sources
171be168c0dSopenharmony_ci@@ -854,7 +907,6 @@ nnacl_sources += infer_control_sources
172be168c0dSopenharmony_ci 
173be168c0dSopenharmony_ci # source files on arm32
174be168c0dSopenharmony_ci arm_only_sources = arm32_assembly_sources
175be168c0dSopenharmony_ci-#arm_only_sources += arm32_fp16_assembly_sources
176be168c0dSopenharmony_ci not_needed(arm32_fp16_assembly_sources)
177be168c0dSopenharmony_ci 
178be168c0dSopenharmony_ci # source files on arm64
179be168c0dSopenharmony_ci@@ -868,8 +920,16 @@ arm64_only_sources += arm64_fp32_kernel_sources
180be168c0dSopenharmony_ci # sources files on x86_64
181be168c0dSopenharmony_ci x86_64_only_sources = sse_avx_avx512_sources
182be168c0dSopenharmony_ci x86_64_only_sources += avx_fp32_kernel_sources
183be168c0dSopenharmony_ci-x86_64_only_sources += avx512_fp32_kernel_sources
184be168c0dSopenharmony_ci-x86_64_only_sources += gemm_avx512_kernel_sources
185be168c0dSopenharmony_ci+# emulator not support avx512
186be168c0dSopenharmony_ci+if (is_emulator) {
187be168c0dSopenharmony_ci+  not_needed(avx512_fp32_kernel_sources)
188be168c0dSopenharmony_ci+  not_needed(gemm_avx512_kernel_sources)
189be168c0dSopenharmony_ci+  not_needed(gemm_mask_avx512_kernel_sources)
190be168c0dSopenharmony_ci+} else {
191be168c0dSopenharmony_ci+  x86_64_only_sources += avx512_fp32_kernel_sources
192be168c0dSopenharmony_ci+  x86_64_only_sources += gemm_avx512_kernel_sources
193be168c0dSopenharmony_ci+  x86_64_only_sources += gemm_mask_avx512_kernel_sources
194be168c0dSopenharmony_ci+}
195be168c0dSopenharmony_ci 
196be168c0dSopenharmony_ci if (target_cpu == "arm") {
197be168c0dSopenharmony_ci   nnacl_sources += arm_only_sources
198be168c0dSopenharmony_cidiff --git a/mindspore/lite/BUILD.gn b/mindspore/lite/BUILD.gn
199be168c0dSopenharmony_ciindex 467cdb6a..124c84c9 100644
200be168c0dSopenharmony_ci--- a/mindspore/lite/BUILD.gn
201be168c0dSopenharmony_ci+++ b/mindspore/lite/BUILD.gn
202be168c0dSopenharmony_ci@@ -118,12 +118,6 @@ control_flow_kernel_sources = [
203be168c0dSopenharmony_ci   "src/control_flow/kernel/identity_kernel.cc",
204be168c0dSopenharmony_ci ]
205be168c0dSopenharmony_ci 
206be168c0dSopenharmony_ci-experimental_sources = [
207be168c0dSopenharmony_ci-]
208be168c0dSopenharmony_ci-
209be168c0dSopenharmony_ci-string_kernel_source = [
210be168c0dSopenharmony_ci-]
211be168c0dSopenharmony_ci-
212be168c0dSopenharmony_ci auto_parallel_source = [
213be168c0dSopenharmony_ci   "src/litert/sub_graph_split.cc"
214be168c0dSopenharmony_ci ]
215be168c0dSopenharmony_ci@@ -186,19 +180,11 @@ lite_mindrt_sources = [
216be168c0dSopenharmony_ci all_lite_sources += cxx_api_sources
217be168c0dSopenharmony_ci all_lite_sources += api_source
218be168c0dSopenharmony_ci all_lite_sources += control_flow_kernel_sources
219be168c0dSopenharmony_ci-all_lite_sources += experimental_sources
220be168c0dSopenharmony_ci-all_lite_sources += string_kernel_source
221be168c0dSopenharmony_ci all_lite_sources += auto_parallel_source
222be168c0dSopenharmony_ci all_lite_sources += custom_registry_sources
223be168c0dSopenharmony_ci all_lite_sources += weight_decode_source
224be168c0dSopenharmony_ci all_lite_sources += lite_mindrt_sources
225be168c0dSopenharmony_ci 
226be168c0dSopenharmony_ci-ops_base_sources = [
227be168c0dSopenharmony_ci-#  "src/common/ops/anf_utils.cc", # disable runtiem convert
228be168c0dSopenharmony_ci-#  "src/common/ops/ops_def.cc", # disable kernel executor
229be168c0dSopenharmony_ci-#  "src/common/ops/ops_utils.cc" # disable kernel executor
230be168c0dSopenharmony_ci-]
231be168c0dSopenharmony_ci-
232be168c0dSopenharmony_ci basic_populate_sources = [
233be168c0dSopenharmony_ci   "src/common/ops/populate/activation_grad_populate.cc",
234be168c0dSopenharmony_ci   "src/common/ops/populate/activation_populate.cc",
235be168c0dSopenharmony_ci@@ -346,8 +332,7 @@ control_populate_sources = [
236be168c0dSopenharmony_ci   "src/common/ops/populate/control/tensorliststack_populate.cc",
237be168c0dSopenharmony_ci ]
238be168c0dSopenharmony_ci 
239be168c0dSopenharmony_ci-all_ops_sources = ops_base_sources
240be168c0dSopenharmony_ci-all_ops_sources += basic_populate_sources
241be168c0dSopenharmony_ci+all_ops_sources = basic_populate_sources
242be168c0dSopenharmony_ci all_ops_sources += string_populate_sources
243be168c0dSopenharmony_ci all_ops_sources += control_populate_sources
244be168c0dSopenharmony_ci 
245be168c0dSopenharmony_ci@@ -360,6 +345,12 @@ missing_sources = [
246be168c0dSopenharmony_ci 
247be168c0dSopenharmony_ci all_sources += missing_sources
248be168c0dSopenharmony_ci 
249be168c0dSopenharmony_ci+SUPPORT_NNRT = false
250be168c0dSopenharmony_ci+# currently, only arm/arm64 real machine support nnrt
251be168c0dSopenharmony_ci+if ((target_cpu == "arm" || target_cpu == "arm64") && !is_emulator) {
252be168c0dSopenharmony_ci+  SUPPORT_NNRT = true
253be168c0dSopenharmony_ci+}
254be168c0dSopenharmony_ci+
255be168c0dSopenharmony_ci ohos_shared_library("mindspore_lib") {
256be168c0dSopenharmony_ci   deps = [
257be168c0dSopenharmony_ci     "../ccsrc/plugin/device/cpu/kernel/nnacl/:nnacl_obj",
258be168c0dSopenharmony_ci@@ -387,7 +378,6 @@ ohos_shared_library("mindspore_lib") {
259be168c0dSopenharmony_ci     "../ccsrc/",
260be168c0dSopenharmony_ci     "src/litert/kernel/cpu/",
261be168c0dSopenharmony_ci     "../core/mindrt/src/",
262be168c0dSopenharmony_ci-    "//foundation/ai/neural_network_runtime/",
263be168c0dSopenharmony_ci   ]
264be168c0dSopenharmony_ci 
265be168c0dSopenharmony_ci   defines = [
266be168c0dSopenharmony_ci@@ -418,6 +408,17 @@ ohos_shared_library("mindspore_lib") {
267be168c0dSopenharmony_ci       "CL_HPP_TARGET_OPENCL_VERSION=120",
268be168c0dSopenharmony_ci       "CL_HPP_MINIMUM_OPENCL_VERSION=120",
269be168c0dSopenharmony_ci     ]
270be168c0dSopenharmony_ci+  } else if (target_cpu == "x86_64") {
271be168c0dSopenharmony_ci+    defines += [
272be168c0dSopenharmony_ci+      "ENABLE_SSE",
273be168c0dSopenharmony_ci+      "ENABLE_AVX",
274be168c0dSopenharmony_ci+    ]
275be168c0dSopenharmony_ci+    # emulator not support avx512
276be168c0dSopenharmony_ci+    if (!is_emulator) {
277be168c0dSopenharmony_ci+      defines += [
278be168c0dSopenharmony_ci+        "ENABLE_AVX512",
279be168c0dSopenharmony_ci+      ]
280be168c0dSopenharmony_ci+    }
281be168c0dSopenharmony_ci   }
282be168c0dSopenharmony_ci 
283be168c0dSopenharmony_ci   configs = [
284be168c0dSopenharmony_ci@@ -434,10 +435,10 @@ ohos_shared_library("mindspore_lib") {
285be168c0dSopenharmony_ci   output_name = "libmindspore-lite"
286be168c0dSopenharmony_ci   output_extension = "so"
287be168c0dSopenharmony_ci   innerapi_tags = [ "platformsdk" ]
288be168c0dSopenharmony_ci-  SUPPORT_NNRT = true
289be168c0dSopenharmony_ci   if (SUPPORT_NNRT) {
290be168c0dSopenharmony_ci     if (mindspore_feature_nnrt_metagraph) {
291be168c0dSopenharmony_ci       defines += [ "SUPPORT_NNRT_METAGRAPH" ]
292be168c0dSopenharmony_ci+      sources += [ "src/litert/delegate/nnrt/hiai_foundation_wrapper.cc", ]
293be168c0dSopenharmony_ci       print("enabled feature: mindspore_feature_nnrt_metagraph")
294be168c0dSopenharmony_ci     }
295be168c0dSopenharmony_ci     sources += [
296be168c0dSopenharmony_ci@@ -445,7 +446,6 @@ ohos_shared_library("mindspore_lib") {
297be168c0dSopenharmony_ci       "src/litert/delegate/nnrt/nnrt_delegate.cc",
298be168c0dSopenharmony_ci       "src/litert/delegate/nnrt/nnrt_model_kernel.cc",
299be168c0dSopenharmony_ci       "src/litert/delegate/nnrt/nnrt_allocator.cc",
300be168c0dSopenharmony_ci-      "src/litert/delegate/nnrt/hiai_foundation_wrapper.cc",
301be168c0dSopenharmony_ci       "src/litert/delegate/nnrt/extension_options_parser.cc",
302be168c0dSopenharmony_ci     ]
303be168c0dSopenharmony_ci     include_dirs += [
304be168c0dSopenharmony_ci@@ -453,6 +453,7 @@ ohos_shared_library("mindspore_lib") {
305be168c0dSopenharmony_ci       "../../mindspore/core/ir",
306be168c0dSopenharmony_ci       "mindir/include",
307be168c0dSopenharmony_ci       "mindir/inner_headers",
308be168c0dSopenharmony_ci+      "//foundation/ai/neural_network_runtime/",
309be168c0dSopenharmony_ci     ]
310be168c0dSopenharmony_ci 
311be168c0dSopenharmony_ci     external_deps += [ "neural_network_runtime:nnrt_target" ]
312be168c0dSopenharmony_ci@@ -499,11 +500,9 @@ ohos_shared_library("mindspore_ndk") {
313be168c0dSopenharmony_ci     "../../third_party/",
314be168c0dSopenharmony_ci     "./schema/",
315be168c0dSopenharmony_ci     "../ccsrc/",
316be168c0dSopenharmony_ci-    "//foundation/ai/neural_network_runtime/",
317be168c0dSopenharmony_ci   ]
318be168c0dSopenharmony_ci 
319be168c0dSopenharmony_ci   defines = [
320be168c0dSopenharmony_ci-    "SUPPORT_NNRT",
321be168c0dSopenharmony_ci     "MS_COMPILE_OHOS",
322be168c0dSopenharmony_ci     "PRIMITIVE_WRITEABLE",
323be168c0dSopenharmony_ci     "RUNTIME_PASS_CLIP",
324be168c0dSopenharmony_ci@@ -512,9 +511,18 @@ ohos_shared_library("mindspore_ndk") {
325be168c0dSopenharmony_ci     "ENABLE_HI_APP_EVENT",
326be168c0dSopenharmony_ci   ]
327be168c0dSopenharmony_ci 
328be168c0dSopenharmony_ci-  if (mindspore_feature_nnrt_metagraph) {
329be168c0dSopenharmony_ci-    defines += [ "SUPPORT_NNRT_METAGRAPH" ]
330be168c0dSopenharmony_ci-    print("enabled feature: mindspore_feature_nnrt_metagraph")
331be168c0dSopenharmony_ci+  if (SUPPORT_NNRT) {
332be168c0dSopenharmony_ci+    include_dirs += [
333be168c0dSopenharmony_ci+      "//foundation/ai/neural_network_runtime/",
334be168c0dSopenharmony_ci+    ]
335be168c0dSopenharmony_ci+    defines += [
336be168c0dSopenharmony_ci+      "SUPPORT_NNRT",
337be168c0dSopenharmony_ci+    ]
338be168c0dSopenharmony_ci+    if (mindspore_feature_nnrt_metagraph) {
339be168c0dSopenharmony_ci+      defines += [ "SUPPORT_NNRT_METAGRAPH" ]
340be168c0dSopenharmony_ci+      print("enabled feature: mindspore_feature_nnrt_metagraph")
341be168c0dSopenharmony_ci+    }
342be168c0dSopenharmony_ci+    external_deps = [ "neural_network_runtime:nnrt_target" ]
343be168c0dSopenharmony_ci   }
344be168c0dSopenharmony_ci 
345be168c0dSopenharmony_ci   configs = [
346be168c0dSopenharmony_ci@@ -523,8 +531,6 @@ ohos_shared_library("mindspore_ndk") {
347be168c0dSopenharmony_ci     ":secure_option",
348be168c0dSopenharmony_ci   ]
349be168c0dSopenharmony_ci 
350be168c0dSopenharmony_ci-  external_deps = [ "neural_network_runtime:nnrt_target" ]
351be168c0dSopenharmony_ci-
352be168c0dSopenharmony_ci   remove_configs = [ "//build/config/compiler:no_rtti" ]
353be168c0dSopenharmony_ci 
354be168c0dSopenharmony_ci   output_name = "libmindspore_lite_ndk"
355be168c0dSopenharmony_ci@@ -749,4 +755,4 @@ config("secure_option") {
356be168c0dSopenharmony_ci 
357be168c0dSopenharmony_ci config("train_kernel_option") {
358be168c0dSopenharmony_ci   cflags_cc = [ "-fno-finite-math-only" ]
359be168c0dSopenharmony_ci-}
360be168c0dSopenharmony_ci+}
361be168c0dSopenharmony_ci\ No newline at end of file
362be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/common/thread_utils.cc b/mindspore/lite/src/common/thread_utils.cc
363be168c0dSopenharmony_ciindex 28c8e1cd..28c7acab 100644
364be168c0dSopenharmony_ci--- a/mindspore/lite/src/common/thread_utils.cc
365be168c0dSopenharmony_ci+++ b/mindspore/lite/src/common/thread_utils.cc
366be168c0dSopenharmony_ci@@ -17,7 +17,7 @@
367be168c0dSopenharmony_ci #if defined(__linux__) && !defined(ENABLE_ARM)
368be168c0dSopenharmony_ci #include "src/common/thread_utils.h"
369be168c0dSopenharmony_ci #include <sys/stat.h>
370be168c0dSopenharmony_ci-#include <wait.h>
371be168c0dSopenharmony_ci+#include <sys/wait.h>
372be168c0dSopenharmony_ci #include "src/common/log_adapter.h"
373be168c0dSopenharmony_ci 
374be168c0dSopenharmony_ci namespace mindspore {
375be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/litert/kernel/cpu/BUILD.gn b/mindspore/lite/src/litert/kernel/cpu/BUILD.gn
376be168c0dSopenharmony_ciindex 297fc6f6..d51b9f4a 100644
377be168c0dSopenharmony_ci--- a/mindspore/lite/src/litert/kernel/cpu/BUILD.gn
378be168c0dSopenharmony_ci+++ b/mindspore/lite/src/litert/kernel/cpu/BUILD.gn
379be168c0dSopenharmony_ci@@ -52,7 +52,6 @@ cpu_kernel_sources = [
380be168c0dSopenharmony_ci     "fp32/convolution_fp32.cc",
381be168c0dSopenharmony_ci     "fp32/convolution_im2col_arm32_fp32.cc",
382be168c0dSopenharmony_ci     "fp32/convolution_im2col_arm64_fp32.cc",
383be168c0dSopenharmony_ci-    "fp32/convolution_im2col_avx512_fp32.cc",
384be168c0dSopenharmony_ci     "fp32/convolution_im2col_avx_fp32.cc",
385be168c0dSopenharmony_ci     "fp32/convolution_im2col_base_fp32.cc",
386be168c0dSopenharmony_ci     "fp32/convolution_im2col_fp32.cc",
387be168c0dSopenharmony_ci@@ -90,7 +89,6 @@ cpu_kernel_sources = [
388be168c0dSopenharmony_ci     "fp32/lstm_non_mindir_fp32.cc",
389be168c0dSopenharmony_ci     "fp32/matmul_fp32_arm32.cc",
390be168c0dSopenharmony_ci     "fp32/matmul_fp32_arm64.cc",
391be168c0dSopenharmony_ci-    "fp32/matmul_fp32_avx512.cc",
392be168c0dSopenharmony_ci     "fp32/matmul_fp32_avx.cc",
393be168c0dSopenharmony_ci     "fp32/matmul_fp32_base.cc",
394be168c0dSopenharmony_ci     "fp32/matmul_fp32.cc",
395be168c0dSopenharmony_ci@@ -125,7 +123,7 @@ cpu_kernel_sources = [
396be168c0dSopenharmony_ci ]
397be168c0dSopenharmony_ci 
398be168c0dSopenharmony_ci if ((target_cpu != "arm") && (target_cpu != "arm64")) {
399be168c0dSopenharmony_ci-    cpu_kernel_sources += [ "src/runtime/kernel/cpu/fp32/cast_for_x86_fp16.cc" ]
400be168c0dSopenharmony_ci+    cpu_kernel_sources += [ "fp32/cast_for_x86_fp16.cc" ]
401be168c0dSopenharmony_ci }
402be168c0dSopenharmony_ci 
403be168c0dSopenharmony_ci arm64_cpu_kernel_sources = [
404be168c0dSopenharmony_ci@@ -148,8 +146,6 @@ sse_avx_avx512_kernel_sources = [
405be168c0dSopenharmony_ci   "fp32/convolution_im2col_avx_fp32.cc",
406be168c0dSopenharmony_ci   "fp32/matmul_fp32_avx.cc",
407be168c0dSopenharmony_ci   "fp32/convolution_winograd_avx_fp32.cc",
408be168c0dSopenharmony_ci-  "fp32/convolution_im2col_avx512_fp32.cc",
409be168c0dSopenharmony_ci-  "fp32/matmul_fp32_avx512.cc",
410be168c0dSopenharmony_ci ]
411be168c0dSopenharmony_ci 
412be168c0dSopenharmony_ci fp16_kernel_sources = [
413be168c0dSopenharmony_ci@@ -272,6 +268,18 @@ control_kernel_sources = [
414be168c0dSopenharmony_ci     "control/tensorlist_stack.cc",
415be168c0dSopenharmony_ci ]
416be168c0dSopenharmony_ci 
417be168c0dSopenharmony_ci+# emulator not support avx512
418be168c0dSopenharmony_ci+if (!is_emulator) {
419be168c0dSopenharmony_ci+  cpu_kernel_sources += [
420be168c0dSopenharmony_ci+    "fp32/convolution_im2col_avx512_fp32.cc",
421be168c0dSopenharmony_ci+    "fp32/matmul_fp32_avx512.cc",
422be168c0dSopenharmony_ci+  ]
423be168c0dSopenharmony_ci+  sse_avx_avx512_kernel_sources += [
424be168c0dSopenharmony_ci+    "fp32/convolution_im2col_avx512_fp32.cc",
425be168c0dSopenharmony_ci+    "fp32/matmul_fp32_avx512.cc",
426be168c0dSopenharmony_ci+  ]
427be168c0dSopenharmony_ci+}
428be168c0dSopenharmony_ci+
429be168c0dSopenharmony_ci all_cpu_kernel_sources = cpu_kernel_sources
430be168c0dSopenharmony_ci all_cpu_kernel_sources += int8_kernel_sources
431be168c0dSopenharmony_ci all_cpu_kernel_sources += string_kernel_sources
432be168c0dSopenharmony_ci@@ -348,6 +356,16 @@ ohos_source_set("cpu_kernel_obj") {
433be168c0dSopenharmony_ci       "CL_HPP_TARGET_OPENCL_VERSION=120",
434be168c0dSopenharmony_ci       "CL_HPP_MINIMUM_OPENCL_VERSION=120",
435be168c0dSopenharmony_ci     ]
436be168c0dSopenharmony_ci+  } else if (target_cpu == "x86_64") {
437be168c0dSopenharmony_ci+    defines += [
438be168c0dSopenharmony_ci+      "ENABLE_SSE",
439be168c0dSopenharmony_ci+      "ENABLE_AVX",
440be168c0dSopenharmony_ci+    ]
441be168c0dSopenharmony_ci+    if (!is_emulator) {
442be168c0dSopenharmony_ci+      defines += [
443be168c0dSopenharmony_ci+        "ENABLE_AVX512",
444be168c0dSopenharmony_ci+      ]
445be168c0dSopenharmony_ci+    }
446be168c0dSopenharmony_ci   }
447be168c0dSopenharmony_ci 
448be168c0dSopenharmony_ci   cflags_cc = [
449be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_delegate_fp32.cc b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_delegate_fp32.cc
450be168c0dSopenharmony_ciindex f907bbbf..ac693c44 100644
451be168c0dSopenharmony_ci--- a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_delegate_fp32.cc
452be168c0dSopenharmony_ci+++ b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_delegate_fp32.cc
453be168c0dSopenharmony_ci@@ -49,7 +49,9 @@ using mindspore::schema::PrimitiveType_Conv2DFusion;
454be168c0dSopenharmony_ci 
455be168c0dSopenharmony_ci namespace mindspore::kernel {
456be168c0dSopenharmony_ci namespace {
457be168c0dSopenharmony_ci+#ifndef ENABLE_AVX
458be168c0dSopenharmony_ci constexpr int kMaxDwConvSWSize = 32;
459be168c0dSopenharmony_ci+#endif
460be168c0dSopenharmony_ci }  // namespace
461be168c0dSopenharmony_ci 
462be168c0dSopenharmony_ci float *ConvolutionDelegateCPUKernel::CopyData(const lite::Tensor *tensor) {
463be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.cc b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.cc
464be168c0dSopenharmony_ciindex 568b9463..d35669ce 100644
465be168c0dSopenharmony_ci--- a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.cc
466be168c0dSopenharmony_ci+++ b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.cc
467be168c0dSopenharmony_ci@@ -106,7 +106,7 @@ int ConvolutionDepthwiseSWCPUKernelX86::ReSize() {
468be168c0dSopenharmony_ci   return RET_OK;
469be168c0dSopenharmony_ci }
470be168c0dSopenharmony_ci 
471be168c0dSopenharmony_ci-int ConvolutionDepthwiseSWCPUKernelX86::Execute(int task_id) {
472be168c0dSopenharmony_ci+int ConvolutionDepthwiseSWCPUKernelX86::DoExecute(int task_id) {
473be168c0dSopenharmony_ci   DepthwiseSWAvxFp32(packed_output_, packed_input_, reinterpret_cast<float *>(packed_weight_),
474be168c0dSopenharmony_ci                      reinterpret_cast<float *>(bias_data_), conv_param_, sliding_, task_id);
475be168c0dSopenharmony_ci   return RET_OK;
476be168c0dSopenharmony_ci@@ -114,7 +114,7 @@ int ConvolutionDepthwiseSWCPUKernelX86::Execute(int task_id) {
477be168c0dSopenharmony_ci 
478be168c0dSopenharmony_ci int ConvDwSWAvxRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
479be168c0dSopenharmony_ci   auto conv_dw = reinterpret_cast<ConvolutionDepthwiseSWCPUKernelX86 *>(cdata);
480be168c0dSopenharmony_ci-  auto ret = conv_dw->Execute(task_id);
481be168c0dSopenharmony_ci+  auto ret = conv_dw->DoExecute(task_id);
482be168c0dSopenharmony_ci   if (ret != RET_OK) {
483be168c0dSopenharmony_ci     MS_LOG(ERROR) << "ConvolutionDepthwiseSWRun in x86 error task_id[" << task_id << "] error_code[" << ret << "]";
484be168c0dSopenharmony_ci     return RET_ERROR;
485be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.h b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.h
486be168c0dSopenharmony_ciindex e959fe45..928321e5 100644
487be168c0dSopenharmony_ci--- a/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.h
488be168c0dSopenharmony_ci+++ b/mindspore/lite/src/litert/kernel/cpu/fp32/convolution_depthwise_slidewindow_x86_fp32.h
489be168c0dSopenharmony_ci@@ -35,7 +35,7 @@ class ConvolutionDepthwiseSWCPUKernelX86 : public ConvolutionBaseCPUKernel {
490be168c0dSopenharmony_ci   int ReSize() override;
491be168c0dSopenharmony_ci   int Run() override;
492be168c0dSopenharmony_ci 
493be168c0dSopenharmony_ci-  int Execute(int task_id);
494be168c0dSopenharmony_ci+  int DoExecute(int task_id);
495be168c0dSopenharmony_ci 
496be168c0dSopenharmony_ci  private:
497be168c0dSopenharmony_ci   void FreePackedInputOutput();
498be168c0dSopenharmony_ci-- 
499be168c0dSopenharmony_ci2.25.1
500be168c0dSopenharmony_ci
501