mindspore/patches/0004-cross-compile-ndkso-fp16-nnrt-train_capi.patch

From baf2daaebd70448cddd35f5011642fe585d071b5 Mon Sep 17 00:00:00 2001
From: chengfeng27 <chengfeng27@huawei.com>
Date: Tue, 5 Mar 2024 20:00:24 +0800
Subject: [PATCH] hilog use macro definition api

---
 cmake/external_libs/flatbuffers.cmake         |   4 +-
 include/api/context.h                         |  65 ++
 include/c_api/context_c.h                     | 111 +++
 include/c_api/model_c.h                       | 178 ++++
 include/c_api/tensor_c.h                      |  14 +
 include/c_api/types_c.h                       |  57 +-
 include/sdk_api/context.h                     | 103 +++
 include/sdk_api/tensor.h                      |  13 +
 include/sdk_api/types.h                       |  38 +-
 .../plugin/device/cpu/kernel/nnacl/BUILD.gn   |   3 +
 .../device/cpu/kernel/nnacl/CMakeLists.txt    |   2 +-
 .../kernel/nnacl/avx/scatter_nd_binary_avx.h  |  66 ++
 .../nnacl/avx512/scatter_nd_binary_avx512.h   |  66 ++
 .../cpu/kernel/nnacl/base/scatter_nd_binary.c |  28 +
 .../cpu/kernel/nnacl/base/scatter_nd_binary.h |   3 +
 .../nnacl/base/scatter_nd_binary_simd.h.in    |  14 +
 .../kernel/nnacl/custom_is_inf_parameter.h    |  26 +
 .../nnacl/custom_masked_fill_parameter.h      |  26 +
 .../custom_tensor_scatter_max_parameter.h     |  26 +
 .../kernel/nnacl/infer/custom_is_inf_infer.c  |  38 +
 .../kernel/nnacl/infer/custom_is_inf_infer.h  |  31 +
 .../nnacl/infer/custom_masked_fill_infer.c    |  37 +
 .../nnacl/infer/custom_masked_fill_infer.h    |  31 +
 .../infer/custom_tensor_scatter_max_infer.c   |  37 +
 .../infer/custom_tensor_scatter_max_infer.h   |  31 +
 .../nnacl/neon/scatter_nd_binary_neon.h       |  65 ++
 .../plugin/device/cpu/kernel/nnacl/op_base.h  |   4 +
 .../cpu/kernel/nnacl/scatter_nd_binary_simd.h |  36 +
 .../kernel/nnacl/sse/scatter_nd_binary_sse.h  |  66 ++
 mindspore/core/mindrt/BUILD.gn                |   9 +-
 .../mindrt/src/thread/actor_threadpool.cc     |   2 +-
 .../core/mindrt/src/thread/core_affinity.cc   |   6 +-
 .../core/mindrt/src/thread/core_affinity.h    |   2 +-
 .../mindrt/src/thread/parallel_threadpool.cc  |   2 +-
 mindspore/core/mindrt/src/thread/threadlog.h  |  28 +-
 .../core/mindrt/src/thread/threadpool.cc      |   7 +-
 mindspore/lite/BUILD.gn                       |  82 +-
 mindspore/lite/CMakeLists.txt                 |   5 +-
 mindspore/lite/include/lite_types.h           |   1 +
 mindspore/lite/include/model.h                |   4 +
 .../lite/include/registry/converter_context.h |   4 +-
 mindspore/lite/mindir/include/mindir.h        |   2 +
 mindspore/lite/mindir/src/mindir.cc           |  40 +
 mindspore/lite/mindir/src/mindir_tensor.cc    |   2 +-
 mindspore/lite/mindir/src/utils.cc            |   2 +-
 mindspore/lite/src/CMakeLists.txt             |   6 +-
 mindspore/lite/src/common/context_util.cc     |  14 +-
 mindspore/lite/src/common/log.cc              |  33 +-
 mindspore/lite/src/common/log.h               |  50 +-
 .../common/ops/populate/custom_populate.cc    |  53 ++
 mindspore/lite/src/litert/c_api/context_c.cc  | 372 +++++++-
 mindspore/lite/src/litert/c_api/context_c.h   |  23 -
 mindspore/lite/src/litert/c_api/model_c.cc    | 724 ++++++++-------
 mindspore/lite/src/litert/c_api/tensor_c.cc   |  78 +-
 .../lite/src/litert/c_api/type_c_private.h    |  40 +
 mindspore/lite/src/litert/cxx_api/context.cc  |  85 ++
 .../lite/src/litert/cxx_api/converters.cc     |  60 +-
 .../lite/src/litert/cxx_api/converters.h      |   4 +-
 .../src/litert/delegate/nnrt/CMakeLists.txt   |  27 +-
 .../delegate/nnrt/checker/primitive_check.cc  |   2 +
 .../src/litert/delegate/nnrt/nnrt_delegate.cc | 836 ++++++++++++++----
 .../src/litert/delegate/nnrt/nnrt_delegate.h  |  74 +-
 .../litert/delegate/nnrt/nnrt_model_kernel.cc |   3 +-
 .../litert/delegate/nnrt/nnrt_model_kernel.h  |   2 +-
 .../src/litert/delegate/nnrt/nnrt_stub.cc     |  99 +++
 mindspore/lite/src/litert/infer_manager.cc    |   3 +-
 mindspore/lite/src/litert/inner_context.cc    |   4 +
 mindspore/lite/src/litert/inner_context.h     |  14 +
 mindspore/lite/src/litert/kernel/cpu/BUILD.gn |  51 +-
 .../src/litert/kernel/cpu/base/custom_base.cc |  46 +
 .../src/litert/kernel/cpu/base/custom_base.h  |  43 +
 .../litert/kernel/cpu/base/custom_is_inf.cc   |  61 ++
 .../litert/kernel/cpu/base/custom_is_inf.h    |  38 +
 .../kernel/cpu/base/custom_masked_fill.cc     |  84 ++
 .../kernel/cpu/base/custom_masked_fill.h      |  35 +
 .../kernel/cpu/base/custom_tensor_scatter.cc  |  75 ++
 .../kernel/cpu/base/custom_tensor_scatter.h   |  36 +
 mindspore/lite/src/litert/lite_model.cc       |  29 +
 mindspore/lite/src/litert/lite_session.cc     |  39 +-
 mindspore/lite/src/litert/lite_session.h      |   1 +
 mindspore/lite/src/litert/scheduler.cc        |  17 +
 mindspore/lite/src/litert/tensor_category.cc  |   4 +
 mindspore/lite/src/litert/tensor_category.h   |   1 +
 mindspore/lite/test/CMakeLists.txt            |  15 +-
 mindspore/lite/test/runtest.sh                |   1 +
 .../test/ut/test_data/third_party_model.cfg   |   8 +
 .../tools/converter/api/converter_api_test.cc |  10 +
 .../third_party_param_parser_test.cc          | 176 ++++
 .../lite/tools/benchmark/benchmark_base.cc    |   2 +-
 .../lite/tools/benchmark/benchmark_base.h     |   2 +-
 .../lite/tools/benchmark/benchmark_c_api.cc   |   4 +
 .../tools/benchmark/benchmark_unified_api.cc  |   5 +
 .../lite/tools/benchmark_train/CMakeLists.txt |   3 +
 mindspore/lite/tools/benchmark_train/main.cc  |   3 +-
 .../lite/tools/benchmark_train/net_runner.cc  |  10 +-
 .../lite/tools/benchmark_train/net_train.cc   | 418 +--------
 .../lite/tools/benchmark_train/net_train.h    | 229 +----
 .../tools/benchmark_train/net_train_base.cc   | 410 +++++++++
 .../tools/benchmark_train/net_train_base.h    | 288 ++++++
 .../tools/benchmark_train/net_train_c_api.cc  | 659 ++++++++++++++
 .../tools/benchmark_train/net_train_c_api.h   | 121 +++
 .../tools/benchmark_train/run_net_train.cc    |  86 ++
 .../tools/benchmark_train/run_net_train.h     |  22 +
 mindspore/lite/tools/converter/CMakeLists.txt |   4 +
 .../config_parser/config_file_parser.cc       |  27 +
 .../config_parser/config_file_parser.h        |  15 +
 .../config_parser/third_party_param_parser.cc | 299 +++++++
 .../config_parser/third_party_param_parser.h  |  44 +
 mindspore/lite/tools/converter/converter.cc   |  34 +-
 .../tools/converter/converter_funcgraph.cc    |  13 +-
 .../converter_lite/converter_flags.cc         |   4 +-
 .../tools/converter/cxx_api/converter_para.h  |  14 +
 .../tools/converter/graphdef_transform.cc     |  44 +
 .../parser/third_party/CMakeLists.txt         |   4 +
 .../third_party/third_party_model_parser.cc   | 277 ++++++
 .../third_party/third_party_model_parser.h    |  50 ++
 .../registry/model_parser_registry.cc         |   4 +-
 117 files changed, 6456 insertions(+), 1432 deletions(-)
 create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/avx/scatter_nd_binary_avx.h
 create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/avx512/scatter_nd_binary_avx512.h
 create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_is_inf_parameter.h
 create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_masked_fill_parameter.h
 create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_tensor_scatter_max_parameter.h
 create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_is_inf_infer.c
 create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_is_inf_infer.h
 create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_masked_fill_infer.c
 create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_masked_fill_infer.h
 create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_tensor_scatter_max_infer.c
 create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_tensor_scatter_max_infer.h
 create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/neon/scatter_nd_binary_neon.h
 create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/scatter_nd_binary_simd.h
 create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/sse/scatter_nd_binary_sse.h
 create mode 100644 mindspore/lite/src/litert/c_api/type_c_private.h
 create mode 100644 mindspore/lite/src/litert/delegate/nnrt/nnrt_stub.cc
 create mode 100644 mindspore/lite/src/litert/kernel/cpu/base/custom_base.cc
 create mode 100644 mindspore/lite/src/litert/kernel/cpu/base/custom_base.h
 create mode 100644 mindspore/lite/src/litert/kernel/cpu/base/custom_is_inf.cc
 create mode 100644 mindspore/lite/src/litert/kernel/cpu/base/custom_is_inf.h
 create mode 100644 mindspore/lite/src/litert/kernel/cpu/base/custom_masked_fill.cc
 create mode 100644 mindspore/lite/src/litert/kernel/cpu/base/custom_masked_fill.h
 create mode 100644 mindspore/lite/src/litert/kernel/cpu/base/custom_tensor_scatter.cc
 create mode 100644 mindspore/lite/src/litert/kernel/cpu/base/custom_tensor_scatter.h
 create mode 100644 mindspore/lite/test/ut/test_data/third_party_model.cfg
 create mode 100644 mindspore/lite/test/ut/tools/converter/config_parser/third_party_param_parser_test.cc
 create mode 100644 mindspore/lite/tools/benchmark_train/net_train_base.cc
 create mode 100644 mindspore/lite/tools/benchmark_train/net_train_base.h
 create mode 100644 mindspore/lite/tools/benchmark_train/net_train_c_api.cc
 create mode 100644 mindspore/lite/tools/benchmark_train/net_train_c_api.h
 create mode 100644 mindspore/lite/tools/benchmark_train/run_net_train.cc
 create mode 100644 mindspore/lite/tools/benchmark_train/run_net_train.h
 create mode 100644 mindspore/lite/tools/converter/config_parser/third_party_param_parser.cc
 create mode 100644 mindspore/lite/tools/converter/config_parser/third_party_param_parser.h
 create mode 100644 mindspore/lite/tools/converter/parser/third_party/CMakeLists.txt
 create mode 100644 mindspore/lite/tools/converter/parser/third_party/third_party_model_parser.cc
 create mode 100644 mindspore/lite/tools/converter/parser/third_party/third_party_model_parser.h

diff --git a/cmake/external_libs/flatbuffers.cmake b/cmake/external_libs/flatbuffers.cmake
index 2fde4311..87f0425b 100644
--- a/cmake/external_libs/flatbuffers.cmake
+++ b/cmake/external_libs/flatbuffers.cmake
@@ -21,8 +21,8 @@ else()
         # flatbuffers.lib cimplied by msvc
         set(CMAKE_STATIC_LIBRARY_PREFIX "")
     else()
-        set(flatbuffers_CXXFLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-strong")
-        set(flatbuffers_CFLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-strong")
+        set(flatbuffers_CXXFLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-strong -Wno-error=unused-but-set-variable")
+        set(flatbuffers_CFLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-strong -Wno-error=unused-but-set-variable")
     endif()

     if(WIN32)
diff --git a/include/api/context.h b/include/api/context.h
index c9fb11f0..eb704d44 100644
--- a/include/api/context.h
+++ b/include/api/context.h
@@ -39,6 +39,8 @@ enum DeviceType {
   kAscend310,
   kCustomDevice,
   kAllDevice,
+  //ohos-only device range[60,80)
+  kNNRt = 60,
   // add new type here
   kInvalidDeviceType = 100,
 };
@@ -598,5 +600,68 @@ void AscendDeviceInfo::SetBufferOptimizeMode(const std::string &buffer_optimize_
   SetBufferOptimizeMode(StringToChar(buffer_optimize_mode));
 }
 std::string AscendDeviceInfo::GetBufferOptimizeMode() const { return CharToString(GetBufferOptimizeModeChar()); }
+
+struct Extension {
+  std::string name;
+  std::vector<uint8_t> value;
+};
+
+class MS_API NNRTDeviceInfo : public DeviceInfoContext {
+ public:
+  /// \brief Get the type of this DeviceInfoContext.
+  ///
+  /// \return Type of this DeviceInfoContext.
+  enum DeviceType GetDeviceType() const override { return DeviceType::kNNRt; };
+
+  /// \brief Set device id.
+  ///
+  /// \param[in] device_id The device id.
+  void SetDeviceID(size_t device_id);
+
+  /// \brief Get the device id.
+  ///
+  /// \return The device id.
+  size_t GetDeviceID() const;
+
+  /// \brief Set performance mode.
+  ///
+  /// \param[in] performance_mode The performance mode.
+  void SetPerformanceMode(int performance_mode);
+
+  /// \brief Get performance mode.
+  ///
+  /// \return The priority.
+  int GetPerformanceMode() const;
+
+  /// \brief Set priority.
+  ///
+  /// \param[in] priority The priority.
+  void SetPriority(int priority);
+
+  /// \brief Get priority.
+  ///
+  /// \return The priority.
+  int GetPriority() const;
+
+  /// \brief Set enables to perform the float16 inference
+  ///
+  /// \param[in] is_fp16 Enable float16 inference or not.
+  void SetEnableFP16(bool is_fp16);
+
+  /// \brief Get enables to perform the float16 inference
+  ///
+  /// \return Whether enable float16 inference.
+  bool GetEnableFP16() const;
+
+  /// \brief Set extensions
+  ///
+  /// \param[in] extension array.
+  void SetExtensions(const std::vector<Extension> &extensions);
+
+  /// \brief Get extensions
+  ///
+  /// \return extension array.
+  std::vector<Extension> GetExtensions() const;
+};
 }  // namespace mindspore
 #endif  // MINDSPORE_INCLUDE_API_CONTEXT_H
diff --git a/include/c_api/context_c.h b/include/c_api/context_c.h
index 53839e80..8951da25 100644
--- a/include/c_api/context_c.h
+++ b/include/c_api/context_c.h
@@ -19,6 +19,7 @@
 #include <stddef.h>
 #include <stdint.h>
 #include <stdbool.h>
+#include "include/c_api/status_c.h"
 #include "include/c_api/types_c.h"

 #ifdef __cplusplus
@@ -173,6 +174,116 @@ OH_AI_API void OH_AI_DeviceInfoSetFrequency(OH_AI_DeviceInfoHandle device_info,
 /// \return NPU frequency
 OH_AI_API int OH_AI_DeviceInfoGetFrequency(const OH_AI_DeviceInfoHandle device_info);

+/// \brief Obtain the all device descriptions in NNRT.
+///
+/// \param[out] num Number of NNRT device description.
+///
+/// \return NNRT device description array.
+OH_AI_API NNRTDeviceDesc *OH_AI_GetAllNNRTDeviceDescs(size_t *num);
+
+/// \brief Obtain the specified element in NNRt device description array.
+///
+/// \param[in] descs NNRT device description array.
+/// \param[in] index Element index.
+///
+/// \return NNRT device description.
+OH_AI_API NNRTDeviceDesc *OH_AI_GetElementOfNNRTDeviceDescs(NNRTDeviceDesc *descs, size_t index);
+
+/// \brief Obtain the all device descriptions in NNRT.
+///
+/// \param[out] num Number of NNRT device description.
+///
+/// \return NNRT device description array.
+OH_AI_API NNRTDeviceDesc *OH_AI_GetAllNNRTDeviceDescs(size_t *num);
+
+/// \brief Destroy the NNRT device descriptions returned by OH_AI_GetAllNNRTDeviceDescs().
+///
+/// \param[in] desc NNRT device description array.
+OH_AI_API void OH_AI_DestroyAllNNRTDeviceDescs(NNRTDeviceDesc **desc);
+
+/// \brief Obtain the device id in NNRT device description.
+///
+/// \param[in] desc pointer to the NNRT device description instance.
+///
+/// \return NNRT device id.
+OH_AI_API size_t OH_AI_GetDeviceIdFromNNRTDeviceDesc(const NNRTDeviceDesc *desc);
+
+/// \brief Obtain the device name in NNRT device description.
+///
+/// \param[in] desc pointer to the NNRT device description instance.
+///
+/// \return NNRT device name.
+OH_AI_API const char *OH_AI_GetNameFromNNRTDeviceDesc(const NNRTDeviceDesc *desc);
+
+/// \brief Obtain the device type in NNRT device description.
+///
+/// \param[in] desc pointer to the NNRT device description instance.
+///
+/// \return NNRT device type.
+OH_AI_API OH_AI_NNRTDeviceType OH_AI_GetTypeFromNNRTDeviceDesc(const NNRTDeviceDesc *desc);
+
+/// \brief Create the NNRT device info by exactly matching the specific device name.
+///
+/// \param[in] name NNRt device name.
+///
+/// \return Device info object handle.
+OH_AI_API OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByName(const char *name);
+
+/// \brief Create the NNRT device info by finding the first device with the specific device type.
+///
+/// \param[in] name NNRt device type.
+///
+/// \return Device info object handle.
+OH_AI_API OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByType(OH_AI_NNRTDeviceType type);
+
+/// \brief Set the NNRT device id, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+/// \param[in] device_id NNRT device id.
+OH_AI_API void OH_AI_DeviceInfoSetDeviceId(OH_AI_DeviceInfoHandle device_info, size_t device_id);
+
+/// \brief Obtain the NNRT device id, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+///
+/// \return NNRT device id.
+OH_AI_API size_t OH_AI_DeviceInfoGetDeviceId(const OH_AI_DeviceInfoHandle device_info);
+
+/// \brief Set the NNRT performance mode, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+/// \param[in] device_id NNRT performance mode.
+OH_AI_API void OH_AI_DeviceInfoSetPerformanceMode(OH_AI_DeviceInfoHandle device_info, OH_AI_PerformanceMode mode);
+
+/// \brief Obtain the NNRT performance mode, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+///
+/// \return NNRT performance mode.
+OH_AI_API OH_AI_PerformanceMode OH_AI_DeviceInfoGetPerformanceMode(const OH_AI_DeviceInfoHandle device_info);
+
+/// \brief Set the NNRT priority, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+/// \param[in] device_id NNRT priority.
+OH_AI_API void OH_AI_DeviceInfoSetPriority(OH_AI_DeviceInfoHandle device_info, OH_AI_Priority priority);
+
+/// \brief Obtain the NNRT priority, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+///
+/// \return NNRT priority.
+OH_AI_API OH_AI_Priority OH_AI_DeviceInfoGetPriority(const OH_AI_DeviceInfoHandle device_info);
+
+/// \brief Add extension of key/value format to device info, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+/// \param[in] name The content of key as a C string.
+/// \param[in] value The pointer to the value, which is a byte array.
+/// \param[in] value_size The size of the value, which is a byte array.
+///
+/// \return OH_AI_STATUS_SUCCESS if success, or detail error code if failed.
+OH_AI_API OH_AI_Status OH_AI_DeviceInfoAddExtension(OH_AI_DeviceInfoHandle device_info, const char *name, const char *value, size_t value_size);
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/c_api/model_c.h b/include/c_api/model_c.h
index 12a46bcd..2286e673 100644
--- a/include/c_api/model_c.h
+++ b/include/c_api/model_c.h
@@ -26,6 +26,8 @@ extern "C" {

 typedef void *OH_AI_ModelHandle;

+typedef void *OH_AI_TrainCfgHandle;
+
 typedef struct OH_AI_TensorHandleArray {
   size_t handle_num;
   OH_AI_TensorHandle *handle_list;
@@ -168,6 +170,182 @@ OH_AI_API OH_AI_TensorHandle OH_AI_ModelGetInputByTensorName(const OH_AI_ModelHa
 /// \return The output tensor handle with the given name, if the name is not found, an NULL is returned.
 OH_AI_API OH_AI_TensorHandle OH_AI_ModelGetOutputByTensorName(const OH_AI_ModelHandle model, const char *tensor_name);

+/// \brief Create a TrainCfg object. Only valid for Lite Train.
+///
+/// \return TrainCfg object handle.
+OH_AI_API OH_AI_TrainCfgHandle OH_AI_TrainCfgCreate();
+
+/// \brief Destroy the train_cfg object. Only valid for Lite Train.
+///
+/// \param[in] train_cfg TrainCfg object handle.
+OH_AI_API void OH_AI_TrainCfgDestroy(OH_AI_TrainCfgHandle *train_cfg);
+
+/// \brief Obtains part of the name that identify a loss kernel. Only valid for Lite Train.
+///
+/// \param[in] train_cfg TrainCfg object handle.
+/// \param[in] num The num of loss_name.
+///
+/// \return loss_name.
+OH_AI_API char **OH_AI_TrainCfgGetLossName(OH_AI_TrainCfgHandle train_cfg, size_t *num);
+
+/// \brief Set part of the name that identify a loss kernel. Only valid for Lite Train.
+///
+/// \param[in] train_cfg TrainCfg object handle.
+/// \param[in] loss_name define part of the name that identify a loss kernel.
+/// \param[in] num The num of loss_name.
+OH_AI_API void OH_AI_TrainCfgSetLossName(OH_AI_TrainCfgHandle train_cfg, const char **loss_name, size_t num);
+
+/// \brief Obtains optimization level of the train_cfg. Only valid for Lite Train.
+///
+/// \param[in] train_cfg TrainCfg object handle.
+///
+/// \return OH_AI_OptimizationLevel.
+OH_AI_API OH_AI_OptimizationLevel OH_AI_TrainCfgGetOptimizationLevel(OH_AI_TrainCfgHandle train_cfg);
+
+/// \brief Set optimization level of the train_cfg. Only valid for Lite Train.
+///
+/// \param[in] train_cfg TrainCfg object handle.
+/// \param[in] level The optimization level of train_cfg.
+OH_AI_API void OH_AI_TrainCfgSetOptimizationLevel(OH_AI_TrainCfgHandle train_cfg, OH_AI_OptimizationLevel level);
+
+/// \brief Build the train model from model buffer so that it can run on a device. Only valid for Lite Train.
+///
+/// \param[in] model Model object handle.
+/// \param[in] model_data Define the buffer read from a model file.
+/// \param[in] data_size Define bytes number of model file buffer.
+/// \param[in] model_type Define The type of model file.
+/// \param[in] model_context Define the context used to store options during execution.
+/// \param[in] train_cfg Define the config used by training.
+///
+/// \return OH_AI_Status.
+OH_AI_API OH_AI_Status OH_AI_TrainModelBuild(OH_AI_ModelHandle model, const void *model_data, size_t data_size,
+                                     OH_AI_ModelType model_type, const OH_AI_ContextHandle model_context,
+                                     const OH_AI_TrainCfgHandle train_cfg);
+
+/// \brief Build the train model from model file buffer so that it can run on a device. Only valid for Lite Train.
+///
+/// \param[in] model Model object handle.
+/// \param[in] model_path Define the model path.
+/// \param[in] model_type Define The type of model file.
+/// \param[in] model_context Define the context used to store options during execution.
+/// \param[in] train_cfg Define the config used by training.
+///
+/// \return OH_AI_Status.
+OH_AI_API OH_AI_Status OH_AI_TrainModelBuildFromFile(OH_AI_ModelHandle model, const char *model_path,
+                                             OH_AI_ModelType model_type, const OH_AI_ContextHandle model_context,
+                                             const OH_AI_TrainCfgHandle train_cfg);
+
+/// \brief Train model by step. Only valid for Lite Train.
+///
+/// \param[in] model Model object handle.
+/// \param[in] before CallBack before predict.
+/// \param[in] after CallBack after predict.
+///
+/// \return OH_AI_Status.
+OH_AI_API OH_AI_Status OH_AI_RunStep(OH_AI_ModelHandle model, const OH_AI_KernelCallBack before,
+                                     const OH_AI_KernelCallBack after);
+
+/// \brief Sets the Learning Rate of the training. Only valid for Lite Train.
+///
+/// \param[in] learning_rate to set.
+///
+/// \return OH_AI_Status of operation.
+OH_AI_API OH_AI_Status OH_AI_ModelSetLearningRate(OH_AI_ModelHandle model, float learning_rate);
+
+/// \brief Obtains the Learning Rate of the optimizer. Only valid for Lite Train.
+///
+/// \return Learning rate. 0.0 if no optimizer was found.
+OH_AI_API float OH_AI_ModelGetLearningRate(OH_AI_ModelHandle model);
+
+/// \brief Obtains all weights tensors of the model. Only valid for Lite Train.
+///
+/// \param[in] model Model object handle.
+///
+/// \return The vector that includes all gradient tensors.
+OH_AI_API OH_AI_TensorHandleArray OH_AI_ModelGetWeights(OH_AI_ModelHandle model);
+
+/// \brief update weights tensors of the model. Only valid for Lite Train.
+///
+/// \param[in] new_weights A vector new weights.
+///
+/// \return OH_AI_Status
+OH_AI_API OH_AI_Status OH_AI_ModelUpdateWeights(OH_AI_ModelHandle model, const OH_AI_TensorHandleArray new_weights);
+
+/// \brief Get the model running mode.
+///
+/// \param[in] model Model object handle.
+///
+/// \return Is Train Mode or not.
+OH_AI_API bool OH_AI_ModelGetTrainMode(OH_AI_ModelHandle model);
+
+/// \brief Set the model running mode. Only valid for Lite Train.
+///
+/// \param[in] model Model object handle.
+/// \param[in] train True means model runs in Train Mode, otherwise Eval Mode.
+///
+/// \return OH_AI_Status.
+OH_AI_API OH_AI_Status OH_AI_ModelSetTrainMode(OH_AI_ModelHandle model, bool train);
+
+/// \brief Setup training with virtual batches. Only valid for Lite Train.
+///
+/// \param[in] model Model object handle.
+/// \param[in] virtual_batch_multiplier - virtual batch multiplier, use any number < 1 to disable.
+/// \param[in] lr - learning rate to use for virtual batch, -1 for internal configuration.
+/// \param[in] momentum - batch norm momentum to use for virtual batch, -1 for internal configuration.
+///
+/// \return OH_AI_Status.
+OH_AI_API OH_AI_Status OH_AI_ModelSetupVirtualBatch(OH_AI_ModelHandle model, int virtual_batch_multiplier, float lr,
+                                                    float momentum);
+
+/// \brief Export training model from file. Only valid for Lite Train.
+///
+/// \param[in] model The model data.
+/// \param[in] model_type The model file type.
+/// \param[in] model_file The exported model file.
+/// \param[in] quantization_type The quantification type.
+/// \param[in] export_inference_only Whether to export a reasoning only model.
+/// \param[in] output_tensor_name The set the name of the output tensor of the exported reasoning model, default as
+/// empty, and export the complete reasoning model.
+/// \param[in] num The number of output_tensor_name.
+///
+/// \return OH_AI_Status.
+OH_AI_API OH_AI_Status OH_AI_ExportModel(OH_AI_ModelHandle model, OH_AI_ModelType model_type, const char *model_file,
+                                         OH_AI_QuantizationType quantization_type, bool export_inference_only,
+                                         char **output_tensor_name, size_t num);
+
+/// \brief Export training model from buffer. Only valid for Lite Train.
+///
+/// \param[in] model The model data.
+/// \param[in] model_type The model file type.
+/// \param[in] model_data The exported model buffer.
+/// \param[in] data_size The exported model buffer size.
+/// \param[in] quantization_type The quantification type.
+/// \param[in] export_inference_only Whether to export a reasoning only model.
+/// \param[in] output_tensor_name The set the name of the output tensor of the exported reasoning model, default as
+/// empty, and export the complete reasoning model.
+/// \param[in] num The number of output_tensor_name.
+///
+/// \return OH_AI_Status.
+OH_AI_API OH_AI_Status OH_AI_ExportModelBuffer(OH_AI_ModelHandle model, OH_AI_ModelType model_type, char **model_data,
+                                               size_t *data_size, OH_AI_QuantizationType quantization_type,
+                                               bool export_inference_only, char **output_tensor_name, size_t num);
+
+/// \brief Export model's weights, which can be used in micro only. Only valid for Lite Train.
+///
+/// \param[in] model The model data.
+/// \param[in] model_type The model file type.
+/// \param[in] weight_file The path of exported weight file.
+/// \param[in] is_inference Whether to export weights from a reasoning model. Currently, only support this is `true`.
+/// \param[in] enable_fp16 Float-weight is whether to be saved in float16 format.
+/// \param[in] changeable_weights_name The set the name of these weight tensors, whose shape is changeable.
+/// \param[in] num The number of changeable_weights_name.
+///
+/// \return OH_AI_Status.
+OH_AI_API OH_AI_Status OH_AI_ExportWeightsCollaborateWithMicro(OH_AI_ModelHandle model, OH_AI_ModelType model_type,
+                                                               const char *weight_file, bool is_inference,
+                                                               bool enable_fp16, char **changeable_weights_name,
+                                                               size_t num);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/c_api/tensor_c.h b/include/c_api/tensor_c.h
index f18ba163..6d2aaab6 100644
--- a/include/c_api/tensor_c.h
+++ b/include/c_api/tensor_c.h
@@ -17,6 +17,7 @@
 #define MINDSPORE_INCLUDE_C_API_TENSOE_C_H

 #include <stddef.h>
+#include "include/c_api/status_c.h"
 #include "include/c_api/types_c.h"
 #include "include/c_api/data_type_c.h"
 #include "include/c_api/format_c.h"
@@ -112,6 +113,19 @@ OH_AI_API OH_AI_Format OH_AI_TensorGetFormat(const OH_AI_TensorHandle tensor);
 /// \param[in] data A pointer to the data of the tensor.
 OH_AI_API void OH_AI_TensorSetData(OH_AI_TensorHandle tensor, void *data);

+/// \brief Set the data for the tensor with user-allocated data buffer.
+/// The main purpose of this interface is providing a way of using memory already allocated by user as the Model's
+/// input, but not which allocated inside the Model object. It can reduce one copy.
+/// Note: The tensor won't free the data provided by invoker. Invoker has the responsibility to free it. And this
+/// free action should not be preformed before destruction of the tensor.
+///
+/// \param[in] tensor Tensor object handle.
+/// \param[in] data A pointer to the user data buffer.
+/// \param[in] data the byte size of the user data buffer.
+///
+/// \return OH_AI_STATUS_SUCCESS if success, or detail error code if failed.
+OH_AI_API OH_AI_Status OH_AI_TensorSetUserData(OH_AI_TensorHandle tensor, void *data, size_t data_size);
+
 /// \brief Obtain the data pointer of the tensor.
 ///
 /// \param[in] tensor Tensor object handle.
diff --git a/include/c_api/types_c.h b/include/c_api/types_c.h
index dba54ffa..e520e336 100644
--- a/include/c_api/types_c.h
+++ b/include/c_api/types_c.h
@@ -40,10 +40,65 @@ typedef enum OH_AI_DeviceType {
   OH_AI_DEVICETYPE_KIRIN_NPU,
   // add new type here
   // ohos-only device range: [60, 80)
-  OH_AI_DEVICETYPE__NNRT = 60,
+  OH_AI_DEVICETYPE_NNRT = 60,
   OH_AI_DEVICETYPE_INVALID = 100,
 } OH_AI_DeviceType;

+typedef enum OH_AI_NNRTDeviceType {
+  /** Devices that are not CPU, GPU, or dedicated accelerator */
+  OH_AI_NNRTDEVICE_OTHERS = 0,
+  /** CPU device */
+  OH_AI_NNRTDEVICE_CPU = 1,
+  /** GPU device */
+  OH_AI_NNRTDEVICE_GPU = 2,
+  /** Dedicated hardware accelerator */
+  OH_AI_NNRTDEVICE_ACCELERATOR = 3,
+} OH_AI_NNRTDeviceType;
+
+typedef enum OH_AI_PerformanceMode {
+  /** No performance mode preference */
+  OH_AI_PERFORMANCE_NONE = 0,
+  /** Low power consumption mode*/
+  OH_AI_PERFORMANCE_LOW = 1,
+  /** Medium performance mode */
+  OH_AI_PERFORMANCE_MEDIUM = 2,
+  /** High performance mode */
+  OH_AI_PERFORMANCE_HIGH = 3,
+  /** Ultimate performance mode */
+  OH_AI_PERFORMANCE_EXTREME = 4
+} OH_AI_PerformanceMode;
+
+typedef enum OH_AI_Priority {
+  /** No priority preference */
+  OH_AI_PRIORITY_NONE = 0,
+  /** Low priority */
+  OH_AI_PRIORITY_LOW = 1,
+  /** Medium priority */
+  OH_AI_PRIORITY_MEDIUM = 2,
+  /** High priority */
+  OH_AI_PRIORITY_HIGH = 3
+} OH_AI_Priority;
+
+typedef enum OH_AI_OptimizationLevel {
+  /** Do not change */
+  OH_AI_KO0 = 0,
+  /** Cast network to float16, keep batchnorm and loss in float32 */
+  OH_AI_KO2 = 2,
+  /** Cast network to float16, including bacthnorm */
+  OH_AI_KO3 = 3,
+  /** Choose optimization based on device */
+  OH_AI_KAUTO = 4,
+  OH_AI_KOPTIMIZATIONTYPE = 0xFFFFFFFF
+} OH_AI_OptimizationLevel;
+
+typedef enum OH_AI_QuantizationType {
+  OH_AI_NO_QUANT = 0,
+  OH_AI_WEIGHT_QUANT = 1,
+  OH_AI_FULL_QUANT = 2,
+  OH_AI_UNKNOWN_QUANT_TYPE = 0xFFFFFFFF
+} OH_AI_QuantizationType;
+
+typedef struct NNRTDeviceDesc NNRTDeviceDesc;
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/sdk_api/context.h b/include/sdk_api/context.h
index 5bfc9279..e12b8d6f 100644
--- a/include/sdk_api/context.h
+++ b/include/sdk_api/context.h
@@ -174,6 +174,109 @@ OH_AI_API void OH_AI_DeviceInfoSetFrequency(OH_AI_DeviceInfoHandle device_info,
 /// \return NPU frequency
 OH_AI_API int OH_AI_DeviceInfoGetFrequency(const OH_AI_DeviceInfoHandle device_info);

+/// \brief Obtain the all device descriptions in NNRT.
+///
+/// \param[out] num Number of NNRT device description.
+///
+/// \return NNRT device description array.
+OH_AI_API NNRTDeviceDesc *OH_AI_GetAllNNRTDeviceDescs(size_t *num);
+
+/// \brief Obtain the specified element in NNRt device description array.
+///
+/// \param[in] descs NNRT device description array.
+/// \param[in] index Element index.
+///
+/// \return NNRT device description.
+OH_AI_API NNRTDeviceDesc *OH_AI_GetElementOfNNRTDeviceDescs(NNRTDeviceDesc *descs, size_t index);
+
+/// \brief Destroy the NNRT device descriptions returned by OH_AI_NNRTGetAllDeviceDescs().
+///
+/// \param[in] desc NNRT device description array.
+OH_AI_API void OH_AI_DestroyAllNNRTDeviceDescs(NNRTDeviceDesc **desc);
+
+/// \brief Obtain the device id in NNRT device description.
+///
+/// \param[in] desc pointer to the NNRT device description instance.
+///
+/// \return NNRT device id.
+OH_AI_API size_t OH_AI_GetDeviceIdFromNNRTDeviceDesc(const NNRTDeviceDesc *desc);
+
+/// \brief Obtain the device name in NNRT device description.
+///
+/// \param[in] desc pointer to the NNRT device description instance.
+///
+/// \return NNRT device name.
+OH_AI_API const char *OH_AI_GetNameFromNNRTDeviceDesc(const NNRTDeviceDesc *desc);
+
+/// \brief Obtain the device type in NNRT device description.
+///
+/// \param[in] desc pointer to the NNRT device description instance.
+///
+/// \return NNRT device type.
+OH_AI_API OH_AI_NNRTDeviceType OH_AI_GetTypeFromNNRTDeviceDesc(const NNRTDeviceDesc *desc);
+
+/// \brief Create the NNRT device info by exactly matching the specific device name.
+///
+/// \param[in] name NNRt device name.
+///
+/// \return Device info object handle.
+OH_AI_API OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByName(const char *name);
+
+/// \brief Create the NNRT device info by finding the first device with the specific device type.
+///
+/// \param[in] name NNRt device type.
+///
+/// \return Device info object handle.
+OH_AI_API OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByType(OH_AI_NNRTDeviceType type);
+
+/// \brief Set the NNRT device id, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+/// \param[in] device_id NNRT device id.
+OH_AI_API void OH_AI_DeviceInfoSetDeviceId(OH_AI_DeviceInfoHandle device_info, size_t device_id);
+
+/// \brief Obtain the NNRT device id, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+///
+/// \return NNRT device id.
+OH_AI_API size_t OH_AI_DeviceInfoGetDeviceId(const OH_AI_DeviceInfoHandle device_info);
+
+/// \brief Set the NNRT performance mode, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+/// \param[in] device_id NNRT performance mode.
+OH_AI_API void OH_AI_DeviceInfoSetPerformanceMode(OH_AI_DeviceInfoHandle device_info, OH_AI_PerformanceMode mode);
+
+/// \brief Obtain the NNRT performance mode, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+///
+/// \return NNRT performance mode.
+OH_AI_API OH_AI_PerformanceMode OH_AI_DeviceInfoGetPerformanceMode(const OH_AI_DeviceInfoHandle device_info);
+
+/// \brief Set the NNRT priority, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+/// \param[in] device_id NNRT priority.
+OH_AI_API void OH_AI_DeviceInfoSetPriority(OH_AI_DeviceInfoHandle device_info, OH_AI_Priority priority);
+
+/// \brief Obtain the NNRT priority, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+///
+/// \return NNRT priority.
+OH_AI_API OH_AI_Priority OH_AI_DeviceInfoGetPriority(const OH_AI_DeviceInfoHandle device_info);
+
+/// \brief Add extension of key/value format to device info, Only valid for NNRT.
+///
+/// \param[in] device_info Device info object handle.
+/// \param[in] name The content of key as a C string.
+/// \param[in] value The pointer to the value, which is a byte array.
+/// \param[in] value_size The size of the value, which is a byte array.
+///
+/// \return OH_AI_STATUS_SUCCESS if success, or detail error code if failed.
+OH_AI_API OH_AI_Status OH_AI_DeviceInfoAddExtension(OH_AI_DeviceInfoHandle device_info, const char *name, const char *value, size_t value_size);
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/sdk_api/tensor.h b/include/sdk_api/tensor.h
index f6ba02cd..3dad04ac 100644
--- a/include/sdk_api/tensor.h
+++ b/include/sdk_api/tensor.h
@@ -17,6 +17,7 @@
 #define MINDSPORE_INCLUDE_C_API_TENSOE_C_H

 #include <stddef.h>
+#include "mindspore/status.h"
 #include "mindspore/types.h"
 #include "mindspore/data_type.h"
 #include "mindspore/format.h"
@@ -140,6 +141,18 @@ OH_AI_API int64_t OH_AI_TensorGetElementNum(const OH_AI_TensorHandle tensor);
 /// \return The data size of the tensor.
 OH_AI_API size_t OH_AI_TensorGetDataSize(const OH_AI_TensorHandle tensor);

+/// \brief Set the data for the tensor with user-allocated data buffer.
+/// The main purpose of this interface is providing a way of using memory already allocated by user as the Model's
+/// input, but not which allocated inside the Model object. It can reduce one copy.
+/// Note: The tensor won't free the data provided by invoker. Invoker has the responsibility to free it. And this
+/// free action should not be preformed before destruction of the tensor.
+///
+/// \param[in] tensor Tensor object handle.
+/// \param[in] data A pointer to the user data buffer.
+/// \param[in] data the byte size of the user data buffer.
+///
+/// \return OH_AI_STATUS_SUCCESS if success, or detail error code if failed.
+OH_AI_API OH_AI_Status OH_AI_TensorSetUserData(OH_AI_TensorHandle tensor, void *data, size_t data_size);
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/sdk_api/types.h b/include/sdk_api/types.h
index a39c6daa..d38660b0 100644
--- a/include/sdk_api/types.h
+++ b/include/sdk_api/types.h
@@ -40,10 +40,46 @@ typedef enum OH_AI_DeviceType {
   OH_AI_DEVICETYPE_KIRIN_NPU,
   // add new type here
   // ohos-only device range: [60, 80)
-  OH_AI_DeviceType_NNRT = 60,
+  OH_AI_DEVICETYPE_NNRT = 60,
   OH_AI_DEVICETYPE_INVALID = 100,
 } OH_AI_DeviceType;

+typedef enum OH_AI_NNRTDeviceType {
+  /** Devices that are not CPU, GPU, or dedicated accelerator */
+  OH_AI_NNRTDEVICE_OTHERS = 0,
+  /** CPU device */
+  OH_AI_NNRTDEVICE_CPU = 1,
+  /** GPU device */
+  OH_AI_NNRTDEVICE_GPU = 2,
+  /** Dedicated hardware accelerator */
+  OH_AI_NNRTDEVICE_ACCELERATOR = 3,
+} OH_AI_NNRTDeviceType;
+
+typedef enum OH_AI_PerformanceMode {
+  /** No performance mode preference */
+  OH_AI_PERFORMANCE_NONE = 0,
+  /** Low power consumption mode*/
+  OH_AI_PERFORMANCE_LOW = 1,
+  /** Medium performance mode */
+  OH_AI_PERFORMANCE_MEDIUM = 2,
+  /** High performance mode */
+  OH_AI_PERFORMANCE_HIGH = 3,
+  /** Ultimate performance mode */
+  OH_AI_PERFORMANCE_EXTREME = 4
+} OH_AI_PerformanceMode;
+
+typedef enum OH_AI_Priority {
+  /** No priority preference */
+  OH_AI_PRIORITY_NONE = 0,
+  /** Low priority */
+  OH_AI_PRIORITY_LOW = 1,
+  /** Medium priority */
+  OH_AI_PRIORITY_MEDIUM = 2,
+  /** High priority */
+  OH_AI_PRIORITY_HIGH = 3
+} OH_AI_Priority;
+
+typedef struct NNRTDeviceDesc NNRTDeviceDesc;
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn
index 7bbc3782..103e53b7 100644
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/BUILD.gn
@@ -498,6 +498,9 @@ infer_shape_sources = [
   "infer/crop_infer.c",
   "infer/cumsum_infer.c",
   "infer/custom_gru_infer.c",
+  "infer/custom_masked_fill_infer.c",
+  "infer/custom_is_inf_infer.c",
+  "infer/custom_tensor_scatter_max_infer.c",
   "infer/decoder_layer_infer.c",
   "infer/deconv2d_infer.c",
   "infer/depth_to_space_infer.c",
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/CMakeLists.txt b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/CMakeLists.txt
index c1685a65..6fef44fd 100644
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/CMakeLists.txt
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/CMakeLists.txt
@@ -238,7 +238,7 @@ endif()
 if(PLATFORM_ARM)
     set(NO_FAST_MATH_OPTI ${NNACL_DIR}/fp32/resize_fp32.c)
     set_source_files_properties(${NO_FAST_MATH_OPTI} PROPERTIES LANGUAGE C
-        COMPILE_FLAGS "${CMAKE_C_FLAGS} -fno-fast-math")
+        COMPILE_FLAGS "${CMAKE_C_FLAGS} -w -fno-fast-math")
 endif()

 add_library(nnacl_mid OBJECT ${KERNEL_SRC} ${TRAIN_SRC} ${ASSEMBLY_SRC} ${MS_X86_SIMD_SRC})
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/avx/scatter_nd_binary_avx.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/avx/scatter_nd_binary_avx.h
new file mode 100644
index 00000000..14bd1d76
--- /dev/null
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/avx/scatter_nd_binary_avx.h
@@ -0,0 +1,66 @@
+/**
+* Copyright 2023 Huawei Technologies Co., Ltd
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+#ifndef NNACL_BASE_SCATTER_ND_BINARY_AVX_H_
+#define NNACL_BASE_SCATTER_ND_BINARY_AVX_H_
+
+#include "nnacl/intrinsics/ms_simd_instructions.h"
+#include "nnacl/intrinsics/ms_simd_avx_instructions.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#pragma GCC push_options
+#pragma GCC target("avx", "avx2")
+#define MS_SIMD_INSTRUCTION MS_SIMD_AVX_INSTRUCTION
+#define BLOCK_NUM 8
+#define MS_SIMD_AVX
+
+static inline int ScatterNDAddFp32AVX(int index, const float *update, int size, float *output) {
+  for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
+    SIMD_ST_F32(output + index, SIMD_ADD_F32(SIMD_LD_F32(output + index), SIMD_LD_F32(update + index)));
+  }
+  return index;
+}
+
+static inline int ScatterNDAddInt32AVX(int index, const int *update, int size, int *output) {
+  for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
+    SIMD_ST_EPI32(output + index, SIMD_ADD_EPI32(SIMD_LD_EPI32(output + index), SIMD_LD_EPI32(update + index)));
+  }
+  return index;
+}
+
+static inline int ScatterNDMaxFp32AVX(int index, const float *update, int size, float *output) {
+  for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
+    SIMD_ST_F32(output + index, SIMD_MAX_F32(SIMD_LD_F32(output + index), SIMD_LD_F32(update + index)));
+  }
+  return index;
+}
+
+static inline int ScatterNDMaxInt32AVX(int index, const int *update, int size, int *output) {
+  for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
+    SIMD_ST_EPI32(output + index, SIMD_MAX_EPI32(SIMD_LD_EPI32(output + index), SIMD_LD_EPI32(update + index)));
+  }
+  return index;
+}
+
+#undef MS_SIMD_INSTRUCTION
+#undef BLOCK_NUM
+#pragma GCC pop_options
+#undef MS_SIMD_AVX
+#ifdef __cplusplus
+}
+#endif
+#endif  // NNACL_BASE_SCATTER_ND_BINARY_AVX_H_
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/avx512/scatter_nd_binary_avx512.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/avx512/scatter_nd_binary_avx512.h
new file mode 100644
index 00000000..abf024c5
--- /dev/null
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/avx512/scatter_nd_binary_avx512.h
@@ -0,0 +1,66 @@
+/**
+* Copyright 2023 Huawei Technologies Co., Ltd
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+#ifndef NNACL_BASE_SCATTER_ND_BINARY_AVX512_H_
+#define NNACL_BASE_SCATTER_ND_BINARY_AVX512_H_
+
+#include "nnacl/intrinsics/ms_simd_instructions.h"
+#include "nnacl/intrinsics/ms_simd_avx512_instructions.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#pragma GCC push_options
+#pragma GCC target("avx512f")
+#define MS_SIMD_INSTRUCTION MS_SIMD_AVX512_INSTRUCTION
+#define BLOCK_NUM 16
+#define MS_SIMD_AVX512
+
+static inline int ScatterNDAddFp32AVX512(int index, const float *update, int size, float *output) {
+  for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
+    SIMD_ST_F32(output + index, SIMD_ADD_F32(SIMD_LD_F32(output + index), SIMD_LD_F32(update + index)));
+  }
+  return index;
+}
+
+static inline int ScatterNDAddInt32AVX512(int index, const int *update, int size, int *output) {
+  for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
+    SIMD_ST_EPI32(output + index, SIMD_ADD_EPI32(SIMD_LD_EPI32(output + index), SIMD_LD_EPI32(update + index)));
+  }
+  return index;
+}
+
+static inline int ScatterNDMaxFp32AVX512(int index, const float *update, int size, float *output) {
+  for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
+    SIMD_ST_F32(output + index, SIMD_MAX_F32(SIMD_LD_F32(output + index), SIMD_LD_F32(update + index)));
+  }
+  return index;
+}
+
+static inline int ScatterNDMaxInt32AVX512(int index, const int *update, int size, int *output) {
+  for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
+    SIMD_ST_EPI32(output + index, SIMD_MAX_EPI32(SIMD_LD_EPI32(output + index), SIMD_LD_EPI32(update + index)));
+  }
+  return index;
+}
+
+#undef MS_SIMD_INSTRUCTION
+#undef BLOCK_NUM
+#pragma GCC pop_options
+#undef MS_SIMD_AVX512
+#ifdef __cplusplus
+}
+#endif
+#endif  // NNACL_BASE_SCATTER_ND_BINARY_AVX512_H_
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary.c b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary.c
index bca71f55..e496bb4b 100644
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary.c
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary.c
@@ -77,3 +77,31 @@ int ScatterNDUpdate(void *output, const void *update, int *output_unit_offsets,
   }
   return NNACL_OK;
 }
+
+int ScatterNDMax(const void *update, void *output, int *output_unit_offsets, const ScatterNDParameter *param, int type,
+                 int task_id) {
+  if (update == NULL || output == NULL || output_unit_offsets == NULL || param == NULL) {
+    return NNACL_NULL_PTR;
+  }
+  if (param->op_parameter.thread_num_ == 0) {
+    return NNACL_ERR;
+  }
+  int unit_per_thread = UP_DIV(param->num_unit, param->op_parameter.thread_num_);
+  int begin = unit_per_thread * task_id;
+  int end = MSMIN(begin + unit_per_thread, param->num_unit);
+  if (type == 0) {
+    float *update_fp32 = (float *)update;
+    float *output_fp32 = (float *)output;
+    for (int i = begin; i < end; i++) {
+      const float *update_data = update_fp32 + i * param->unit_size;
+      float *output_data = output_fp32 + output_unit_offsets[i];
+      int j = 0;
+
+      SIMD_RUN_NO_SCALAR(ScatterNDMaxFp32, j, update_data, param->unit_size, output_data);
+      for (; j < param->unit_size; j++) {
+        output_data[j] = fmaxf(update_data[j], output_data[j]);
+      }
+    }
+  }
+  return NNACL_OK;
+}
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary.h
index 3af55335..36657cd9 100644
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary.h
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary.h
@@ -27,6 +27,9 @@ int ScatterNDUpdate(void *output, const void *update, int *output_unit_offsets,

 int ScatterNDAdd(const void *update, void *output, int *output_unit_offsets, const ScatterNDParameter *param, int type,
                  int task_id);
+
+int ScatterNDMax(const void *update, void *output, int *output_unit_offsets, const ScatterNDParameter *param, int type,
+                 int task_id);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary_simd.h.in b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary_simd.h.in
index c72d9cc2..46bb20ce 100644
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary_simd.h.in
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/base/scatter_nd_binary_simd.h.in
@@ -38,6 +38,20 @@ static inline int ScatterNDAddInt32@SIMD_INSTRUCTION@(int index, const int *upda
   return index;
 }

+static inline int ScatterNDMaxFp32@SIMD_INSTRUCTION@(int index, const float *update, int size, float *output) {
+for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
+SIMD_ST_F32(output + index, SIMD_MAX_F32(SIMD_LD_F32(output + index), SIMD_LD_F32(update + index)));
+}
+return index;
+}
+
+static inline int ScatterNDMaxInt32@SIMD_INSTRUCTION@(int index, const int *update, int size, int *output) {
+for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
+SIMD_ST_EPI32(output + index, SIMD_MAX_EPI32(SIMD_LD_EPI32(output + index), SIMD_LD_EPI32(update + index)));
+}
+return index;
+}
+
 @SIMD_INSTRUCTION_END@
 #ifdef __cplusplus
 }
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_is_inf_parameter.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_is_inf_parameter.h
new file mode 100644
index 00000000..e1eae394
--- /dev/null
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_is_inf_parameter.h
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_CUSTOM_IS_INF_PARAMETER_H_
+#define MINDSPORE_NNACL_CUSTOM_IS_INF_PARAMETER_H_
+
+#include "nnacl/op_base.h"
+
+typedef struct CustomIsInfParameter {
+  // Primitive parameter
+  OpParameter op_parameter_;
+} CustomIsInfParameter;
+
+#endif  // MINDSPORE_NNACL_CUSTOM_IS_INF_PARAMETER_H_
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_masked_fill_parameter.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_masked_fill_parameter.h
new file mode 100644
index 00000000..047d3d3f
--- /dev/null
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_masked_fill_parameter.h
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_CUSTOM_MASKED_FILL_PARAMETER_H_
+#define MINDSPORE_NNACL_CUSTOM_MASKED_FILL_PARAMETER_H_
+
+#include "nnacl/op_base.h"
+
+typedef struct CustomMaskedFillParameter {
+  // Primitive parameter
+  OpParameter op_parameter_;
+} CustomMaskedFillParameter;
+
+#endif  // MINDSPORE_NNACL_CUSTOM_MASKED_FILL_PARAMETER_H_
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_tensor_scatter_max_parameter.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_tensor_scatter_max_parameter.h
new file mode 100644
index 00000000..ba6940db
--- /dev/null
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/custom_tensor_scatter_max_parameter.h
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_CUSTOM_TENSOR_SCATTER_MAX_PARAMETER_H_
+#define MINDSPORE_NNACL_CUSTOM_TENSOR_SCATTER_MAX_PARAMETER_H_
+
+#include "nnacl/op_base.h"
+
+typedef struct CustomTensorScatterMaxParameter {
+  // Primitive parameter
+  OpParameter op_parameter_;
+} CustomTensorScatterMaxParameter;
+
+#endif  // MINDSPORE_NNACL_CUSTOM_TENSOR_SCATTER_MAX_PARAMETER_H_
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_is_inf_infer.c b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_is_inf_infer.c
new file mode 100644
index 00000000..fc87d157
--- /dev/null
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_is_inf_infer.c
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/infer/custom_is_inf_infer.h"
+#include "nnacl/infer/infer_register.h"
+
+int CustomIsInfInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                          OpParameter *parameter) {
+  int check_ret = CheckAugmentNullSize(inputs, inputs_size, outputs, outputs_size, parameter, C1NUM, C1NUM);
+  if (check_ret != NNACL_OK) {
+    return check_ret;
+  }
+
+  const TensorC *input = inputs[0];
+  TensorC *output = outputs[0];
+  output->data_type_ = kNumberTypeBool;
+  output->format_ = input->format_;
+  if (!InferFlag(inputs, inputs_size)) {
+    return NNACL_INFER_INVALID;
+  }
+  SetShapeTensor(output, input);
+  return NNACL_OK;
+}
+
+REG_INFER(CustomIsInf, PrimType_Inner_CustomIsInf, CustomIsInfInferShape)
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_is_inf_infer.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_is_inf_infer.h
new file mode 100644
index 00000000..d1b4b33d
--- /dev/null
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_is_inf_infer.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_CUSTOM_IS_INF_INFER_H
+#define MINDSPORE_NNACL_CUSTOM_IS_INF_INFER_H
+
+#include "nnacl/infer/common_infer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int CustomIsInfInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                          OpParameter *parameter);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_NNACL_CUSTOM_IS_INF_INFER_H
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_masked_fill_infer.c b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_masked_fill_infer.c
new file mode 100644
index 00000000..957a4d4f
--- /dev/null
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_masked_fill_infer.c
@@ -0,0 +1,37 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/infer/custom_masked_fill_infer.h"
+#include "nnacl/infer/infer_register.h"
+
+int CustomMaskedFillInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                               OpParameter *parameter) {
+  int check_ret = CheckAugmentNullSize(inputs, inputs_size, outputs, outputs_size, parameter, C3NUM, C1NUM);
+  if (check_ret != NNACL_OK) {
+    return check_ret;
+  }
+
+  const TensorC *input = inputs[0];
+  TensorC *output = outputs[0];
+  SetDataTypeFormat(output, input);
+  if (!InferFlag(inputs, inputs_size)) {
+    return NNACL_INFER_INVALID;
+  }
+  SetShapeTensor(output, input);
+  return NNACL_OK;
+}
+
+REG_INFER(CustomMaskedFill, PrimType_Inner_CustomMaskedFill, CustomMaskedFillInferShape)
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_masked_fill_infer.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_masked_fill_infer.h
new file mode 100644
index 00000000..a8adbae2
--- /dev/null
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_masked_fill_infer.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_CUSTOM_MASKED_FILL_INFER_H
+#define MINDSPORE_NNACL_CUSTOM_MASKED_FILL_INFER_H
+
+#include "nnacl/infer/common_infer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int CustomMaskedFillInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
+                               OpParameter *parameter);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_NNACL_CUSTOM_MASKED_FILL_INFER_H
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_tensor_scatter_max_infer.c b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_tensor_scatter_max_infer.c
new file mode 100644
index 00000000..be6716ba
--- /dev/null
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_tensor_scatter_max_infer.c
@@ -0,0 +1,37 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/infer/custom_tensor_scatter_max_infer.h"
+#include "nnacl/infer/infer_register.h"
+
+int CustomTensorScatterMaxInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs,
+                                     size_t outputs_size, OpParameter *parameter) {
+  int check_ret = CheckAugmentNullSize(inputs, inputs_size, outputs, outputs_size, parameter, C3NUM, C1NUM);
+  if (check_ret != NNACL_OK) {
+    return check_ret;
+  }
+
+  const TensorC *input = inputs[0];
+  TensorC *output = outputs[0];
+  SetDataTypeFormat(output, input);
+  if (!InferFlag(inputs, inputs_size)) {
+    return NNACL_INFER_INVALID;
+  }
+  SetShapeTensor(output, input);
+  return NNACL_OK;
+}
+
+REG_INFER(CustomTensorScatterMax, PrimType_Inner_CustomTensorScatterMax, CustomTensorScatterMaxInferShape)
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_tensor_scatter_max_infer.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_tensor_scatter_max_infer.h
new file mode 100644
index 00000000..641aa483
--- /dev/null
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/infer/custom_tensor_scatter_max_infer.h
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_NNACL_CUSTOM_TENSOR_SCATTER_MAX_INFER_H
+#define MINDSPORE_NNACL_CUSTOM_TENSOR_SCATTER_MAX_INFER_H
+
+#include "nnacl/infer/common_infer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int CustomTensorScatterMaxInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs,
+                                     size_t outputs_size, OpParameter *parameter);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_NNACL_CUSTOM_TENSOR_SCATTER_MAX_INFER_H
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/neon/scatter_nd_binary_neon.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/neon/scatter_nd_binary_neon.h
new file mode 100644
index 00000000..d7c34768
--- /dev/null
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/neon/scatter_nd_binary_neon.h
@@ -0,0 +1,65 @@
+/**
+* Copyright 2023 Huawei Technologies Co., Ltd
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+#ifndef NNACL_BASE_SCATTER_ND_BINARY_NEON_H_
+#define NNACL_BASE_SCATTER_ND_BINARY_NEON_H_
+
+#include "nnacl/intrinsics/ms_simd_instructions.h"
+#include "nnacl/intrinsics/ms_simd_neon_instructions.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MS_SIMD_INSTRUCTION MS_SIMD_NEON_INSTRUCTION
+#define BLOCK_NUM 4
+#define MS_SIMD_NEON
+
+static inline int ScatterNDAddFp32NEON(int index, const float *update, int size, float *output) {
+  for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
+    SIMD_ST_F32(output + index, SIMD_ADD_F32(SIMD_LD_F32(output + index), SIMD_LD_F32(update + index)));
+  }
+  return index;
+}
+
+static inline int ScatterNDAddInt32NEON(int index, const int *update, int size, int *output) {
+  for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
+    SIMD_ST_EPI32(output + index, SIMD_ADD_EPI32(SIMD_LD_EPI32(output + index), SIMD_LD_EPI32(update + index)));
+  }
+  return index;
+}
+
+static inline int ScatterNDMaxFp32NEON(int index, const float *update, int size, float *output) {
+  for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
+    SIMD_ST_F32(output + index, SIMD_MAX_F32(SIMD_LD_F32(output + index), SIMD_LD_F32(update + index)));
+  }
+  return index;
+}
+
+static inline int ScatterNDMaxInt32NEON(int index, const int *update, int size, int *output) {
+  for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
+    SIMD_ST_EPI32(output + index, SIMD_MAX_EPI32(SIMD_LD_EPI32(output + index), SIMD_LD_EPI32(update + index)));
+  }
+  return index;
+}
+
+#undef MS_SIMD_INSTRUCTION
+#undef BLOCK_NUM
+
+#undef MS_SIMD_NEON
+#ifdef __cplusplus
+}
+#endif
+#endif  // NNACL_BASE_SCATTER_ND_BINARY_NEON_H_
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h
index 955a70a5..895f7e3d 100644
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h
@@ -558,6 +558,10 @@ enum PrimType {
   PrimType_Inner_CustomGru = 10010,
   PrimType_Inner_CastGatherReduceFusion = 10011,
   PrimType_Inner_ReduceConcatFusion = 10012,
+  PrimType_Inner_ThirdPartyModel = 10013,
+  PrimType_Inner_CustomMaskedFill = 10014,
+  PrimType_Inner_CustomTensorScatterMax = 10015,
+  PrimType_Inner_CustomIsInf = 10016,
   PrimType_InnerOpMax,
   PrimType_InnerOpMin = PrimType_Inner_ToFormat
 };
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/scatter_nd_binary_simd.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/scatter_nd_binary_simd.h
new file mode 100644
index 00000000..dd9878f7
--- /dev/null
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/scatter_nd_binary_simd.h
@@ -0,0 +1,36 @@
+/**
+* Copyright 2023 Huawei Technologies Co., Ltd
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+#ifndef NNACL_SCATTER_ND_BINARY_SIMD_H_
+#define NNACL_SCATTER_ND_BINARY_SIMD_H_
+
+#include "nnacl/intrinsics/ms_simd_instructions.h"
+#ifdef ENABLE_AVX512
+#include "nnacl/avx512/scatter_nd_binary_avx512.h"
+#endif
+
+#ifdef ENABLE_AVX
+#include "nnacl/avx/scatter_nd_binary_avx.h"
+#endif
+
+#ifdef ENABLE_SSE
+#include "nnacl/sse/scatter_nd_binary_sse.h"
+#endif
+
+#ifdef ENABLE_ARM
+#include "nnacl/neon/scatter_nd_binary_neon.h"
+#endif
+
+#endif
diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/sse/scatter_nd_binary_sse.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/sse/scatter_nd_binary_sse.h
new file mode 100644
index 00000000..983d2923
--- /dev/null
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/sse/scatter_nd_binary_sse.h
@@ -0,0 +1,66 @@
+/**
+* Copyright 2023 Huawei Technologies Co., Ltd
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+#ifndef NNACL_BASE_SCATTER_ND_BINARY_SSE_H_
+#define NNACL_BASE_SCATTER_ND_BINARY_SSE_H_
+
+#include "nnacl/intrinsics/ms_simd_instructions.h"
+#include "nnacl/intrinsics/ms_simd_sse_instructions.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#pragma GCC push_options
+#pragma GCC target("sse4.1")
+#define MS_SIMD_INSTRUCTION MS_SIMD_SSE_INSTRUCTION
+#define BLOCK_NUM 4
+#define MS_SIMD_SSE
+
+static inline int ScatterNDAddFp32SSE(int index, const float *update, int size, float *output) {
+  for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
+    SIMD_ST_F32(output + index, SIMD_ADD_F32(SIMD_LD_F32(output + index), SIMD_LD_F32(update + index)));
+  }
+  return index;
+}
+
+static inline int ScatterNDAddInt32SSE(int index, const int *update, int size, int *output) {
+  for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
+    SIMD_ST_EPI32(output + index, SIMD_ADD_EPI32(SIMD_LD_EPI32(output + index), SIMD_LD_EPI32(update + index)));
+  }
+  return index;
+}
+
+static inline int ScatterNDMaxFp32SSE(int index, const float *update, int size, float *output) {
+  for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
+    SIMD_ST_F32(output + index, SIMD_MAX_F32(SIMD_LD_F32(output + index), SIMD_LD_F32(update + index)));
+  }
+  return index;
+}
+
+static inline int ScatterNDMaxInt32SSE(int index, const int *update, int size, int *output) {
+  for (int block_max_size = size - BLOCK_NUM + 1; index < block_max_size; index += BLOCK_NUM) {
+    SIMD_ST_EPI32(output + index, SIMD_MAX_EPI32(SIMD_LD_EPI32(output + index), SIMD_LD_EPI32(update + index)));
+  }
+  return index;
+}
+
+#undef MS_SIMD_INSTRUCTION
+#undef BLOCK_NUM
+#pragma GCC pop_options
+#undef MS_SIMD_SSE
+#ifdef __cplusplus
+}
+#endif
+#endif  // NNACL_BASE_SCATTER_ND_BINARY_SSE_H_
diff --git a/mindspore/core/mindrt/BUILD.gn b/mindspore/core/mindrt/BUILD.gn
index b56d5f5c..b0e7c70d 100644
--- a/mindspore/core/mindrt/BUILD.gn
+++ b/mindspore/core/mindrt/BUILD.gn
@@ -41,8 +41,15 @@ ohos_source_set("mindrt_obj") {
     "../../core/",
   ]

+  defines = [
+    "ENABLE_MINDRT",
+    "MS_COMPILE_OHOS",
+    "BUILD_LITE",
+  ]
+
+  external_deps = [ "hilog:libhilog" ]
+
   remove_configs = [ "//build/config/compiler:no_rtti" ]
-  defines = [ "BUILD_LITE" ]

   part_name = "mindspore"
   subsystem_name = "thirdparty"
diff --git a/mindspore/core/mindrt/src/thread/actor_threadpool.cc b/mindspore/core/mindrt/src/thread/actor_threadpool.cc
index 70414757..c50c46e0 100644
--- a/mindspore/core/mindrt/src/thread/actor_threadpool.cc
+++ b/mindspore/core/mindrt/src/thread/actor_threadpool.cc
@@ -32,7 +32,7 @@ void ActorWorker::RunWithSpin() {
   }
 #if !defined(__APPLE__) && !defined(_MSC_VER)
   static std::atomic_int index{0};
-  (void)pthread_setname_np(pthread_self(), ("ActorThread_" + std::to_string(index++)).c_str());
+  (void)pthread_setname_np(pthread_self(), ("OS_Actor_" + std::to_string(index++)).c_str());
 #endif
 #ifdef PLATFORM_86
   // Some CPU kernels need set the flush zero mode to improve performance.
diff --git a/mindspore/core/mindrt/src/thread/core_affinity.cc b/mindspore/core/mindrt/src/thread/core_affinity.cc
index 33bf3529..a3478dff 100644
--- a/mindspore/core/mindrt/src/thread/core_affinity.cc
+++ b/mindspore/core/mindrt/src/thread/core_affinity.cc
@@ -344,12 +344,12 @@ int CoreAffinity::InitBindCoreId(size_t thread_num, BindMode bind_mode) {
 int CoreAffinity::SetAffinity() { return THREAD_OK; }
 #elif defined(BIND_CORE)
 int CoreAffinity::SetAffinity(const pthread_t &thread_id, cpu_set_t *cpu_set) {
-#ifdef __ANDROID__
-#if __ANDROID_API__ >= 21
+#if defined(__ANDROID__) || defined(MS_COMPILE_OHOS)
+#if (__ANDROID_API__ >= 21) || defined(MS_COMPILE_OHOS)
   THREAD_INFO("thread: %d, mask: %lu", pthread_gettid_np(thread_id), cpu_set->__bits[0]);
   int ret = sched_setaffinity(pthread_gettid_np(thread_id), sizeof(cpu_set_t), cpu_set);
   if (ret != THREAD_OK) {
-    THREAD_ERROR("bind thread %d to cpu failed. ERROR %d", pthread_gettid_np(thread_id), ret);
+    THREAD_ERROR("bind thread %d to cpu failed. ERROR %{public}d", pthread_gettid_np(thread_id), ret);
     return THREAD_ERROR;
   }
 #endif
diff --git a/mindspore/core/mindrt/src/thread/core_affinity.h b/mindspore/core/mindrt/src/thread/core_affinity.h
index 2dd2abd1..28b0967a 100644
--- a/mindspore/core/mindrt/src/thread/core_affinity.h
+++ b/mindspore/core/mindrt/src/thread/core_affinity.h
@@ -23,7 +23,7 @@
 #ifdef PARALLEL_INFERENCE
 #define BIND_CORE
 #endif
-#ifdef __ANDROID__
+#if defined(__ANDROID__) || defined(MS_COMPILE_OHOS)
 #define BIND_CORE
 #include <sched.h>
 #endif
diff --git a/mindspore/core/mindrt/src/thread/parallel_threadpool.cc b/mindspore/core/mindrt/src/thread/parallel_threadpool.cc
index 9e0dd25c..09c39f32 100644
--- a/mindspore/core/mindrt/src/thread/parallel_threadpool.cc
+++ b/mindspore/core/mindrt/src/thread/parallel_threadpool.cc
@@ -48,7 +48,7 @@ void ParallelWorker::ParallelRun() {
     SetAffinity();
   }
 #if !defined(__APPLE__) && !defined(_MSC_VER)
-  (void)pthread_setname_np(pthread_self(), ("ParallelThread_" + std::to_string(worker_id_)).c_str());
+  (void)pthread_setname_np(pthread_self(), ("OS_Parallel_" + std::to_string(worker_id_)).c_str());
 #endif
 #ifdef PLATFORM_86
   // Some CPU kernels need set the flush zero mode to improve performance.
diff --git a/mindspore/core/mindrt/src/thread/threadlog.h b/mindspore/core/mindrt/src/thread/threadlog.h
index 7ed917f1..b212a401 100644
--- a/mindspore/core/mindrt/src/thread/threadlog.h
+++ b/mindspore/core/mindrt/src/thread/threadlog.h
@@ -16,7 +16,9 @@

 #ifndef MINDSPORE_CORE_MINDRT_RUNTIME_THREADPOOL_LOG_H_
 #define MINDSPORE_CORE_MINDRT_RUNTIME_THREADPOOL_LOG_H_
-
+#ifdef MS_COMPILE_OHOS
+#include "hilog/log.h"
+#endif
 namespace mindspore {
 #ifdef THREAD_POOL_DEBUG
 #include <stdio.h>
@@ -32,13 +34,35 @@ namespace mindspore {
   }
 #else
 #define THREAD_DEBUG(content, ...)
-#define THREAD_INFO(content, ...)
 #define THREAD_TEST_TRUE(flag)
+
 #if defined(__ANDROID__)
+#define THREAD_INFO(content, ...)
 #include <android/log.h>
 #define THREAD_ERROR(content, args...) \
   { __android_log_print(ANDROID_LOG_ERROR, "MS_LITE", "%s|%d: " #content "\r\n", __func__, __LINE__, ##args); }
+
+#elif defined(MS_COMPILE_OHOS) // For OHOS, use hilog.
+
+#define MINDRT_OHOS_LOG_DOMAIN 0x2102
+#define MINDRT_OHOS_LOG_TAG "MS_LITE"
+
+#ifdef MS_COMPILE_WITH_OHOS_NDK
+// When build with OHOS NDK, use public api of hilog module.
+#define THREAD_INFO(content, args...) \
+  { OH_LOG_Print(LOG_APP, LOG_INFO, MINDRT_OHOS_LOG_DOMAIN, MINDRT_OHOS_LOG_TAG, "%s:%d " #content, __func__, __LINE__, ##args); }
+#define THREAD_ERROR(content, args...) \
+  { OH_LOG_Print(LOG_APP, LOG_ERROR, MINDRT_OHOS_LOG_DOMAIN, MINDRT_OHOS_LOG_TAG, "%s:%d " #content, __func__, __LINE__, ##args); }
+#else
+// When build in OHOS repo, use inner api of hilog module.
+#define THREAD_INFO(content, args...) \
+  { HiLogPrint(LOG_APP, LOG_INFO, MINDRT_OHOS_LOG_DOMAIN, MINDRT_OHOS_LOG_TAG, "%s:%d " #content, __func__, __LINE__, ##args); }
+#define THREAD_ERROR(content, args...) \
+  { HiLogPrint(LOG_APP, LOG_ERROR, MINDRT_OHOS_LOG_DOMAIN, MINDRT_OHOS_LOG_TAG, "%s:%d " #content, __func__, __LINE__, ##args); }
+#endif
+
 #else
+#define THREAD_INFO(content, ...)
 #define THREAD_ERROR(content, ...)
 #endif
 #endif
diff --git a/mindspore/core/mindrt/src/thread/threadpool.cc b/mindspore/core/mindrt/src/thread/threadpool.cc
index c56e0425..2301be8c 100644
--- a/mindspore/core/mindrt/src/thread/threadpool.cc
+++ b/mindspore/core/mindrt/src/thread/threadpool.cc
@@ -68,10 +68,11 @@ void Worker::SetAffinity() {
 #ifdef _WIN32
   SetWindowsSelfAffinity(core_id_);
 #elif defined(BIND_CORE)
-#ifdef __ANDROID__
+#if defined(__ANDROID__) || defined(MS_COMPILE_OHOS)
+  THREAD_INFO("thread: %d, mask: %lu", gettid(), mask_.__bits[0]);
   int ret = sched_setaffinity(gettid(), sizeof(cpu_set_t), &mask_);
   if (ret != THREAD_OK) {
-    THREAD_ERROR("bind thread %d to cpu failed. ERROR %d", gettid(), errno);
+    THREAD_ERROR("bind thread %d to cpu failed. ERROR %{public}d", gettid(), errno);
   }
   return;
 #else
@@ -111,7 +112,7 @@ void Worker::Run() {
   }
 #if !defined(__APPLE__) && !defined(_MSC_VER)
   static std::atomic_int index = {0};
-  (void)pthread_setname_np(pthread_self(), ("KernelThread_" + std::to_string(index++)).c_str());
+  (void)pthread_setname_np(pthread_self(), ("OS_Kernel_" + std::to_string(index++)).c_str());
 #endif
 #ifdef PLATFORM_86
   // Some CPU kernels need set the flush zero mode to improve performance.
diff --git a/mindspore/lite/BUILD.gn b/mindspore/lite/BUILD.gn
index a774b58c..f7e465e2 100644
--- a/mindspore/lite/BUILD.gn
+++ b/mindspore/lite/BUILD.gn
@@ -71,9 +71,14 @@

 import("//build/ohos.gni")

+declare_args() {
+    mindspore_feature_nnrt_metagraph = false
+}
+
 ohos_group("mindspore") {
   deps = [
     ":mindspore_lib",
+    ":mindspore_ndk",
     ":mindspore_train_lib",
     "mindir:mindir_lib",
     "src/litert/js_api:mindsporelite_napi"
@@ -180,7 +185,6 @@ lite_mindrt_sources = [
 ]

 all_lite_sources += cxx_api_sources
-all_lite_sources += c_api_sources
 all_lite_sources += api_source
 all_lite_sources += control_flow_kernel_sources
 all_lite_sources += experimental_sources
@@ -368,7 +372,6 @@ ohos_shared_library("mindspore_lib") {
   sources = all_sources

   include_dirs = [
-    "//base/hiviewdfx/hilog/interfaces/native/innerkits/include",
     "//third_party/flatbuffers/include",
     "./",
     "../",
@@ -384,6 +387,7 @@ ohos_shared_library("mindspore_lib") {
     "../ccsrc/",
     "src/litert/kernel/cpu/",
     "../core/mindrt/src/",
+    "//foundation/ai/neural_network_runtime/",
   ]

   defines = [
@@ -426,24 +430,29 @@ ohos_shared_library("mindspore_lib") {

   external_deps = [ "hilog:libhilog" ]

-  output_name = "libmindspore-lite.huawei"
+  output_name = "libmindspore-lite"
   output_extension = "so"
   innerapi_tags = [ "platformsdk" ]
   SUPPORT_NNRT = true
   if (SUPPORT_NNRT) {
+    if (mindspore_feature_nnrt_metagraph) {
+      defines += [ "SUPPORT_NNRT_METAGRAPH" ]
+      print("enabled feature: mindspore_feature_nnrt_metagraph")
+    }
     sources += [
       "src/litert/delegate/nnrt/checker/primitive_check.cc",
       "src/litert/delegate/nnrt/nnrt_delegate.cc",
       "src/litert/delegate/nnrt/nnrt_model_kernel.cc",
     ]
     include_dirs += [
-      "//foundation/ai/neural_network_runtime",
       "src/delegate/nnrt/include",
       "../../mindspore/core/ir",
       "mindir/include",
       "mindir/inner_headers",
     ]
+
     external_deps += [ "neural_network_runtime:nnrt_target" ]
+
     deps += [ "mindir:mindir_lib" ]
     defines += [ "SUPPORT_NNRT" ]
   }
@@ -461,6 +470,67 @@ ohos_shared_library("mindspore_lib") {
   subsystem_name = "thirdparty"
 }

+# NDK lib
+ohos_shared_library("mindspore_ndk") {
+  deps = [
+    ":mindspore_lib",
+    ":mindspore_train_lib"
+  ]
+
+  sources = c_api_sources
+
+  include_dirs = [
+    "//base/hiviewdfx/hilog/interfaces/native/innerkits/include",
+    "//third_party/flatbuffers/include",
+    "./",
+    "../",
+    "../../",
+    "../core",
+    "src",
+    "src/c_api/",
+    "../ccsrc/plugin/device/cpu/kernel/",
+    "../core/mindrt/src/",
+    "../core/mindrt/include/",
+    "../../third_party/",
+    "./schema/",
+    "../ccsrc/",
+    "//foundation/ai/neural_network_runtime/",
+  ]
+
+  defines = [
+    "SUPPORT_NNRT",
+    "MS_COMPILE_OHOS",
+    "PRIMITIVE_WRITEABLE",
+    "RUNTIME_PASS_CLIP",
+    "ENABLE_MULTI_LAYOUT",
+    "VERSION_STR=\"2.1.0\"",
+  ]
+
+  configs = [
+    ":mindspore_api",
+    ":disable_android",
+    ":secure_option",
+  ]
+
+  external_deps = [ "neural_network_runtime:nnrt_target" ]
+
+  remove_configs = [ "//build/config/compiler:no_rtti" ]
+
+  output_name = "libmindspore_lite_ndk"
+  output_extension = "so"
+  innerapi_tags = [ "ndk"]
+  cflags_cc = [
+    "-Wno-ignored-qualifiers",
+    "-Wunused-private-field",
+    "-Wno-unused-private-field",
+    "-Wno-inconsistent-missing-override",
+    "-Wno-macro-redefined",
+    "-Wno-constant-conversion",
+  ]
+  part_name = "mindspore"
+  subsystem_name = "thirdparty"
+}
+
 # Train library
 expression_cxx_api_sources = [
   "src/litert/cxx_api/expression/net.cc",
@@ -614,7 +684,6 @@ ohos_shared_library("mindspore_train_lib") {
   sources = all_train_sources

   include_dirs = [
-    "//base/hiviewdfx/hilog/interfaces/native/innerkits/include",
     "//third_party/flatbuffers/include",
     "./",
     "../",
@@ -698,6 +767,9 @@ config("disable_android") {
     "-U__ANDROID__",
     "-U__ANDROID_API__",
   ]
+  ldflags = [
+    "-Wl,--no-as-needed",
+  ]
 }

 config("secure_option") {
diff --git a/mindspore/lite/CMakeLists.txt b/mindspore/lite/CMakeLists.txt
index 72337f70..1faf2f38 100644
--- a/mindspore/lite/CMakeLists.txt
+++ b/mindspore/lite/CMakeLists.txt
@@ -298,8 +298,9 @@ elseif(TOOLCHAIN_NAME STREQUAL "ohos-lite")
 elseif(TOOLCHAIN_NAME STREQUAL "ohos")
     set(TARGET_OHOS on)
     add_compile_definitions(MS_COMPILE_OHOS)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-command-line-argument -Wno-c++17-extensions")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-command-line-argument -Wno-c++17-extensions")
+    add_compile_definitions(MS_COMPILE_WITH_OHOS_NDK)
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-command-line-argument -Wno-c++17-extensions -Wno-deprecated-builtins")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-command-line-argument -Wno-c++17-extensions -Wno-deprecated-builtins")
 endif()

 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.3.0
diff --git a/mindspore/lite/include/lite_types.h b/mindspore/lite/include/lite_types.h
index 017e98a8..860390d5 100644
--- a/mindspore/lite/include/lite_types.h
+++ b/mindspore/lite/include/lite_types.h
@@ -42,6 +42,7 @@ typedef enum {
   DT_NPU,    /**< NPU device type */
   DT_ASCEND, /**< ASCEND device type */
   DT_CUSTOM, /**< EXTEND device type */
+  DT_NNRT,   /**< NNRT device type */
   DT_END     /**< NO device type */
 } DeviceType;

diff --git a/mindspore/lite/include/model.h b/mindspore/lite/include/model.h
index 93e27ea9..b96c7e35 100644
--- a/mindspore/lite/include/model.h
+++ b/mindspore/lite/include/model.h
@@ -25,6 +25,7 @@ namespace mindspore {
 namespace schema {
 struct Tensor;
 }  // namespace schema
+
 namespace lite {
 typedef enum { ModelType_MSLite, ModelType_MindIR } LiteModelType;

@@ -62,7 +63,10 @@ struct MS_API LiteGraph {
   bool model_obfuscated_ = false;
   std::vector<unsigned char *> deobf_prims_;
 #endif
+
+  std::string ToString() const;
 };
+
 struct MS_API Model {
   LiteGraph graph_;
   char *buf = nullptr;
diff --git a/mindspore/lite/include/registry/converter_context.h b/mindspore/lite/include/registry/converter_context.h
index 2d72b200..4bc92599 100644
--- a/mindspore/lite/include/registry/converter_context.h
+++ b/mindspore/lite/include/registry/converter_context.h
@@ -39,7 +39,9 @@ enum MS_API FmkType : int {
   kFmkTypeMs = 3,
   kFmkTypeTflite = 4,
   kFmkTypePytorch = 5,
-  kFmkTypeMsLite = 6,
+  kFmkTypeThirdParty = 6,
+  kFmkTypeMsLite = 7,
+  kFmkTypeEnd = 8,  // For range check purpose, valid range: [0, kFmkTypeEnd)
 };

 /// \brief ConverterParameters defined read-only converter parameters used by users in ModelParser.
diff --git a/mindspore/lite/mindir/include/mindir.h b/mindspore/lite/mindir/include/mindir.h
index ca811dce..f47cad8c 100644
--- a/mindspore/lite/mindir/include/mindir.h
+++ b/mindspore/lite/mindir/include/mindir.h
@@ -151,6 +151,8 @@ int64_t MindIR_Conv2DFusion_GetOutChannel(ConstPrimitivePtr primitive);
 void MindIR_Conv2DFusion_SetOutChannel(PrimitivePtr *primitive, int64_t out_channel);
 ActivationType MindIR_Conv2DFusion_GetActivationType(ConstPrimitivePtr primitive);
 void MindIR_Conv2DFusion_SetActivationType(PrimitivePtr *primitive, ActivationType activation_type);
+Format MindIR_Conv2DFusion_GetFormat(ConstPrimitivePtr primitive);
+void MindIR_Conv2DFusion_SetFormat(PrimitivePtr *primitive, Format format);

 // ********** Conv2dTransposeFusion **********
 PrimitivePtr MindIR_Conv2dTransposeFusion_CreatePrimitive(
diff --git a/mindspore/lite/mindir/src/mindir.cc b/mindspore/lite/mindir/src/mindir.cc
index 7fc9c00e..374bbef5 100644
--- a/mindspore/lite/mindir/src/mindir.cc
+++ b/mindspore/lite/mindir/src/mindir.cc
@@ -1215,6 +1215,46 @@ void MindIR_Conv2DFusion_SetActivationType(PrimitivePtr *primitive, ActivationTy
   }
 }

+Format MindIR_Conv2DFusion_GetFormat(ConstPrimitivePtr primitive) {
+  if (primitive != nullptr) {
+    auto prim = static_cast<const schema::Primitive *>(primitive);
+    auto value = prim->value_as_Conv2DFusion();
+    if (prim != nullptr && value != nullptr) {
+      return static_cast<Format>(value->format());
+    } else {
+      Format en = static_cast<Format>(0);
+      return en;
+    }
+  } else {
+    Format en = static_cast<Format>(0);
+    return en;
+  }
+}
+
+void MindIR_Conv2DFusion_SetFormat(PrimitivePtr *primitive, Format format) {
+  if (primitive != nullptr && *primitive != nullptr) {
+    auto prim = static_cast<schema::Primitive *>(*primitive);
+    auto value = prim->value_as_Conv2DFusion();
+    if (prim != nullptr && value != nullptr) {
+      flatbuffers::FlatBufferBuilder fbb;
+      auto ops_offset = schema::CreateConv2DFusion(
+        fbb, static_cast<schema::Format>(format),
+        fbb.CreateVector(value->kernel_size()->data(), value->kernel_size()->size()),
+        fbb.CreateVector(value->stride()->data(), value->stride()->size()),
+        fbb.CreateVector(value->dilation()->data(), value->dilation()->size()),
+        static_cast<schema::PadMode>(value->pad_mode()),
+        fbb.CreateVector(value->pad_list()->data(), value->pad_list()->size()), 0, value->group(), value->in_channel(),
+        value->out_channel(), static_cast<schema::ActivationType>(value->activation_type()));
+      auto prim_offset =
+        schema::CreatePrimitive(fbb, static_cast<schema::PrimitiveType>(NODE_TYPE_CONV2D_FUSION), ops_offset.o);
+      fbb.Finish(prim_offset);
+      auto new_addr = MindIRMemoryManager::GetInstance()->CreatePrimitiveFromBuilder(fbb, prim);
+      auto ret_value = flatbuffers::GetMutableRoot<schema::Primitive>(new_addr);
+      *primitive = ret_value;
+    }
+  }
+}
+
 // ********** Conv2dTransposeFusion **********
 PrimitivePtr MindIR_Conv2dTransposeFusion_CreatePrimitive(
   const std::vector<int64_t> &kernel_size, const std::vector<int64_t> &stride, const std::vector<int64_t> &dilation,
diff --git a/mindspore/lite/mindir/src/mindir_tensor.cc b/mindspore/lite/mindir/src/mindir_tensor.cc
index 9ec2d0e4..2db4ce8b 100644
--- a/mindspore/lite/mindir/src/mindir_tensor.cc
+++ b/mindspore/lite/mindir/src/mindir_tensor.cc
@@ -134,7 +134,7 @@ void MindIR_Tensor_SetDataType(TensorPtr *tensor, DataType data_type) {
         name = fbb.CreateString(value->name()->c_str(), value->name()->size());
       }
       auto ops_offset =
-        schema::CreateTensor(fbb, 0, value->dataType(), dims, static_cast<schema::Format>(value->format()), 0, 0, data,
+        schema::CreateTensor(fbb, 0, data_type, dims, static_cast<schema::Format>(value->format()), 0, 0, data,
                              ConvertQuantParams(fbb, value->quantParams()), 0, name);
       fbb.Finish(ops_offset);
       auto new_addr = MindIRMemoryManager::GetInstance()->CreateTensorFromBuilder(fbb, value);
diff --git a/mindspore/lite/mindir/src/utils.cc b/mindspore/lite/mindir/src/utils.cc
index 28d66ceb..b044f414 100644
--- a/mindspore/lite/mindir/src/utils.cc
+++ b/mindspore/lite/mindir/src/utils.cc
@@ -22,7 +22,7 @@ namespace lite {

 // ********** PrimitiveBase **********
 NodeType MindIR_Primitive_GetType(PrimitivePtr primitive) {
-  auto prim = flatbuffers::GetMutableRoot<schema::Primitive>(primitive);
+  auto prim = static_cast<schema::Primitive *>(primitive);
   auto type = prim->value_type();
   return static_cast<NodeType>(type);
 }
diff --git a/mindspore/lite/src/CMakeLists.txt b/mindspore/lite/src/CMakeLists.txt
index 5afccc87..de1781cd 100644
--- a/mindspore/lite/src/CMakeLists.txt
+++ b/mindspore/lite/src/CMakeLists.txt
@@ -410,6 +410,11 @@ add_subdirectory(common)
 add_library(lite_src_mid OBJECT ${LITE_SRC})
 add_dependencies(lite_src_mid lite_src_common_mid fbs_src fbs_inner_src)

+if(SUPPORT_NNRT)
+    add_subdirectory(litert/delegate/nnrt)
+    target_link_libraries(lite_src_mid nnrt_mid)
+endif()
+
 if(MSLITE_ENABLE_ACL)
     include_directories(${TOP_DIR}/graphengine/910/inc/external)
     if(NOT (MSLITE_ENABLE_CLOUD_FUSION_INFERENCE OR MSLITE_ENABLE_CLOUD_INFERENCE))
@@ -497,7 +502,6 @@ if(MSLITE_ENABLE_MINDRT)
 endif()

 if (SUPPORT_NNRT)
-    add_subdirectory(litert/delegate/nnrt)
     target_link_libraries(mindspore-lite nnrt_mid)
     target_link_libraries(mindspore-lite_static nnrt_mid)
 endif()
diff --git a/mindspore/lite/src/common/context_util.cc b/mindspore/lite/src/common/context_util.cc
index f011e0d7..0fa4ebd0 100644
--- a/mindspore/lite/src/common/context_util.cc
+++ b/mindspore/lite/src/common/context_util.cc
@@ -118,6 +118,17 @@ std::shared_ptr<mindspore::DeviceInfoContext> CustomDeviceInfoFromCustomDeviceCo
   MS_CHECK_TRUE_RET(device_info != nullptr, nullptr);
   return device_info;
 }
+
+std::shared_ptr<mindspore::NNRTDeviceInfo> NNRtDeviceInfoFromNNRtDeviceContext(
+  const lite::DeviceContext &nnrt_context) {
+  if (nnrt_context.device_type_ != DT_NNRT) {
+    MS_LOG(ERROR) << "Function input parameter is not NNRt context.";
+    return nullptr;
+  }
+  auto nnrt_info = std::make_shared<mindspore::NNRTDeviceInfo>();
+  MS_CHECK_TRUE_RET(nnrt_info != nullptr, nullptr);
+  return nnrt_info;
+}
 }  // namespace

 mindspore::Context *MSContextFromContext(const std::shared_ptr<InnerContext> &context) {
@@ -144,7 +155,8 @@ mindspore::Context *MSContextFromContext(const std::shared_ptr<InnerContext> &co
                       {DT_GPU, GPUDeviceInfoFromGPUDeviceContext},
                       {DT_NPU, NPUDeviceInfoFromNPUDeviceContext},
                       {DT_ASCEND, AscendDeviceInfoFromAscendDeviceContext},
-                      {DT_CUSTOM, CustomDeviceInfoFromCustomDeviceContext}};
+                      {DT_CUSTOM, CustomDeviceInfoFromCustomDeviceContext},
+                      {DT_NNRT, NNRtDeviceInfoFromNNRtDeviceContext}};
   for (auto &device_context : context->device_list_) {
     auto device_type = device_context.device_type_;
     if (transfer_funcs.find(device_type) == transfer_funcs.end()) {
diff --git a/mindspore/lite/src/common/log.cc b/mindspore/lite/src/common/log.cc
index 66c0d76b..f1040662 100644
--- a/mindspore/lite/src/common/log.cc
+++ b/mindspore/lite/src/common/log.cc
@@ -21,6 +21,13 @@
 #include <android/log.h>
 #endif

+#ifdef MS_COMPILE_OHOS
+#define LOG_DOMAIN 0xD002102
+#define LOG_TAG "MS_LITE"
+#define FORMAT "[%{public}s:%{public}d] %{public}s# %{public}s"
+#include "hilog/log.h"
+#endif
+
 // namespace to support utils module definition namespace mindspore constexpr const char *ANDROID_LOG_TAG = "MS_LITE";
 namespace mindspore {
 #if defined(__ANDROID__)
@@ -73,17 +80,33 @@ static int GetAndroidLogLevel(LiteLogLevel level) {

 #ifdef MS_COMPILE_OHOS
 void PrintHiLog(LiteLogLevel level, const char *file, int line, const char *func, const char *msg) {
+#ifdef MS_COMPILE_WITH_OHOS_NDK
+  // When build with OHOS NDK, use public api of hilog module.
   if (level == LiteLogLevel::DEBUG) {
-    OHOS::HiviewDFX::HiLog::Debug(MSLite_LABEL, FORMAT, file, line, func, msg);
+    OH_LOG_Print(LOG_APP, LOG_DEBUG, LOG_DOMAIN, LOG_TAG, FORMAT, file, line, func, msg);
   } else if (level == LiteLogLevel::INFO) {
-    OHOS::HiviewDFX::HiLog::Info(MSLite_LABEL, FORMAT, file, line, func, msg);
+    OH_LOG_Print(LOG_APP, LOG_INFO, LOG_DOMAIN, LOG_TAG, FORMAT, file, line, func, msg);
   } else if (level == LiteLogLevel::WARNING) {
-    OHOS::HiviewDFX::HiLog::Warn(MSLite_LABEL, FORMAT, file, line, func, msg);
+    OH_LOG_Print(LOG_APP, LOG_WARN, LOG_DOMAIN, LOG_TAG, FORMAT, file, line, func, msg);
   } else if (level == LiteLogLevel::ERROR) {
-    OHOS::HiviewDFX::HiLog::Error(MSLite_LABEL, FORMAT, file, line, func, msg);
+    OH_LOG_Print(LOG_APP, LOG_ERROR, LOG_DOMAIN, LOG_TAG, FORMAT, file, line, func, msg);
   } else {
-    OHOS::HiviewDFX::HiLog::Error(MSLite_LABEL, FORMAT, file, line, func, msg);
+    OH_LOG_Print(LOG_APP, LOG_ERROR, LOG_DOMAIN, LOG_TAG, FORMAT, file, line, func, msg);
   }
+#else
+  // When build in OHOS repo, use inner api of hilog module.
+  if (level == LiteLogLevel::DEBUG) {
+    HILOG_DEBUG(LOG_APP, FORMAT, file, line, func, msg);
+  } else if (level == LiteLogLevel::INFO) {
+    HILOG_INFO(LOG_APP, FORMAT, file, line, func, msg);
+  } else if (level == LiteLogLevel::WARNING) {
+    HILOG_WARN(LOG_APP, FORMAT, file, line, func, msg);
+  } else if (level == LiteLogLevel::ERROR) {
+    HILOG_ERROR(LOG_APP, FORMAT, file, line, func, msg);
+  } else {
+    HILOG_ERROR(LOG_APP, FORMAT, file, line, func, msg);
+  }
+#endif
 }
 #endif

diff --git a/mindspore/lite/src/common/log.h b/mindspore/lite/src/common/log.h
index 3002a454..bea21f01 100644
--- a/mindspore/lite/src/common/log.h
+++ b/mindspore/lite/src/common/log.h
@@ -23,12 +23,6 @@
 #include <unordered_map>
 #include "utils/overload.h"

-#ifdef MS_COMPILE_OHOS
-#define LOG_DOMAIN 0x2102
-#define LOG_TAG "MS_Lite"
-#define FORMAT "[%{public}s:%{public}d] %{public}s# %{public}s"
-#include "hilog/log.h"
-#endif
 // NOTICE: when relative path of 'log.h' changed, macro 'LITE_LOG_HEAR_FILE_REL_PATH' must be changed
 #ifndef LITE_LOG_HEAR_FILE_REL_PATH
 #define LITE_LOG_HEAR_FILE_REL_PATH "mindspore/lite/src/common/log.h"
@@ -140,6 +134,9 @@ class LiteLogWriter {
   LiteLogLevel log_level_;
 };

+#define MSLOG_IF(level)                                                                                  \
+  mindspore::LiteLogWriter(mindspore::LiteLocationInfo(LITE_FILE_NAME, __LINE__, __FUNCTION__), level) < \
+    mindspore::LiteLogStream()

 #define MS_LOG(level) MS_LOG_##level

@@ -148,47 +145,6 @@ class LiteLogWriter {
 #define MS_LOG_WARNING MSLOG_IF(mindspore::LiteLogLevel::WARNING)
 #define MS_LOG_ERROR MSLOG_IF(mindspore::LiteLogLevel::ERROR)

-
-#ifdef MS_COMPILE_OHOS
-namespace {
-constexpr unsigned int MSLITE_DOMAIN_ID_START = 0xD0029A0;
-constexpr unsigned int MSLITE_DOMAIN_ID_END = MSLITE_DOMAIN_ID_START + 32;
-constexpr unsigned int TEST_DOMAIN_ID = 0xD000F00;
-}  // namespace
-
-#define FILE_NAME (__builtin_strrchr(__FILE__, '/') ? __builtin_strrchr(__FILE__, '/') + 1 : __FILE__)
-#define FORMAT "[%{public}s:%{public}d] %{public}s# %{public}s"
-
-#define MSLOG_IF(level)                                                                                  \
-  mindspore::LiteLogWriter(mindspore::LiteLocationInfo(LITE_FILE_NAME, __LINE__, __FUNCTION__), level) < \
-    mindspore::LiteLogStream()
-
-enum MSLiteManagerLogLabel {
-  // Component labels, you can add if needed
-  COMP_FWK = 0,
-  // Test label
-  LABEL_TEST,
-  // The end of labels, max to the domain id range length 32
-  LABEL_END,
-};
-
-enum MSLiteManagerLogDomain {
-  DOMAIN_FRAMEWORK = MSLITE_DOMAIN_ID_START + COMP_FWK,  // 0xD0029A0
-  DOMAIN_TEST = TEST_DOMAIN_ID,                          // 0xD000F00
-  DOMAIN_END = MSLITE_DOMAIN_ID_END,  // Max to 0xD002940, keep the sequence and length same as MSLiteManagerLogLabel
-};
-
-// Keep the sequence and length same as MSLiteManagerLogDomain
-static constexpr OHOS::HiviewDFX::HiLogLabel MSLite_LABEL = {LOG_CORE, DOMAIN_FRAMEWORK, "MSLiteFwk"};
-
-#else
-
-#define MSLOG_IF(level)                                                                                  \
-  mindspore::LiteLogWriter(mindspore::LiteLocationInfo(LITE_FILE_NAME, __LINE__, __FUNCTION__), level) < \
-    mindspore::LiteLogStream()
-
-#endif
-
 }  // namespace mindspore

 #ifdef Debug
diff --git a/mindspore/lite/src/common/ops/populate/custom_populate.cc b/mindspore/lite/src/common/ops/populate/custom_populate.cc
index 5e1878b9..13957ed7 100644
--- a/mindspore/lite/src/common/ops/populate/custom_populate.cc
+++ b/mindspore/lite/src/common/ops/populate/custom_populate.cc
@@ -19,6 +19,9 @@
 #include "nnacl/custom_parameter.h"
 #include "nnacl/split_parameter.h"
 #include "nnacl/custom_gru_parameter.h"
+#include "nnacl/custom_masked_fill_parameter.h"
+#include "nnacl/custom_is_inf_parameter.h"
+#include "nnacl/custom_tensor_scatter_max_parameter.h"
 using mindspore::schema::PrimitiveType_Custom;

 namespace mindspore {
@@ -92,6 +95,39 @@ OpParameter *CreateCustomGruParameter() {
   return reinterpret_cast<OpParameter *>(param);
 }

+OpParameter *CreateCustomIsInfParameter() {
+  auto *param = static_cast<CustomIsInfParameter *>(malloc(sizeof(CustomIsInfParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "malloc CustomIsInfParameter failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(CustomIsInfParameter));
+  param->op_parameter_.type_ = PrimType_Inner_CustomIsInf;
+  return reinterpret_cast<OpParameter *>(param);
+}
+
+OpParameter *CreateCustomTensorScatterMaxParameter() {
+  auto *param = static_cast<CustomTensorScatterMaxParameter *>(malloc(sizeof(CustomTensorScatterMaxParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "malloc CustomTensorScatterMaxParameter failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(CustomTensorScatterMaxParameter));
+  param->op_parameter_.type_ = PrimType_Inner_CustomTensorScatterMax;
+  return reinterpret_cast<OpParameter *>(param);
+}
+
+OpParameter *CreateCustomMaskedFillParameter() {
+  auto *param = static_cast<CustomMaskedFillParameter *>(malloc(sizeof(CustomMaskedFillParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "malloc CustomMaskedFillParameter failed.";
+    return nullptr;
+  }
+  memset(param, 0, sizeof(CustomMaskedFillParameter));
+  param->op_parameter_.type_ = PrimType_Inner_CustomMaskedFill;
+  return reinterpret_cast<OpParameter *>(param);
+}
+
 OpParameter *PopulateCustomParameter(const void *prim) {
   MS_CHECK_TRUE_RET(prim != nullptr, nullptr);
   auto primitive = static_cast<const schema::Primitive *>(prim);
@@ -131,6 +167,23 @@ OpParameter *PopulateCustomParameter(const void *prim) {
     return CreateCustomGruParameter();
   } else if (type == "CastGatherReduceFusion") {
     return CreateParam(PrimType_Inner_CastGatherReduceFusion);
+  } else if (type == "ThirdPartyModel") {
+    auto *param = static_cast<CustomParameter *>(malloc(sizeof(CustomParameter)));
+    if (param == nullptr) {
+      MS_LOG(ERROR) << "malloc CustomParameter failed.";
+      return nullptr;
+    }
+    memset(param, 0, sizeof(CustomParameter));
+    param->op_parameter_.type_ = PrimType_Inner_ThirdPartyModel;
+    // Just use the attr_data pointer to save the prim directly, the inner value is parsed as necessary.
+    param->attr_data[0] = static_cast<char *>(const_cast<void *>(prim));
+    return reinterpret_cast<OpParameter *>(param);
+  } else if (type == "MaskedFill") {
+    return CreateCustomMaskedFillParameter();
+  } else if (type == "TensorScatterMax") {
+    return CreateCustomTensorScatterMaxParameter();
+  } else if (type == "IsInf") {
+    return CreateCustomIsInfParameter();
   } else {
     MS_LOG(ERROR) << "Unsupported custom type: " << type;
   }
diff --git a/mindspore/lite/src/litert/c_api/context_c.cc b/mindspore/lite/src/litert/c_api/context_c.cc
index f614ef09..c5f825aa 100644
--- a/mindspore/lite/src/litert/c_api/context_c.cc
+++ b/mindspore/lite/src/litert/c_api/context_c.cc
@@ -14,12 +14,17 @@
  * limitations under the License.
  */
 #include "include/c_api/context_c.h"
-#include "src/litert/c_api/context_c.h"
+#include "include/api/context.h"
+#include <string.h>
+#include "src/litert/c_api/type_c_private.h"
 #include "src/common/log_adapter.h"
+#ifdef SUPPORT_NNRT
+#include "interfaces/kits/c/neural_network_runtime/neural_network_runtime.h"
+#endif

 // ================ Context ================
 OH_AI_ContextHandle OH_AI_ContextCreate() {
-  auto impl = new (std::nothrow) mindspore::ContextC;
+  auto impl = new (std::nothrow) mindspore::Context();
   if (impl == nullptr) {
     MS_LOG(ERROR) << "memory allocation failed.";
     return nullptr;
@@ -29,7 +34,7 @@ OH_AI_ContextHandle OH_AI_ContextCreate() {

 void OH_AI_ContextDestroy(OH_AI_ContextHandle *context) {
   if (context != nullptr && *context != nullptr) {
-    auto impl = static_cast<mindspore::ContextC *>(*context);
+    auto impl = static_cast<mindspore::Context *>(*context);
     delete impl;
     *context = nullptr;
   }
@@ -40,8 +45,8 @@ void OH_AI_ContextSetThreadNum(OH_AI_ContextHandle context, int32_t thread_num)
     MS_LOG(ERROR) << "param is nullptr.";
     return;
   }
-  auto impl = static_cast<mindspore::ContextC *>(context);
-  impl->thread_num = thread_num;
+  auto impl = static_cast<mindspore::Context *>(context);
+  impl->SetThreadNum(thread_num);
 }

 int32_t OH_AI_ContextGetThreadNum(const OH_AI_ContextHandle context) {
@@ -49,8 +54,8 @@ int32_t OH_AI_ContextGetThreadNum(const OH_AI_ContextHandle context) {
     MS_LOG(ERROR) << "param is nullptr.";
     return 0;
   }
-  auto impl = static_cast<mindspore::ContextC *>(context);
-  return impl->thread_num;
+  auto impl = static_cast<mindspore::Context *>(context);
+  return impl->GetThreadNum();
 }

 void OH_AI_ContextSetThreadAffinityMode(OH_AI_ContextHandle context, int mode) {
@@ -58,8 +63,8 @@ void OH_AI_ContextSetThreadAffinityMode(OH_AI_ContextHandle context, int mode) {
     MS_LOG(ERROR) << "param is nullptr.";
     return;
   }
-  auto impl = static_cast<mindspore::ContextC *>(context);
-  impl->affinity_mode = mode;
+  auto impl = static_cast<mindspore::Context *>(context);
+  impl->SetThreadAffinity(mode);
   return;
 }

@@ -68,8 +73,8 @@ int OH_AI_ContextGetThreadAffinityMode(const OH_AI_ContextHandle context) {
     MS_LOG(ERROR) << "param is nullptr.";
     return 0;
   }
-  auto impl = static_cast<mindspore::ContextC *>(context);
-  return impl->affinity_mode;
+  auto impl = static_cast<mindspore::Context *>(context);
+  return impl->GetThreadAffinityMode();
 }

 void OH_AI_ContextSetThreadAffinityCoreList(OH_AI_ContextHandle context, const int32_t *core_list, size_t core_num) {
@@ -78,8 +83,8 @@ void OH_AI_ContextSetThreadAffinityCoreList(OH_AI_ContextHandle context, const i
     return;
   }
   const std::vector<int32_t> vec_core_list(core_list, core_list + core_num);
-  auto impl = static_cast<mindspore::ContextC *>(context);
-  impl->affinity_core_list = vec_core_list;
+  auto impl = static_cast<mindspore::Context *>(context);
+  impl->SetThreadAffinity(vec_core_list);
   return;
 }

@@ -88,9 +93,18 @@ const int32_t *OH_AI_ContextGetThreadAffinityCoreList(const OH_AI_ContextHandle
     MS_LOG(ERROR) << "param is nullptr.";
     return nullptr;
   }
-  auto impl = static_cast<mindspore::ContextC *>(context);
-  *core_num = impl->affinity_core_list.size();
-  return impl->affinity_core_list.data();
+  auto impl = static_cast<mindspore::Context *>(context);
+  auto affinity_core_list = impl->GetThreadAffinityCoreList();
+  *core_num = affinity_core_list.size();
+  int32_t *core_list = static_cast<int32_t *>(malloc((*core_num) * sizeof(int32_t)));
+  if (core_list == nullptr) {
+    MS_LOG(ERROR) << "malloc core_list is null.";
+    return nullptr;
+  }
+  for (size_t i = 0; i < affinity_core_list.size(); i++) {
+    core_list[i] = affinity_core_list[i];
+  }
+  return core_list;
 }

 void OH_AI_ContextSetEnableParallel(OH_AI_ContextHandle context, bool is_parallel) {
@@ -98,8 +112,8 @@ void OH_AI_ContextSetEnableParallel(OH_AI_ContextHandle context, bool is_paralle
     MS_LOG(ERROR) << "param is nullptr.";
     return;
   }
-  auto impl = static_cast<mindspore::ContextC *>(context);
-  impl->enable_parallel = is_parallel;
+  auto impl = static_cast<mindspore::Context *>(context);
+  impl->SetEnableParallel(is_parallel);
 }

 bool OH_AI_ContextGetEnableParallel(const OH_AI_ContextHandle context) {
@@ -107,8 +121,8 @@ bool OH_AI_ContextGetEnableParallel(const OH_AI_ContextHandle context) {
     MS_LOG(ERROR) << "param is nullptr.";
     return false;
   }
-  auto impl = static_cast<mindspore::ContextC *>(context);
-  return impl->enable_parallel;
+  auto impl = static_cast<mindspore::Context *>(context);
+  return impl->GetEnableParallel();
 }

 void OH_AI_ContextAddDeviceInfo(OH_AI_ContextHandle context, OH_AI_DeviceInfoHandle device_info) {
@@ -116,25 +130,36 @@ void OH_AI_ContextAddDeviceInfo(OH_AI_ContextHandle context, OH_AI_DeviceInfoHan
     MS_LOG(ERROR) << "param is nullptr.";
     return;
   }
-  auto impl = static_cast<mindspore::ContextC *>(context);
-  std::shared_ptr<mindspore::DeviceInfoC> device(static_cast<mindspore::DeviceInfoC *>(device_info));
-  impl->device_info_list.push_back(device);
+  auto impl = static_cast<mindspore::Context *>(context);
+  std::shared_ptr<mindspore::DeviceInfoContext> device(static_cast<mindspore::DeviceInfoContext *>(device_info));
+  impl->MutableDeviceInfo().push_back(device);
 }

 // ================ DeviceInfo ================
 OH_AI_DeviceInfoHandle OH_AI_DeviceInfoCreate(OH_AI_DeviceType device_type) {
-  mindspore::DeviceInfoC *impl = new (std::nothrow) mindspore::DeviceInfoC;
+  mindspore::DeviceInfoContext *impl;
+  if (OH_AI_DEVICETYPE_CPU == device_type) {
+    impl = new (std::nothrow) mindspore::CPUDeviceInfo();
+  } else if (OH_AI_DEVICETYPE_GPU == device_type) {
+    impl = new (std::nothrow) mindspore::GPUDeviceInfo();
+  } else if (OH_AI_DEVICETYPE_KIRIN_NPU == device_type) {
+    impl = new (std::nothrow) mindspore::KirinNPUDeviceInfo();
+  } else if (OH_AI_DEVICETYPE_NNRT == device_type) {
+    impl = new (std::nothrow) mindspore::NNRTDeviceInfo();
+  } else {
+    MS_LOG(ERROR) << "device_type is invalid.";
+    impl = nullptr;
+  }
   if (impl == nullptr) {
     MS_LOG(ERROR) << "memory allocation failed.";
     return nullptr;
   }
-  impl->device_type = device_type;
   return static_cast<OH_AI_DeviceInfoHandle>(impl);
 }

 void OH_AI_DeviceInfoDestroy(OH_AI_DeviceInfoHandle *device_info) {
   if (device_info != nullptr && *device_info != nullptr) {
-    auto impl = static_cast<mindspore::DeviceInfoC *>(*device_info);
+    auto impl = static_cast<mindspore::DeviceInfoContext *>(*device_info);
     delete impl;
     *device_info = nullptr;
   }
@@ -145,8 +170,8 @@ void OH_AI_DeviceInfoSetProvider(OH_AI_DeviceInfoHandle device_info, const char
     MS_LOG(ERROR) << "param is nullptr.";
     return;
   }
-  auto impl = static_cast<mindspore::DeviceInfoC *>(device_info);
-  impl->provider = provider;
+  auto impl = static_cast<mindspore::DeviceInfoContext *>(device_info);
+  impl->SetProvider(provider);
 }

 const char *OH_AI_DeviceInfoGetProvider(const OH_AI_DeviceInfoHandle device_info) {
@@ -154,8 +179,14 @@ const char *OH_AI_DeviceInfoGetProvider(const OH_AI_DeviceInfoHandle device_info
     MS_LOG(ERROR) << "param is nullptr.";
     return nullptr;
   }
-  auto impl = static_cast<mindspore::DeviceInfoC *>(device_info);
-  return impl->provider.c_str();
+  auto impl = static_cast<mindspore::DeviceInfoContext *>(device_info);
+  char *provider = static_cast<char *>(malloc(impl->GetProvider().size() + 1));
+  if (provider == nullptr) {
+    MS_LOG(ERROR) << "malloc provider is null.";
+    return nullptr;
+  }
+  strcpy(provider, impl->GetProvider().c_str());
+  return provider;
 }

 void OH_AI_DeviceInfoSetProviderDevice(OH_AI_DeviceInfoHandle device_info, const char *device) {
@@ -163,8 +194,8 @@ void OH_AI_DeviceInfoSetProviderDevice(OH_AI_DeviceInfoHandle device_info, const
     MS_LOG(ERROR) << "param is nullptr.";
     return;
   }
-  auto impl = static_cast<mindspore::DeviceInfoC *>(device_info);
-  impl->provider_device = device;
+  auto impl = static_cast<mindspore::DeviceInfoContext *>(device_info);
+  impl->SetProviderDevice(device);
 }

 const char *OH_AI_DeviceInfoGetProviderDevice(const OH_AI_DeviceInfoHandle device_info) {
@@ -172,8 +203,14 @@ const char *OH_AI_DeviceInfoGetProviderDevice(const OH_AI_DeviceInfoHandle devic
     MS_LOG(ERROR) << "param is nullptr.";
     return nullptr;
   }
-  auto impl = static_cast<mindspore::DeviceInfoC *>(device_info);
-  return impl->provider_device.c_str();
+  auto impl = static_cast<mindspore::DeviceInfoContext *>(device_info);
+  char *provider_device = static_cast<char *>(malloc(impl->GetProviderDevice().size() + 1));
+  if (provider_device == nullptr) {
+    MS_LOG(ERROR) << "malloc provider_device is null.";
+    return nullptr;
+  }
+  strcpy(provider_device, impl->GetProviderDevice().c_str());
+  return provider_device;
 }

 OH_AI_DeviceType OH_AI_DeviceInfoGetDeviceType(const OH_AI_DeviceInfoHandle device_info) {
@@ -181,8 +218,8 @@ OH_AI_DeviceType OH_AI_DeviceInfoGetDeviceType(const OH_AI_DeviceInfoHandle devi
     MS_LOG(ERROR) << "param is nullptr.";
     return OH_AI_DEVICETYPE_INVALID;
   }
-  auto impl = static_cast<mindspore::DeviceInfoC *>(device_info);
-  return impl->device_type;
+  auto impl = static_cast<mindspore::DeviceInfoContext *>(device_info);
+  return static_cast<OH_AI_DeviceType>(impl->GetDeviceType());
 }

 void OH_AI_DeviceInfoSetEnableFP16(OH_AI_DeviceInfoHandle device_info, bool is_fp16) {
@@ -190,9 +227,17 @@ void OH_AI_DeviceInfoSetEnableFP16(OH_AI_DeviceInfoHandle device_info, bool is_f
     MS_LOG(ERROR) << "param is nullptr.";
     return;
   }
-  auto impl = static_cast<mindspore::DeviceInfoC *>(device_info);
-  if (impl->device_type == OH_AI_DEVICETYPE_CPU || impl->device_type == OH_AI_DEVICETYPE_GPU) {
-    impl->enable_fp16 = is_fp16;
+
+  auto impl_device = static_cast<mindspore::DeviceInfoContext *>(device_info);
+  if (OH_AI_DEVICETYPE_CPU == static_cast<OH_AI_DeviceType>(impl_device->GetDeviceType())) {
+    auto impl = static_cast<mindspore::CPUDeviceInfo *>(device_info);
+    impl->SetEnableFP16(is_fp16);
+  } else if (OH_AI_DEVICETYPE_GPU == static_cast<OH_AI_DeviceType>(impl_device->GetDeviceType())) {
+    auto impl = static_cast<mindspore::GPUDeviceInfo *>(device_info);
+    impl->SetEnableFP16(is_fp16);
+  } else if (OH_AI_DEVICETYPE_NNRT == static_cast<OH_AI_DeviceType>(impl_device->GetDeviceType())) {
+    auto impl = static_cast<mindspore::NNRTDeviceInfo *>(device_info);
+    impl->SetEnableFP16(is_fp16);
   } else {
     MS_LOG(ERROR) << "Unsupported Feature.";
   }
@@ -203,11 +248,19 @@ bool OH_AI_DeviceInfoGetEnableFP16(const OH_AI_DeviceInfoHandle device_info) {
     MS_LOG(ERROR) << "param is nullptr.";
     return false;
   }
-  auto impl = static_cast<mindspore::DeviceInfoC *>(device_info);
-  if (impl->device_type == OH_AI_DEVICETYPE_CPU || impl->device_type == OH_AI_DEVICETYPE_GPU) {
-    return impl->enable_fp16;
+
+  auto impl_device = static_cast<mindspore::DeviceInfoContext *>(device_info);
+  if (OH_AI_DEVICETYPE_CPU == static_cast<OH_AI_DeviceType>(impl_device->GetDeviceType())) {
+    auto impl = static_cast<mindspore::CPUDeviceInfo *>(device_info);
+    return impl->GetEnableFP16();
+  } else if (OH_AI_DEVICETYPE_GPU == static_cast<OH_AI_DeviceType>(impl_device->GetDeviceType())) {
+    auto impl = static_cast<mindspore::GPUDeviceInfo *>(device_info);
+    return impl->GetEnableFP16();
+  } else if (OH_AI_DEVICETYPE_NNRT == static_cast<OH_AI_DeviceType>(impl_device->GetDeviceType())) {
+    auto impl = static_cast<mindspore::NNRTDeviceInfo *>(device_info);
+    return impl->GetEnableFP16();
   } else {
-    MS_LOG(ERROR) << "Unsupported Feature. device_type: " << impl->device_type;
+    MS_LOG(ERROR) << "Unsupported Feature. device_type: " << impl_device->GetDeviceType();
     return false;
   }
 }
@@ -217,9 +270,10 @@ void OH_AI_DeviceInfoSetFrequency(OH_AI_DeviceInfoHandle device_info, int freque
     MS_LOG(ERROR) << "param is nullptr.";
     return;
   }
-  auto impl = static_cast<mindspore::DeviceInfoC *>(device_info);
-  if (impl->device_type == OH_AI_DEVICETYPE_KIRIN_NPU) {
-    impl->frequency = frequency;
+  auto impl_device = static_cast<mindspore::DeviceInfoContext *>(device_info);
+  if (static_cast<OH_AI_DeviceType>(impl_device->GetDeviceType()) == OH_AI_DEVICETYPE_KIRIN_NPU) {
+    auto impl = static_cast<mindspore::KirinNPUDeviceInfo *>(device_info);
+    impl->SetFrequency(frequency);
   } else {
     MS_LOG(ERROR) << "Unsupported Feature.";
   }
@@ -230,11 +284,231 @@ int OH_AI_DeviceInfoGetFrequency(const OH_AI_DeviceInfoHandle device_info) {  //
     MS_LOG(ERROR) << "param is nullptr.";
     return -1;
   }
-  auto impl = static_cast<mindspore::DeviceInfoC *>(device_info);
-  if (impl->device_type == OH_AI_DEVICETYPE_KIRIN_NPU) {
-    return impl->frequency;
+  auto impl_device = static_cast<mindspore::DeviceInfoContext *>(device_info);
+  if (static_cast<OH_AI_DeviceType>(impl_device->GetDeviceType()) == OH_AI_DEVICETYPE_KIRIN_NPU) {
+    auto impl = static_cast<mindspore::KirinNPUDeviceInfo *>(device_info);
+    return impl->GetFrequency();
   } else {
     MS_LOG(ERROR) << "Unsupported Feature.";
     return -1;
   }
 }
+
+NNRTDeviceDesc *OH_AI_GetAllNNRTDeviceDescs(size_t *num) {
+  if (num == nullptr) {
+    MS_LOG(ERROR) << "Input num is null";
+    return nullptr;
+  }
+#ifdef SUPPORT_NNRT
+  *num = 0;
+
+  const size_t *all_device_ids;
+  uint32_t device_count;
+  auto ret = OH_NNDevice_GetAllDevicesID(&all_device_ids, &device_count);
+  if ((ret != OH_NN_SUCCESS) || (device_count == 0)) {
+    MS_LOG(ERROR) << "NNRT get all device id failed, ret: " << ret;
+    return nullptr;
+  }
+
+  NNRTDeviceDesc *desc = (NNRTDeviceDesc *)malloc(sizeof(NNRTDeviceDesc) * device_count);
+  if (desc == nullptr) {
+    MS_LOG(ERROR) << "NNRT allocate desc failed";
+    return nullptr;
+  }
+
+  for (uint32_t i = 0; i < device_count; i++) {
+    desc[i].device_id = all_device_ids[i];
+    OH_NN_DeviceType type;
+    (void)OH_NNDevice_GetType(all_device_ids[i], &type);
+    desc[i].device_type = static_cast<OH_AI_NNRTDeviceType>(type);
+
+    const char *name = nullptr;
+    (void)OH_NNDevice_GetName(all_device_ids[i], &name);
+    desc[i].device_name[127] = '\0';
+    strncpy(desc[i].device_name, name, 127);
+  }
+  *num = device_count;
+  return desc;
+#else
+  return nullptr;
+#endif
+}
+
+NNRTDeviceDesc *OH_AI_GetElementOfNNRTDeviceDescs(NNRTDeviceDesc *descs, size_t index) {
+  if (descs == nullptr) {
+    MS_LOG(ERROR) << "descs is null";
+    return nullptr;
+  }
+  return descs + index;
+}
+
+void OH_AI_DestroyAllNNRTDeviceDescs(NNRTDeviceDesc **desc) {
+  if (desc == nullptr) {
+    MS_LOG(WARNING) << "desc is null";
+    return;
+  }
+  free(*desc);
+  *desc = nullptr;
+}
+
+size_t OH_AI_GetDeviceIdFromNNRTDeviceDesc(const NNRTDeviceDesc *desc) {
+  if (desc == nullptr) {
+    MS_LOG(ERROR) << "NNRT desc is null";
+    return 0;
+  }
+  return desc->device_id;
+}
+
+const char *OH_AI_GetNameFromNNRTDeviceDesc(const NNRTDeviceDesc *desc) {
+  if (desc == nullptr) {
+    MS_LOG(ERROR) << "NNRT desc is null";
+    return nullptr;
+  }
+  return desc->device_name;
+}
+
+OH_AI_NNRTDeviceType OH_AI_GetTypeFromNNRTDeviceDesc(const NNRTDeviceDesc *desc) {
+  if (desc == nullptr) {
+    MS_LOG(ERROR) << "NNRT desc is null";
+    return OH_AI_NNRTDeviceType::OH_AI_NNRTDEVICE_OTHERS;
+  }
+  return desc->device_type;
+}
+
+OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByName(const char *name) {
+  size_t num = 0;
+  NNRTDeviceDesc *desc = OH_AI_GetAllNNRTDeviceDescs(&num);
+  if (desc == nullptr) {
+    MS_LOG(ERROR) << "Get all device desc failed";
+    return nullptr;
+  }
+
+  OH_AI_DeviceInfoHandle handle = nullptr;
+  for (size_t i = 0; i < num; i++) {
+    if (strncmp(desc[i].device_name, name, NNRT_DEVICE_NAME_MAX - 1) == 0) {
+      handle = OH_AI_DeviceInfoCreate(OH_AI_DEVICETYPE_NNRT);
+      OH_AI_DeviceInfoSetDeviceId(handle, desc[i].device_id);
+      break;
+    }
+  }
+  OH_AI_DestroyAllNNRTDeviceDescs(&desc);
+  return handle;
+}
+
+OH_AI_DeviceInfoHandle OH_AI_CreateNNRTDeviceInfoByType(OH_AI_NNRTDeviceType type) {
+  size_t num = 0;
+  NNRTDeviceDesc *desc = OH_AI_GetAllNNRTDeviceDescs(&num);
+  if (desc == nullptr) {
+    MS_LOG(ERROR) << "Get all device desc failed";
+    return nullptr;
+  }
+
+  OH_AI_DeviceInfoHandle handle = nullptr;
+  for (size_t i = 0; i < num; i++) {
+    if (desc[i].device_type == type) {
+      handle = OH_AI_DeviceInfoCreate(OH_AI_DEVICETYPE_NNRT);
+      OH_AI_DeviceInfoSetDeviceId(handle, desc[i].device_id);
+      break;
+    }
+  }
+  OH_AI_DestroyAllNNRTDeviceDescs(&desc);
+  return handle;
+}
+
+void OH_AI_DeviceInfoSetDeviceId(OH_AI_DeviceInfoHandle device_info, size_t device_id) {
+  if (device_info == nullptr) {
+    MS_LOG(ERROR) << "device info is null";
+    return;
+  }
+  if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) {
+    MS_LOG(ERROR) << "Set device_id of non-NNRT device is not allowable, ignored";
+    return;
+  }
+  auto impl = reinterpret_cast<mindspore::NNRTDeviceInfo *>(device_info);
+  impl->SetDeviceID(device_id);
+}
+
+size_t OH_AI_DeviceInfoGetDeviceId(const OH_AI_DeviceInfoHandle device_info) {
+  if (device_info == nullptr) {
+    MS_LOG(ERROR) << "device info is null";
+    return 0;
+  }
+  if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) {
+    MS_LOG(ERROR) << "Get device_id of non-NNRT device is not allowable, ignored";
+    return 0;
+  }
+  auto impl = reinterpret_cast<mindspore::NNRTDeviceInfo *>(device_info);
+  return impl->GetDeviceID();
+}
+
+void OH_AI_DeviceInfoSetPerformanceMode(OH_AI_DeviceInfoHandle device_info, OH_AI_PerformanceMode mode) {
+  if (device_info == nullptr) {
+    MS_LOG(ERROR) << "device info is null";
+    return;
+  }
+  if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) {
+    MS_LOG(ERROR) << "Set performance_mode of non-NNRT device is not allowable, ignored";
+    return;
+  }
+  auto impl = reinterpret_cast<mindspore::NNRTDeviceInfo *>(device_info);
+  impl->SetPerformanceMode(mode);
+}
+
+OH_AI_PerformanceMode OH_AI_DeviceInfoGetPerformanceMode(const OH_AI_DeviceInfoHandle device_info) {
+  if (device_info == nullptr) {
+    MS_LOG(ERROR) << "device info is null";
+    return OH_AI_PERFORMANCE_NONE;
+  }
+  if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) {
+    MS_LOG(ERROR) << "Get performance_mode of non-NNRT device is not allowable, ignored";
+    return OH_AI_PERFORMANCE_NONE;
+  }
+  auto impl = reinterpret_cast<mindspore::NNRTDeviceInfo *>(device_info);
+  return static_cast<OH_AI_PerformanceMode>(impl->GetPerformanceMode());
+}
+
+void OH_AI_DeviceInfoSetPriority(OH_AI_DeviceInfoHandle device_info, OH_AI_Priority priority) {
+  if (device_info == nullptr) {
+    MS_LOG(ERROR) << "device info is null";
+    return;
+  }
+  if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) {
+    MS_LOG(ERROR) << "Set priority of non-NNRT device is not allowable, ignored";
+    return;
+  }
+  auto impl = reinterpret_cast<mindspore::NNRTDeviceInfo *>(device_info);
+  impl->SetPriority(priority);
+}
+
+OH_AI_Priority OH_AI_DeviceInfoGetPriority(const OH_AI_DeviceInfoHandle device_info) {
+  if (device_info == nullptr) {
+    MS_LOG(ERROR) << "device info is null";
+    return OH_AI_PRIORITY_NONE;
+  }
+  if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) {
+    MS_LOG(ERROR) << "Get priority of non-NNRT device is not allowable, ignored";
+    return OH_AI_PRIORITY_NONE;
+  }
+  auto impl = reinterpret_cast<mindspore::NNRTDeviceInfo *>(device_info);
+  return static_cast<OH_AI_Priority>(impl->GetPriority());
+}
+
+OH_AI_API OH_AI_Status OH_AI_DeviceInfoAddExtension(OH_AI_DeviceInfoHandle device_info,
+                                                    const char *name, const char*value, size_t value_size) {
+  if (device_info == nullptr) {
+    MS_LOG(ERROR) << "device info is null";
+    return OH_AI_STATUS_LITE_NULLPTR;
+  }
+  if (OH_AI_DeviceInfoGetDeviceType(device_info) != OH_AI_DEVICETYPE_NNRT) {
+    MS_LOG(ERROR) << "Add extension to non-NNRT device is not allowable, ignored";
+    return OH_AI_STATUS_LITE_ERROR;
+  }
+  auto impl = reinterpret_cast<mindspore::NNRTDeviceInfo *>(device_info);
+  mindspore::Extension extension;
+  extension.name = std::string(name);
+  extension.value = std::vector<uint8_t>(value, value + value_size);
+  std::vector<mindspore::Extension> extension_list = impl->GetExtensions();
+  extension_list.push_back(extension);
+  impl->SetExtensions(extension_list);
+  return OH_AI_STATUS_SUCCESS;
+}
\ No newline at end of file
diff --git a/mindspore/lite/src/litert/c_api/context_c.h b/mindspore/lite/src/litert/c_api/context_c.h
index 076f4d1f..dc88b8a4 100644
--- a/mindspore/lite/src/litert/c_api/context_c.h
+++ b/mindspore/lite/src/litert/c_api/context_c.h
@@ -21,27 +21,4 @@
 #include <memory>
 #include "include/c_api/types_c.h"

-namespace mindspore {
-class Allocator;
-class Delegate;
-
-typedef struct DeviceInfoC {
-  OH_AI_DeviceType device_type;
-  bool enable_fp16 = false;
-  int frequency = 3;
-  std::string provider;
-  std::string provider_device;
-  std::shared_ptr<Allocator> allocator = nullptr;
-} DeviceInfoC;
-
-typedef struct ContextC {
-  std::vector<std::shared_ptr<DeviceInfoC>> device_info_list;
-  int32_t thread_num = 2;
-  bool enable_parallel = false;
-  std::vector<int32_t> affinity_core_list;
-  int affinity_mode = 0;
-  int delegate_mode = 0;
-  std::shared_ptr<Delegate> delegate = nullptr;
-} ContextC;
-}  // namespace mindspore
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_C_API_CONTEXT_C_H_
diff --git a/mindspore/lite/src/litert/c_api/model_c.cc b/mindspore/lite/src/litert/c_api/model_c.cc
index 802df6b1..9da52d76 100644
--- a/mindspore/lite/src/litert/c_api/model_c.cc
+++ b/mindspore/lite/src/litert/c_api/model_c.cc
@@ -17,321 +17,135 @@
 #include <vector>
 #include <cstdint>
 #include "include/api/context.h"
+#include <include/api/serialization.h>
 #include "include/api/types.h"
 #include "src/litert/cxx_api/tensor/tensor_impl.h"
 #include "src/litert/cxx_api/converters.h"
-#include "src/litert/lite_session.h"
-#include "src/litert/cpu_info.h"
+#include "src/litert//cxx_api/model/model_impl.h"

 namespace mindspore {
 class ModelC {
- public:
-  ModelC() : session_(nullptr), context_(nullptr) {}
+public:
+  ModelC() : model_(nullptr) {}
   ~ModelC() {
-    for (auto &impl : tensor_map_) {
-      delete impl.second;
+    for (auto in : inputs_) {
+      delete in;
+    }
+    for (auto out : outputs_) {
+      delete out;
+    }
+    for (auto out : outputs_train_) {
+      delete out;
     }
   }

-  Status Build(const void *model_data, size_t data_size, ModelType model_type, const ContextC *model_context);
-  Status Build(const std::string &model_path, ModelType model_type, const ContextC *model_context);
-  Status Resize(const std::vector<LiteTensorImpl *> &inputs, const std::vector<std::vector<int64_t>> &shapes);
-
-  Status Predict(const OH_AI_TensorHandle *inputs, size_t input_num, OH_AI_TensorHandle **outputs, size_t *output_num,
-                 const OH_AI_KernelCallBack &before, const OH_AI_KernelCallBack &after);
-
-  LiteTensorImpl **GetInputs(size_t *input_num);
-  LiteTensorImpl **GetOutputs(size_t *output_num);
+  MSTensor **GetInputs(size_t *input_num);
+  MSTensor **GetOutputs(size_t *output_num);
+  mindspore::MSKernelCallBack TransCallBack(const OH_AI_KernelCallBack &oh_callback);
+  std::shared_ptr<Model> model_;
+  std::shared_ptr<Context> context_;

- private:
-  Status RunGraph(const OH_AI_KernelCallBack &before, const OH_AI_KernelCallBack &after);
-  void ResetTensorData(std::vector<void *> old_data, std::vector<lite::Tensor *> tensors);
-  LiteTensorImpl *TensorToTensorImpl(mindspore::lite::Tensor *tensor);
-
- private:
-  std::shared_ptr<lite::LiteSession> session_ = nullptr;
-  std::shared_ptr<const ContextC> context_ = nullptr;
-  std::map<mindspore::lite::Tensor *, LiteTensorImpl *> tensor_map_;
-  std::vector<LiteTensorImpl *> inputs_;
-  std::vector<LiteTensorImpl *> outputs_;
-  bool is_already_built = false;
+private:
+  MSTensor **GetOutputsTensor(size_t *output_num, std::vector<MSTensor *> *vec_tensors);
+  std::vector<MSTensor *> inputs_;
+  std::vector<MSTensor *> outputs_;
+  std::vector<MSTensor *> outputs_train_;
 };

-Status ModelC::Build(const void *model_data, size_t data_size, ModelType model_type, const ContextC *model_context) {
-  if (is_already_built) {
-    MS_LOG(ERROR) << "The model is already built.";
-    return kLiteModelRebuild;
-  }
-  if (!PlatformInstructionSetSupportCheck()) {
-    MS_LOG(ERROR) << "The platform exist don't support's instruction.";
-    return kLiteNotSupport;
-  }
-  if(context_.get() != model_context){
-    context_.reset(model_context);
-  }
-  session_ = std::make_shared<lite::LiteSession>();
-  if (session_ == nullptr) {
-    MS_LOG(ERROR) << "create session failed";
-    return kLiteNullptr;
-  }
-  auto ret = session_->Init(ContextUtils::Convert(model_context));
-  if (ret != mindspore::lite::RET_OK) {
-    MS_LOG(ERROR) << "init session failed";
-    return static_cast<StatusCode>(ret);
-  }
-  ret = session_->LoadModelAndCompileByBuf(static_cast<const char *>(model_data), model_type, data_size);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Load and compile failed";
-    return static_cast<StatusCode>(ret);
-  }
-  is_already_built = true;
-  return static_cast<StatusCode>(kSuccess);
-}
-
-Status ModelC::Build(const std::string &model_path, ModelType model_type, const ContextC *model_context) {
-  if (is_already_built) {
-    MS_LOG(ERROR) << "The model is already built.";
-    return kLiteModelRebuild;
-  }
-  if (!PlatformInstructionSetSupportCheck()) {
-    MS_LOG(ERROR) << "The platform exist don't support's instruction.";
-    return kLiteNotSupport;
-  }
-  if(context_.get() != model_context){
-    context_.reset(model_context);
-  }
-  session_ = std::make_shared<lite::LiteSession>();
-  if (session_ == nullptr) {
-    MS_LOG(ERROR) << "create session failed";
-    return kLiteNullptr;
-  }
-  auto ret = session_->Init(ContextUtils::Convert(model_context));
-  if (ret != mindspore::lite::RET_OK) {
-    MS_LOG(ERROR) << "init session failed";
-    return static_cast<StatusCode>(ret);
+MSTensor **ModelC::GetInputs(size_t *input_num) {
+  if (model_ == nullptr) {
+    MS_LOG(ERROR) << "model_ is nullptr.";
+    return nullptr;
   }
-  ret = session_->LoadModelAndCompileByPath(model_path, model_type);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Load and compile failed";
-    return static_cast<StatusCode>(ret);
+  if (!inputs_.empty()) {
+    *input_num = inputs_.size();
+    return inputs_.data();
   }
-  is_already_built = true;
-  return static_cast<StatusCode>(kSuccess);
-}

-Status ModelC::Resize(const std::vector<LiteTensorImpl *> &inputs, const std::vector<std::vector<int64_t>> &shapes) {
-  std::vector<lite::Tensor *> inner_input;
-  size_t input_num = inputs.size();
-  for (size_t i = 0; i < input_num; i++) {
-    auto input = inputs[i];
-    if (input == nullptr || input->lite_tensor() == nullptr) {
-      MS_LOG(ERROR) << "Input tensor is null.";
-      return kLiteInputTensorError;
+  auto inputs = model_->GetInputs();
+  *input_num = inputs.size();
+  inputs_.resize(inputs.size(), nullptr);
+  for (size_t i = 0; i < inputs.size(); i++) {
+    inputs_[i] = new (std::nothrow) MSTensor(inputs[i].impl());
+    if (inputs_[i] == nullptr) {
+      inputs_.clear();
+      return nullptr;
     }
-    inner_input.push_back(input->lite_tensor());
   }
-  size_t shape_num = shapes.size();
-  std::vector<std::vector<int32_t>> inner_shapes(shape_num);
-  for (size_t i = 0; i < shape_num; i++) {
-    std::transform(shapes[i].begin(), shapes[i].end(), std::back_inserter(inner_shapes[i]),
-                   [](int64_t value) { return static_cast<int32_t>(value); });
-  }
-  if (session_ == nullptr) {
-    MS_LOG(ERROR) << "Session implement is null.";
-    return kLiteNullptr;
-  }
-  auto ret = session_->Resize(inner_input, inner_shapes);
-  return static_cast<StatusCode>(ret);
+  return inputs_.data();
 }

-void ModelC::ResetTensorData(std::vector<void *> old_data, std::vector<lite::Tensor *> tensors) {
-  for (size_t j = 0; j < old_data.size(); j++) {
-    tensors.at(j)->set_data(old_data.at(j));
+MSTensor **ModelC::GetOutputs(size_t *output_num) {
+  if (model_->GetTrainMode() == true) {
+    return GetOutputsTensor(output_num, &outputs_train_);
+  } else {
+    return GetOutputsTensor(output_num, &outputs_);
   }
 }

-Status ModelC::Predict(const OH_AI_TensorHandle *inputs, size_t input_num, OH_AI_TensorHandle **outputs,
-                       size_t *output_num, const OH_AI_KernelCallBack &before, const OH_AI_KernelCallBack &after) {
-  if (outputs == nullptr || session_ == nullptr) {
-    MS_LOG(ERROR) << "param is nullptr.";
-    return kLiteError;
+MSTensor **ModelC::GetOutputsTensor(size_t *output_num, std::vector<MSTensor *> *vec_tensors) {
+  if (model_ == nullptr) {
+    MS_LOG(ERROR) << "model_ is nullptr.";
+    return nullptr;
   }
-  auto model_inputs = session_->GetInputs();
-  if (model_inputs.size() != input_num) {
-    MS_LOG(ERROR) << "Wrong input size.";
-    return kLiteError;
+  if (!vec_tensors->empty()) {
+    *output_num = vec_tensors->size();
+    return vec_tensors->data();
   }
-  std::vector<void *> old_data;
-  for (size_t i = 0; i < input_num; i++) {
-    auto real_input = model_inputs[i];
-    auto user_input = static_cast<LiteTensorImpl *>(inputs[i]);
-    if (user_input->DataType() != static_cast<DataType>(real_input->data_type())) {
-      ResetTensorData(old_data, model_inputs);
-      MS_LOG(ERROR) << "DataType does not match, input:" << user_input->Name()
-                    << ", real:" << real_input->tensor_name();
-      return kLiteInputTensorError;
-    }
-    if (user_input->Data() == nullptr) {
-      ResetTensorData(old_data, model_inputs);
-      MS_LOG(ERROR) << "Tensor " << user_input->Name() << " has no data.";
-      return kLiteInputTensorError;
-    }

-    // GPU tensor can't manipulate CPU memory which the user provides.
-    // When model input is GPU tensor and user input is NOT GPU data,
-    // just free model input's data for late GPU Tensor filling.
-    if (IS_OPENCL_ALLOCATOR(real_input->allocator()) && (!IS_OPENCL_ALLOCATOR(user_input->GetAllocator()))) {
-      real_input->FreeData();
-    }
-    old_data.push_back(real_input->data());  // Save original data in model tensors.
-
-    if (real_input->data_type() == kObjectTypeString) {
-      std::vector<int32_t> shape;
-      std::transform(user_input->Shape().begin(), user_input->Shape().end(), std::back_inserter(shape),
-                     [](int64_t value) { return static_cast<int32_t>(value); });
-      real_input->set_shape(shape);
-      real_input->set_data(user_input->MutableData());
-    } else {
-      if (user_input->MutableData() != real_input->data()) {
-        if (real_input->Size() != user_input->DataSize()) {
-          ResetTensorData(old_data, model_inputs);
-          MS_LOG(ERROR) << "Tensor " << user_input->Name() << " has wrong data size.";
-          return kLiteInputTensorError;
-        }
-        if (!IS_OPENCL_ALLOCATOR(real_input->allocator())) {
-          real_input->set_data(user_input->MutableData());
-        } else {
-          // Use outside CPU data to fill GPU Tensor.
-          auto dst_data = real_input->MutableData();
-          auto src_data = user_input->MutableData();
-          (void)memcpy(dst_data, src_data, real_input->Size());
-        }
-      }
-    }
-  }
-  auto ret = RunGraph(before, after);
-  ResetTensorData(old_data, model_inputs);
-  if (ret != kSuccess) {
-    MS_LOG(ERROR) << "Run graph failed.";
-    return ret;
-  }
-
-  *outputs = reinterpret_cast<OH_AI_TensorHandle *>(GetOutputs(output_num));
-  return kSuccess;
-}
-
-Status ModelC::RunGraph(const OH_AI_KernelCallBack &before, const OH_AI_KernelCallBack &after) {
-  KernelCallBack before_call_back = nullptr;
-  KernelCallBack after_call_back = nullptr;
-  if (before != nullptr) {
-    before_call_back = [&](const std::vector<mindspore::lite::Tensor *> &before_inputs,
-                           const std::vector<mindspore::lite::Tensor *> &before_outputs,
-                           const MSCallBackParam &call_param) {
-      std::vector<LiteTensorImpl> inputs_impl;
-      std::vector<LiteTensorImpl> outputs_impl;
-      std::vector<OH_AI_TensorHandle> op_inputs;
-      std::vector<OH_AI_TensorHandle> op_outputs;
-    size_t op_input_num = before_inputs.size();
-    for (size_t i = 0; i < op_input_num; i++) {
-      inputs_impl.emplace_back(before_inputs[i]);
-      op_inputs.push_back(&(inputs_impl.back()));
-    }
-    size_t op_output_num = before_outputs.size();
-    for (size_t i = 0; i < op_output_num; i++) {
-      outputs_impl.emplace_back(before_outputs[i]);
-      op_outputs.push_back(&(outputs_impl.back()));
-    }
-      const OH_AI_CallBackParam op_info = {const_cast<char *>(call_param.node_name.c_str()),
-                                         const_cast<char *>(call_param.node_type.c_str())};
-      OH_AI_TensorHandleArray inputs = {op_input_num, op_inputs.data()};
-      OH_AI_TensorHandleArray outputs = {op_output_num, op_outputs.data()};
-    return before(inputs, outputs, op_info);
-  };
-  }
-  if (after != nullptr) {
-    after_call_back = [&](const std::vector<mindspore::lite::Tensor *> &after_inputs,
-                          const std::vector<mindspore::lite::Tensor *> &after_outputs,
-                          const MSCallBackParam &call_param) {
-      std::vector<LiteTensorImpl> inputs_impl;
-      std::vector<LiteTensorImpl> outputs_impl;
-      std::vector<OH_AI_TensorHandle> op_inputs;
-      std::vector<OH_AI_TensorHandle> op_outputs;
-    size_t op_input_num = after_inputs.size();
-    for (size_t i = 0; i < op_input_num; i++) {
-      inputs_impl.emplace_back(after_inputs[i]);
-      op_inputs.push_back(&(inputs_impl.back()));
-    }
-    size_t op_output_num = after_outputs.size();
-    for (size_t i = 0; i < op_output_num; i++) {
-      outputs_impl.emplace_back(after_outputs[i]);
-      op_outputs.push_back(&(outputs_impl.back()));
-    }
-    const OH_AI_CallBackParam op_info = {const_cast<char *>(call_param.node_name.c_str()),
-                                         const_cast<char *>(call_param.node_type.c_str())};
-    OH_AI_TensorHandleArray inputs = {op_input_num, op_inputs.data()};
-    OH_AI_TensorHandleArray outputs = {op_output_num, op_outputs.data()};
-    return after(inputs, outputs, op_info);
-  };
-  }
-  auto ret = session_->RunGraph(before_call_back, after_call_back);
-  return static_cast<StatusCode>(ret);
-}
-
-LiteTensorImpl *ModelC::TensorToTensorImpl(mindspore::lite::Tensor *tensor) {
-  LiteTensorImpl *impl = nullptr;
-  auto iter = tensor_map_.find(tensor);
-  if (iter != tensor_map_.end()) {
-    impl = iter->second;
-  } else {
-    impl = new (std::nothrow) LiteTensorImpl(tensor);
-    if (impl == nullptr || impl->lite_tensor() == nullptr) {
-      MS_LOG(ERROR) << "Create tensor failed.";
+  auto outputs = model_->GetOutputs();
+  *output_num = outputs.size();
+  vec_tensors->resize(outputs.size(), nullptr);
+  for (size_t i = 0; i < outputs.size(); i++) {
+    (*vec_tensors)[i] = new (std::nothrow) MSTensor(outputs[i].impl());
+    if ((*vec_tensors)[i] == nullptr) {
+      vec_tensors->clear();
       return nullptr;
     }
-    tensor_map_[tensor] = impl;
   }
-  return impl;
+  return vec_tensors->data();
 }

-LiteTensorImpl **ModelC::GetInputs(size_t *input_num) {
-  if (session_ == nullptr || input_num == nullptr) {
-    MS_LOG(ERROR) << "Session is null.";
-    return nullptr;
-  }
-  auto inputs = session_->GetInputs();
-  *input_num = inputs.size();
-  if (inputs_.capacity() < *input_num) {
-    inputs_.reserve(*input_num);
-  }
-  inputs_.clear();
-  std::transform(inputs.begin(), inputs.end(), std::back_inserter(inputs_),
-                 [&](lite::Tensor *input) { return TensorToTensorImpl(input); });
-  return inputs_.data();
-}
+mindspore::MSKernelCallBack ModelC::TransCallBack(const OH_AI_KernelCallBack &oh_callback) {
+  mindspore::MSKernelCallBack call_back = nullptr;
+  if (oh_callback != nullptr) {
+    call_back = [&](const std::vector<mindspore::MSTensor> &inputs,
+                    const std::vector<mindspore::MSTensor> &outputs,
+                    const mindspore::MSCallBackParam &opInfo) {
+      std::vector<OH_AI_TensorHandle> vec_inputs;
+      std::vector<OH_AI_TensorHandle> vec_outputs;
+      OH_AI_CallBackParam call_back = {const_cast<char *>(opInfo.node_name.c_str()),
+                                       const_cast<char *>(opInfo.node_type.c_str())};
+      size_t inputs_handle_num = inputs.size();
+      for (size_t i = 0; i < inputs_handle_num; i++) {
+        vec_inputs.push_back(
+          static_cast<OH_AI_TensorHandle>(&(static_cast<std::vector<mindspore::MSTensor>>(inputs)[i])));
+      }
+      size_t outputs_handle_num = inputs.size();
+      for (size_t i = 0; i < outputs_handle_num; i++) {
+        vec_outputs.push_back(
+          static_cast<OH_AI_TensorHandle>(&(static_cast<std::vector<mindspore::MSTensor>>(outputs)[i])));
+      }

-LiteTensorImpl **ModelC::GetOutputs(size_t *output_num) {
-  if (session_ == nullptr || output_num == nullptr) {
-    MS_LOG(ERROR) << "Session is null.";
-    return nullptr;
-  }
-  auto outputs = session_->GetOutputs();
-  *output_num = outputs.size();
-  if (outputs_.capacity() < *output_num) {
-    outputs_.reserve(*output_num);
+      OH_AI_TensorHandleArray handle_inputs = {inputs_handle_num, vec_inputs.data()};
+      OH_AI_TensorHandleArray handle_outputs = {outputs_handle_num, vec_outputs.data()};
+      return oh_callback(handle_inputs, handle_outputs, call_back);
+    };
   }
-  outputs_.clear();
-  std::transform(outputs.begin(), outputs.end(), std::back_inserter(outputs_),
-                 [&](std::unordered_map<std::string, mindspore::lite::Tensor *>::value_type iter) {
-                   return TensorToTensorImpl(iter.second);
-                 });
-  return outputs_.data();
+  return call_back;
 }
 }  // namespace mindspore

 OH_AI_ModelHandle OH_AI_ModelCreate() {
   auto impl = new (std::nothrow) mindspore::ModelC();
   if (impl == nullptr) {
-    MS_LOG(ERROR) << "Model implement is null.";
+    MS_LOG(ERROR) << "Model implement is nullptr.";
+    return nullptr;
+  }
+  impl->model_ = std::make_shared<mindspore::Model>();
+  if (impl->model_ == nullptr) {
+    MS_LOG(ERROR) << "model_ is nullptr.";
+    delete impl;
     return nullptr;
   }
   return static_cast<OH_AI_ModelHandle>(impl);
@@ -358,55 +172,59 @@ size_t OH_AI_ModelCalcWorkspaceSize(OH_AI_ModelHandle model) {
 OH_AI_Status OH_AI_ModelBuild(OH_AI_ModelHandle model, const void *model_data, size_t data_size,
                               OH_AI_ModelType model_type, const OH_AI_ContextHandle model_context) {
   if (model == nullptr || model_data == nullptr || model_context == nullptr) {
-    MS_LOG(ERROR) << "param is nullptr.";
+    MS_LOG(ERROR) << "model/model_data/model_context is nullptr.";
     return OH_AI_STATUS_LITE_NULLPTR;
   }
   if (model_type == OH_AI_MODELTYPE_INVALID) {
-    MS_LOG(ERROR) << "param is invalid.";
+    MS_LOG(ERROR) << "model_type is invalid.";
     return OH_AI_STATUS_LITE_PARAM_INVALID;
   }
-  mindspore::ContextC *context = static_cast<mindspore::ContextC *>(model_context);
+  mindspore::Context *context = static_cast<mindspore::Context *>(model_context);
   auto impl = static_cast<mindspore::ModelC *>(model);
-  auto ret = impl->Build(model_data, data_size, static_cast<mindspore::ModelType>(model_type), context);
+  if (impl->context_.get() != context) {
+    impl->context_.reset(context);
+  }
+  auto ret = impl->model_->Build(model_data, data_size, static_cast<mindspore::ModelType>(model_type), impl->context_);
   return static_cast<OH_AI_Status>(ret.StatusCode());
 }

 OH_AI_Status OH_AI_ModelBuildFromFile(OH_AI_ModelHandle model, const char *model_path, OH_AI_ModelType model_type,
                                       const OH_AI_ContextHandle model_context) {
   if (model == nullptr || model_path == nullptr || model_context == nullptr) {
-    MS_LOG(ERROR) << "param is nullptr.";
+    MS_LOG(ERROR) << "model/model_path/model_context is nullptr.";
     return OH_AI_STATUS_LITE_NULLPTR;
   }
   if (model_type == OH_AI_MODELTYPE_INVALID) {
-    MS_LOG(ERROR) << "param is invalid.";
+    MS_LOG(ERROR) << "model_type is invalid.";
     return OH_AI_STATUS_LITE_PARAM_INVALID;
   }
-  mindspore::ContextC *context = static_cast<mindspore::ContextC *>(model_context);
+  mindspore::Context *context = static_cast<mindspore::Context *>(model_context);
   auto impl = static_cast<mindspore::ModelC *>(model);
-  auto ret = impl->Build(model_path, static_cast<mindspore::ModelType>(model_type), context);
+  if (impl->context_.get() != context) {
+    impl->context_.reset(context);
+  }
+  auto ret = impl->model_->Build(model_path, static_cast<mindspore::ModelType>(model_type), impl->context_);
   return static_cast<OH_AI_Status>(ret.StatusCode());
 }

 OH_AI_Status OH_AI_ModelResize(OH_AI_ModelHandle model, const OH_AI_TensorHandleArray inputs,
                                OH_AI_ShapeInfo *shape_infos, size_t shape_info_num) {
   if (model == nullptr || shape_infos == nullptr) {
-    MS_LOG(ERROR) << "param is nullptr.";
+    MS_LOG(ERROR) << "model/shape_infos is nullptr.";
     return OH_AI_STATUS_LITE_NULLPTR;
   }
-  std::vector<mindspore::LiteTensorImpl *> vec_inputs;
-  std::transform(inputs.handle_list, inputs.handle_list + inputs.handle_num, std::back_inserter(vec_inputs),
-                 [](OH_AI_TensorHandle value) { return static_cast<mindspore::LiteTensorImpl *>(value); });
+  std::vector<mindspore::MSTensor> vec_inputs;
+  for (size_t i = 0; i < inputs.handle_num; ++i) {
+    vec_inputs.push_back(*static_cast<mindspore::MSTensor *>(inputs.handle_list[i]));
+  }
+
   std::vector<std::vector<int64_t>> vec_dims;
   for (size_t i = 0; i < shape_info_num; i++) {
     std::vector<int64_t> shape(shape_infos[i].shape, shape_infos[i].shape + shape_infos[i].shape_num);
-    if (std::any_of(shape.begin(), shape.end(), [](int64_t val) { return val < 0 || val > INT32_MAX; })) {
-      MS_LOG(ERROR) << "Invalid shape: " << shape << ", each dimension must be in [0, INT32_MAX]";
-      return OH_AI_STATUS_LITE_PARAM_INVALID;
-    }
     vec_dims.push_back(shape);
   }
   auto impl = static_cast<mindspore::ModelC *>(model);
-  auto ret = impl->Resize(vec_inputs, vec_dims);
+  auto ret = impl->model_->Resize(vec_inputs, vec_dims);
   return static_cast<OH_AI_Status>(ret.StatusCode());
 }

@@ -414,15 +232,25 @@ OH_AI_Status OH_AI_ModelPredict(OH_AI_ModelHandle model, const OH_AI_TensorHandl
                                 OH_AI_TensorHandleArray *outputs, const OH_AI_KernelCallBack before,
                                 const OH_AI_KernelCallBack after) {
   if (model == nullptr) {
-    MS_LOG(ERROR) << "param is nullptr.";
+    MS_LOG(ERROR) << "model is nullptr.";
     return OH_AI_STATUS_LITE_NULLPTR;
   }
+  std::vector<mindspore::MSTensor> ms_tensor_inputs;
+  for (size_t i = 0; i < inputs.handle_num; i++) {
+    auto user_input = static_cast<mindspore::MSTensor *>(inputs.handle_list[i]);
+    ms_tensor_inputs.push_back(*user_input);
+  }
+
   auto impl = static_cast<mindspore::ModelC *>(model);
-  auto ret = impl->Predict(inputs.handle_list, inputs.handle_num, &(outputs->handle_list), &(outputs->handle_num),
-                           before, after);
+  mindspore::MSKernelCallBack before_call_back = impl->TransCallBack(before);
+  mindspore::MSKernelCallBack after_call_back = impl->TransCallBack(after);
+
+  std::vector<mindspore::MSTensor> ms_tensor_outputs;
+  auto ret = impl->model_->Predict(ms_tensor_inputs, &ms_tensor_outputs, before_call_back, after_call_back);
   if (!ret.IsOk()) {
     MS_LOG(ERROR) << "Predict fail, ret :" << ret;
   }
+  outputs->handle_list = reinterpret_cast<OH_AI_TensorHandle *>(impl->GetOutputs(&outputs->handle_num));
   return static_cast<OH_AI_Status>(ret.StatusCode());
 }

@@ -431,11 +259,6 @@ OH_AI_Status OH_AI_ModelRunStep(OH_AI_ModelHandle model, const OH_AI_KernelCallB
   return OH_AI_STATUS_LITE_NOT_SUPPORT;
 }

-OH_AI_Status OH_AI_ModelSetTrainMode(const OH_AI_ModelHandle model, bool train) {
-  MS_LOG(ERROR) << "Unsupported Feature.";
-  return OH_AI_STATUS_LITE_NOT_SUPPORT;
-}
-
 OH_AI_Status OH_AI_ModelExportWeight(const OH_AI_ModelHandle model, const char *export_path) {
   MS_LOG(ERROR) << "Unsupported Feature.";
   return OH_AI_STATUS_LITE_NOT_SUPPORT;
@@ -443,7 +266,7 @@ OH_AI_Status OH_AI_ModelExportWeight(const OH_AI_ModelHandle model, const char *

 OH_AI_TensorHandleArray OH_AI_ModelGetInputs(const OH_AI_ModelHandle model) {
   if (model == nullptr) {
-    MS_LOG(ERROR) << "param is nullptr.";
+    MS_LOG(ERROR) << "model is nullptr.";
     return {0, nullptr};
   }
   auto impl = static_cast<mindspore::ModelC *>(model);
@@ -454,7 +277,7 @@ OH_AI_TensorHandleArray OH_AI_ModelGetInputs(const OH_AI_ModelHandle model) {

 OH_AI_TensorHandleArray OH_AI_ModelGetOutputs(const OH_AI_ModelHandle model) {
   if (model == nullptr) {
-    MS_LOG(ERROR) << "param is nullptr.";
+    MS_LOG(ERROR) << "model is nullptr.";
     return {0, nullptr};
   }
   auto impl = static_cast<mindspore::ModelC *>(model);
@@ -465,7 +288,7 @@ OH_AI_TensorHandleArray OH_AI_ModelGetOutputs(const OH_AI_ModelHandle model) {

 OH_AI_TensorHandle OH_AI_ModelGetInputByTensorName(const OH_AI_ModelHandle model, const char *tensor_name) {
   if (model == nullptr || tensor_name == nullptr) {
-    MS_LOG(ERROR) << "param is nullptr.";
+    MS_LOG(ERROR) << "model/tensor_name is nullptr.";
     return nullptr;
   }
   auto impl = static_cast<mindspore::ModelC *>(model);
@@ -482,7 +305,7 @@ OH_AI_TensorHandle OH_AI_ModelGetInputByTensorName(const OH_AI_ModelHandle model

 OH_AI_TensorHandle OH_AI_ModelGetOutputByTensorName(const OH_AI_ModelHandle model, const char *tensor_name) {
   if (model == nullptr || tensor_name == nullptr) {
-    MS_LOG(ERROR) << "param is nullptr.";
+    MS_LOG(ERROR) << "model/tensor_name is nullptr.";
     return nullptr;
   }
   auto impl = static_cast<mindspore::ModelC *>(model);
@@ -496,3 +319,294 @@ OH_AI_TensorHandle OH_AI_ModelGetOutputByTensorName(const OH_AI_ModelHandle mode
   MS_LOG(ERROR) << "tensor is not exist.";
   return nullptr;
 }
+
+OH_AI_TrainCfgHandle OH_AI_TrainCfgCreate() {
+  auto impl = new (std::nothrow) mindspore::TrainCfg();
+  if (impl == nullptr) {
+    MS_LOG(ERROR) << "TrainCfg implement is nullptr.";
+    return nullptr;
+  }
+  return static_cast<OH_AI_TrainCfgHandle>(impl);
+}
+
+void OH_AI_TrainCfgDestroy(OH_AI_TrainCfgHandle *train_cfg) {
+  if (train_cfg != nullptr && *train_cfg != nullptr) {
+    auto impl = static_cast<mindspore::TrainCfg *>(*train_cfg);
+    delete impl;
+    *train_cfg = nullptr;
+  }
+}
+
+char **OH_AI_TrainCfgGetLossName(OH_AI_TrainCfgHandle train_cfg, size_t *num) {
+  if (train_cfg == nullptr || num == nullptr) {
+    MS_LOG(ERROR) << "train_cfg/num is nullptr.";
+    return nullptr;
+  }
+  auto impl = static_cast<mindspore::TrainCfg *>(train_cfg);
+  auto loss_name = impl->GetLossName();
+  *num = loss_name.size();
+  char **name = static_cast<char **>(malloc(loss_name.size()));
+  if (name == nullptr) {
+    MS_LOG(ERROR) << "Failed to malloc loss_name.";
+    return nullptr;
+  }
+  for (size_t i = 0; i < loss_name.size(); i++) {
+    name[i] = static_cast<char *>(malloc(loss_name[i].size() + 1));
+    strcpy(name[i], loss_name[i].c_str());
+  }
+  return name;
+}
+
+void OH_AI_TrainCfgSetLossName(OH_AI_TrainCfgHandle train_cfg, const char **loss_name, size_t num) {
+  if (train_cfg == nullptr) {
+    MS_LOG(ERROR) << "train_cfg is nullptr.";
+    return;
+  }
+  auto impl = static_cast<mindspore::TrainCfg *>(train_cfg);
+  std::vector<std::string> vec_name;
+  for (size_t i = 0; i < num; i++) {
+    vec_name.push_back(loss_name[i]);
+  }
+  impl->SetLossName(vec_name);
+}
+
+OH_AI_OptimizationLevel OH_AI_TrainCfgGetOptimizationLevel(OH_AI_TrainCfgHandle train_cfg) {
+  if (train_cfg == nullptr) {
+    MS_LOG(ERROR) << "train_cfg is nullptr.";
+    return OH_AI_KO0;
+  }
+  auto impl = static_cast<mindspore::TrainCfg *>(train_cfg);
+  return static_cast<OH_AI_OptimizationLevel>(impl->optimization_level_);
+}
+
+void OH_AI_TrainCfgSetOptimizationLevel(OH_AI_TrainCfgHandle train_cfg, OH_AI_OptimizationLevel level) {
+  if (train_cfg == nullptr) {
+    MS_LOG(ERROR) << "train_cfg is nullptr.";
+    return;
+  }
+  auto impl = static_cast<mindspore::TrainCfg *>(train_cfg);
+  impl->optimization_level_ = static_cast<mindspore::OptimizationLevel>(level);
+}
+
+OH_AI_Status OH_AI_TrainModelBuild(OH_AI_ModelHandle model, const void *model_data, size_t data_size,
+                                   OH_AI_ModelType model_type, const OH_AI_ContextHandle model_context,
+                                   const OH_AI_TrainCfgHandle train_cfg) {
+  if (model == nullptr || model_data == nullptr || model_context == nullptr) {
+    MS_LOG(ERROR) << "model/model_data/model_context is nullptr.";
+    return OH_AI_STATUS_LITE_NULLPTR;
+  }
+  if (model_type == OH_AI_MODELTYPE_INVALID) {
+    MS_LOG(ERROR) << "model_type is invalid.";
+    return OH_AI_STATUS_LITE_PARAM_INVALID;
+  }
+  auto impl = static_cast<mindspore::ModelC *>(model);
+
+  mindspore::Graph graph;
+  auto status = mindspore::Serialization::Load(model_data, data_size, static_cast<mindspore::ModelType>(model_type), &graph);
+  if (status != mindspore::kSuccess) {
+    MS_LOG(ERROR) << "load ms file failed.";
+    return OH_AI_STATUS_LITE_ERROR;
+  }
+  auto context = static_cast<mindspore::Context *>(model_context);
+  auto build_train_cfg = static_cast<mindspore::TrainCfg *>(train_cfg);
+  if (impl->context_.get() != context) {
+    impl->context_.reset(context);
+  }
+  auto ret = impl->model_->Build(static_cast<mindspore::GraphCell>(graph), impl->context_,
+                                 std::shared_ptr<mindspore::TrainCfg>(build_train_cfg));
+  if (ret != mindspore::kSuccess) {
+    MS_LOG(ERROR) << "Load and compile failed";
+  }
+  return static_cast<OH_AI_Status>(ret.StatusCode());
+}
+
+OH_AI_Status OH_AI_TrainModelBuildFromFile(OH_AI_ModelHandle model, const char *model_path,
+                                           OH_AI_ModelType model_type, const OH_AI_ContextHandle model_context,
+                                           const OH_AI_TrainCfgHandle train_cfg) {
+  if (model == nullptr || model_path == nullptr || model_context == nullptr) {
+    MS_LOG(ERROR) << "model/model_path/model_context is nullptr.";
+    return OH_AI_STATUS_LITE_NULLPTR;
+  }
+  if (model_type == OH_AI_MODELTYPE_INVALID) {
+    MS_LOG(ERROR) << "model_type is invalid.";
+    return OH_AI_STATUS_LITE_PARAM_INVALID;
+  }
+  auto impl = static_cast<mindspore::ModelC *>(model);
+
+  mindspore::Graph graph;
+  auto status = mindspore::Serialization::Load(model_path, static_cast<mindspore::ModelType>(model_type), &graph);
+  if (status != mindspore::kSuccess) {
+    MS_LOG(ERROR) << "load ms file failed. " << model_path;
+    return OH_AI_STATUS_LITE_ERROR;
+  }
+  auto context = static_cast<mindspore::Context *>(model_context);
+  auto build_train_cfg = static_cast<mindspore::TrainCfg *>(train_cfg);
+  if (impl->context_.get() != context) {
+    impl->context_.reset(context);
+  }
+  auto ret = impl->model_->Build(static_cast<mindspore::GraphCell>(graph), impl->context_,
+                                 std::shared_ptr<mindspore::TrainCfg>(build_train_cfg));
+  if (ret != mindspore::kSuccess) {
+    MS_LOG(ERROR) << "Load and compile failed";
+  }
+  return static_cast<OH_AI_Status>(ret.StatusCode());
+}
+
+OH_AI_Status OH_AI_ModelSetLearningRate(OH_AI_ModelHandle model, float learning_rate) {
+  if (model == nullptr) {
+    MS_LOG(ERROR) << "model is nullptr.";
+    return OH_AI_STATUS_LITE_PARAM_INVALID;
+  }
+  auto impl = static_cast<mindspore::ModelC *>(model);
+  auto ret = impl->model_->SetLearningRate(learning_rate);
+  return static_cast<OH_AI_Status>(ret.StatusCode());
+}
+
+float OH_AI_ModelGetLearningRate(OH_AI_ModelHandle model) {
+  if (model == nullptr) {
+    MS_LOG(ERROR) << "model is nullptr.";
+    return OH_AI_STATUS_LITE_PARAM_INVALID;
+  }
+  auto impl = static_cast<mindspore::ModelC *>(model);
+  return impl->model_->GetLearningRate();
+}
+
+OH_AI_Status OH_AI_RunStep(OH_AI_ModelHandle model, const OH_AI_KernelCallBack before, const OH_AI_KernelCallBack after) {
+  if (model == nullptr) {
+    MS_LOG(ERROR) << "model is nullptr.";
+    return OH_AI_STATUS_LITE_PARAM_INVALID;
+  }
+  auto impl = static_cast<mindspore::ModelC *>(model);
+  auto ret = impl->model_->RunStep(impl->TransCallBack(before), impl->TransCallBack(after));
+  return static_cast<OH_AI_Status>(ret.StatusCode());
+}
+
+OH_AI_TensorHandleArray OH_AI_ModelGetWeights(OH_AI_ModelHandle model) {
+  if (model == nullptr) {
+    MS_LOG(ERROR) << "model is nullptr.";
+    return {0, nullptr};
+  }
+  auto impl = static_cast<mindspore::ModelC *>(model);
+  auto features = impl->model_->GetFeatureMaps();
+  size_t handle_num = features.size();
+
+  mindspore::MSTensor **handle_list = static_cast<mindspore::MSTensor **>(malloc(
+    handle_num * sizeof(mindspore::MSTensor *)));
+  if (handle_list == nullptr) {
+    MS_LOG(ERROR) << "Failed to malloc handle_list.";
+    return {0, nullptr};
+  }
+  for (size_t i = 0; i < handle_num; i++) {
+    handle_list[i] = new mindspore::MSTensor(features[i].impl());
+  }
+  return {handle_num, reinterpret_cast<OH_AI_TensorHandle *>(handle_list)};
+}
+
+OH_AI_Status OH_AI_ModelUpdateWeights(OH_AI_ModelHandle model, const OH_AI_TensorHandleArray new_weights) {
+  if (model == nullptr) {
+    MS_LOG(ERROR) << "model is nullptr.";
+    return OH_AI_STATUS_LITE_PARAM_INVALID;
+  }
+  auto impl = static_cast<mindspore::ModelC *>(model);
+  std::vector<mindspore::MSTensor> weights;
+  for (size_t i = 0; i < new_weights.handle_num; i++) {
+    weights.push_back(*static_cast<mindspore::MSTensor *>(new_weights.handle_list[i]));
+  }
+  auto ret = impl->model_->UpdateWeights(weights);
+  return static_cast<OH_AI_Status>(ret.StatusCode());
+}
+
+bool OH_AI_ModelGetTrainMode(OH_AI_ModelHandle model) {
+  if (model == nullptr) {
+    MS_LOG(ERROR) << "model is nullptr.";
+    return false;
+  }
+  auto impl = static_cast<mindspore::ModelC *>(model);
+  return impl->model_->GetTrainMode();
+}
+
+OH_AI_Status OH_AI_ModelSetTrainMode(OH_AI_ModelHandle model, bool train) {
+  if (model == nullptr) {
+    MS_LOG(ERROR) << "model is nullptr.";
+    return OH_AI_STATUS_LITE_PARAM_INVALID;
+  }
+  auto impl = static_cast<mindspore::ModelC *>(model);
+  auto ret = impl->model_->SetTrainMode(train);
+  return static_cast<OH_AI_Status>(ret.StatusCode());
+}
+
+OH_AI_Status OH_AI_ModelSetupVirtualBatch(OH_AI_ModelHandle model, int virtual_batch_multiplier, float lr, float momentum) {
+  if (model == nullptr) {
+    MS_LOG(ERROR) << "model is nullptr.";
+    return OH_AI_STATUS_LITE_PARAM_INVALID;
+  }
+  auto impl = static_cast<mindspore::ModelC *>(model);
+  auto ret = impl->model_->SetupVirtualBatch(virtual_batch_multiplier, lr, momentum);
+  return static_cast<OH_AI_Status>(ret.StatusCode());
+}
+
+OH_AI_Status OH_AI_ExportModel(OH_AI_ModelHandle model, OH_AI_ModelType model_type, const char *model_file,
+                               OH_AI_QuantizationType quantization_type, bool export_inference_only,
+                               char **output_tensor_name, size_t num) {
+  if (model == nullptr) {
+    MS_LOG(ERROR) << "model is nullptr.";
+    return OH_AI_STATUS_LITE_PARAM_INVALID;
+  }
+  auto impl = static_cast<mindspore::ModelC *>(model);
+  std::vector<std::string> tensor_name;
+  for (size_t i = 0; i < num; i++) {
+    tensor_name.push_back(output_tensor_name[i]);
+  }
+  auto ret = mindspore::Serialization::ExportModel(*(impl->model_.get()), static_cast<mindspore::ModelType>(model_type),
+                                                   model_file,
+                                                   static_cast<mindspore::QuantizationType>(quantization_type),
+                                                   export_inference_only, tensor_name);
+  if (!ret.IsOk()) {
+    MS_LOG(ERROR) << "export model fail, ret :" << ret;
+  }
+  return static_cast<OH_AI_Status>(ret.StatusCode());
+}
+
+OH_AI_Status OH_AI_ExportModelBuffer(OH_AI_ModelHandle model, OH_AI_ModelType model_type, char **model_data,
+                                     size_t *data_size, OH_AI_QuantizationType quantization_type,
+                                     bool export_inference_only, char **output_tensor_name, size_t num) {
+  if (model == nullptr) {
+    MS_LOG(ERROR) << "model is nullptr.";
+    return OH_AI_STATUS_LITE_PARAM_INVALID;
+  }
+  auto impl = static_cast<mindspore::ModelC *>(model);
+  std::vector<std::string> tensor_name;
+  for (size_t i = 0; i < num; i++) {
+    tensor_name.push_back(output_tensor_name[i]);
+  }
+  mindspore::Buffer buffer;
+  auto ret = mindspore::Serialization::ExportModel(*(impl->model_.get()), static_cast<mindspore::ModelType>(model_type),
+                                                   &buffer, static_cast<mindspore::QuantizationType>(quantization_type),
+                                                   export_inference_only, tensor_name);
+  auto data = static_cast<char *>(buffer.MutableData());
+  *model_data = (char *) malloc(buffer.DataSize());
+  *data_size = buffer.DataSize();
+  memcpy(*model_data, data, buffer.DataSize());
+  if (!ret.IsOk()) {
+    MS_LOG(ERROR) << "export model fail, ret :" << ret;
+  }
+  return static_cast<OH_AI_Status>(ret.StatusCode());
+}
+
+OH_AI_Status OH_AI_ExportWeightsCollaborateWithMicro(OH_AI_ModelHandle model, OH_AI_ModelType model_type, const char *weight_file,
+  bool is_inference, bool enable_fp16, char **changeable_weights_name, size_t num) {
+  if (model == nullptr) {
+    MS_LOG(ERROR) << "model is nullptr.";
+    return OH_AI_STATUS_LITE_PARAM_INVALID;
+  }
+  auto impl = static_cast<mindspore::ModelC *>(model);
+  std::vector<std::string> weights_name;
+  for (size_t i = 0; i < num; i++) {
+    weights_name.push_back(changeable_weights_name[i]);
+  }
+  auto ret = mindspore::Serialization::ExportWeightsCollaborateWithMicro(*(impl->model_.get()), static_cast<mindspore::ModelType>(model_type), weight_file, is_inference, enable_fp16, weights_name);
+  if (!ret.IsOk()) {
+    MS_LOG(ERROR) << "export model fail, ret :" << ret;
+  }
+  return static_cast<OH_AI_Status>(ret.StatusCode());
+}
diff --git a/mindspore/lite/src/litert/c_api/tensor_c.cc b/mindspore/lite/src/litert/c_api/tensor_c.cc
index 7b5c4c2f..4b1e6aff 100644
--- a/mindspore/lite/src/litert/c_api/tensor_c.cc
+++ b/mindspore/lite/src/litert/c_api/tensor_c.cc
@@ -17,7 +17,6 @@
 #include "include/api/status.h"
 #include "src/tensor.h"
 #include "src/litert/cxx_api/tensor/tensor_impl.h"
-#include "src/litert/inner_allocator.h"

 OH_AI_TensorHandle OH_AI_TensorCreate(const char *name, OH_AI_DataType type, const int64_t *shape, size_t shape_num,
                                       const void *data, size_t data_len) {
@@ -31,18 +30,23 @@ OH_AI_TensorHandle OH_AI_TensorCreate(const char *name, OH_AI_DataType type, con
   }
   auto lite_tensor =
     mindspore::lite::Tensor::CreateTensor(name, static_cast<mindspore::TypeId>(type), vec_shape, data, data_len);
-  auto impl = new (std::nothrow) mindspore::LiteTensorImpl(lite_tensor);
-  if (impl == nullptr || impl->lite_tensor() == nullptr) {
+  auto lite_tensor_impl = std::make_shared<mindspore::LiteTensorImpl>(lite_tensor);
+  if (lite_tensor_impl == nullptr || lite_tensor_impl->lite_tensor() == nullptr) {
     MS_LOG(ERROR) << "Failed to allocate tensor impl.";
     return nullptr;
   }
-  impl->set_from_session(false);
+  lite_tensor_impl->set_from_session(false);
+  auto impl = new (std::nothrow) mindspore::MSTensor(lite_tensor_impl);
+  if (impl == nullptr) {
+    MS_LOG(ERROR) << "Failed to allocate MSTensor.";
+    return nullptr;
+  }
   return impl;
 }

 void OH_AI_TensorDestroy(OH_AI_TensorHandle *tensor) {
   if (tensor != nullptr && *tensor != nullptr) {
-    auto impl = static_cast<mindspore::LiteTensorImpl *>(*tensor);
+    auto impl = static_cast<mindspore::MSTensor *>(*tensor);
     delete impl;
     *tensor = nullptr;
   }
@@ -53,20 +57,14 @@ OH_AI_TensorHandle OH_AI_TensorClone(OH_AI_TensorHandle tensor) {
     MS_LOG(ERROR) << "param is nullptr.";
     return nullptr;
   }
-  auto impl = static_cast<mindspore::LiteTensorImpl *>(tensor);
-  auto lite_tensor = static_cast<mindspore::lite::Tensor *>(impl->lite_tensor());
-  auto clone = mindspore::lite::Tensor::CopyTensor(*lite_tensor, true, lite_tensor->allocator());
-  if (clone == nullptr) {
-    MS_LOG(ERROR) << "Failed to allocate tensor.";
-    return nullptr;
-  }
-  auto clone_impl = new (std::nothrow) mindspore::LiteTensorImpl(clone);
+  auto impl = static_cast<mindspore::MSTensor *>(tensor);
+  auto clone_impl = impl->Clone();
   if (clone_impl == nullptr) {
-    delete clone;
     MS_LOG(ERROR) << "Failed to allocate tensor impl.";
     return nullptr;
   }
-  clone_impl->set_from_session(false);
+  std::static_pointer_cast<mindspore::LiteTensorImpl>(clone_impl->impl())->set_own_data(false);
+  clone_impl->SetTensorName(impl->Name() + "_duplicate");
   return clone_impl;
 }

@@ -75,8 +73,8 @@ void OH_AI_TensorSetName(OH_AI_TensorHandle tensor, const char *name) {
     MS_LOG(ERROR) << "param is nullptr.";
     return;
   }
-  auto impl = static_cast<mindspore::LiteTensorImpl *>(tensor);
-  impl->SetName(name);
+  auto impl = static_cast<mindspore::MSTensor *>(tensor);
+  impl->SetTensorName(name);
 }

 const char *OH_AI_TensorGetName(const OH_AI_TensorHandle tensor) {
@@ -84,8 +82,8 @@ const char *OH_AI_TensorGetName(const OH_AI_TensorHandle tensor) {
     MS_LOG(ERROR) << "param is nullptr.";
     return nullptr;
   }
-  auto impl = static_cast<mindspore::LiteTensorImpl *>(tensor);
-  return impl->Name().c_str();
+  auto ms_tensor = static_cast<mindspore::MSTensor *>(tensor);
+  return std::static_pointer_cast<mindspore::LiteTensorImpl>(ms_tensor->impl())->Name().c_str();
 }

 void OH_AI_TensorSetDataType(OH_AI_TensorHandle tensor, OH_AI_DataType type) {
@@ -93,7 +91,7 @@ void OH_AI_TensorSetDataType(OH_AI_TensorHandle tensor, OH_AI_DataType type) {
     MS_LOG(ERROR) << "param is nullptr.";
     return;
   }
-  auto impl = static_cast<mindspore::LiteTensorImpl *>(tensor);
+  auto impl = static_cast<mindspore::MSTensor *>(tensor);
   impl->SetDataType(static_cast<mindspore::DataType>(type));
 }

@@ -102,7 +100,7 @@ OH_AI_DataType OH_AI_TensorGetDataType(const OH_AI_TensorHandle tensor) {
     MS_LOG(ERROR) << "param is nullptr.";
     return OH_AI_DATATYPE_UNKNOWN;
   }
-  auto impl = static_cast<mindspore::LiteTensorImpl *>(tensor);
+  auto impl = static_cast<mindspore::MSTensor *>(tensor);
   auto dtype = impl->DataType();
   return static_cast<OH_AI_DataType>(dtype);
 }
@@ -112,7 +110,7 @@ void OH_AI_TensorSetShape(OH_AI_TensorHandle tensor, const int64_t *shape, size_
     MS_LOG(ERROR) << "param is nullptr.";
     return;
   }
-  auto impl = static_cast<mindspore::LiteTensorImpl *>(tensor);
+  auto impl = static_cast<mindspore::MSTensor *>(tensor);
   std::vector<int64_t> vec_shape(shape_num);
   for (size_t i = 0; i < shape_num; i++) {
     vec_shape[i] = shape[i];
@@ -125,7 +123,7 @@ const int64_t *OH_AI_TensorGetShape(const OH_AI_TensorHandle tensor, size_t *sha
     MS_LOG(ERROR) << "param is nullptr.";
     return nullptr;
   }
-  auto impl = static_cast<mindspore::LiteTensorImpl *>(tensor);
+  auto impl = static_cast<mindspore::MSTensor *>(tensor);
   *shape_num = impl->Shape().size();
   return impl->Shape().data();
 }
@@ -135,7 +133,7 @@ void OH_AI_TensorSetFormat(OH_AI_TensorHandle tensor, OH_AI_Format format) {
     MS_LOG(ERROR) << "param is nullptr.";
     return;
   }
-  auto impl = static_cast<mindspore::LiteTensorImpl *>(tensor);
+  auto impl = static_cast<mindspore::MSTensor *>(tensor);
   return impl->SetFormat(static_cast<mindspore::Format>(format));
 }

@@ -144,8 +142,8 @@ OH_AI_Format OH_AI_TensorGetFormat(const OH_AI_TensorHandle tensor) {
     MS_LOG(ERROR) << "param is nullptr.";
     return OH_AI_FORMAT_NHWC;
   }
-  auto impl = static_cast<mindspore::LiteTensorImpl *>(tensor);
-  return static_cast<OH_AI_Format>(impl->Format());
+  auto impl = static_cast<mindspore::MSTensor *>(tensor);
+  return static_cast<OH_AI_Format>(impl->format());
 }

 void OH_AI_TensorSetData(OH_AI_TensorHandle tensor, void *data) {
@@ -153,16 +151,34 @@ void OH_AI_TensorSetData(OH_AI_TensorHandle tensor, void *data) {
     MS_LOG(ERROR) << "param is nullptr.";
     return;
   }
-  auto impl = static_cast<mindspore::LiteTensorImpl *>(tensor);
+  auto impl = static_cast<mindspore::MSTensor *>(tensor);
   return impl->SetData(data, true);
 }

+OH_AI_Status OH_AI_TensorSetUserData(OH_AI_TensorHandle tensor, void *data, size_t data_size) {
+  if (tensor == nullptr) {
+    MS_LOG(ERROR) << "param is nullptr.";
+    return OH_AI_STATUS_LITE_NULLPTR;
+  }
+
+  auto impl = static_cast<mindspore::MSTensor *>(tensor);
+  if ((impl->DataSize() > 0) && (data_size != impl->DataSize())) {
+    MS_LOG(ERROR) << "input data size does not match inner data size";
+    return OH_AI_STATUS_LITE_PARAM_INVALID;
+  }
+
+  // This is one tricky way to represent that the inner data is not owned by tensor itself.
+  impl->SetAllocator(nullptr);
+  impl->SetData(data, false);
+  return OH_AI_STATUS_SUCCESS;
+}
+
 const void *OH_AI_TensorGetData(const OH_AI_TensorHandle tensor) {
   if (tensor == nullptr) {
     MS_LOG(ERROR) << "param is nullptr.";
     return nullptr;
   }
-  auto impl = static_cast<mindspore::LiteTensorImpl *>(tensor);
+  auto impl = static_cast<mindspore::MSTensor *>(tensor);
   return impl->Data().get();
 }

@@ -171,7 +187,7 @@ void *OH_AI_TensorGetMutableData(const OH_AI_TensorHandle tensor) {
     MS_LOG(ERROR) << "param is nullptr.";
     return nullptr;
   }
-  auto impl = static_cast<mindspore::LiteTensorImpl *>(tensor);
+  auto impl = static_cast<mindspore::MSTensor *>(tensor);
   return impl->MutableData();
 }

@@ -180,7 +196,7 @@ int64_t OH_AI_TensorGetElementNum(const OH_AI_TensorHandle tensor) {
     MS_LOG(ERROR) << "param is nullptr.";
     return 0;
   }
-  auto impl = static_cast<mindspore::LiteTensorImpl *>(tensor);
+  auto impl = static_cast<mindspore::MSTensor *>(tensor);
   return impl->ElementNum();
 }

@@ -189,6 +205,6 @@ size_t OH_AI_TensorGetDataSize(const OH_AI_TensorHandle tensor) {
     MS_LOG(ERROR) << "param is nullptr.";
     return 0;
   }
-  auto impl = static_cast<mindspore::LiteTensorImpl *>(tensor);
+  auto impl = static_cast<mindspore::MSTensor *>(tensor);
   return impl->DataSize();
 }
diff --git a/mindspore/lite/src/litert/c_api/type_c_private.h b/mindspore/lite/src/litert/c_api/type_c_private.h
new file mode 100644
index 00000000..2d3b3883
--- /dev/null
+++ b/mindspore/lite/src/litert/c_api/type_c_private.h
@@ -0,0 +1,40 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_SRC_LITERT_C_API_TYPE_C_PRIVATE_H_
+#define MINDSPORE_LITE_SRC_LITERT_C_API_TYPE_C_PRIVATE_H_
+
+#include <string>
+#include <vector>
+#include <memory>
+#include <stddef.h>
+#include "include/c_api/types_c.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define NNRT_DEVICE_NAME_MAX (128)
+
+struct NNRTDeviceDesc {
+  size_t device_id;
+  OH_AI_NNRTDeviceType device_type;
+  char device_name[NNRT_DEVICE_NAME_MAX];
+};
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // MINDSPORE_LITE_SRC_LITERT_C_API_TYPE_C_PRIVATE_H_
diff --git a/mindspore/lite/src/litert/cxx_api/context.cc b/mindspore/lite/src/litert/cxx_api/context.cc
index 1371bcf0..e5f19d28 100644
--- a/mindspore/lite/src/litert/cxx_api/context.cc
+++ b/mindspore/lite/src/litert/cxx_api/context.cc
@@ -50,6 +50,11 @@ constexpr auto kModelOptionAscendDynamicBatchSize = "mindspore.option.ascend.dyn
 constexpr auto kModelOptionAscendDynamicImageSize = "mindspore.option.ascend.dynamic_image_size";
 constexpr auto kModelOptionAscendBufferOptimize = "mindspore.option.ascend.buffer_optimize";
 constexpr auto kModelOptionAscendRankID = "mindspore.option.ascend.rank_id";
+constexpr auto kModelOptionNNRTDeviceID = "mindspore.option.nnrt.device_id";
+constexpr auto kModelOptionNNRTPerformanceMode = "mindspore.option.nnrt.performance_mode";
+constexpr auto kModelOptionNNRTPriority = "mindspore.option.nnrt.priority";
+constexpr auto kModelOptionNNRTEnableFP16 = "mindspore.option.nnrt.enable_fp16";
+constexpr auto kModelOptionNNRTExtensions = "mindspore.option.nnrt.extensions";
 #ifdef USE_GLOG
 extern "C" {
 extern void mindspore_log_init();
@@ -684,4 +689,84 @@ std::vector<char> AscendDeviceInfo::GetBufferOptimizeModeChar() const {
   const std::string &ref = GetValue<std::string>(data_, kModelOptionAscendBufferOptimize);
   return StringToChar(ref);
 }
+
+void NNRTDeviceInfo::SetDeviceID(size_t device_id) {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return;
+  }
+  data_->params[kModelOptionNNRTDeviceID] = device_id;
+}
+
+size_t NNRTDeviceInfo::GetDeviceID() const {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return 0;
+  }
+  return GetValue<size_t>(data_, kModelOptionNNRTDeviceID);
+}
+
+void NNRTDeviceInfo::SetPerformanceMode(int performance_mode) {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return;
+  }
+  data_->params[kModelOptionNNRTPerformanceMode] = performance_mode;
+}
+
+int NNRTDeviceInfo::GetPerformanceMode() const {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return 0;
+  }
+  return GetValue<int>(data_, kModelOptionNNRTPerformanceMode);
+}
+
+void NNRTDeviceInfo::SetPriority(int priority) {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return;
+  }
+  data_->params[kModelOptionNNRTPriority] = priority;
+}
+
+int NNRTDeviceInfo::GetPriority() const {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return 0;
+  }
+  return GetValue<int>(data_, kModelOptionNNRTPriority);
+}
+
+void NNRTDeviceInfo::SetEnableFP16(bool is_fp16) {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return;
+  }
+  data_->params[kModelOptionNNRTEnableFP16] = is_fp16;
+}
+
+bool NNRTDeviceInfo::GetEnableFP16() const {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return false;
+  }
+  return GetValue<bool>(data_, kModelOptionNNRTEnableFP16);
+}
+
+void NNRTDeviceInfo::SetExtensions(const std::vector<Extension> &extensions) {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return;
+  }
+  data_->params[kModelOptionNNRTExtensions] = extensions;
+}
+
+std::vector<Extension> NNRTDeviceInfo::GetExtensions() const {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return {};
+  }
+  return GetValue<std::vector<Extension>>(data_, kModelOptionNNRTExtensions);
+}
 }  // namespace mindspore
diff --git a/mindspore/lite/src/litert/cxx_api/converters.cc b/mindspore/lite/src/litert/cxx_api/converters.cc
index 0ff345cc..e54a36ee 100644
--- a/mindspore/lite/src/litert/cxx_api/converters.cc
+++ b/mindspore/lite/src/litert/cxx_api/converters.cc
@@ -86,6 +86,23 @@ Status ContextUtils::AddCustomDevice(lite::InnerContext *inner_context,
   return kSuccess;
 }

+Status ContextUtils::AddNNRtDevice(lite::InnerContext *inner_context, size_t device_id, int performance_mode,
+                                   int priority, bool enable_fp16, const std::vector<Extension> &extensions) {
+  lite::DeviceInfo device_info = {0};
+  device_info.nnrt_device_info_.device_id_ = device_id;
+  device_info.nnrt_device_info_.performance_mode_ = performance_mode;
+  device_info.nnrt_device_info_.priority_ = priority;
+  device_info.nnrt_device_info_.enable_fp16_ = enable_fp16;
+  for (auto src_extension: extensions) {
+    lite::Extension dest_extension;
+    dest_extension.name = src_extension.name;
+    dest_extension.value = src_extension.value;
+    device_info.nnrt_device_info_.extensions_.push_back(dest_extension);
+  }
+  inner_context->device_list_.push_back({lite::DT_NNRT, device_info});
+  return kSuccess;
+}
+
 void ContextUtils::ResetContextDefaultParam(Context *context) {
   if (context->GetInterOpParallelNum() == 0) {
     context->SetInterOpParallelNum(kDefaultInterOpParallelNum);
@@ -163,44 +180,11 @@ std::shared_ptr<lite::InnerContext> ContextUtils::Convert(Context *context) {
       ret = AddAscendDevice(inner_context.get(), device.get());
     } else if (device->GetDeviceType() == kCustomDevice) {
       ret = AddCustomDevice(inner_context.get(), device);
-    }
-    if (ret != kSuccess) {
-      MS_LOG(ERROR) << "Add device failed!";
-      return nullptr;
-    }
-  }
-  return inner_context;
-}
-
-std::shared_ptr<lite::InnerContext> ContextUtils::Convert(const ContextC *context_c) {
-  auto inner_context = std::make_shared<lite::InnerContext>();
-  if ((context_c == nullptr) || (inner_context == nullptr)) {
-    MS_LOG(ERROR) << "Invalid context pointers.";
-    return nullptr;
-  }
-  auto device_list = context_c->device_info_list;
-  if (device_list.size() == 0 || device_list.size() > kMaxNumOfDevices) {
-    MS_LOG(ERROR) << "Device num, support min: 1, max: " << kMaxNumOfDevices;
-    return nullptr;
-  }
-  SetContextAttr(context_c->thread_num, 1, context_c->enable_parallel, context_c->affinity_core_list,
-                 context_c->delegate_mode, context_c->delegate, inner_context.get());
-  inner_context->device_list_.clear();
-  Status ret = kLiteError;
-  for (auto &device_info_c : device_list) {
-    MS_CHECK_TRUE_RET(device_info_c != nullptr, nullptr);
-    lite::DeviceInfo device_info = {{0}};
-    if (device_info_c->device_type == OH_AI_DEVICETYPE_CPU) {
-      if (device_info_c->allocator == nullptr) {
-        device_info_c->allocator = Allocator::Create();
-      }
-      ret = AddCpuDevice(device_info_c->allocator, context_c->affinity_mode, device_info_c->enable_fp16,
-                         device_info_c->provider, device_info_c->provider_device, inner_context.get());
-    } else if (device_info_c->device_type == OH_AI_DEVICETYPE_GPU) {
-      ret = AddGpuDevice(device_info_c->enable_fp16, 0, 0, 0, false, nullptr, nullptr, device_info_c->provider,
-                         device_info_c->provider_device, device_info_c->allocator, inner_context.get());
-    } else if (device_info_c->device_type == OH_AI_DEVICETYPE_KIRIN_NPU) {
-      ret = AddNpuDevice(device_info_c->enable_fp16, device_info_c->frequency, inner_context.get());
+    } else if (device->GetDeviceType() == kNNRt) {
+      auto nnrt_device_info = device->Cast<NNRTDeviceInfo>();
+      ret = AddNNRtDevice(inner_context.get(), nnrt_device_info->GetDeviceID(),
+                          nnrt_device_info->GetPerformanceMode(), nnrt_device_info->GetPriority(),
+                          nnrt_device_info->GetEnableFP16(), nnrt_device_info->GetExtensions());
     }
     if (ret != kSuccess) {
       MS_LOG(ERROR) << "Add device failed!";
diff --git a/mindspore/lite/src/litert/cxx_api/converters.h b/mindspore/lite/src/litert/cxx_api/converters.h
index 0c043fc3..1af7c7df 100644
--- a/mindspore/lite/src/litert/cxx_api/converters.h
+++ b/mindspore/lite/src/litert/cxx_api/converters.h
@@ -24,14 +24,12 @@
 #include "include/api/cfg.h"
 #include "include/train/train_cfg.h"
 #include "src/litert/inner_context.h"
-#include "src/litert/c_api/context_c.h"
 #include "src/common/log_adapter.h"

 namespace mindspore {
 class MS_API ContextUtils {
  public:
   static std::shared_ptr<lite::InnerContext> Convert(Context *context);
-  static std::shared_ptr<lite::InnerContext> Convert(const ContextC *context_c);

  private:
   static void SetContextAttr(int32_t thread_num, int32_t inter_op_parallel_num, bool enable_parallel,
@@ -48,6 +46,8 @@ class MS_API ContextUtils {
   static Status AddNpuDevice(bool enable_fp16, int frequency, lite::InnerContext *inner_context);
   static Status AddAscendDevice(lite::InnerContext *inner_context, DeviceInfoContext *device);
   static Status AddCustomDevice(lite::InnerContext *inner_context, const std::shared_ptr<DeviceInfoContext> &device);
+  static Status AddNNRtDevice(lite::InnerContext *inner_context, size_t device_id, int performance_mode, int priority,
+                              bool enable_fp16, const std::vector<Extension> &extensions);
   static bool IsAffinityModeValid(int affinity_mode) {
     return affinity_mode >= lite::NO_BIND && affinity_mode <= lite::MID_CPU;
   }
diff --git a/mindspore/lite/src/litert/delegate/nnrt/CMakeLists.txt b/mindspore/lite/src/litert/delegate/nnrt/CMakeLists.txt
index 70aa63f3..625459e2 100644
--- a/mindspore/lite/src/litert/delegate/nnrt/CMakeLists.txt
+++ b/mindspore/lite/src/litert/delegate/nnrt/CMakeLists.txt
@@ -1,30 +1,13 @@
 include_directories(${DDK_PATH})
 include_directories($(CCSRC_DIR)/plugin/device/cpu/kernel)
+include_directories(${CMAKE_SOURCE_DIR}/../../../../../../foundation/ai/neural_network_runtime/)

 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
-#include_directories(/home/tony/wty/workspace/ohos/third_party/mindspore/mindspore/lite/mindir/include/inner)
-#include_directories(/home/tony/wty/workspace/ohos/third_party/mindspore/mindspore/lite/mindir/include)
+
 file(GLOB_RECURSE NNRT_SRC
         ${CMAKE_CURRENT_SOURCE_DIR}/*.cc
 )
-
-#add_library(hiai SHARED IMPORTED)
-#set_target_properties(hiai PROPERTIES IMPORTED_LOCATION
-#        ${DDK_LIB_PATH}/libhiai.so)
-#add_library(hiai_ir SHARED IMPORTED)
-#set_target_properties(hiai_ir PROPERTIES IMPORTED_LOCATION
-#        ${DDK_LIB_PATH}/libhiai_ir.so)
-#add_library(hiai_ir_build SHARED IMPORTED)
-#set_target_properties(hiai_ir_build PROPERTIES IMPORTED_LOCATION
-#        ${DDK_LIB_PATH}/libhiai_ir_build.so)
-#add_library(npu_kernel_mid OBJECT ${NPU_RUNTIME_SRC})
-#add_dependencies(npu_kernel_mid fbs_src)
-#target_link_libraries(
-#        npu_kernel_mid
-#        hiai
-#        hiai_ir
-#        hiai_ir_build
-#)
-
 file(GLOB convert_source checker/*.cc)
-add_library(nnr_mid OBJECT ${NNRT_SRC} ${convert_source} )
\ No newline at end of file
+
+add_library(nnrt_mid OBJECT ${NNRT_SRC} ${convert_source})
+target_include_directories(nnrt_mid PUBLIC ${CMAKE_SOURCE_DIR}/../../../../../../foundation/ai/neural_network_runtime/)
\ No newline at end of file
diff --git a/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.cc b/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.cc
index 4df7e477..6b191c8e 100644
--- a/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.cc
+++ b/mindspore/lite/src/litert/delegate/nnrt/checker/primitive_check.cc
@@ -109,6 +109,8 @@ Status CheckPrimitiveSupported(const schema::Primitive *primitive) {
         return mindspore::kSuccess;
       case schema::PrimitiveType_Unsqueeze:
         return mindspore::kSuccess;
+      case schema::PrimitiveType_Custom:
+        return mindspore::kSuccess;
       default: {
         MS_LOG(WARNING) << "No primitive type :" << (int)(type);
         return mindspore::kLiteSuccessExit;
diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc
index 34897331..9f012e76 100644
--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc
+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.cc
@@ -13,144 +13,637 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
+#include <unordered_set>
+#include <numeric>
 #include "nnrt_delegate.h"
 #include "checker/primitive_check.h"
 #include "src/common/log_adapter.h"
-#include "interfaces/kits/c/neural_network_runtime.h"
+#include "interfaces/kits/c/neural_network_runtime/neural_network_runtime.h"
 #include "interfaces/innerkits/c/neural_network_runtime_inner.h"
 #include "nnrt_model_kernel.h"
+#include "schema/model_generated.h"
+#include "schema/ops_generated.h"
+#include "flatbuffers/flatbuffers.h"
+#include "litert/tensor_category.h"
+
+namespace mindspore {
+namespace lite {
+void NNRTDelegate::InitCachePath() {
+  static const std::string kCachePathName = "CachePath";
+  static const std::string kCacheVersion = "CacheVersion";
+
+  const auto &extensions = nnrt_device_info_.extensions_;

-mindspore::Status mindspore::NNRTDelegate::Build(DelegateModel<schema::Primitive> *model) {
-  if (this->nnrt_lite_graph == nullptr) {
-    MS_LOG(ERROR) << "nnrt_lite_graph is nullptr.";
-    return mindspore::kLiteError;
+  auto iter_path = std::find_if(extensions.begin(), extensions.end(), [](const Extension &extension) {
+    return extension.name == kCachePathName;
+  });
+  if (iter_path != extensions.end()) {
+    cache_path_ = std::string(iter_path->value.begin(), iter_path->value.end());
   }
-  if (this->nnrt_lite_graph->sub_graphs_.empty()) {
-    // must have at lease one subgraph
-    MS_LOG(ERROR) << "must have at lease one subgraph";
-    return mindspore::kLiteError;
+
+  auto iter_version = std::find_if(extensions.begin(), extensions.end(), [](const Extension &extension) {
+    return extension.name == kCacheVersion;
+  });
+  if (iter_version != extensions.end()) {
+    std::string version_str = std::string(iter_version->value.begin(), iter_version->value.end());
+    cache_version_ = static_cast<uint32_t>(std::atol(version_str.c_str()));
   }
-  OH_NN_ReturnCode ret_code;
-  OH_NNModel *oh_nnmodel = OH_NNModel_Construct();
-  if (oh_nnmodel == nullptr) {
-    MS_LOG(ERROR) << "Construct NNModel failed, oh_nnmodel is nullptr.";
-    return mindspore::kLiteError;
+}
+
+Status NNRTDelegate::Build(DelegateModel<schema::Primitive> *model) {
+#ifdef SUPPORT_NNRT_METAGRAPH
+  if (IsKirinNPU()) {
+    MS_LOG(DEBUG) << "Choose to build nnrt model with Metagraph";
+    InitCachePath();
+    return BuildKirinNPUModel(model);
   }
+#endif

-  ret_code = OH_NNModel_BuildFromLiteGraph(oh_nnmodel, this->nnrt_lite_graph);
-  if (ret_code != OH_NN_SUCCESS) {
-    MS_LOG(ERROR) << "Build NNModel failed, OH_NN_ReturnCode = " << ret_code;
-    OH_NNModel_Destroy(&oh_nnmodel);
-    return mindspore::kLiteError;
+  return BuildNormalModel(model);
+}
+
+bool NNRTDelegate::IsCustomModel() const {
+  // check if there is only one Cutsom kernel in LiteModel.
+  if (lite_graph_ == nullptr) {
+    return false;
+  }
+  if (lite_graph_->all_nodes_.size() != 1) {
+    return false;
+  }
+  auto node = lite_graph_->all_nodes_[0];
+  if (node == nullptr) {
+    return false;
+  }
+  if (node->node_type_ != mindspore::schema::PrimitiveType_Custom) {
+    return false;
+  }
+  return true;
+}
+
+#ifdef SUPPORT_NNRT_METAGRAPH
+bool NNRTDelegate::IsKirinNPU() const {
+  const std::string kirin_npu_name_prefix = "NPU_";
+  auto device_id = nnrt_device_info_.device_id_;
+  const char *device_name;
+  auto ret = OH_NNDevice_GetName(device_id, &device_name);
+  if (ret != OH_NN_SUCCESS) {
+    MS_LOG(WARNING) << "Get name of device: " << device_id << " failed, error: " << ret;
+    return false;
+  }
+
+  if (strncmp(kirin_npu_name_prefix.c_str(), device_name, kirin_npu_name_prefix.size()) != 0) {
+    MS_LOG(WARNING) << "strncmp: " << device_id << " failed, device_name: " << device_name;
+    return false;
+  }
+  return true;
+}
+
+Status NNRTDelegate::BuildKirinNPUModel(DelegateModel<schema::Primitive> *model) {
+  OH_NNModel *nn_model = OH_NNModel_Construct();
+  if (nn_model == nullptr) {
+    MS_LOG(ERROR) << "Create NNModel failed, result is nullptr";
+    return kLiteNullptr;
+  }
+
+  size_t extension_size = nnrt_device_info_.extensions_.size();
+  std::vector<OH_NN_Extension> extensions;
+  MS_LOG_DEBUG << "set extensions, item number: " << extension_size;
+  const size_t kExtensionNameMax = 128; // This is a length limitation in NNRT API.
+  for (size_t i = 0; i < extension_size; i++) {
+    auto &src_extension = nnrt_device_info_.extensions_[i];
+    OH_NN_Extension dst_extension;
+    dst_extension.name[kExtensionNameMax - 1] = '\0';
+    strncpy(dst_extension.name, src_extension.name.c_str(), kExtensionNameMax - 1);
+    dst_extension.value = (char *)((void *)src_extension.value.data());
+    dst_extension.valueSize = src_extension.value.size();
+    extensions.push_back(dst_extension);
+    MS_LOG_DEBUG << "set extension, item name: " << dst_extension.name << ", value size: " << dst_extension.valueSize;
+  }
+
+  if (IsCustomModel()) {
+    auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, lite_graph_);
+    if (ret != OH_NN_SUCCESS) {
+      MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret;
+      OH_NNModel_Destroy(&nn_model);
+      return kLiteError;
+    }
+  } else {
+    SetKirinModelInputsAndOutputs(nn_model);
+    auto ret = OH_NNModel_BuildFromMetaGraph(nn_model, meta_graph_, extensions.data(), extensions.size());
+    if (ret != OH_NN_SUCCESS) {
+      MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret;
+      OH_NNModel_Destroy(&nn_model);
+      return kLiteError;
+    }
+  }
+
+  auto ret2 =  CreateFullModelKernel(model, nn_model);
+  if (ret2 != kSuccess) {
+    MS_LOG(ERROR) << "Create full model kernel failed, ret: " << ret2;
+    return kLiteError;
   }
-  MS_LOG(INFO) << "NNRTDelegate creates NNModel success.";
+  return kSuccess;
+}
+
+std::vector<OH_NN_TensorInfo> NNRTDelegate::CreateNNTensorInfos(const std::vector<uint32_t> &indices) const {
+  std::vector<OH_NN_TensorInfo> nn_tensor_infos;
+  for (auto index: indices) {
+    auto tensor = lite_graph_->all_tensors_[index];
+    auto shape = tensor->dims();
+    auto data_type = tensor->dataType();
+    auto name = tensor->name();
+    auto format = tensor->format();

-  OH_NNCompilation *oh_nn_compilation = nullptr;
-  oh_nn_compilation = OH_NNCompilation_Construct(oh_nnmodel);
+    OH_NN_TensorInfo info;
+    info.dataType = CastToNNRTDataType(static_cast<mindspore::DataType>(data_type));
+    info.dimensions = shape->data();
+    info.dimensionCount = shape->size();
+    strcpy(info.name, name->c_str());
+    info.format = CastToNNRTFormat(static_cast<Format>(format));
+    nn_tensor_infos.push_back(info);
+  }
+  return nn_tensor_infos;
+}

-  if (oh_nn_compilation == nullptr) {
+Status NNRTDelegate::SetKirinModelInputsAndOutputs(OH_NNModel *nn_model) {
+  std::vector<OH_NN_TensorInfo> inputInfos;
+  std::vector<OH_NN_TensorInfo> outputInfos;
+  auto input_infos = CreateNNTensorInfos(lite_graph_->input_indices_);
+  auto output_infos = CreateNNTensorInfos(lite_graph_->output_indices_);
+  OH_NNModel_SetInputsAndOutputsInfo(nn_model, input_infos.data(), input_infos.size(), output_infos.data(),
+                                     output_infos.size());
+  return kSuccess;
+}
+
+Status NNRTDelegate::CreateFullModelKernel(DelegateModel<schema::Primitive> *model, OH_NNModel *nn_model) {
+  OH_NNCompilation *nn_compilation = OH_NNCompilation_Construct(nn_model);
+  if (nn_compilation == nullptr) {
     MS_LOG(ERROR) << "Construct NNCompilation failed";
-    OH_NNModel_Destroy(&oh_nnmodel);
-    return mindspore::kLiteError;
+    OH_NNModel_Destroy(&nn_model);
+    return kLiteError;
   }
-  MS_LOG(INFO) << "NNRTDelegate creates NNCompilation success.";
+  MS_LOG(DEBUG) << "NNRTDelegate creates NNCompilation success.";

-  const size_t *allDevicesID = nullptr;
-  uint32_t device_count = 0;
-  ret_code = OH_NNDevice_GetAllDevicesID(&allDevicesID, &device_count);
-  if (ret_code != OH_NN_SUCCESS) {
-    MS_LOG(ERROR) << "NNModel GetAllDevicesID failed, OH_NN_ReturnCode = " << ret_code;
-    OH_NNCompilation_Destroy(&oh_nn_compilation);
-    OH_NNModel_Destroy(&oh_nnmodel);
-    return mindspore::kLiteError;
+  auto ret_code = InitNNCompilation(nn_compilation);
+  if (ret_code != kSuccess) {
+    MS_LOG(ERROR) << "Init NNCompilation failed";
+    OH_NNModel_Destroy(&nn_model);
+    OH_NNCompilation_Destroy(&nn_compilation);
+    return kLiteError;
   }
+  OH_NNModel_Destroy(&nn_model);

-  if (device_count <= 0) {
-    MS_LOG(WARNING) << "No NNRt Device found, fall back to CPU. ";
-    // OH_NNCompilation_Destroy(&oh_nn_compilation);
-    // OH_NNModel_Destroy(&oh_nnmodel);
-    return mindspore::kSuccess;
+  OH_NNExecutor *nn_executor = nullptr;
+  nn_executor = OH_NNExecutor_Construct(nn_compilation);
+  if (nn_executor == nullptr) {
+    MS_LOG(ERROR) << "Construct NNExecutor failed, ret: " << ret_code;
+    OH_NNCompilation_Destroy(&nn_compilation);
+    return kLiteError;
   }
-  MS_LOG(INFO) << "NNRTDelegate GetAllDevicesID success.";
+  OH_NNCompilation_Destroy(&nn_compilation);

-  // check if model ops are supported
-  const bool *issupported = nullptr;
+  auto nnrt_model_kernel = new (std::nothrow)NNRTModelKernel(nn_executor, model->inputs(), model->outputs());
+  if (nnrt_model_kernel == nullptr) {
+    OH_NNExecutor_Destroy(&nn_executor);
+    MS_LOG(ERROR) << "new NNRTModelKernel failed";
+    return kLiteError;
+  }
+  model->Replace(model->BeginKernelIterator(), model->EndKernelIterator(), nnrt_model_kernel);
+  return kSuccess;
+}
+#endif
+
+Status NNRTDelegate::BuildNormalModel(DelegateModel<schema::Primitive> *model) {
+  MS_LOG(DEBUG) << "Start to build NNRT model.";
+  if ((lite_graph_ == nullptr) || (lite_graph_->sub_graphs_.size() > 1)) {
+    MS_LOG(WARNING) << "LiteGraph contains more than one subgraph. NNRT does not support control-flow model yet, fallback to CPU";
+    return kSuccess;
+  }
+
+  OH_NNModel *full_model = CreateFullNNModel();
+  if (full_model == nullptr) {
+    MS_LOG(WARNING) << "Build full NNModel failed, fallback to CPU";
+    return kSuccess;
+  }
+  std::vector<bool> op_supports = QueryOpSupports(full_model);
+  if (op_supports.empty()) {
+    MS_LOG(WARNING) << "Query no op supports for full model, fallback to CPU";
+    OH_NNModel_Destroy(&full_model);
+    return kSuccess;
+  }
+  auto nnrt_subgraph_ranges = GetNNRTSubgraphRanges(model, op_supports);
+  MS_LOG(INFO) << "Found NNRT subgraph count: " << nnrt_subgraph_ranges.size();
+
+  std::vector<LiteGraph *> sub_lite_graphs;
+  auto ret = CreateLiteGraphForNNRTSubgraph(nnrt_subgraph_ranges, &sub_lite_graphs);
+  if (ret != kSuccess) {
+    OH_NNModel_Destroy(&full_model);
+    MS_LOG(WARNING) << "Create NNRT sub LiteGraph failed, fallback to CPU";
+    return kSuccess;
+  }
+
+  std::vector<NNRTModelKernel *> nnrt_subgraph_kernels;
+  ret = CreateNNRTSubgraphKernels(model, sub_lite_graphs, nnrt_subgraph_ranges, &nnrt_subgraph_kernels);
+  if (ret != kSuccess) {
+    OH_NNModel_Destroy(&full_model);
+    MS_LOG(WARNING) << "Create NNRT subgraph kernel failed, fallback to CPU";
+    return kSuccess;
+  }
+
+  ReplaceNNRTKernelsInDelegateModel(model, nnrt_subgraph_ranges, nnrt_subgraph_kernels);
+  OH_NNModel_Destroy(&full_model);
+  MS_LOG(INFO) << "NNRTDelegate build success.";
+  return kSuccess;
+}
+
+OH_NNModel *NNRTDelegate::CreateFullNNModel() {
+  if (lite_graph_ == nullptr) {
+    MS_LOG(ERROR) << "Lite graph is null";
+    return nullptr;
+  }
+
+  if (lite_graph_->sub_graphs_.empty()) {
+    MS_LOG(ERROR) << "Lite graph must have at lease one subgraph";
+    return nullptr;
+  }
+
+  OH_NNModel *nn_model = OH_NNModel_Construct();
+  if (nn_model == nullptr) {
+    MS_LOG(ERROR) << "Create NNModel failed, result is nullptr";
+    return nullptr;
+  }
+
+  auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, lite_graph_);
+  if (ret != OH_NN_SUCCESS) {
+    MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret;
+    OH_NNModel_Destroy(&nn_model);
+    return nullptr;
+  }
+  return nn_model;
+}
+
+std::vector<bool> NNRTDelegate::QueryOpSupports(OH_NNModel *nn_model) {
+  const bool *is_supported = nullptr; // Note: this memory is owned by nn_model, don't free alone.
   uint32_t op_count = 0;
-  ret_code = OH_NNModel_GetAvailableOperations(oh_nnmodel, allDevicesID[0], &issupported, &op_count);
-  if (ret_code != OH_NN_SUCCESS) {
-    MS_LOG(ERROR) << "NNModel GetAvailableOperations failed, OH_NN_ReturnCode = " << ret_code
-                  << ", maybe due to dataParcel data length limitaion. Fall back to CPU.";
-    OH_NNCompilation_Destroy(&oh_nn_compilation);
-    OH_NNModel_Destroy(&oh_nnmodel);
-    return mindspore::kSuccess;
+  auto ret = OH_NNModel_GetAvailableOperations(nn_model, nnrt_device_info_.device_id_, &is_supported, &op_count);
+  if (ret != OH_NN_SUCCESS) {
+    MS_LOG(WARNING) << "NNModel GetAvailableOperations failed, ret: " << ret
+                  << ", maybe caused by dataParcel data length limitation";
+    return {};
   }
-  uint32_t supported_op_count = 0;
-  for (uint32_t i = 0; i < op_count; i++) {
-    if (issupported[i]) {
-      supported_op_count++;
+  std::vector<bool> op_supports(is_supported, is_supported + op_count);
+  return op_supports;
+}
+
+/* Find continuous sub-sequence in op_supports. */
+std::vector<NNRTOpRange> NNRTDelegate::GetNNRTSubgraphRanges(DelegateModel<schema::Primitive> *model,
+                                                             const std::vector<bool> &op_supports) {
+  std::vector<NNRTOpRange> nnrt_subgraph_ranges;
+  NNRTOpRange op_range;
+  bool start_count = false;
+  for (size_t i = 0; i < op_supports.size(); i++) {
+    if (op_supports[i]) {
+      if (start_count == false) {
+        start_count = true;
+        op_range.begin_index_ = i;
+        op_range.begin_iter_ = model->BeginKernelIterator() + i;
+      }
+    } else {
+      if (start_count == true) {
+        start_count = false;
+        op_range.end_index_ = i;
+        op_range.end_iter_ = model->BeginKernelIterator() + i;
+        nnrt_subgraph_ranges.push_back(op_range);
+      }
     }
   }
-  if (op_count != supported_op_count) {
-    MS_LOG(WARNING) << "this model has " << op_count << "ops, but NNRT only support " << supported_op_count
-                    << " ops, fall back to CPU.";
-    // must support all op, else fall back to CPU
-    OH_NNCompilation_Destroy(&oh_nn_compilation);
-    OH_NNModel_Destroy(&oh_nnmodel);
-    return mindspore::kSuccess;
+  // handle last true subsequence
+  if (start_count == true) {
+    op_range.end_index_ = op_supports.size();
+    op_range.end_iter_ = model->EndKernelIterator();
+    nnrt_subgraph_ranges.push_back(op_range);
+    MS_LOG(INFO) << "Schedule NNRT subgraph range: [" << op_range.begin_index_ << ", " << op_range.end_index_ << ")";
   }
-  MS_LOG(INFO) << "NNRtDelegate supports all op in this model.";
+  return nnrt_subgraph_ranges;
+}
+
+/**
+ * This method ONLY works when the follow pre-conditions are satisfied:
+ * 1. The node order of lite_graph_->all_nodes should be consistent with DelegateModel sequence.
+ *  This ensures the kernel replacement in DelegateModel based on the re-organizing info from lite_graph_ is correct.
+ * 2. The node indices of lite_graph_->sub_graphs[0].node_indices should be monotonically increasing from 0 to size - 1.
+ */
+Status NNRTDelegate::CreateLiteGraphForNNRTSubgraph(
+    const std::vector<NNRTOpRange> &nnrt_op_ranges,
+    std::vector<LiteGraph *> *sub_lite_graphs) {
+  MS_LOG(INFO) << "Start creating LiteGraph for NNRT subgraph";
+  for (const auto &op_range: nnrt_op_ranges) {
+    MS_LOG(INFO) << "Process op range: [" << op_range.begin_index_ << ", " << op_range.end_index_ << ")";
+    LiteGraph *sub_lite_graph = new (std::nothrow)LiteGraph;
+    if (sub_lite_graph == nullptr) {
+      MS_LOG(ERROR) << "Allocate LiteGraph failed";
+      return kLiteError;
+    }
+    sub_lite_graph->name_ = lite_graph_->name_;
+    sub_lite_graph->version_ = lite_graph_->version_;

-  ret_code = OH_NNCompilation_SetDevice(oh_nn_compilation, allDevicesID[0]);
+    auto sub_graph = new (std::nothrow)LiteGraph::SubGraph;
+    if (sub_graph == nullptr) {
+      MS_LOG(ERROR) << "Allocate SubGraph failed";
+      return kLiteError;
+    }
+    sub_graph->name_ = lite_graph_->name_;
+    sub_lite_graph->sub_graphs_.push_back(sub_graph);

+    // deal with all_nodes
+    MS_LOG(INFO) << "Assemble all_nodes...";
+    int new_node_index = 0;
+    std::map<uint32_t, schema::Tensor *> in_tensor_index_map;
+    std::map<uint32_t, schema::Tensor *> out_tensor_index_map;
+    for (size_t index = op_range.begin_index_; index < op_range.end_index_; index++) {
+      LiteGraph::Node *node = new (std::nothrow)LiteGraph::Node;
+      if (node == nullptr) {
+        MS_LOG(ERROR) << "Allocate Node failed";
+        return kLiteError;
+      }
+      *node = *lite_graph_->all_nodes_[index];
+      sub_lite_graph->all_nodes_.push_back(node);
+      sub_graph->node_indices_.push_back(new_node_index++);
+
+      for (auto i: node->input_indices_) {
+        in_tensor_index_map.emplace(i, lite_graph_->all_tensors_[i]);
+      }
+      for (auto i: node->output_indices_) {
+        out_tensor_index_map.emplace(i, lite_graph_->all_tensors_[i]);
+      }
+    }
+
+    // deal with all_tensors
+    MS_LOG(INFO) << "Assemble all_tensors...";
+    std::set<schema::Tensor *> tensors;
+    for (auto iter: in_tensor_index_map) {
+      tensors.emplace(iter.second);
+    }
+    for (auto iter: out_tensor_index_map) {
+      tensors.emplace(iter.second);
+    }
+
+    uint32_t new_index = 0;
+    std::map<schema::Tensor *, uint32_t> new_tensor_maps;
+    for (auto tensor: tensors) {
+      new_tensor_maps.emplace(tensor, new_index++);
+    }
+
+    sub_lite_graph->all_tensors_ = std::vector<schema::Tensor *>(tensors.begin(), tensors.end());
+
+    // deal with every node's input/output indices
+    MS_LOG(INFO) << "Set input/output indices of each node...";
+    for (auto node: sub_lite_graph->all_nodes_) {
+      for (auto &index : node->input_indices_) {
+        index = new_tensor_maps.at(in_tensor_index_map.at(index));
+      }
+      for (auto &index : node->output_indices_) {
+        index = new_tensor_maps.at(out_tensor_index_map.at(index));
+      }
+    }
+
+    // deal with subgraph's input/output indices
+    MS_LOG(INFO) << "Set input/output indices of each subgraph...";
+    sub_graph->tensor_indices_ = std::vector<uint32_t>(tensors.size());
+    std::iota(sub_graph->tensor_indices_.begin(), sub_graph->tensor_indices_.end(), 0U);
+
+    for (auto iter: in_tensor_index_map) {
+      auto new_tensor_index = new_tensor_maps[iter.second];
+      MS_LOG(DEBUG) << "handle input: old: " << iter.first << ", new: " << new_tensor_index << std::endl;
+      if (IsConstTensor(*iter.second)) {
+        MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is const." << std::endl;
+        continue;
+      }
+
+      bool is_subgraph_input = true;
+      for (auto node: sub_lite_graph->all_nodes_) {
+        if (std::find(node->output_indices_.begin(), node->output_indices_.end(), new_tensor_index) !=
+            node->output_indices_.end()) {
+          is_subgraph_input = false;
+          MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is not subgraph input." << std::endl;
+          break;
+        }
+      }
+      if (is_subgraph_input) {
+        sub_graph->input_indices_.push_back(new_tensor_index);
+        MS_LOG(DEBUG) << "- select tensor: " << new_tensor_index << " as subgraph input." << std::endl;
+      }
+    }
+
+    for (auto iter: out_tensor_index_map) {
+      int new_tensor_index = new_tensor_maps.at(iter.second);
+      MS_LOG(DEBUG) << "handle output: old: " << iter.first << ", new: " << new_tensor_index << std::endl;
+      if (IsConstTensor(*iter.second)) {
+        MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is const." << std::endl;
+        continue;
+      }
+
+      bool is_subgraph_output = false;
+      for (size_t i = 0; i < lite_graph_->all_nodes_.size(); i++) {
+        if ((i >= op_range.begin_index_) && (i < op_range.end_index_)) {
+          continue;
+        }
+        auto node = lite_graph_->all_nodes_[i];
+        if (std::find(node->input_indices_.begin(), node->input_indices_.end(), iter.first) !=
+            node->input_indices_.end()) { // As the input of node which does not belong to the subgraph.
+          is_subgraph_output = true;
+          MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is original subgraph output. node: " << node->primitive_ << std::endl;
+          break;
+        }
+      }
+      bool is_graph_output = (std::find(lite_graph_->output_indices_.begin(),lite_graph_->output_indices_.end(),
+                                        iter.first) != lite_graph_->output_indices_.end());
+      if (is_graph_output) {
+        MS_LOG(DEBUG) << "- tensor: " << new_tensor_index << " is graph output." << std::endl;
+      }
+      if (is_subgraph_output || is_graph_output) {
+        sub_graph->output_indices_.push_back(new_tensor_index);
+        MS_LOG(DEBUG) << "- select tensor: " << new_tensor_index << " as subgraph output." << std::endl;
+      }
+    }
+
+    // deal with full-graph's input/output indices
+    sub_lite_graph->input_indices_ = sub_graph->input_indices_;
+    sub_lite_graph->output_indices_ = sub_graph->output_indices_;
+    sub_lite_graphs->push_back(sub_lite_graph);
+  }
+  MS_LOG(INFO) << "Finished creating LiteGraph for NNRT subgraph";
+  return kSuccess;
+}
+
+struct TensorLocation {
+  uint32_t node_index; // the index of node which the tensor belongs to.
+  uint32_t tensor_index; // the index of node in/out tensors which the tensor is located at.
+};
+
+Status NNRTDelegate::InitNNCompilation(OH_NNCompilation *nn_compilation) const {
+  auto ret_code = OH_NNCompilation_SetDevice(nn_compilation, nnrt_device_info_.device_id_);
   if (ret_code != OH_NN_SUCCESS) {
-    MS_LOG(ERROR) << "NNCompilation SetDevice failed, OH_NN_ReturnCode = " << ret_code;
-    OH_NNCompilation_Destroy(&oh_nn_compilation);
-    OH_NNModel_Destroy(&oh_nnmodel);
-    return mindspore::kLiteError;
+    MS_LOG(ERROR) << "NNCompilation set device id failed, ret: " << ret_code;
+    return kLiteError;
+  }
+  ret_code = OH_NNCompilation_SetPerformanceMode(nn_compilation,
+                                                 (OH_NN_PerformanceMode)(nnrt_device_info_.performance_mode_));
+  if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) {
+    MS_LOG(ERROR) << "NNCompilation set performance mode failed, ret: " << ret_code;
+    return kLiteError;
+  }
+  ret_code = OH_NNCompilation_SetPriority(nn_compilation, (OH_NN_Priority)(nnrt_device_info_.priority_));
+  if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) {
+    MS_LOG(ERROR) << "NNCompilation set priority failed, ret: " << ret_code;
+    return kLiteError;
+  }
+  ret_code = OH_NNCompilation_EnableFloat16(nn_compilation, nnrt_device_info_.enable_fp16_);
+  if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) {
+    MS_LOG(ERROR) << "NNCompilation enable fp16 failed, ret: " << ret_code;
+    return kLiteError;
   }

-  ret_code = OH_NNCompilation_Build(oh_nn_compilation);
+  if (!cache_path_.empty()) { // Set cache path if user indeed set it.
+    ret_code = OH_NNCompilation_SetCache(nn_compilation, cache_path_.c_str(), cache_version_);
+    if ((ret_code != OH_NN_SUCCESS) && (ret_code != OH_NN_OPERATION_FORBIDDEN)) {
+      MS_LOG(ERROR) << "NNCompilation set cache failed, ret: " << ret_code;
+      return kLiteError;
+    }
+  }

+  ret_code = OH_NNCompilation_Build(nn_compilation);
   if (ret_code != OH_NN_SUCCESS) {
-    MS_LOG(ERROR) << "Build NNCompilation failed, OH_NN_ReturnCode = " << ret_code;
-    OH_NNCompilation_Destroy(&oh_nn_compilation);
-    OH_NNModel_Destroy(&oh_nnmodel);
-    return mindspore::kLiteError;
-  }
-
-  MS_LOG(DEBUG) << "NNRTDelegate SetDevice success.";
-
-  OH_NNExecutor *oh_nn_executor = nullptr;
-  oh_nn_executor = OH_NNExecutor_Construct(oh_nn_compilation);
-  if (oh_nn_executor == nullptr) {
-    MS_LOG(ERROR) << "Construct NNCompilation SetDevice failed, OH_NN_ReturnCode = " << ret_code;
-    OH_NNCompilation_Destroy(&oh_nn_compilation);
-    OH_NNModel_Destroy(&oh_nnmodel);
-    return mindspore::kLiteError;
-  }
-  MS_LOG(DEBUG) << "NNRTDelegate creates NNExecutor success.";
-  mindspore::Status prepare_data_ret;
-  auto nnr_model_kernel = new (std::nothrow) NNRTModelKernel(oh_nn_executor, model->inputs(), model->outputs());
-  if (nnr_model_kernel == nullptr) {
-    MS_LOG(ERROR) << "new NNRTModelKernel failed";
-    return mindspore::kLiteError;
+    MS_LOG(ERROR) << "Build NNCompilation failed, ret: " << ret_code;
+    return kLiteError;
   }
-  OH_NNCompilation_Destroy(&oh_nn_compilation);
-  OH_NNModel_Destroy(&oh_nnmodel);
-  KernelIter from = model->BeginKernelIterator();
-  KernelIter end = model->EndKernelIterator();
-  model->Replace(from, end, nnr_model_kernel);
+  return kSuccess;
+}
+
+Status NNRTDelegate::CreateNNRTSubgraphKernels(DelegateModel<schema::Primitive> *model,
+                                               const std::vector<LiteGraph *> &sub_lite_graphs, const std::vector<NNRTOpRange> &nnrt_subgraph_ranges,
+                                               std::vector<NNRTModelKernel *> *nnrt_subgraph_kernels) {
+  for (size_t i = 0; i < sub_lite_graphs.size(); i++) {
+    auto sub_lite_graph = sub_lite_graphs[i];
+
+    OH_NNModel *nn_model = OH_NNModel_Construct();
+    auto ret = OH_NNModel_BuildFromLiteGraph(nn_model, sub_lite_graph);
+    if (ret != OH_NN_SUCCESS) {
+      MS_LOG(ERROR) << "Build NNModel failed, ret: " << ret;
+      OH_NNModel_Destroy(&nn_model);
+      return kLiteError;
+    }

-  MS_LOG(INFO) << "NNRTDelegate build  success.";
-  return mindspore::kSuccess;
+    OH_NNCompilation *nn_compilation = OH_NNCompilation_Construct(nn_model);
+    if (nn_compilation == nullptr) {
+      MS_LOG(ERROR) << "Construct NNCompilation failed";
+      OH_NNModel_Destroy(&nn_model);
+      return kLiteError;
+    }
+    MS_LOG(DEBUG) << "NNRTDelegate creates NNCompilation success.";
+
+    auto ret_code = InitNNCompilation(nn_compilation);
+    if (ret_code != kSuccess) {
+      MS_LOG(ERROR) << "Init NNCompilation failed";
+      OH_NNCompilation_Destroy(&nn_compilation);
+      OH_NNModel_Destroy(&nn_model);
+      return kLiteError;
+    }
+
+    OH_NNExecutor *nn_executor = nullptr;
+    nn_executor = OH_NNExecutor_Construct(nn_compilation);
+    if (nn_executor == nullptr) {
+      MS_LOG(ERROR) << "Construct NNExecutor failed, ret: " << ret_code;
+      OH_NNCompilation_Destroy(&nn_compilation);
+      OH_NNModel_Destroy(&nn_model);
+      return kLiteError;
+    }
+    MS_LOG(DEBUG) << "NNRTDelegate creates NNExecutor success.";
+
+    bool format_not_support = false;
+    std::vector<MSTensor> in_tensors;
+    for (auto index: sub_lite_graph->sub_graphs_[0]->input_indices_) {
+      TensorLocation location;
+      for (auto node_index: sub_lite_graph->sub_graphs_[0]->node_indices_) {
+        auto node = sub_lite_graph->all_nodes_[node_index];
+        auto iter = std::find(node->input_indices_.begin(), node->input_indices_.end(), index);
+        if (iter != node->input_indices_.end()) {
+          uint32_t tensor_index = iter - node->input_indices_.begin();
+          location.node_index = node_index;
+          location.tensor_index = tensor_index;
+          MS_LOG(INFO) << "Found graph input index: " << index << " is the " << tensor_index << "th input of the node " << node->primitive_;
+          break;
+        }
+      }
+      KernelIter kernel_iter = nnrt_subgraph_ranges[i].begin_iter_ + location.node_index;
+      in_tensors.push_back((*kernel_iter)->inputs()[location.tensor_index]);
+      if (in_tensors.back().format() != Format::NHWC) {
+        format_not_support = true;
+        break ;
+      }
+    }
+
+    std::vector<MSTensor> out_tensors;
+    for (auto index: sub_lite_graph->sub_graphs_[0]->output_indices_) {
+      TensorLocation location;
+      for (auto node_index: sub_lite_graph->sub_graphs_[0]->node_indices_) {
+        auto node = sub_lite_graph->all_nodes_[node_index];
+        auto iter = std::find(node->output_indices_.begin(), node->output_indices_.end(), index);
+        if (iter != node->output_indices_.end()) {
+          uint32_t tensor_index = iter - node->output_indices_.begin();
+          location.node_index = node_index;
+          location.tensor_index = tensor_index;
+          MS_LOG(INFO) << "Found graph output index: " << index << " is the " << tensor_index << "th output of the node " << node->primitive_;
+          break;
+        }
+      }
+      KernelIter kernel_iter = nnrt_subgraph_ranges[i].begin_iter_ + location.node_index;
+      out_tensors.push_back((*kernel_iter)->outputs()[location.tensor_index]);
+      if (out_tensors.back().format() != Format::NHWC) {
+        format_not_support = true;
+        break ;
+      }
+    }
+    if (format_not_support) {
+      MS_LOG(WARNING) << "Not support in/out tensor format, skip this subgraph";
+      OH_NNCompilation_Destroy(&nn_compilation);
+      OH_NNModel_Destroy(&nn_model);
+      nnrt_subgraph_kernels->push_back(nullptr);
+      continue ;
+    }
+
+    auto nnrt_model_kernel = new (std::nothrow)NNRTModelKernel(nn_executor, in_tensors, out_tensors);
+    if (nnrt_model_kernel == nullptr) {
+      MS_LOG(ERROR) << "new NNRTModelKernel failed";
+      return kLiteError;
+    }
+    OH_NNCompilation_Destroy(&nn_compilation);
+    OH_NNModel_Destroy(&nn_model);
+    nnrt_subgraph_kernels->push_back(nnrt_model_kernel);
+  }
+  return kSuccess;
 }

-mindspore::Status mindspore::NNRTDelegate::Init() {
-  MS_LOG(DEBUG) << "NNRTDelegate init success.";
-  return mindspore::kSuccess;
+void NNRTDelegate::ReplaceNNRTKernelsInDelegateModel(DelegateModel<schema::Primitive> *model,
+                                       const std::vector<NNRTOpRange> &nnrt_subgraph_ranges,
+                                       const std::vector<NNRTModelKernel *> &nnrt_subgraph_kernels) {
+  // Here we perform the replacement from back to front intentionally! If replace from front to end, the kernel
+  // sequence would shrink and the later begin_iter_/end_iter_ may be erased already.
+  for (int i = nnrt_subgraph_ranges.size() - 1; i >= 0; i--) {
+    if (nnrt_subgraph_kernels[i] == nullptr) {
+      continue;
+    }
+    auto from = nnrt_subgraph_ranges[i].begin_iter_;
+    auto end = nnrt_subgraph_ranges[i].end_iter_;
+    (void)model->Replace(from, end, nnrt_subgraph_kernels[i]);
+    MS_LOG(INFO) << "Replace nnrt subgraph kernel in range: [" << (from - model->BeginKernelIterator())
+      << ", " << (end - model->BeginKernelIterator()) << ")";
+  }
 }
-mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel<schema::Primitive> *model,
-                                                         OH_NNExecutor *oh_nn_executor) {
+
+Status NNRTDelegate::PrepareInputs(DelegateModel<schema::Primitive> *model,
+                                   OH_NNExecutor *oh_nn_executor) {
   auto input_tensors = model->inputs();
   for (size_t i = 0; i < input_tensors.size(); i++) {
     auto tensor = input_tensors[i];
@@ -161,10 +654,10 @@ mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel<schema::P
     std::vector<double> scale;
     std::vector<int32_t> zero_point;
     if (!tmp_quant_param.empty()) {
-      quant_param = new (std::nothrow) OH_NN_QuantParam;
+      quant_param = new(std::nothrow) OH_NN_QuantParam;
       if (quant_param == nullptr) {
         MS_LOG(ERROR) << "new OH_NN_QuantParam failed.";
-        return mindspore::kLiteError;
+        return kLiteError;
       }
       for (auto qparam : tmp_quant_param) {
         bit_num.emplace_back(qparam.bit_num);
@@ -176,12 +669,12 @@ mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel<schema::P
       quant_param->scale = scale.data();
       quant_param->zeroPoint = zero_point.data();
     }
-    auto oprend = new (std::nothrow) OH_NN_Tensor;
+    auto oprend = new(std::nothrow) OH_NN_Tensor;
     if (oprend == nullptr) {
       MS_LOG(ERROR) << "new OH_NN_Tensor Failed";
-      return mindspore::kLiteError;
+      return kLiteError;
     }
-    oprend->dataType = ConvertDataType(tensor.DataType());
+    oprend->dataType = CastToNNRTDataType(tensor.DataType());
     oprend->dimensionCount = tensor_shape.size();

     std::vector<int32_t> dimensions_list;
@@ -191,14 +684,14 @@ mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel<schema::P
       } else {
         MS_LOG(ERROR) << "NNExecutor SetInput failed,tensor dimension is is too large, max dim = " << INT32_MAX
                       << ", but get dimension = " << shape;
-        return mindspore::kLiteError;
+        return kLiteError;
       }
     }
     oprend->dimensions = dimensions_list.data();
     oprend->quantParam = quant_param;
     oprend->type = OH_NN_TENSOR;
     OH_NN_ReturnCode ret_code =
-      OH_NNExecutor_SetInput(oh_nn_executor, i, oprend, tensor.MutableData(), tensor.DataSize());
+        OH_NNExecutor_SetInput(oh_nn_executor, i, oprend, tensor.MutableData(), tensor.DataSize());
     delete (oprend);

     if (!tmp_quant_param.empty()) {
@@ -209,70 +702,41 @@ mindspore::Status mindspore::NNRTDelegate::PrepareInputs(DelegateModel<schema::P
     if (ret_code != OH_NN_SUCCESS) {
       MS_LOG(ERROR) << "NNExecutor SetInput failed, current input tensor is" << tensor.Name()
                     << "OH_NN_ReturnCode = " << ret_code;
-      return mindspore::kLiteError;
+      return kLiteError;
     }
   }
+  return kSuccess;
+}
+
+OH_NN_DataType NNRTDelegate::CastToNNRTDataType(DataType data_type) {
+  const std::unordered_map<DataType, OH_NN_DataType> kDataTypeMap = {
+      {DataType::kNumberTypeBool, OH_NN_BOOL},
+      {DataType::kNumberTypeInt8, OH_NN_INT8},
+      {DataType::kNumberTypeInt16, OH_NN_INT16},
+      {DataType::kNumberTypeInt32, OH_NN_INT32},
+      {DataType::kNumberTypeInt64, OH_NN_INT64},
+      {DataType::kNumberTypeUInt8, OH_NN_UINT8},
+      {DataType::kNumberTypeUInt16, OH_NN_UINT16},
+      {DataType::kNumberTypeUInt32, OH_NN_UINT32},
+      {DataType::kNumberTypeUInt64, OH_NN_UINT64},
+      {DataType::kNumberTypeFloat16, OH_NN_FLOAT16},
+      {DataType::kNumberTypeFloat32, OH_NN_FLOAT32},
+      {DataType::kNumberTypeFloat64, OH_NN_FLOAT64},
+  };

-  return mindspore::kSuccess;
+  auto iter = kDataTypeMap.find(data_type);
+  if (iter == kDataTypeMap.end()) {
+    return OH_NN_UNKNOWN;
+  }
+  return iter->second;
 }
-OH_NN_DataType mindspore::NNRTDelegate::ConvertDataType(mindspore::DataType data_type) {
-  OH_NN_DataType oh_data_type;
-  switch (data_type) {
-    case mindspore::DataType::kTypeUnknown:
-    case mindspore::DataType::kObjectTypeString:
-    case mindspore::DataType::kObjectTypeList:
-    case mindspore::DataType::kObjectTypeTuple:
-    case mindspore::DataType::kObjectTypeTensorType:
-    case mindspore::DataType::kNumberTypeBegin:
-    case mindspore::DataType::kNumberTypeEnd:
-    case mindspore::DataType::kInvalidType:
-      oh_data_type = OH_NN_UNKNOWN;
-      break;
-    case mindspore::DataType::kNumberTypeBool:
-      oh_data_type = OH_NN_BOOL;
-      break;
-    case mindspore::DataType::kNumberTypeInt8:
-      oh_data_type = OH_NN_INT8;
-      break;
-    case mindspore::DataType::kNumberTypeInt16:
-      oh_data_type = OH_NN_INT16;
-      break;
-    case mindspore::DataType::kNumberTypeInt32:
-      oh_data_type = OH_NN_INT32;
-      break;
-    case mindspore::DataType::kNumberTypeInt64:
-      oh_data_type = OH_NN_INT64;
-      break;
-    case mindspore::DataType::kNumberTypeUInt8:
-      oh_data_type = OH_NN_UINT8;
-      break;
-    case mindspore::DataType::kNumberTypeUInt16:
-      oh_data_type = OH_NN_UINT16;
-      break;
-    case mindspore::DataType::kNumberTypeUInt32:
-      oh_data_type = OH_NN_UINT32;
-      break;
-    case mindspore::DataType::kNumberTypeUInt64:
-      oh_data_type = OH_NN_UINT64;
-      break;
-    case mindspore::DataType::kNumberTypeFloat16:
-      oh_data_type = OH_NN_FLOAT16;
-      break;
-    case mindspore::DataType::kNumberTypeFloat32:
-      oh_data_type = OH_NN_FLOAT32;
-      break;
-    case mindspore::DataType::kNumberTypeFloat64:
-      oh_data_type = OH_NN_FLOAT64;
-      break;
-    default: {
-      oh_data_type = OH_NN_UNKNOWN;
-    }
-  }
-  return oh_data_type;
+
+OH_NN_Format NNRTDelegate::CastToNNRTFormat(Format format) {
+  return OH_NN_FORMAT_NHWC;
 }

-mindspore::Status mindspore::NNRTDelegate::PrepareOutputs(DelegateModel<schema::Primitive> *model,
-                                                          OH_NNExecutor *oh_nn_executor) {
+Status NNRTDelegate::PrepareOutputs(DelegateModel<schema::Primitive> *model,
+                                    OH_NNExecutor *oh_nn_executor) {
   auto output_tensors = model->outputs();
   for (size_t i = 0; i < output_tensors.size(); i++) {
     auto tensor = output_tensors[i];
@@ -280,17 +744,17 @@ mindspore::Status mindspore::NNRTDelegate::PrepareOutputs(DelegateModel<schema::
     if (ret_code != OH_NN_SUCCESS) {
       MS_LOG(ERROR) << "NNExecutor SetOutput failed, current out tensor is" << tensor.Name()
                     << ", OH_NN_ReturnCode = " << ret_code;
-      return mindspore::kLiteError;
+      return kLiteError;
     }
   }
-  return mindspore::kSuccess;
+  return kSuccess;
 }

-void mindspore::NNRTDelegate::ShallowCopyLiteGraph(const mindspore::lite::LiteGraph &lite_graph) {
+void NNRTDelegate::ShallowCopyLiteGraph(const lite::LiteGraph &lite_graph) {
   Status ret;
   for (auto node : lite_graph.all_nodes_) {
     ret = lite::CheckPrimitiveSupported(static_cast<const schema::Primitive *>(node->primitive_));
-    if (ret == mindspore::kLiteError) {
+    if (ret == kLiteError) {
       MS_LOG(ERROR) << " primitive supported check failed.";
       return;
     }
@@ -299,7 +763,7 @@ void mindspore::NNRTDelegate::ShallowCopyLiteGraph(const mindspore::lite::LiteGr
   node_list.reserve(lite_graph.all_nodes_.size());
   // copy node
   for (auto node : lite_graph.all_nodes_) {
-    auto new_node = new (std::nothrow) LiteGraph::Node;
+    auto new_node = new(std::nothrow) LiteGraph::Node;
     if (new_node == nullptr) {
       MS_LOG(ERROR) << " new LiteGraph::Node failed.";
       return;
@@ -318,7 +782,7 @@ void mindspore::NNRTDelegate::ShallowCopyLiteGraph(const mindspore::lite::LiteGr
   // copy subgraph
   std::vector<LiteGraph::SubGraph *> subgraph_list;
   for (auto subgraph : lite_graph.sub_graphs_) {
-    auto new_subgraph = new (std::nothrow) LiteGraph::SubGraph;
+    auto new_subgraph = new(std::nothrow) LiteGraph::SubGraph;
     if (new_subgraph == nullptr) {
       MS_LOG(ERROR) << "new LiteGraph::Subgraph failed.";
       return;
@@ -331,30 +795,32 @@ void mindspore::NNRTDelegate::ShallowCopyLiteGraph(const mindspore::lite::LiteGr
   }
   for (auto tensor : lite_graph.all_tensors_) {
     ret = lite::CheckTensorSupported(static_cast<const schema::Tensor *>(tensor));
-    if (ret == mindspore::kLiteError) {
+    if (ret == kLiteError) {
       MS_LOG(ERROR) << "tensor supported check failed.";
       return;
     }
   }

-  nnrt_lite_graph = new (std::nothrow) lite::LiteGraph();
-  if (nnrt_lite_graph == nullptr) {
+  lite_graph_ = new(std::nothrow) lite::LiteGraph();
+  if (lite_graph_ == nullptr) {
     MS_LOG(ERROR) << "new LiteGraph failed.";
     return;
   }

-  nnrt_lite_graph->name_ = lite_graph.name_;
-  nnrt_lite_graph->version_ = lite_graph.version_;
-  nnrt_lite_graph->input_indices_ = lite_graph.input_indices_;
-  nnrt_lite_graph->output_indices_ = lite_graph.output_indices_;
-  nnrt_lite_graph->all_tensors_ = lite_graph.all_tensors_;
-  nnrt_lite_graph->all_nodes_ = node_list;
-  nnrt_lite_graph->sub_graphs_ = subgraph_list;
+  lite_graph_->name_ = lite_graph.name_;
+  lite_graph_->version_ = lite_graph.version_;
+  lite_graph_->input_indices_ = lite_graph.input_indices_;
+  lite_graph_->output_indices_ = lite_graph.output_indices_;
+  lite_graph_->all_tensors_ = lite_graph.all_tensors_;
+  lite_graph_->all_nodes_ = node_list;
+  lite_graph_->sub_graphs_ = subgraph_list;
   MS_LOG(INFO) << "ShallowCopyLiteGraph success.";
 }

-mindspore::NNRTDelegate::~NNRTDelegate() {
-  if (this->nnrt_lite_graph != nullptr) {
+NNRTDelegate::~NNRTDelegate() {
+  if (lite_graph_ != nullptr) {
     MS_LOG(ERROR) << "Delete NNRTDelegate.";
   }
-};
+}
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h
index c2847704..52626339 100644
--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h
+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_delegate.h
@@ -15,37 +15,81 @@
  */
 #ifndef MINDSPORE_NNR_DELEGATE_H
 #define MINDSPORE_NNR_DELEGATE_H
+
 #include <vector>
 #include <map>
 #include "include/api/delegate.h"
 #include "include/model.h"
-#include "interfaces/kits/c/neural_network_runtime_type.h"
-namespace mindspore {
+#include "src/litert/inner_context.h"
+#include "nnrt_model_kernel.h"
+#include "schema/model_generated.h"
+#include "interfaces/kits/c/neural_network_runtime/neural_network_runtime_type.h"
+#include "interfaces/kits/c/neural_network_runtime/neural_network_runtime.h"
+#include "interfaces/innerkits/c/neural_network_runtime_inner.h"

-using namespace lite;
+namespace mindspore {
+namespace lite {
+struct NNRTOpRange {
+  /* NNRT kernel range in DelegateModel: [begin_iter_, end_iter_) */
+  KernelIter begin_iter_;
+  KernelIter end_iter_;
+  /* NNRT node range in lite_graph_: [begin_index_, end_index_) */
+  size_t begin_index_;
+  size_t end_index_;
+};

 class NNRTDelegate : public Delegate {
  public:
-  NNRTDelegate() : Delegate(){};
-
+  NNRTDelegate() = default;
+  NNRTDelegate(const NNRtDeviceInfo &nnrt_device_info) : nnrt_device_info_(nnrt_device_info) {}
   ~NNRTDelegate() override;
-
-  Status Init() override;
-
+  Status Init() override { return kSuccess; }
   Status Build(DelegateModel<schema::Primitive> *model) override;
-
   void ShallowCopyLiteGraph(const lite::LiteGraph &liteGraph);
-
- protected:
-  LiteGraph *nnrt_lite_graph = nullptr;
+  void SetMetaGraph(const void *meta_graph) {
+    meta_graph_ = meta_graph;
+  }
+  static std::vector<NNRTOpRange> GetNNRTSubgraphRanges(DelegateModel<schema::Primitive> *model,
+                                                        const std::vector<bool> &op_supports);

  private:
-  //  static LiteGraph* CreateLiteGraph(const LiteGraph &liteGraph);
+  void InitCachePath();
+  Status BuildNormalModel(DelegateModel<schema::Primitive> *model);
+  OH_NNModel *CreateFullNNModel();
+  std::vector<bool> QueryOpSupports(OH_NNModel *nn_model);
+  Status CreateLiteGraphForNNRTSubgraph(
+    const std::vector<NNRTOpRange> &nnrt_op_ranges,
+    std::vector<LiteGraph *> *sub_lite_graphs);
+  Status CreateNNRTSubgraphKernels(
+    DelegateModel<schema::Primitive> *model,
+    const std::vector<LiteGraph *> &sub_lite_graphs,
+    const std::vector<NNRTOpRange> &nnrt_subgraph_ranges,
+    std::vector<NNRTModelKernel *> *nnrt_subgraph_kernels);
+  void ReplaceNNRTKernelsInDelegateModel(DelegateModel<schema::Primitive> *model,
+                                         const std::vector<NNRTOpRange> &nnrt_subgraph_ranges,
+                                         const std::vector<NNRTModelKernel *> &nnrt_subgraph_kernels);
   Status PrepareInputs(DelegateModel<schema::Primitive> *model, OH_NNExecutor *oh_nn_executor);
   Status PrepareOutputs(DelegateModel<schema::Primitive> *model, OH_NNExecutor *oh_nn_executor);
-  OH_NN_DataType ConvertDataType(mindspore::DataType data_type);
-};
+  Status InitNNCompilation(OH_NNCompilation *nn_compilation) const;
+  static OH_NN_DataType CastToNNRTDataType(mindspore::DataType data_type);
+  static OH_NN_Format CastToNNRTFormat(Format format);
+  bool IsCustomModel() const;
+
+#ifdef SUPPORT_NNRT_METAGRAPH
+  bool IsKirinNPU() const;
+  Status BuildKirinNPUModel(DelegateModel<schema::Primitive> *model);
+  Status SetKirinModelInputsAndOutputs(OH_NNModel *nn_model);
+  std::vector<OH_NN_TensorInfo> CreateNNTensorInfos(const std::vector<uint32_t> &indices) const;
+  Status CreateFullModelKernel(DelegateModel<schema::Primitive> *model, OH_NNModel *nn_model);
+#endif

+  NNRtDeviceInfo nnrt_device_info_;
+  LiteGraph *lite_graph_ = nullptr;
+  const void *meta_graph_ = nullptr;
+  std::string cache_path_ = "";
+  uint32_t cache_version_ = 0;
+};
+}  // namespace lite
 }  // namespace mindspore

 #endif  // MINDSPORE_NNR_DELEGATE_H
diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.cc b/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.cc
index 5acf2e9a..67443e08 100644
--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.cc
+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.cc
@@ -97,7 +97,7 @@ OH_NN_DataType mindspore::NNRTModelKernel::ConvertDataType(mindspore::DataType d
 }
 int mindspore::NNRTModelKernel::PrepareInputs() {
   auto input_tensors = this->inputs();
-  for (int i = 0; i < input_tensors.size(); i++) {
+  for (size_t i = 0; i < input_tensors.size(); i++) {
     auto tensor = input_tensors[i];
     auto tensor_shape = tensor.Shape();
     auto tmp_quant_param = tensor.QuantParams();
@@ -142,6 +142,7 @@ int mindspore::NNRTModelKernel::PrepareInputs() {
     oprend->dimensions = dimensions_list.data();
     oprend->quantParam = quant_param;
     oprend->type = OH_NN_TENSOR;
+    MS_LOG_INFO << "input tensor: " << tensor.Name() << ", data: " << (void *)tensor.MutableData() << ", size: " << tensor.DataSize();
     OH_NN_ReturnCode ret_code =
       OH_NNExecutor_SetInput(oh_nn_executor, i, oprend, tensor.MutableData(), tensor.DataSize());
     delete (oprend);
diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.h b/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.h
index cf9481df..ea15f7ca 100644
--- a/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.h
+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_model_kernel.h
@@ -20,7 +20,7 @@
 #include <map>
 #include <utility>
 #include "include/api/kernel.h"
-#include "interfaces/kits/c/neural_network_runtime.h"
+#include "interfaces/kits/c/neural_network_runtime/neural_network_runtime.h"
 #include "src/common/log_adapter.h"
 #include "include/errorcode.h"

diff --git a/mindspore/lite/src/litert/delegate/nnrt/nnrt_stub.cc b/mindspore/lite/src/litert/delegate/nnrt/nnrt_stub.cc
new file mode 100644
index 00000000..8ac283af
--- /dev/null
+++ b/mindspore/lite/src/litert/delegate/nnrt/nnrt_stub.cc
@@ -0,0 +1,99 @@
+/**
+* Copyright 2023 Huawei Technologies Co., Ltd
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#include "interfaces/kits/c/neural_network_runtime/neural_network_runtime.h"
+#include "interfaces/innerkits/c/neural_network_runtime_inner.h"
+
+OH_NNModel *OH_NNModel_Construct(void) {
+  return NULL;
+}
+
+OH_NN_ReturnCode OH_NNExecutor_Run(OH_NNExecutor *executor) {
+  return OH_NN_SUCCESS;
+}
+
+OH_NN_ReturnCode OH_NNCompilation_Build(OH_NNCompilation *compilation) {
+  return OH_NN_SUCCESS;
+}
+
+void OH_NNCompilation_Destroy(OH_NNCompilation **compilation) {}
+
+OH_NNExecutor *OH_NNExecutor_Construct(OH_NNCompilation *compilation) {
+  return NULL;
+}
+
+void OH_NNExecutor_Destroy(OH_NNExecutor **executor) {}
+
+OH_NNCompilation *OH_NNCompilation_Construct(const OH_NNModel *model) {
+  return NULL;
+}
+
+OH_NN_ReturnCode OH_NNDevice_GetAllDevicesID(const size_t **allDevicesID, uint32_t *deviceCount) {
+  return OH_NN_SUCCESS;
+}
+
+OH_NN_ReturnCode OH_NNExecutor_SetOutput(OH_NNExecutor *executor,
+                                         uint32_t outputIndex,
+                                         void *dataBuffer,
+                                         size_t length) {
+  return OH_NN_SUCCESS;
+}
+
+OH_NN_ReturnCode OH_NNCompilation_SetDevice(OH_NNCompilation *compilation, size_t deviceID) {
+  return OH_NN_SUCCESS;
+}
+
+OH_NN_ReturnCode OH_NNExecutor_SetInput(OH_NNExecutor *executor,
+                                        uint32_t inputIndex,
+                                        const OH_NN_Tensor *tensor,
+                                        const void *dataBuffer,
+                                        size_t length) {
+  return OH_NN_SUCCESS;
+}
+
+void OH_NNModel_Destroy(OH_NNModel **model) {}
+
+OH_NN_ReturnCode OH_NNModel_GetAvailableOperations(OH_NNModel *model,
+                                                   size_t deviceID,
+                                                   const bool **isSupported,
+                                                   uint32_t *opCount) {
+  return OH_NN_SUCCESS;
+}
+
+OH_NN_ReturnCode OH_NNModel_BuildFromLiteGraph(OH_NNModel *model, const void *liteGraph) {
+  return OH_NN_SUCCESS;
+}
+
+OH_NN_ReturnCode OH_NNDevice_GetName(size_t deviceID, const char **name) {
+  return OH_NN_SUCCESS;
+}
+
+OH_NN_ReturnCode OH_NNDevice_GetType(size_t deviceID, OH_NN_DeviceType *deviceType) {
+  return OH_NN_SUCCESS;
+}
+
+OH_NN_ReturnCode OH_NNCompilation_SetPriority(OH_NNCompilation *compilation, OH_NN_Priority priority) {
+  return OH_NN_SUCCESS;
+}
+
+OH_NN_ReturnCode OH_NNCompilation_EnableFloat16(OH_NNCompilation *compilation, bool enableFloat16) {
+  return OH_NN_SUCCESS;
+}
+
+OH_NN_ReturnCode OH_NNCompilation_SetPerformanceMode(OH_NNCompilation *compilation,
+                                                     OH_NN_PerformanceMode performanceMode) {
+  return OH_NN_SUCCESS;
+}
\ No newline at end of file
diff --git a/mindspore/lite/src/litert/infer_manager.cc b/mindspore/lite/src/litert/infer_manager.cc
index 2b21d1ca..908ab122 100644
--- a/mindspore/lite/src/litert/infer_manager.cc
+++ b/mindspore/lite/src/litert/infer_manager.cc
@@ -162,7 +162,8 @@ int KernelInferShape(const std::vector<lite::Tensor *> &inputs, const std::vecto
   if (parameter->type_ == static_cast<int>(schema::PrimitiveType_PartialFusion) ||
       parameter->type_ == static_cast<int>(schema::PrimitiveType_Switch) ||
       parameter->type_ == static_cast<int>(schema::PrimitiveType_Call) ||
-      parameter->type_ == static_cast<int>(schema::PrimitiveType_SwitchLayer)) {
+      parameter->type_ == static_cast<int>(schema::PrimitiveType_SwitchLayer) ||
+      parameter->type_ == static_cast<int>(PrimType_Inner_ThirdPartyModel)) {
     MS_LOG(INFO) << "no need infer shape.";
     return RET_OK;
   }
diff --git a/mindspore/lite/src/litert/inner_context.cc b/mindspore/lite/src/litert/inner_context.cc
index 7cbac8f7..bf585ff0 100644
--- a/mindspore/lite/src/litert/inner_context.cc
+++ b/mindspore/lite/src/litert/inner_context.cc
@@ -122,6 +122,10 @@ int InnerContext::Init() {
 #endif
   }

+  if (IsDeviceTypeEnabled(DT_NNRT)) {
+    MS_LOG(DEBUG) << "NNRT enabled.";
+  }
+
   if (CreateThreadPool(false)) {
     MS_LOG(ERROR) << "CreateThreadPool failed.";
     return RET_ERROR;
diff --git a/mindspore/lite/src/litert/inner_context.h b/mindspore/lite/src/litert/inner_context.h
index 88281eb1..8735961c 100644
--- a/mindspore/lite/src/litert/inner_context.h
+++ b/mindspore/lite/src/litert/inner_context.h
@@ -71,12 +71,26 @@ typedef struct CustomDeviceInfo {
   std::shared_ptr<DeviceInfoContext> user_defined_device_info_;
 } CustomDeviceInfo;

+typedef struct Extension {
+  std::string name; // config name
+  std::vector<uint8_t> value; // config value
+} Extension;
+
+typedef struct NNRtDeviceInfo {
+  size_t device_id_ = 0;
+  int priority_ = 0;
+  int performance_mode_ = 0;
+  bool enable_fp16_ = false;
+  std::vector<Extension> extensions_;
+} NNRtDeviceInfo;
+
 struct DeviceInfo {
   CpuDeviceInfo cpu_device_info_;
   GpuDeviceInfo gpu_device_info_;
   NpuDeviceInfo npu_device_info_;
   AscendDeviceInfo ascend_device_info_;
   CustomDeviceInfo custom_device_info_;
+  NNRtDeviceInfo nnrt_device_info_;
 };

 struct DeviceContext {
diff --git a/mindspore/lite/src/litert/kernel/cpu/BUILD.gn b/mindspore/lite/src/litert/kernel/cpu/BUILD.gn
index 48308425..65065b5b 100644
--- a/mindspore/lite/src/litert/kernel/cpu/BUILD.gn
+++ b/mindspore/lite/src/litert/kernel/cpu/BUILD.gn
@@ -13,6 +13,10 @@ cpu_kernel_sources = [
     "base/call.cc",
     "base/constant_of_shape.cc",
     "base/convolution_base.cc",
+    "base/custom_base.cc",
+    "base/custom_masked_fill.cc",
+    "base/custom_is_inf.cc",
+    "base/custom_tensor_scatter.cc",
     "base/detection_post_process_base.cc",
     "base/format_transpose.cc",
     "base/group_convolution_base.cc",
@@ -37,7 +41,6 @@ cpu_kernel_sources = [
     "fp32/batchnorm_fp32.cc",
     "fp32/batch_to_space_fp32.cc",
     "fp32/broadcast_to_fp32.cc",
-    "fp32/cast_for_x86_fp16.cc",
     "fp32/cast_fp32.cc",
     "fp32/convolution_1x1_fp32.cc",
     "fp32/convolution_delegate_fp32.cc",
@@ -118,6 +121,10 @@ cpu_kernel_sources = [
     "fp32/online_fusion/split_reduce_concat_fp32.cc",
 ]

+if ((target_cpu != "arm") && (target_cpu != "arm64")) {
+    cpu_kernel_sources += [ "src/runtime/kernel/cpu/fp32/cast_for_x86_fp16.cc" ]
+}
+
 arm64_cpu_kernel_sources = [
   "fp32/convolution_im2col_arm64_fp32.cc",
   "fp32/matmul_fp32_arm64.cc",
@@ -142,6 +149,42 @@ sse_avx_avx512_kernel_sources = [
   "fp32/matmul_fp32_avx512.cc",
 ]

+fp16_kernel_sources = [
+  "fp16/batchnorm_fp16.cc",
+  "fp16/biasadd_fp16.cc",
+  "fp16/cast_fp16.cc",
+  "fp16/common_fp16.cc",
+  "fp16/convolution_1x1_fp16.cc",
+  "fp16/convolution_delegate_fp16.cc",
+  "fp16/convolution_depthwise_3x3_fp16.cc",
+  "fp16/convolution_depthwise_fp16.cc",
+  "fp16/convolution_depthwise_slidewindow_fp16.cc",
+  "fp16/convolution_fp16.cc",
+  "fp16/convolution_winograd_fp16.cc",
+  "fp16/custom_gru_fp16.cc",
+  "fp16/deconvolution_depthwise_fp16.cc",
+  "fp16/deconvolution_fp16.cc",
+  "fp16/deconvolution_winograd_fp16.cc",
+  "fp16/depth_to_space_fp16.cc",
+  "fp16/dynamic_quant_fp16.cc",
+  "fp16/fullconnection_fp16.cc",
+  "fp16/fused_batchnorm_fp16.cc",
+  "fp16/group_convolution_fp16.cc",
+  "fp16/gru_fp16.cc",
+  "fp16/instance_norm_fp16.cc",
+  "fp16/layout_transform_fp16.cc",
+  "fp16/lstm_fp16.cc",
+  "fp16/matmul_base_fp16.cc",
+  "fp16/matmul_fp16.cc",
+  "fp16/power_fp16.cc",
+  "fp16/prelu_fp16.cc",
+  "fp16/quant_dtype_cast_fp16.cc",
+  "fp16/reduce_fp16.cc",
+  "fp16/resize_fp16.cc",
+  "fp16/slice_fp16.cc",
+  "fp16/where_fp16.cc",
+]
+
 int8_kernel_sources = [
     "int8/activation_int8.cc",
     "int8/add_int8.cc",
@@ -227,6 +270,12 @@ all_cpu_kernel_sources += int8_kernel_sources
 all_cpu_kernel_sources += string_kernel_sources
 all_cpu_kernel_sources += control_kernel_sources

+if (target_cpu == "arm64") {
+    all_cpu_kernel_sources += fp16_kernel_sources
+} else {
+    not_needed(fp16_kernel_sources)
+}
+
 if (target_cpu == "arm") {
   all_cpu_kernel_sources -= arm64_cpu_kernel_sources
   all_cpu_kernel_sources -= sse_avx_avx512_kernel_sources
diff --git a/mindspore/lite/src/litert/kernel/cpu/base/custom_base.cc b/mindspore/lite/src/litert/kernel/cpu/base/custom_base.cc
new file mode 100644
index 00000000..9921e063
--- /dev/null
+++ b/mindspore/lite/src/litert/kernel/cpu/base/custom_base.cc
@@ -0,0 +1,46 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/litert/kernel/cpu/base/custom_base.h"
+#include <algorithm>
+#include <utility>
+#include <vector>
+#include "src/litert/kernel_registry.h"
+#include "nnacl/op_base.h"
+
+using mindspore::kernel::KERNEL_ARCH;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_Custom;
+
+namespace mindspore::kernel {
+int CustomBaseCPUKernel::Prepare() {
+  return RET_OK;
+}
+
+int CustomBaseCPUKernel::ReSize() {
+  return RET_OK;
+}
+
+int CustomBaseCPUKernel::Run() {
+  return RET_OK;
+}
+
+REG_KERNEL(kCPU, kNumberTypeInt32, PrimType_Inner_ThirdPartyModel, LiteKernelCreator<CustomBaseCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimType_Inner_ThirdPartyModel, LiteKernelCreator<CustomBaseCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeBool, PrimType_Inner_ThirdPartyModel, LiteKernelCreator<CustomBaseCPUKernel>)
+}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/litert/kernel/cpu/base/custom_base.h b/mindspore/lite/src/litert/kernel/cpu/base/custom_base.h
new file mode 100644
index 00000000..ecb4c72d
--- /dev/null
+++ b/mindspore/lite/src/litert/kernel/cpu/base/custom_base.h
@@ -0,0 +1,43 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_CUSTOM_BASE_H_
+#define MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_CUSTOM_BASE_H_
+
+#include <vector>
+#include "src/litert/lite_kernel.h"
+#include "nnacl/custom_parameter.h"
+
+namespace mindspore::kernel {
+class CustomBaseCPUKernel : public LiteKernel {
+ public:
+  CustomBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                      const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : LiteKernel(parameter, inputs, outputs, ctx) {
+    custom_param_ = reinterpret_cast<CustomParameter *>(op_parameter_);
+  }
+  ~CustomBaseCPUKernel() override = default;
+
+  int Prepare() override;
+  int ReSize() override;
+  int Run() override;
+
+ private:
+  CustomParameter *custom_param_ = nullptr;
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_CUSTOM_BASE_H_
diff --git a/mindspore/lite/src/litert/kernel/cpu/base/custom_is_inf.cc b/mindspore/lite/src/litert/kernel/cpu/base/custom_is_inf.cc
new file mode 100644
index 00000000..edffea42
--- /dev/null
+++ b/mindspore/lite/src/litert/kernel/cpu/base/custom_is_inf.cc
@@ -0,0 +1,61 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/litert/kernel_registry.h"
+#include "include/errorcode.h"
+#include "src/litert/kernel/cpu/base/custom_is_inf.h"
+#include "src/common/tensor_util.h"
+#include "nnacl/op_base.h"
+
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+
+namespace mindspore::kernel {
+
+int CustomIsInfCPUKernel::Prepare() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C1NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), C1NUM);
+  return RET_OK;
+}
+
+int CustomIsInfCPUKernel::ReSize() { return RET_OK; }
+
+void CustomIsInfCPUKernel::LaunchKernelFloat(const float *input, bool *output) {
+  auto elem_num = in_tensors_[FIRST_INPUT]->ElementsNum();
+
+  for (int i = 0; i < elem_num; i++) {
+    output[i] = std::isinf(input[i]);
+  }
+}
+
+int CustomIsInfCPUKernel::Run() {
+  auto input = in_tensors_[FIRST_INPUT];
+  auto output = out_tensors_[FIRST_INPUT];
+  CHECK_NULL_RETURN(input);
+  CHECK_NULL_RETURN(output);
+
+  if (input->data_type() == kNumberTypeFloat32 || input->data_type() == kNumberTypeFloat) {
+    LaunchKernelFloat(reinterpret_cast<const float *>(input->data()), reinterpret_cast<bool *>(output->data()));
+  } else {
+    MS_LOG(ERROR) << "unsupported input data type " << input->data_type();
+    return RET_ERROR;
+  }
+
+  return RET_OK;
+}
+
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimType_Inner_CustomIsInf, LiteKernelCreator<CustomIsInfCPUKernel>)
+}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/litert/kernel/cpu/base/custom_is_inf.h b/mindspore/lite/src/litert/kernel/cpu/base/custom_is_inf.h
new file mode 100644
index 00000000..e63d8ec7
--- /dev/null
+++ b/mindspore/lite/src/litert/kernel/cpu/base/custom_is_inf.h
@@ -0,0 +1,38 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_IS_INF_CPU_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_IS_INF_CPU_H_
+
+#include <vector>
+#include "src/litert/lite_kernel.h"
+
+namespace mindspore::kernel {
+class CustomIsInfCPUKernel : public LiteKernel {
+ public:
+  CustomIsInfCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                       const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : LiteKernel(parameter, inputs, outputs, ctx) {}
+  ~CustomIsInfCPUKernel() override = default;
+  int Prepare() override;
+  int ReSize() override;
+  int Run() override;
+
+ private:
+  void LaunchKernelFloat(const float *input, bool *output);
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_IS_INF_CPU_H_
diff --git a/mindspore/lite/src/litert/kernel/cpu/base/custom_masked_fill.cc b/mindspore/lite/src/litert/kernel/cpu/base/custom_masked_fill.cc
new file mode 100644
index 00000000..9af1af5d
--- /dev/null
+++ b/mindspore/lite/src/litert/kernel/cpu/base/custom_masked_fill.cc
@@ -0,0 +1,84 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/litert/kernel_registry.h"
+#include "include/errorcode.h"
+#include "src/litert/kernel/cpu/base/custom_masked_fill.h"
+#include "src/common/tensor_util.h"
+#include "nnacl/op_base.h"
+
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+
+namespace mindspore::kernel {
+
+int CustomMaskedFillCPUKernel::Prepare() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C3NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), C1NUM);
+
+  // only support input value as a single float value
+  MS_CHECK_TRUE_MSG(in_tensors_[FIRST_INPUT]->data_type() == mindspore::TypeId::kNumberTypeFloat32 ||
+                      in_tensors_[FIRST_INPUT]->data_type() == mindspore::TypeId::kNumberTypeFloat,
+                    RET_ERROR, "input dtype must be float32");
+  if (in_tensors_[THIRD_INPUT]->ElementsNum() != 1) {
+    MS_LOG(ERROR) << "only support fill value as a single float";
+    return RET_ERROR;
+  }
+  MS_CHECK_TRUE_MSG(in_tensors_[SECOND_INPUT]->data_type() == mindspore::TypeId::kNumberTypeBool, RET_ERROR,
+                    "mask dtype must be bool");
+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+  return ReSize();
+}
+
+int CustomMaskedFillCPUKernel::ReSize() { return RET_OK; }
+
+int CustomMaskedFillCPUKernel::Run() {
+  auto input = in_tensors_[FIRST_INPUT];
+  auto mask = in_tensors_[SECOND_INPUT];
+  auto value = in_tensors_[THIRD_INPUT];
+  auto output = out_tensors_[FIRST_INPUT];
+  CHECK_NULL_RETURN(input);
+  CHECK_NULL_RETURN(mask);
+  CHECK_NULL_RETURN(value);
+  CHECK_NULL_RETURN(output);
+
+  if (input->shape() != mask->shape()) {
+    MS_LOG(ERROR) << "Not support broadcast mask to input";
+    return RET_ERROR;
+  }
+
+  auto value_data = reinterpret_cast<float *>(value->data());
+  auto fill_value = value_data[0];
+
+  auto data_num = input->ElementsNum();
+  auto input_data = reinterpret_cast<float *>(input->data());
+  auto mask_data = reinterpret_cast<bool *>(mask->data());
+  auto output_data = reinterpret_cast<float *>(output->data());
+  for (int64_t i = 0; i < data_num; i++) {
+    if (mask_data[i]) {
+      output_data[i] = fill_value;
+    } else {
+      output_data[i] = input_data[i];
+    }
+  }
+
+  return RET_OK;
+}
+
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimType_Inner_CustomMaskedFill, LiteKernelCreator<CustomMaskedFillCPUKernel>)
+}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/litert/kernel/cpu/base/custom_masked_fill.h b/mindspore/lite/src/litert/kernel/cpu/base/custom_masked_fill.h
new file mode 100644
index 00000000..04a2dcab
--- /dev/null
+++ b/mindspore/lite/src/litert/kernel/cpu/base/custom_masked_fill.h
@@ -0,0 +1,35 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CUSTOM_MASKED_FILL_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CUSTOM_MASKED_FILL_H_
+
+#include <vector>
+#include "src/litert/lite_kernel.h"
+
+namespace mindspore::kernel {
+class CustomMaskedFillCPUKernel : public LiteKernel {
+ public:
+  CustomMaskedFillCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                            const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : LiteKernel(parameter, inputs, outputs, ctx) {}
+  ~CustomMaskedFillCPUKernel() override = default;
+  int Prepare() override;
+  int ReSize() override;
+  int Run() override;
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CUSTOM_MASKED_FILL_H_
diff --git a/mindspore/lite/src/litert/kernel/cpu/base/custom_tensor_scatter.cc b/mindspore/lite/src/litert/kernel/cpu/base/custom_tensor_scatter.cc
new file mode 100644
index 00000000..d52d67d5
--- /dev/null
+++ b/mindspore/lite/src/litert/kernel/cpu/base/custom_tensor_scatter.cc
@@ -0,0 +1,75 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/litert/kernel/cpu/base/custom_tensor_scatter.h"
+#include <cstring>
+#include "schema/model_generated.h"
+#include "src/litert/kernel_registry.h"
+#include "include/errorcode.h"
+#include "nnacl/base/scatter_nd_binary.h"
+
+using mindspore::kernel::KERNEL_ARCH;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+
+namespace mindspore::kernel {
+namespace {
+int TensorScatterRun(void *cdata, int task_id, float, float) {
+  auto kernel = static_cast<CustomTensorScatterCPUKernel *>(cdata);
+  CHECK_NULL_RETURN(kernel);
+  return kernel->TensorScatterDispatch(task_id);
+}
+}  // namespace
+
+int CustomTensorScatterCPUKernel::TensorScatterDispatch(int task_id) {
+  auto data_type = in_tensors_[kScatterUpdateInputIndex]->data_type();
+  if (data_type != kNumberTypeFloat32) {
+    MS_LOG(ERROR) << "TensorScatterMax only support float32 input tensor, but got " << data_type;
+    return RET_ERROR;
+  }
+  int type = data_type == kNumberTypeFloat32 ? 0 : 1;
+  // multi thread have some problems to solve
+  param_->op_parameter.thread_num_ = 1;
+  auto ret = ScatterNDMax(in_tensors_[kScatterUpdateIndex]->data(), out_tensors_[kOutputIndex]->data(),
+                          output_unit_offsets_.data(), param_, type, task_id);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "ScatterNDMax failed, ret: " << ret;
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int CustomTensorScatterCPUKernel::Run() {
+  auto in_tensor = in_tensors().front();
+  auto out_tensor = out_tensors().front();
+  (void)memcpy(out_tensor->data(), in_tensor->data(), in_tensor->Size());
+  auto indices = in_tensors_.at(kScatterIndicesIndex);
+  if (!indices->IsConst() && ReSize() != RET_OK) {
+    MS_LOG(ERROR) << "TensorScatterAdd resize failed.";
+    return RET_ERROR;
+  }
+
+  auto ret = ParallelLaunch(ms_context_, TensorScatterRun, this, op_parameter_->thread_num_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "TensorScatterAdd error error_code[" << ret << "]";
+  }
+  return ret;
+}
+
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimType_Inner_CustomTensorScatterMax,
+           LiteKernelCreator<CustomTensorScatterCPUKernel>)
+}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/litert/kernel/cpu/base/custom_tensor_scatter.h b/mindspore/lite/src/litert/kernel/cpu/base/custom_tensor_scatter.h
new file mode 100644
index 00000000..e39733c5
--- /dev/null
+++ b/mindspore/lite/src/litert/kernel/cpu/base/custom_tensor_scatter.h
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_TENSOR_SCATTER_ADD_H_
+#define MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_TENSOR_SCATTER_ADD_H_
+
+#include <vector>
+#include "src/litert/kernel/cpu/base/scatter_nd_binary.h"
+
+namespace mindspore::kernel {
+class CustomTensorScatterCPUKernel : public ScatterNDBinaryCPUKernel {
+ public:
+  explicit CustomTensorScatterCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                                        const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
+      : ScatterNDBinaryCPUKernel(parameter, inputs, outputs, ctx) {}
+  ~CustomTensorScatterCPUKernel() override = default;
+
+  int Run() override;
+  int TensorScatterDispatch(int task_id);
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_LITERT_KERNEL_CPU_BASE_TENSOR_SCATTER_ADD_H_
diff --git a/mindspore/lite/src/litert/lite_model.cc b/mindspore/lite/src/litert/lite_model.cc
index 2c5bc658..13652633 100644
--- a/mindspore/lite/src/litert/lite_model.cc
+++ b/mindspore/lite/src/litert/lite_model.cc
@@ -98,6 +98,8 @@ int LiteModel::ConvertSubGraph(const schema::SubGraph &sub_graph) {
   if (sub_graph.name() == nullptr || sub_graph.inputIndices() == nullptr || sub_graph.outputIndices() == nullptr ||
       sub_graph.tensorIndices() == nullptr) {
     MS_LOG(ERROR) << "sub_graph is invalid";
+    MS_LOG(ERROR) << "sub_graph.name() = " << sub_graph.name() << ", sub_graph.inputIndices() = " << sub_graph.inputIndices()
+      << ", sub_graph.outputIndices() = " << sub_graph.outputIndices() << ", sub_graph.tensorIndices() = " << sub_graph.tensorIndices();
     return RET_ERROR;
   }

@@ -620,6 +622,33 @@ Model *ImportFromBuffer(const char *model_buf, size_t size, bool take_buf, minds
   return model;
 }

+std::string LiteGraph::ToString() const {
+  std::stringstream ss;
+  ss << "all_nodes: " << all_nodes_.size() << std::endl;
+  for (size_t i = 0; i < all_nodes_.size(); i++) {
+    ss << "- node " << i << ": " << all_nodes_[i]->primitive_ << std::endl;
+    ss << "- node " << i << " input_indices_: " << all_nodes_[i]->input_indices_ << std::endl;
+    ss << "- node " << i << " output_indices_: " << all_nodes_[i]->output_indices_ << std::endl;
+  }
+  ss << "all_tensors: " << all_tensors_.size() << std::endl;
+  for (size_t i = 0; i < all_tensors_.size(); i++) {
+    ss << "- tensor " << i << ": " << all_tensors_[i] << std::endl;
+  }
+  ss << "input_indices: " << input_indices_<< std::endl;
+  ss << "output_indices: " << output_indices_ << std::endl;
+
+  ss << "subgraphs: " << std::endl;
+  int count = 0;
+  for (auto subgraph: sub_graphs_) {
+    ss << "- subgraph " << count++ << std::endl;
+    ss << "--- subgraph input " << subgraph->input_indices_ << std::endl;
+    ss << "--- subgraph output " << subgraph->output_indices_ << std::endl;
+    ss << "--- subgraph node " << subgraph->node_indices_ << std::endl;
+    ss << "--- subgraph tensor " << subgraph->tensor_indices_ << std::endl;
+  }
+  return ss.str();
+}
+
 Model *Model::Import(const char *model_buf, size_t size) { return ImportFromBuffer(model_buf, size, false); }

 Model *Model::Import(const char *filename) { return ImportFromPath(filename); }
diff --git a/mindspore/lite/src/litert/lite_session.cc b/mindspore/lite/src/litert/lite_session.cc
index 8f54879e..f635c8d2 100644
--- a/mindspore/lite/src/litert/lite_session.cc
+++ b/mindspore/lite/src/litert/lite_session.cc
@@ -67,6 +67,9 @@
 #include "thread/parallel_thread_pool_manager.h"
 #endif
 #include "src/litert/runtime_packed_node_pass.h"
+#ifdef SUPPORT_NNRT
+#include "src/litert/delegate/nnrt/nnrt_delegate.h"
+#endif

 using AbstractBaseModel = mindspore::infer::AbstractBaseModel;

@@ -635,12 +638,6 @@ int LiteSession::CompileGraph(Model *model) {
   MarkSharedWeight(kernels_);
   FreePackOpWeight(kernels_);

-  ret = RuntimeAllocatorInit();
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Runtime allocator init failed.";
-    is_running_.store(false);
-    return ret;
-  }
   infer_along_running_ = infer_along_running_ && (runtime_allocator_ == nullptr);
   if (infer_along_running_) {
     this->context_->set_infer_checker(InferCheckerAll);
@@ -1092,6 +1089,27 @@ int LiteSession::CreateCoreMLDelegate() {
   return RET_OK;
 }

+int LiteSession::CreateNNRTDelegate() {
+#if SUPPORT_NNRT
+  auto iter = std::find_if(context_->device_list_.begin(), context_->device_list_.end(),
+                           [](DeviceContext &device) { return device.device_type_ == lite::DT_NNRT; });
+  if(iter == context_->device_list_.end()) {
+    MS_LOG(ERROR) << "Found non NNRT device info";
+    return RET_ERROR;
+  }
+
+  delegate_ = std::make_shared<NNRTDelegate>(iter->device_info_.nnrt_device_info_);
+  if (delegate_ == nullptr) {
+    MS_LOG(ERROR) << "New NNRT delegate failed";
+    return RET_ERROR;
+  }
+//  ((NNRTDelegate *)(delegate_.get()))->SetMetaGraph(this->model_->buf);
+  delegate_device_type_ = DT_NNRT;
+  this->context_->delegate = delegate_;
+#endif
+  return RET_OK;
+};
+
 int LiteSession::DelegateInit() {
 #ifndef DELEGATE_CLIP
   int ret = RET_OK;
@@ -1115,6 +1133,8 @@ int LiteSession::DelegateInit() {
       ret = CreateNPUDelegate();
     } else if (context_->IsDeviceTypeEnabled(DT_GPU)) {
       ret = CreateTensorRTDelegate();
+    } else if (context_->IsDeviceTypeEnabled(DT_NNRT)) {
+      ret = CreateNNRTDelegate();
     }
   }

@@ -1496,12 +1516,6 @@ int LiteSession::Resize(const std::vector<mindspore::lite::Tensor *> &inputs,
     return ret;
   }

-  if (RuntimeAllocatorInit() != RET_OK) {
-    MS_LOG(ERROR) << "Runtime allocator in resize failed.";
-    is_running_.store(false);
-    return RET_ERROR;
-  }
-
   auto status = GraphOptimizePass(&kernels_);
   if (status != RET_OK) {
     MS_LOG(ERROR) << "GraphOptimizePass failed.";
@@ -2022,7 +2036,6 @@ int lite::LiteSession::LoadModelAndCompileByPath(const std::string &model_path,
     delete model;
     return RET_ERROR;
   }
-  model->Free();
   set_model(model);
   return RET_OK;
 }
diff --git a/mindspore/lite/src/litert/lite_session.h b/mindspore/lite/src/litert/lite_session.h
index f8f8fe08..64a5f6d3 100644
--- a/mindspore/lite/src/litert/lite_session.h
+++ b/mindspore/lite/src/litert/lite_session.h
@@ -178,6 +178,7 @@ class MS_API LiteSession {
   int CreateNPUDelegate();
   int CreateNNAPIDelegate();
   int CreateCoreMLDelegate();
+  int CreateNNRTDelegate();
   int DelegateInit();
   int InitGPURuntime();
   int InitSharedThreadPool();
diff --git a/mindspore/lite/src/litert/scheduler.cc b/mindspore/lite/src/litert/scheduler.cc
index 11382b09..199b4361 100644
--- a/mindspore/lite/src/litert/scheduler.cc
+++ b/mindspore/lite/src/litert/scheduler.cc
@@ -60,6 +60,9 @@
 #if defined(PARALLEL_INFERENCE) && defined(ENABLE_MINDRT)
 #include "thread/parallel_thread_pool_manager.h"
 #endif
+#ifdef SUPPORT_NNRT
+#include "src/litert/delegate/nnrt/nnrt_delegate.h"
+#endif

 using AbstractBaseModel = mindspore::infer::AbstractBaseModel;

@@ -368,6 +371,7 @@ STATUS Scheduler::DelQuantDTypeCastKernel(std::vector<kernel::KernelExec *> *ker
 }

 int Scheduler::Schedule(std::vector<kernel::KernelExec *> *dst_kernels) {
+  MS_LOG(DEBUG) << "Start schedule.";
   int check_input_ret = CheckInputParam(dst_kernels);
   if (check_input_ret != RET_OK) {
     MS_LOG(ERROR) << "CheckInputParam failed! ret: " << check_input_ret;
@@ -404,11 +408,13 @@ int Scheduler::Schedule(std::vector<kernel::KernelExec *> *dst_kernels) {
   }
   shape_fusion_pass_->StoreStateAndReset();

+  MS_LOG(DEBUG) << "Start to init delegate kernels.";
   ret = InitDelegateKernels(dst_kernels);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Repalce delegate kernels failed.";
     return ret;
   }
+  MS_LOG(DEBUG) << "Finish to init delegate kernels.";

   ret = CheckCpuValid(dst_kernels);
   if (ret != RET_OK) {
@@ -500,6 +506,17 @@ int Scheduler::ReplaceDelegateKernels(std::vector<kernel::KernelExec *> *dst_ker
     MS_LOG(ERROR) << "New delegate model failed.";
     return RET_NULL_PTR;
   }
+
+#ifdef SUPPORT_NNRT
+  if (context_->IsDeviceTypeEnabled(DT_NNRT)) {
+    auto delegate = static_cast<NNRTDelegate *>(delegate_.get());
+    delegate->ShallowCopyLiteGraph(this->src_model_->graph_);
+    void *meta_graph = reinterpret_cast<void*>(const_cast<mindspore::schema::MetaGraph *>(
+      mindspore::schema::GetMetaGraph(this->src_model_->buf)));
+    delegate->SetMetaGraph(meta_graph);
+  }
+#endif
+
   auto ret = delegate_->Build(model);
   if (ret != mindspore::kSuccess) {
     delete model;
diff --git a/mindspore/lite/src/litert/tensor_category.cc b/mindspore/lite/src/litert/tensor_category.cc
index 70d13865..e57cdb28 100644
--- a/mindspore/lite/src/litert/tensor_category.cc
+++ b/mindspore/lite/src/litert/tensor_category.cc
@@ -30,5 +30,9 @@ Category TensorCategory(const schema::Tensor &tensor) {
   auto data_size = tensor.data() == nullptr ? 0 : tensor.data()->size();
   return TensorCategory(tensor.nodeType(), shape_num, TypeId(tensor.dataType()), data_size);
 }
+
+bool IsConstTensor(const schema::Tensor &tensor) {
+  return TensorCategory(tensor) != Category::VAR;
+}
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/src/litert/tensor_category.h b/mindspore/lite/src/litert/tensor_category.h
index 83273032..70e65b31 100644
--- a/mindspore/lite/src/litert/tensor_category.h
+++ b/mindspore/lite/src/litert/tensor_category.h
@@ -35,6 +35,7 @@ enum Category {

 Category TensorCategory(const int node_type, const size_t shape_num, const TypeId data_type, const size_t data_size);
 Category TensorCategory(const schema::Tensor &tensor);
+bool IsConstTensor(const schema::Tensor &tensor);
 }  // namespace lite
 }  // namespace mindspore
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_TENSOR_CATEGORY_H_
diff --git a/mindspore/lite/test/CMakeLists.txt b/mindspore/lite/test/CMakeLists.txt
index 60e240f0..78dab536 100644
--- a/mindspore/lite/test/CMakeLists.txt
+++ b/mindspore/lite/test/CMakeLists.txt
@@ -28,10 +28,14 @@ file(GLOB_RECURSE TEST_UT_SRC
         ${TEST_DIR}/ut/src/runtime/kernel/arm/common/*.cc
         ${TEST_DIR}/ut/src/runtime/kernel/arm/fp32/*.cc
         ${TEST_DIR}/ut/src/runtime/kernel/arm/string/*.cc
-        ${TEST_DIR}/ut/src/api/context_c_test.cc
-        ${TEST_DIR}/ut/src/api/model_c_test.cc
-        ${TEST_DIR}/ut/src/api/tensor_c_test.cc`
+#        ${TEST_DIR}/ut/src/api/context_c_test.cc
+#        ${TEST_DIR}/ut/src/api/model_c_test.cc
+#        ${TEST_DIR}/ut/src/api/tensor_c_test.cc`
         )
+if(MSLITE_ENABLE_NNRT)
+    list(APPEND TEST_UT_SRC ${TEST_DIR}/ut/src/runtime/nnrt_delegate/nnrt_delegate_tests.cc)
+endif()
+
 if(MSLITE_ENABLE_SERVER_INFERENCE)
     list(APPEND TEST_UT_SRC ${TEST_DIR}/ut/src/api/model_parallel_runner_test.cc)
 endif()
@@ -86,7 +90,7 @@ endif()

 if(MSLITE_ENABLE_INT8)
     file(GLOB_RECURSE TEST_INT8_UT_SRC
-            ${TEST_DIR}/ut/src/runtime/kernel/arm/int8/*.cc
+#            ${TEST_DIR}/ut/src/runtime/kernel/arm/int8/*.cc
             ${TEST_DIR}/ut/nnacl/int8/*.cc
             )
     list(APPEND TEST_UT_SRC ${TEST_INT8_UT_SRC})
@@ -118,6 +122,7 @@ if(MSLITE_ENABLE_CONVERTER)
             ${TEST_DIR}/ut/tools/converter/registry/*.cc
             ${TEST_DIR}/ut/tools/converter/parser/tflite/*.cc
             ${TEST_DIR}/ut/tools/converter/api/*.cc
+            ${TEST_DIR}/ut/tools/converter/config_parser/*.cc
             ${TEST_DIR}/st/converter_test.cc
             ${TEST_DIR}/st/delegate_test.cc
             ${TEST_DIR}/st/mindrt_parallel_test.cc
@@ -232,7 +237,7 @@ endif()

 if(MSLITE_ENABLE_CONVERTER)
     target_link_libraries(lite-test-converter tflite_parser_mid caffe_parser_mid
-                                    onnx_parser_mid tf_parser_mid)
+                                    onnx_parser_mid tf_parser_mid third_party_parser_mid)
 endif()

 if(MSLITE_ENABLE_MODEL_OBF)
diff --git a/mindspore/lite/test/runtest.sh b/mindspore/lite/test/runtest.sh
index c0d6d843..abdea6f4 100644
--- a/mindspore/lite/test/runtest.sh
+++ b/mindspore/lite/test/runtest.sh
@@ -80,6 +80,7 @@ if [ "$ENABLE_CONVERTER_TEST" = true ]; then
   ./lite-test-converter --gtest_filter="PassRegistryTest.TestRegistry"
   ./lite-test-converter --gtest_filter="TestConverterAPI.*"
   ./lite-test-converter --gtest_filter="SpecifyGraphOutputFormatTest*"
+  ./lite-test-converter --gtest_filter="TestThirdPartyParamParser.*"
 fi
 ./lite-test --gtest_filter="TestRegistry.TestAdd"
 ./lite-test --gtest_filter="TestRegistryCustomOp.TestCustomAdd"
diff --git a/mindspore/lite/test/ut/test_data/third_party_model.cfg b/mindspore/lite/test/ut/test_data/third_party_model.cfg
new file mode 100644
index 00000000..b5fcba75
--- /dev/null
+++ b/mindspore/lite/test/ut/test_data/third_party_model.cfg
@@ -0,0 +1,8 @@
+[third_party_model]
+input_names=demo_in_0;demo_in_1;demo_in_2
+input_dtypes=float32;float16;float64
+input_shapes=1;2,3;4,5,6
+output_names=demo_out_0;demo_out_1;demo_out_2;demo_out_4
+output_dtypes=int32;int16;int8;uint8
+output_shapes=10;20,30;40;50,60,70
+extended_parameters=foo:foo_value;bar:bar_value
diff --git a/mindspore/lite/test/ut/tools/converter/api/converter_api_test.cc b/mindspore/lite/test/ut/tools/converter/api/converter_api_test.cc
index 549bdd72..e73afc0e 100644
--- a/mindspore/lite/test/ut/tools/converter/api/converter_api_test.cc
+++ b/mindspore/lite/test/ut/tools/converter/api/converter_api_test.cc
@@ -34,3 +34,13 @@ TEST(TestConverterAPI, ConvertCaffeWithNotExistWeight) {
   mindspore::Converter converter(mindspore::converter::FmkType::kFmkTypeCaffe, caffe_model, output_model, caffe_weight);
   ASSERT_FALSE(converter.Convert().IsOk());
 }
+
+TEST(TestConverterAPI, ConvertThirdParty) {
+  std::string third_party_model = "./relu.mindir";
+  std::string config_model = "./third_party_model.cfg";
+  std::string output_model = "./demo_third_party.ms";
+
+  mindspore::Converter converter(mindspore::converter::FmkType::kFmkTypeThirdParty, third_party_model, output_model);
+  converter.SetConfigFile(config_model);
+  ASSERT_TRUE(converter.Convert().IsOk());
+}
\ No newline at end of file
diff --git a/mindspore/lite/test/ut/tools/converter/config_parser/third_party_param_parser_test.cc b/mindspore/lite/test/ut/tools/converter/config_parser/third_party_param_parser_test.cc
new file mode 100644
index 00000000..c8eb5536
--- /dev/null
+++ b/mindspore/lite/test/ut/tools/converter/config_parser/third_party_param_parser_test.cc
@@ -0,0 +1,176 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+#include "tools/converter/config_parser/third_party_param_parser.h"
+
+using mindspore::ThirdPartyModelParam;
+using mindspore::TypeId;
+using mindspore::lite::RET_OK;
+using mindspore::lite::ThirdPartyModelString;
+using mindspore::lite::ThirdPartyParamParser;
+
+const ThirdPartyModelString kDemoSISOParam = {
+  // SISO is short for single-input-single-output.
+  .input_dtypes = "float32",
+  .input_shapes = "1,2,3,4",
+  .input_names = "siso_input",
+  .output_dtypes = "int32",
+  .output_shapes = "2",
+  .output_names = "siso_output",
+  .extended_parameters = "siso_foo:siso_foo_value;siso_bar:siso_bar_value",
+};
+
+const ThirdPartyModelString kDemoMIMOParam = {
+  // MIMO is short for multiple-input-multiple-output.
+  .input_dtypes = "float32;int8;float16",
+  .input_shapes = "1,2,3,4;5,6;7,8,9",
+  .input_names = "mimo_in_0;mimo_in_1;mimo_in_2",
+  .output_dtypes = "int32;float32",
+  .output_shapes = "2,4;10,20,30",
+  .output_names = "mimo_out_0;mimo_out_1",
+  .extended_parameters = "mimo_foo:mimo_foo_value;mimo_bar:mimo_bar_value",
+};
+
+TEST(TestThirdPartyParamParser, ParseSISOParam) {
+  ThirdPartyModelString param_string = kDemoSISOParam;
+  ThirdPartyModelParam result;
+  ASSERT_EQ(ThirdPartyParamParser::Parse(param_string, &result), RET_OK);
+
+  ASSERT_EQ(result.input_names, std::vector<std::string>{"siso_input"});
+  ASSERT_EQ(result.input_shapes.size(), 1U);
+  std::vector<int64_t> expect_in_shape = {1, 2, 3, 4};
+  ASSERT_EQ(result.input_shapes[0], expect_in_shape);
+  ASSERT_EQ(result.input_dtypes, std::vector<TypeId>{TypeId::kNumberTypeFloat32});
+
+  ASSERT_EQ(result.output_names, std::vector<std::string>{"siso_output"});
+  ASSERT_EQ(result.output_shapes.size(), 1U);
+  std::vector<int64_t> expect_out_shape = {2};
+  ASSERT_EQ(result.output_shapes[0], expect_out_shape);
+  ASSERT_EQ(result.output_dtypes, std::vector<TypeId>{TypeId::kNumberTypeInt32});
+
+  const auto &ext_param = result.extended_parameters;
+  ASSERT_EQ(ext_param.size(), 2U);
+  ASSERT_TRUE(ext_param.find("siso_foo") != ext_param.end());
+  auto expect_foo_value = ext_param.at("siso_foo");
+  ASSERT_EQ(std::string(expect_foo_value.begin(), expect_foo_value.end()), "siso_foo_value");
+  ASSERT_TRUE(ext_param.find("siso_bar") != ext_param.end());
+  auto expect_bar_value = ext_param.at("siso_bar");
+  ASSERT_EQ(std::string(expect_bar_value.begin(), expect_bar_value.end()), "siso_bar_value");
+}
+
+TEST(TestThirdPartyParamParser, ParseValidDtype) {
+  ThirdPartyModelString param_string = kDemoSISOParam;
+  const std::vector<std::string> kValidDtypeStrings = {
+    "float64", "float32", "float16", "int64", "int32", "int16", "int8", "uint8", "bool",
+  };
+
+  const std::vector<TypeId> kExpects = {
+    TypeId::kNumberTypeFloat64, TypeId::kNumberTypeFloat32, TypeId::kNumberTypeFloat16,
+    TypeId::kNumberTypeInt64,   TypeId::kNumberTypeInt32,   TypeId::kNumberTypeInt16,
+    TypeId::kNumberTypeInt8,    TypeId::kNumberTypeUInt8,   TypeId::kNumberTypeBool};
+
+  for (size_t i = 0; i < kValidDtypeStrings.size(); i++) {
+    param_string.input_dtypes = kValidDtypeStrings[i];
+    ThirdPartyModelParam result;
+    ASSERT_EQ(ThirdPartyParamParser::Parse(param_string, &result), RET_OK);
+    ASSERT_EQ(result.input_dtypes[0], kExpects[i]);
+  }
+}
+
+TEST(TestThirdPartyParamParser, ParseInvalidDtype) {
+  ThirdPartyModelParam result;
+  ThirdPartyModelString param_string = kDemoSISOParam;
+  ASSERT_EQ(ThirdPartyParamParser::Parse(param_string, &result), RET_OK);
+  param_string.input_dtypes = "bad_dtype";
+  ASSERT_NE(ThirdPartyParamParser::Parse(param_string, &result), RET_OK);
+}
+
+TEST(TestThirdPartyParamParser, ParseValidShape) {
+  ThirdPartyModelString param_string = kDemoSISOParam;
+  param_string.input_shapes = "256,256,1024,96";  // Only support fixed shape.
+  ThirdPartyModelParam result;
+  ASSERT_EQ(ThirdPartyParamParser::Parse(param_string, &result), RET_OK);
+  std::vector<int64_t> expect = {256, 256, 1024, 96};
+  ASSERT_EQ(result.input_shapes[0], expect);
+}
+
+TEST(TestThirdPartyParamParser, ParseInvalidShape) {
+  ThirdPartyModelParam result;
+  ThirdPartyModelString param_string = kDemoSISOParam;
+  ASSERT_EQ(ThirdPartyParamParser::Parse(param_string, &result), RET_OK);
+
+  param_string.input_shapes = "256,256,1024,-1";
+  ASSERT_NE(ThirdPartyParamParser::Parse(param_string, &result), RET_OK);
+
+  param_string.input_shapes = "256,256,0,96";
+  ASSERT_NE(ThirdPartyParamParser::Parse(param_string, &result), RET_OK);
+
+  param_string.input_shapes = "256,-256,1024,96";
+  ASSERT_NE(ThirdPartyParamParser::Parse(param_string, &result), RET_OK);
+
+  param_string.input_shapes = "256,foo,1024,96";
+  ASSERT_NE(ThirdPartyParamParser::Parse(param_string, &result), RET_OK);
+}
+
+TEST(TestThirdPartyParamParser, ParseDefaultName) {
+  ThirdPartyModelParam result;
+  ThirdPartyModelString param_string = kDemoSISOParam;
+  param_string.input_names = "";
+  param_string.output_names = "";
+  ASSERT_EQ(ThirdPartyParamParser::Parse(param_string, &result), RET_OK);
+  ASSERT_EQ(result.input_names[0], "in_0");
+  ASSERT_EQ(result.output_names[0], "out_0");
+}
+
+TEST(TestThirdPartyParamParser, ParseMIMOParam) {
+  ThirdPartyModelString param_string = kDemoMIMOParam;
+  ThirdPartyModelParam result;
+  ASSERT_EQ(ThirdPartyParamParser::Parse(param_string, &result), RET_OK);
+
+  std::vector<std::string> expect_input_names = {"mimo_in_0", "mimo_in_1", "mimo_in_2"};
+  ASSERT_EQ(result.input_names, expect_input_names);
+  std::vector<std::vector<int64_t>> expect_input_shapes = {{1, 2, 3, 4}, {5, 6}, {7, 8, 9}};
+  ASSERT_EQ(result.input_shapes, expect_input_shapes);
+  std::vector<TypeId> expect_input_dtypes = {TypeId::kNumberTypeFloat32, TypeId::kNumberTypeInt8,
+                                             TypeId::kNumberTypeFloat16};
+  ASSERT_EQ(result.input_dtypes, expect_input_dtypes);
+
+  std::vector<std::string> expect_output_names = {"mimo_out_0", "mimo_out_1"};
+  ASSERT_EQ(result.output_names, expect_output_names);
+  std::vector<std::vector<int64_t>> expect_output_shapes = {{2, 4}, {10, 20, 30}};
+  ASSERT_EQ(result.output_shapes, expect_output_shapes);
+  std::vector<TypeId> expect_output_dtypes = {TypeId::kNumberTypeInt32, TypeId::kNumberTypeFloat32};
+  ASSERT_EQ(result.output_dtypes, expect_output_dtypes);
+}
+
+TEST(TestThirdPartyParamParser, ParseMismatchedShapeAndDtypeSize) {
+  ThirdPartyModelString param_string = kDemoMIMOParam;
+  ThirdPartyModelParam result;
+  ASSERT_EQ(ThirdPartyParamParser::Parse(param_string, &result), RET_OK);
+
+  param_string.input_shapes = "1,2,3,4;5,6";  // shape size is 2 while dtype size is 3.
+  ASSERT_NE(ThirdPartyParamParser::Parse(param_string, &result), RET_OK);
+}
+
+TEST(TestThirdPartyParamParser, ParseMismatchedNameAndDtypeSize) {
+  ThirdPartyModelString param_string = kDemoMIMOParam;
+  ThirdPartyModelParam result;
+  ASSERT_EQ(ThirdPartyParamParser::Parse(param_string, &result), RET_OK);
+
+  param_string.input_names = "mimo_in_0;mimo_in_1";  // name size is 2 while dtype size is 3.
+  ASSERT_NE(ThirdPartyParamParser::Parse(param_string, &result), RET_OK);
+}
diff --git a/mindspore/lite/tools/benchmark/benchmark_base.cc b/mindspore/lite/tools/benchmark/benchmark_base.cc
index 16b1e218..ebaa9212 100644
--- a/mindspore/lite/tools/benchmark/benchmark_base.cc
+++ b/mindspore/lite/tools/benchmark/benchmark_base.cc
@@ -323,7 +323,7 @@ int BenchmarkBase::CheckThreadNumValid() {

 int BenchmarkBase::CheckDeviceTypeValid() {
   if (flags_->device_ != "CPU" && flags_->device_ != "GPU" && flags_->device_ != "NPU" &&
-      flags_->device_ != "Ascend310" && flags_->device_ != "Ascend310P") {
+      flags_->device_ != "Ascend310" && flags_->device_ != "Ascend310P" && flags_->device_ != "NNRT") {
     MS_LOG(ERROR) << "Device type:" << flags_->device_ << " is not supported.";
     std::cerr << "Device type:" << flags_->device_ << " is not supported." << std::endl;
     return RET_ERROR;
diff --git a/mindspore/lite/tools/benchmark/benchmark_base.h b/mindspore/lite/tools/benchmark/benchmark_base.h
index acdea21a..f818270c 100644
--- a/mindspore/lite/tools/benchmark/benchmark_base.h
+++ b/mindspore/lite/tools/benchmark/benchmark_base.h
@@ -122,7 +122,7 @@ class MS_API BenchmarkFlags : public virtual FlagParser {
     AddFlag(&BenchmarkFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", "");
     AddFlag(&BenchmarkFlags::group_info_file_, "GroupInfoFile", "Communication group info file", "");
     AddFlag(&BenchmarkFlags::config_file_, "configFile", "Config file", "");
-    AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU | NPU | Ascend310 | Ascend310P | Auto", "CPU");
+    AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU | NPU | Ascend310 | Ascend310P | NNRT | Auto", "CPU");
     AddFlag(&BenchmarkFlags::provider_, "provider", "device provider litert | tensorrt | mindrt", "litert");
     AddFlag(&BenchmarkFlags::cpu_bind_mode_, "cpuBindMode", "Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU.", 1);
     // MarkPerformance
diff --git a/mindspore/lite/tools/benchmark/benchmark_c_api.cc b/mindspore/lite/tools/benchmark/benchmark_c_api.cc
index 252e65c6..cb0c56b0 100644
--- a/mindspore/lite/tools/benchmark/benchmark_c_api.cc
+++ b/mindspore/lite/tools/benchmark/benchmark_c_api.cc
@@ -125,6 +125,10 @@ int BenchmarkCApi::InitContext() {
     OH_AI_DeviceInfoSetFrequency(npu_device_info, kFrequencyDefault);
     OH_AI_ContextAddDeviceInfo(context_, npu_device_info);
   }
+  if (flags_->device_ == "NNRT") {
+    OH_AI_DeviceInfoHandle nnrt_device_info = OH_AI_DeviceInfoCreate(OH_AI_DEVICETYPE_NNRT);
+    OH_AI_ContextAddDeviceInfo(context_, nnrt_device_info);
+  }
   OH_AI_DeviceInfoHandle cpu_device_info = OH_AI_DeviceInfoCreate(OH_AI_DEVICETYPE_CPU);
   OH_AI_DeviceInfoSetEnableFP16(cpu_device_info, flags_->enable_fp16_);
   OH_AI_ContextAddDeviceInfo(context_, cpu_device_info);
diff --git a/mindspore/lite/tools/benchmark/benchmark_unified_api.cc b/mindspore/lite/tools/benchmark/benchmark_unified_api.cc
index bb36c168..c18111b6 100644
--- a/mindspore/lite/tools/benchmark/benchmark_unified_api.cc
+++ b/mindspore/lite/tools/benchmark/benchmark_unified_api.cc
@@ -521,6 +521,11 @@ int BenchmarkUnifiedApi::InitMSContext(const std::shared_ptr<mindspore::Context>
     // InitMSContextForAscend(context, &device_list);
   }

+  if (flags_->device_ == "NNRT" || flags_->device_ == "Auto") {
+    std::shared_ptr<NNRTDeviceInfo> nnrt_device_info = std::make_shared<NNRTDeviceInfo>();
+    device_list.push_back(nnrt_device_info);
+  }
+
   // CPU priority is behind GPU and NPU
   std::shared_ptr<CPUDeviceInfo> device_info = std::make_shared<CPUDeviceInfo>();
   device_info->SetEnableFP16(flags_->enable_fp16_);
diff --git a/mindspore/lite/tools/benchmark_train/CMakeLists.txt b/mindspore/lite/tools/benchmark_train/CMakeLists.txt
index 0c558524..1b9fc347 100644
--- a/mindspore/lite/tools/benchmark_train/CMakeLists.txt
+++ b/mindspore/lite/tools/benchmark_train/CMakeLists.txt
@@ -9,6 +9,9 @@ set(COMMON_SRC
 set(TEST_SRC
     ${CMAKE_CURRENT_SOURCE_DIR}/main.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/net_train.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/net_train_base.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/run_net_train.cc
+    ${CMAKE_CURRENT_SOURCE_DIR}/net_train_c_api.cc
     )

 # add static securec link library
diff --git a/mindspore/lite/tools/benchmark_train/main.cc b/mindspore/lite/tools/benchmark_train/main.cc
index abf3d9dd..76f85aa7 100644
--- a/mindspore/lite/tools/benchmark_train/main.cc
+++ b/mindspore/lite/tools/benchmark_train/main.cc
@@ -17,7 +17,8 @@
 #include <malloc.h>
 #include <unistd.h>
 #include <fstream>
-#include "tools/benchmark_train/net_train.h"
+#include <iostream>
+#include "tools/benchmark_train/run_net_train.h"

 void PrintMem() {
   std::string proc_file = "/proc/" + std::to_string(getpid()) + "/status";
diff --git a/mindspore/lite/tools/benchmark_train/net_runner.cc b/mindspore/lite/tools/benchmark_train/net_runner.cc
index 9b63d29f..edf3e964 100644
--- a/mindspore/lite/tools/benchmark_train/net_runner.cc
+++ b/mindspore/lite/tools/benchmark_train/net_runner.cc
@@ -15,7 +15,7 @@
  */

 #include "tools/benchmark_train/net_runner.h"
-#include "tools/benchmark_train/net_train.h"
+#include "tools/benchmark_train/net_train_base.h"
 #include <getopt.h>
 #include <malloc.h>
 #include <cmath>
@@ -187,7 +187,7 @@ int NetRunner::CompareOutput(const std::vector<mindspore::MSTensor> &outputs) {
     auto output = tensor.Data();
     size_t size;
     std::string output_file = flags_->data_file_ + std::to_string(i) + ".bin";
-    auto bin_buf = std::unique_ptr<float[]>(mindspore::lite::NetTrain::ReadFileBuf(output_file.c_str(), &size));
+    auto bin_buf = std::unique_ptr<float[]>(mindspore::lite::NetTrainBase::ReadFileBuf(output_file.c_str(), &size));
     if (bin_buf == nullptr) {
       MS_LOG(ERROR) << "ReadFile return nullptr";
       std::cout << "ReadFile return nullptr" << std::endl;
@@ -200,7 +200,7 @@ int NetRunner::CompareOutput(const std::vector<mindspore::MSTensor> &outputs) {
                 << ", read size: " << size << std::endl;
       return mindspore::kLiteError;
     }
-    float bias = mindspore::lite::NetTrain::CompareData<float>(bin_buf.get(), tensor.ElementNum(),
+    float bias = mindspore::lite::NetTrainBase::CompareData<float>(bin_buf.get(), tensor.ElementNum(),
                                                                reinterpret_cast<const float *>(output.get()));
     if (bias >= 0) {
       total_bias += bias;
@@ -332,7 +332,7 @@ int NetRunner::ReadInputFile(std::vector<mindspore::MSTensor> *ms_inputs) {
     }
     size_t size;
     std::string file_name = flags_->in_data_file_ + std::to_string(i + 1) + ".bin";
-    auto bin_buf = std::unique_ptr<float[]>(mindspore::lite::NetTrain::ReadFileBuf(file_name.c_str(), &size));
+    auto bin_buf = std::unique_ptr<float[]>(mindspore::lite::NetTrainBase::ReadFileBuf(file_name.c_str(), &size));
     if (bin_buf == nullptr) {
       MS_LOG(ERROR) << "ReadFile return nullptr";
       std::cout << "ReadFile return nullptr" << std::endl;
@@ -368,4 +368,4 @@ int CallBack(mindspore::lite::NetTrainFlags *flags) {
   return nr.Main();
 }

-int init = mindspore::lite::NetTrain::SetNr(CallBack);
+int init = mindspore::lite::NetTrainBase::SetNr(CallBack);
diff --git a/mindspore/lite/tools/benchmark_train/net_train.cc b/mindspore/lite/tools/benchmark_train/net_train.cc
index d1150043..514bba53 100644
--- a/mindspore/lite/tools/benchmark_train/net_train.cc
+++ b/mindspore/lite/tools/benchmark_train/net_train.cc
@@ -31,74 +31,11 @@

 namespace mindspore {
 namespace lite {
-static const char *DELIM_SLASH = "/";
-constexpr const char *DELIM_COLON = ":";
-constexpr const char *DELIM_COMMA = ",";
-constexpr int RET_TOO_BIG = -9;
 constexpr int kField0 = 0;
 constexpr int kField1 = 1;
 constexpr int kField2 = 2;
 constexpr int kField3 = 3;
 constexpr int kField4 = 4;
-constexpr int kFieldsToPrint = 5;
-constexpr int kPrintOffset = 4;
-static const int kTHOUSAND = 1000;
-constexpr int kDumpInputsAndOutputs = 0;
-constexpr int kDumpOutputs = 2;
-
-const std::unordered_map<int, std::string> kTypeIdMap{
-  {kNumberTypeFloat16, "Float16"}, {kNumberTypeFloat, "Float32"},    {kNumberTypeFloat32, "Float32"},
-  {kNumberTypeInt8, "Int8"},       {kNumberTypeInt16, "Int16"},      {kNumberTypeInt, "Int32"},
-  {kNumberTypeInt32, "Int32"},     {kNumberTypeUInt8, "UInt8"},      {kNumberTypeUInt16, "UInt16"},
-  {kNumberTypeUInt, "UInt32"},     {kNumberTypeUInt32, "UInt32"},    {kObjectTypeString, "String"},
-  {kNumberTypeBool, "Bool"},       {kObjectTypeTensorType, "Tensor"}};
-
-const std::unordered_map<mindspore::Format, std::string> kTensorFormatMap{
-  {mindspore::NCHW, "NCHW"}, {mindspore::NHWC, "NHWC"},     {mindspore::NHWC4, "NHWC4"}, {mindspore::HWKC, "HWKC"},
-  {mindspore::HWCK, "HWCK"}, {mindspore::KCHW, "KCHW"},     {mindspore::CKHW, "CKHW"},   {mindspore::KHWC, "KHWC"},
-  {mindspore::CHWK, "CHWK"}, {mindspore::HW, "HW"},         {mindspore::HW4, "HW4"},     {mindspore::NC, "NC"},
-  {mindspore::NC4, "NC4"},   {mindspore::NC4HW4, "NC4HW4"}, {mindspore::NCDHW, "NCDHW"}};
-
-std::function<int(NetTrainFlags *)> NetTrain::nr_cb_ = nullptr;
-
-int NetTrain::SetNr(std::function<int(NetTrainFlags *)> param) {
-  nr_cb_ = param;
-  return 0;
-}
-
-float *NetTrain::ReadFileBuf(const std::string file, size_t *size) {
-  if (file.empty()) {
-    MS_LOG(ERROR) << "file is nullptr";
-    return nullptr;
-  }
-  MS_ASSERT(size != nullptr);
-  std::string real_path = RealPath(file.c_str());
-  std::ifstream ifs(real_path);
-  if (!ifs.good()) {
-    MS_LOG(ERROR) << "file: " << real_path << " is not exist";
-    return nullptr;
-  }
-
-  if (!ifs.is_open()) {
-    MS_LOG(ERROR) << "file: " << real_path << " open failed";
-    return nullptr;
-  }
-
-  ifs.seekg(0, std::ios::end);
-  *size = ifs.tellg();
-  std::unique_ptr<float[]> buf = std::make_unique<float[]>(*size / sizeof(float) + 1);
-  if (buf == nullptr) {
-    MS_LOG(ERROR) << "malloc buf failed, file: " << real_path;
-    ifs.close();
-    return nullptr;
-  }
-
-  ifs.seekg(0, std::ios::beg);
-  ifs.read(reinterpret_cast<char *>(buf.get()), *size);
-  ifs.close();
-
-  return buf.release();
-}

 int NetTrain::GenerateInputData() {
   for (auto tensor : ms_inputs_for_api_) {
@@ -120,28 +57,6 @@ int NetTrain::GenerateInputData() {
   return RET_OK;
 }

-int NetTrain::LoadInput() {
-  inputs_buf_.clear();
-  inputs_size_.clear();
-  batch_num_ = 0;
-  if (flags_->in_data_file_.empty()) {
-    auto status = GenerateInputData();
-    if (status != RET_OK) {
-      std::cerr << "Generate input data error " << status << std::endl;
-      MS_LOG(ERROR) << "Generate input data error " << status;
-      return status;
-    }
-  } else {
-    auto status = ReadInputFile();
-    if (status != RET_OK) {
-      std::cerr << "Read Input File error, " << status << std::endl;
-      MS_LOG(ERROR) << "Read Input File error, " << status;
-      return status;
-    }
-  }
-  return RET_OK;
-}
-
 int NetTrain::LoadStepInput(size_t step) {
   if (step >= batch_num_) {
     auto cur_batch = step + 1;
@@ -269,30 +184,6 @@ int NetTrain::CompareOutput() {
   }
 }

-std::string GenerateOutputFileName(mindspore::MSTensor *tensor, const std::string &op_name,
-                                   const std::string &file_type, const size_t &idx) {
-  std::string file_name = op_name;
-  auto pos = file_name.find_first_of('/');
-  while (pos != std::string::npos) {
-    file_name.replace(pos, 1, ".");
-    pos = file_name.find_first_of('/');
-  }
-  file_name += "_" + file_type + "_" + std::to_string(idx) + "_shape_";
-  for (const auto &dim : tensor->Shape()) {
-    file_name += std::to_string(dim) + "_";
-  }
-  if (kTypeIdMap.find(static_cast<int>(tensor->DataType())) != kTypeIdMap.end()) {
-    file_name += kTypeIdMap.at(static_cast<int>(tensor->DataType()));
-  }
-  auto tensor_format = tensor->format();
-  if (kTensorFormatMap.find(tensor_format) != kTensorFormatMap.end()) {
-    file_name += "_" + kTensorFormatMap.at(tensor_format) + ".bin";
-  }
-
-  file_name += ".bin";
-  return file_name;
-}
-
 int NetTrain::MarkPerformance() {
   MS_LOG(INFO) << "Running train loops...";
   std::cout << "Running train loops..." << std::endl;
@@ -574,26 +465,6 @@ int NetTrain::CreateAndRunNetwork(const std::string &filename, const std::string
   return RET_OK;
 }

-int NetTrain::RunNetTrain() {
-  auto file_name = flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1);
-  bool is_train = (file_name.find("train") != std::string::npos) || !flags_->bb_model_file_.empty();
-  auto status = CreateAndRunNetwork(flags_->model_file_, flags_->bb_model_file_, is_train, flags_->epochs_);
-  if (status != RET_OK) {
-    MS_LOG(ERROR) << "CreateAndRunNetwork failed for model " << flags_->model_file_ << ". Status is " << status;
-    std::cout << "CreateAndRunNetwork failed for model " << flags_->model_file_ << ". Status is " << status
-              << std::endl;
-    return status;
-  }
-
-  status = CheckExecutionOfSavedModels();  // re-initialize sessions according to flags
-  if (status != RET_OK) {
-    MS_LOG(ERROR) << "Run CheckExecute error: " << status;
-    std::cout << "Run CheckExecute error: " << status << std::endl;
-    return status;
-  }
-  return RET_OK;
-}
-
 int NetTrain::SaveModels() {
   if (!flags_->export_file_.empty()) {
     if (flags_->bb_model_file_.empty()) {
@@ -635,77 +506,6 @@ int NetTrain::SaveModels() {
   return RET_OK;
 }

-int NetTrain::CheckExecutionOfSavedModels() {
-  int status = RET_OK;
-  if (!flags_->export_file_.empty()) {
-    status = NetTrain::CreateAndRunNetwork(flags_->export_file_, flags_->bb_model_file_, true, 0);
-    if (status != RET_OK) {
-      MS_LOG(ERROR) << "Run Exported model " << flags_->export_file_ << " error: " << status;
-      std::cout << "Run Exported model " << flags_->export_file_ << " error: " << status << std::endl;
-      return status;
-    }
-    if (flags_->bb_model_file_.empty()) {
-      status = NetTrain::CreateAndRunNetwork(flags_->export_file_ + "_qt", "", true, 0, false);
-      if (status != RET_OK) {
-        MS_LOG(ERROR) << "Run Exported model " << flags_->export_file_ << "_qt.ms error: " << status;
-        std::cout << "Run Exported model " << flags_->export_file_ << "_qt.ms error: " << status << std::endl;
-        return status;
-      }
-    }
-  }
-  if (!flags_->inference_file_.empty()) {
-    status = NetTrain::CreateAndRunNetwork(flags_->inference_file_, "", false, 0);
-    if (status != RET_OK) {
-      MS_LOG(ERROR) << "Running saved model " << flags_->inference_file_ << ".ms error: " << status;
-      std::cout << "Running saved model " << flags_->inference_file_ << ".ms error: " << status << std::endl;
-      return status;
-    }
-    status = NetTrain::CreateAndRunNetwork(flags_->inference_file_ + "_qt", "", false, 0, false);
-    if (status != RET_OK) {
-      MS_LOG(ERROR) << "Running saved model " << flags_->inference_file_ << "_qt.ms error: " << status;
-      std::cout << "Running saved model " << flags_->inference_file_ << "_qt.ms error: " << status << std::endl;
-      return status;
-    }
-  }
-  return status;
-}
-
-void NetTrain::CheckSum(MSTensor *tensor, const std::string &node_type, int id, const std::string &in_out) {
-  if (tensor == nullptr) {
-    MS_LOG(ERROR) << "input tensor is nullptr.";
-    return;
-  }
-  int tensor_size = tensor->ElementNum();
-  void *data = tensor->MutableData();
-  auto *fdata = reinterpret_cast<float *>(tensor->MutableData());
-  auto type = tensor->DataType();
-  std::cout << node_type << " " << in_out << id << " shape=" << tensor->Shape() << " sum=";
-  switch (type) {
-    case mindspore::DataType::kNumberTypeFloat32:
-      TensorNan(reinterpret_cast<float *>(data), tensor_size);
-      std::cout << TensorSum<float>(data, tensor_size) << std::endl;
-      std::cout << "tensor name: " << tensor->Name() << std::endl;
-      std::cout << "data: ";
-      for (int i = 0; i <= kPrintOffset && i < tensor_size; i++) {
-        std::cout << static_cast<float>(fdata[i]) << ", ";
-      }
-      std::cout << std::endl;
-      break;
-    case mindspore::DataType::kNumberTypeInt32:
-      std::cout << TensorSum<int>(data, tensor_size) << std::endl;
-      break;
-#ifdef ENABLE_FP16
-    case mindspore::DataType::kNumberTypeFloat16:
-      std::cout << TensorSum<float16_t>(data, tensor_size) << std::endl;
-      TensorNan(reinterpret_cast<float16_t *>(data), tensor_size);
-      break;
-#endif
-    default:
-      std::cout << "unsupported type:" << static_cast<int>(type) << std::endl;
-      break;
-  }
-}
-
 int NetTrain::InitDumpTensorDataCallbackParameter() {
   // before callback
   before_call_back_ = [&](const std::vector<mindspore::MSTensor> &before_inputs,
@@ -815,178 +615,6 @@ int NetTrain::InitTimeProfilingCallbackParameter() {
   return RET_OK;
 }

-int NetTrain::InitCallbackParameter() {
-  int ret = RET_OK;
-  if (flags_->dump_tensor_data_) {
-    ret = InitDumpTensorDataCallbackParameter();
-  } else if (flags_->time_profiling_) {
-    ret = InitTimeProfilingCallbackParameter();
-  }
-  return ret;
-}
-
-void NetTrainFlags::InitResizeDimsList() {
-  std::string content = this->resize_dims_in_;
-  std::vector<int> shape;
-  auto shape_strs = StrSplit(content, std::string(DELIM_COLON));
-  for (const auto &shape_str : shape_strs) {
-    shape.clear();
-    auto dim_strs = StrSplit(shape_str, std::string(DELIM_COMMA));
-    std::cout << "Resize Dims: ";
-    for (const auto &dim_str : dim_strs) {
-      std::cout << dim_str << " ";
-      shape.emplace_back(static_cast<int>(std::stoi(dim_str)));
-    }
-    std::cout << std::endl;
-    this->resize_dims_.emplace_back(shape);
-  }
-}
-
-int NetTrain::Init() {
-  if (this->flags_ == nullptr) {
-    return 1;
-  }
-  MS_LOG(INFO) << "ModelPath = " << this->flags_->model_file_;
-  MS_LOG(INFO) << "InDataPath = " << this->flags_->in_data_file_;
-  MS_LOG(INFO) << "InDataType = " << this->flags_->in_data_type_in_;
-  MS_LOG(INFO) << "Epochs = " << this->flags_->epochs_;
-  MS_LOG(INFO) << "AccuracyThreshold = " << this->flags_->accuracy_threshold_;
-  MS_LOG(INFO) << "WarmUpLoopCount = " << this->flags_->warm_up_loop_count_;
-  MS_LOG(INFO) << "NumThreads = " << this->flags_->num_threads_;
-  MS_LOG(INFO) << "expectedDataFile = " << this->flags_->data_file_;
-  MS_LOG(INFO) << "exportDataFile = " << this->flags_->export_file_;
-  MS_LOG(INFO) << "enableFp16 = " << this->flags_->enable_fp16_;
-  MS_LOG(INFO) << "virtualBatch = " << this->flags_->virtual_batch_;
-
-  if (this->flags_->epochs_ < 0) {
-    MS_LOG(ERROR) << "epochs:" << this->flags_->epochs_ << " must be equal/greater than 0";
-    std::cerr << "epochs:" << this->flags_->epochs_ << " must be equal/greater than 0" << std::endl;
-    return RET_ERROR;
-  }
-
-  if (this->flags_->num_threads_ < 1) {
-    MS_LOG(ERROR) << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0";
-    std::cerr << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0" << std::endl;
-    return RET_ERROR;
-  }
-
-  this->flags_->in_data_type_ = this->flags_->in_data_type_in_ == "img" ? kImage : kBinary;
-
-  if (flags_->in_data_file_.empty() && !flags_->data_file_.empty()) {
-    MS_LOG(ERROR) << "expectedDataFile not supported in case that inDataFile is not provided";
-    std::cerr << "expectedDataFile is not supported in case that inDataFile is not provided" << std::endl;
-    return RET_ERROR;
-  }
-
-  if (flags_->in_data_file_.empty() && !flags_->export_file_.empty()) {
-    MS_LOG(ERROR) << "exportDataFile not supported in case that inDataFile is not provided";
-    std::cerr << "exportDataFile is not supported in case that inDataFile is not provided" << std::endl;
-    return RET_ERROR;
-  }
-
-  if (flags_->model_file_.empty()) {
-    MS_LOG(ERROR) << "modelPath is required";
-    std::cerr << "modelPath is required" << std::endl;
-    return 1;
-  }
-
-  // get dump data output path
-  auto dump_cfg_path = std::getenv(dump::kConfigPath);
-  if (dump_cfg_path != nullptr) {
-    flags_->dump_tensor_data_ = true;
-    if (InitDumpConfigFromJson(dump_cfg_path) != RET_OK) {
-      MS_LOG(ERROR) << "parse dump config file failed.";
-      return RET_ERROR;
-    }
-  } else {
-    MS_LOG(INFO) << "No MINDSPORE_DUMP_CONFIG in env, don't need to dump data";
-  }
-
-  auto status = InitCallbackParameter();
-  if (status != RET_OK) {
-    MS_LOG(ERROR) << "Init callback Parameter failed.";
-    std::cerr << "Init callback Parameter failed." << std::endl;
-    return RET_ERROR;
-  }
-
-  flags_->InitResizeDimsList();
-  if (!flags_->resize_dims_.empty() && !flags_->input_data_list_.empty() &&
-      flags_->resize_dims_.size() != flags_->input_data_list_.size()) {
-    MS_LOG(ERROR) << "Size of input resizeDims should be equal to size of input inDataPath";
-    std::cerr << "Size of input resizeDims should be equal to size of input inDataPath" << std::endl;
-    return RET_ERROR;
-  }
-  return RET_OK;
-}
-
-namespace {
-constexpr int kNumToPrint = 5;
-}
-
-int NetTrain::InitDumpConfigFromJson(std::string path) {
-  auto real_path = RealPath(path.c_str());
-  std::ifstream ifs(real_path);
-  if (!ifs.good()) {
-    MS_LOG(ERROR) << "file: " << real_path << " is not exist";
-    return RET_ERROR;
-  }
-  if (!ifs.is_open()) {
-    MS_LOG(ERROR) << "file: " << real_path << " open failed";
-    return RET_ERROR;
-  }
-
-  try {
-    dump_cfg_json_ = nlohmann::json::parse(ifs);
-  } catch (const nlohmann::json::parse_error &error) {
-    MS_LOG(ERROR) << "parse json file failed, please check your file.";
-    return RET_ERROR;
-  }
-  if (dump_cfg_json_[dump::kSettings] == nullptr) {
-    MS_LOG(ERROR) << "\"common_dump_settings\" is required.";
-    return RET_ERROR;
-  }
-  if (dump_cfg_json_[dump::kSettings][dump::kMode] == nullptr) {
-    MS_LOG(ERROR) << "\"dump_mode\" is required.";
-    return RET_ERROR;
-  }
-  if (dump_cfg_json_[dump::kSettings][dump::kPath] == nullptr) {
-    MS_LOG(ERROR) << "\"path\" is required.";
-    return RET_ERROR;
-  }
-  if (dump_cfg_json_[dump::kSettings][dump::kNetName] == nullptr) {
-    dump_cfg_json_[dump::kSettings][dump::kNetName] = "default";
-  }
-  if (dump_cfg_json_[dump::kSettings][dump::kInputOutput] == nullptr) {
-    dump_cfg_json_[dump::kSettings][dump::kInputOutput] = 0;
-  }
-  if (dump_cfg_json_[dump::kSettings][dump::kKernels] != nullptr &&
-      !dump_cfg_json_[dump::kSettings][dump::kKernels].empty()) {
-    if (dump_cfg_json_[dump::kSettings][dump::kMode] == 0) {
-      MS_LOG(ERROR) << R"("dump_mode" should be 1 when "kernels" isn't empty.)";
-      return RET_ERROR;
-    }
-  }
-
-  auto abs_path = dump_cfg_json_[dump::kSettings][dump::kPath].get<std::string>();
-  auto net_name = dump_cfg_json_[dump::kSettings][dump::kNetName].get<std::string>();
-  if (abs_path.back() == '\\' || abs_path.back() == '/') {
-    dump_file_output_dir_ = abs_path + net_name;
-  } else {
-#ifdef _WIN32
-    dump_file_output_dir_ = abs_path + "\\" + net_name;
-#else
-    dump_file_output_dir_ = abs_path + "/" + net_name;
-#endif
-  }
-
-  auto status = CreateOutputDir(&dump_file_output_dir_);
-  if (status != RET_OK) {
-    MS_LOG(ERROR) << "create data output directory failed.";
-    return RET_ERROR;
-  }
-  return RET_OK;
-}
-
 int NetTrain::PrintResult(const std::vector<std::string> &title,
                           const std::map<std::string, std::pair<int, float>> &result) {
   std::vector<size_t> columnLenMax(kFieldsToPrint);
@@ -1035,7 +663,7 @@ int NetTrain::PrintResult(const std::vector<std::string> &title,
   }

   printf("-------------------------------------------------------------------------\n");
-  for (int i = 0; i < kNumToPrint; i++) {
+  for (int i = 0; i < kFieldsToPrint; i++) {
     auto printBuf = title[i];
     if (printBuf.size() > columnLenMax.at(i)) {
       columnLenMax.at(i) = printBuf.size();
@@ -1045,7 +673,7 @@ int NetTrain::PrintResult(const std::vector<std::string> &title,
   }
   printf("\n");
   for (auto &row : rows) {
-    for (int j = 0; j < kNumToPrint; j++) {
+    for (int j = 0; j < kFieldsToPrint; j++) {
       auto printBuf = row[j];
       printBuf.resize(columnLenMax.at(j), ' ');
       printf("%s\t", printBuf.c_str());
@@ -1054,47 +682,5 @@ int NetTrain::PrintResult(const std::vector<std::string> &title,
   }
   return RET_OK;
 }
-
-int RunNetTrain(int argc, const char **argv) {
-  NetTrainFlags flags;
-  Option<std::string> err = flags.ParseFlags(argc, argv);
-
-  if (err.IsSome()) {
-    std::cerr << err.Get() << std::endl;
-    std::cerr << flags.Usage() << std::endl;
-    return RET_ERROR;
-  }
-
-  if (flags.help) {
-    std::cerr << flags.Usage() << std::endl;
-    return RET_OK;
-  }
-  if (flags.unified_api_) {
-    return NetTrain::RunNr(&flags);
-  }
-  NetTrain net_trainer(&flags);
-  auto status = net_trainer.Init();
-  if (status != RET_OK) {
-    MS_LOG(ERROR) << "NetTrain init Error : " << status;
-    std::cerr << "NetTrain init Error : " << status << std::endl;
-    return RET_ERROR;
-  }
-
-  status = net_trainer.RunNetTrain();
-  if (status != RET_OK) {
-    MS_LOG(ERROR) << "Run NetTrain "
-                  << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str()
-                  << " Failed : " << status;
-    std::cerr << "Run NetTrain " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str()
-              << " Failed : " << status << std::endl;
-    return RET_ERROR;
-  }
-
-  MS_LOG(INFO) << "Run NetTrain " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str()
-               << " Success.";
-  std::cout << "Run NetTrain " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str()
-            << " Success." << std::endl;
-  return RET_OK;
-}
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/benchmark_train/net_train.h b/mindspore/lite/tools/benchmark_train/net_train.h
index 67e58a04..bdf0ec88 100644
--- a/mindspore/lite/tools/benchmark_train/net_train.h
+++ b/mindspore/lite/tools/benchmark_train/net_train.h
@@ -42,183 +42,22 @@
 #include "tools/common/flag_parser.h"
 #include "src/common/file_utils.h"
 #include "src/common/utils.h"
-
-#ifdef ENABLE_FP16
-static __attribute__((always_inline)) inline bool MS_ISNAN_FP16(float16_t var) {
-  volatile float16_t d = var;
-  return d != d;
-}
-#endif
+#include "tools/benchmark_train/net_train_base.h"

 namespace mindspore::lite {
-enum MS_API DataType { kImage = 0, kBinary = 1 };
-
-constexpr float relativeTolerance = 1e-5;
-constexpr float absoluteTolerance = 1e-8;
 extern const std::unordered_map<int, std::string> kTypeIdMap;
 extern const std::unordered_map<mindspore::Format, std::string> kTensorFormatMap;

-namespace dump {
-constexpr auto kConfigPath = "MINDSPORE_DUMP_CONFIG";
-constexpr auto kSettings = "common_dump_settings";
-constexpr auto kMode = "dump_mode";
-constexpr auto kPath = "path";
-constexpr auto kNetName = "net_name";
-constexpr auto kInputOutput = "input_output";
-constexpr auto kKernels = "kernels";
-}  // namespace dump
-
-template <typename T>
-float TensorSum(const void *data, int size) {
-  const T *typed_data = reinterpret_cast<const T *>(data);
-  float sum = 0.f;
-  for (int i = 0; i < size; i++) {
-    sum += static_cast<float>(typed_data[i]);
-  }
-  return sum;
-}
-
-class MS_API NetTrainFlags : public virtual FlagParser {
+class MS_API NetTrain : public NetTrainBase {
  public:
-  NetTrainFlags() {
-    // common
-    AddFlag(&NetTrainFlags::model_file_, "modelFile", "Input model file", "");
-    AddFlag(&NetTrainFlags::bb_model_file_, "bbModelFile", "Backboine model for transfer session", "");
-    AddFlag(&NetTrainFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", "");
-    // MarkPerformance
-    AddFlag(&NetTrainFlags::warm_up_loop_count_, "warmUpLoopCount", "Run warm up loop", 0);
-    AddFlag(&NetTrainFlags::time_profiling_, "timeProfiling", "Run time profiling", false);
-    AddFlag(&NetTrainFlags::epochs_, "epochs", "Number of training epochs to run", 1);
-    AddFlag(&NetTrainFlags::num_threads_, "numThreads", "Run threads number", 1);
-    // MarkAccuracy
-    AddFlag(&NetTrainFlags::data_file_, "expectedDataFile", "Expected results data file path", "");
-    AddFlag(&NetTrainFlags::export_file_, "exportFile", "MS File to export trained model into", "");
-    AddFlag(&NetTrainFlags::accuracy_threshold_, "accuracyThreshold", "Threshold of accuracy", 0.5);
-    AddFlag(&NetTrainFlags::layer_checksum_, "layerCheckSum", "layer output checksum print (debug)", false);
-    AddFlag(&NetTrainFlags::enable_fp16_, "enableFp16", "Enable float16", false);
-    AddFlag(&NetTrainFlags::loss_name_, "lossName", "loss layer name", "");
-    AddFlag(&NetTrainFlags::inference_file_, "inferenceFile", "MS file to export inference model", "");
-    AddFlag(&NetTrainFlags::virtual_batch_, "virtualBatch", "use virtual batch", false);
-    AddFlag(&NetTrainFlags::resize_dims_in_, "inputShapes",
-            "Shape of input data, the format should be NHWC. e.g. 1,32,32,32:1,1,32,32,1", "");
-    AddFlag(&NetTrainFlags::unified_api_, "unifiedApi", "do unified api test", false);
-  }
-
-  ~NetTrainFlags() override = default;
-  void InitResizeDimsList();
+  explicit NetTrain(NetTrainFlags *flags) : NetTrainBase(flags) {}
+  virtual ~NetTrain() {}

- public:
-  // common
-  std::string model_file_;
-  std::string in_data_file_;
-  std::string bb_model_file_;
-  std::vector<std::string> input_data_list_;
-  DataType in_data_type_;
-  std::string in_data_type_in_ = "bin";
-  int cpu_bind_mode_ = 1;
-  bool enable_fp16_ = false;
-  bool virtual_batch_ = false;
-  // MarkPerformance
-  int num_threads_ = 1;
-  int warm_up_loop_count_ = 0;
-  bool time_profiling_;
-  int epochs_ = 1;
-  // MarkAccuracy
-  std::string data_file_;
-  std::string data_type_ = "FLOAT";
-  float accuracy_threshold_;
-  // Resize
-  std::string export_file_ = "";
-  std::string resize_dims_in_ = "";
-  bool layer_checksum_ = false;
-  std::vector<std::vector<int>> resize_dims_;
-  std::string loss_name_ = "";
-  std::string inference_file_ = "";
-  bool unified_api_ = false;
-  bool dump_tensor_data_ = false;
-};
-
-class MS_API NetTrain {
- public:
-  explicit NetTrain(NetTrainFlags *flags) : flags_(flags) {}
-  virtual ~NetTrain() = default;
-
-  int Init();
-  int RunNetTrain();
-  static float *ReadFileBuf(const std::string file, size_t *size);
-  static int SetNr(std::function<int(NetTrainFlags *)> param);
-  static int RunNr(NetTrainFlags *flags) {
-    if (nr_cb_ != nullptr) {
-      return nr_cb_(flags);
-    }
-    MS_LOG(WARNING) << "unified api was not tested";
-    std::cout << "unified api was not tested";
-    return RET_OK;
-  }
-  // tensorData need to be converter first
-  template <typename T>
-  static float CompareData(const float *refOutput, int size, const T *msTensorData) {
-    size_t errorCount = 0;
-    float meanError = 0;
-    std::cout << "Out tensor size is: " << size << std::endl;
-    std::cout << "Data of model output: ";
-    for (int j = 0; j < std::min(50, size); j++) {
-      std::cout << static_cast<float>(msTensorData[j]) << " ";
-    }
-    std::cout << std::endl;
-    std::cout << "Data of Ref output  : ";
-    for (int j = 0; j < std::min(50, size); j++) {
-      std::cout << refOutput[j] << " ";
-    }
-    std::cout << std::endl;
-    for (int j = 0; j < size; j++) {
-      if (std::isnan(msTensorData[j]) || std::isinf(msTensorData[j])) {
-        std::cerr << "Output tensor has nan or inf data, compare fail" << std::endl;
-        MS_LOG(ERROR) << "Output tensor has nan or inf data, compare fail";
-        return RET_ERROR;
-      }
-
-      auto tolerance = absoluteTolerance + relativeTolerance * fabs(refOutput[j]);
-      auto absoluteError = std::fabs(static_cast<float>(msTensorData[j]) - refOutput[j]);
-      if (absoluteError > tolerance) {
-        if (fabs(refOutput[j]) == 0) {
-          if (absoluteError > 1e-5) {
-            meanError += absoluteError;
-            errorCount++;
-          } else {
-            continue;
-          }
-        } else {
-          // just assume that atol = rtol
-          meanError += absoluteError / (fabs(refOutput[j]) + FLT_MIN);
-          errorCount++;
-        }
-      }
-    }
-    std::cout << std::endl;
-    if (meanError > 0.0f) {
-      meanError /= errorCount;
-    }
-
-    if (meanError <= 0.0000001) {
-      std::cout << "Mean bias of tensor: 0%" << std::endl;
-    } else {
-      std::cout << "Mean bias of tensor: " << meanError * 100 << "%" << std::endl;
-    }
-    return meanError;
-  }
-  int InitDumpConfigFromJson(std::string path);
-
- private:
-  // call GenerateInputData or ReadInputFile to init inputTensors
-  int LoadInput();
-  void CheckSum(MSTensor *tensor, const std::string &node_type, int id, const std::string &in_out);
+ protected:
   // call GenerateRandomData to fill inputTensors
-  int GenerateInputData();
+  int GenerateInputData() override;

-  int GenerateRandomData(mindspore::MSTensor *tensor);
-
-  int ReadInputFile();
+  int ReadInputFile() override;

   int LoadStepInput(size_t step);

@@ -227,20 +66,19 @@ class MS_API NetTrain {
   void InitTrainCfg(const std::shared_ptr<TrainCfg> &train_cfg);

   int CreateAndRunNetwork(const std::string &filename, const std::string &bb_filename, bool is_train, int epochs,
-                          bool check_accuracy = true);
+                          bool check_accuracy = true) override;

   int CreateAndRunNetworkForInference(const std::string &filename, const std::shared_ptr<mindspore::Context> &context);

   int CreateAndRunNetworkForTrain(const std::string &filename, const std::string &bb_filename,
                                   const std::shared_ptr<mindspore::Context> &context,
                                   const std::shared_ptr<TrainCfg> &train_cfg, int epochs);
-  int InitCallbackParameter();

-  int InitDumpTensorDataCallbackParameter();
+  int InitDumpTensorDataCallbackParameter() override;

-  int InitTimeProfilingCallbackParameter();
+  int InitTimeProfilingCallbackParameter() override;

-  int PrintResult(const std::vector<std::string> &title, const std::map<std::string, std::pair<int, float>> &result);
+  int PrintResult(const std::vector<std::string> &title, const std::map<std::string, std::pair<int, float>> &result) override;

   template <typename T>
   void PrintInputData(mindspore::MSTensor *input) {
@@ -256,39 +94,11 @@ class MS_API NetTrain {
     std::cout << std::endl;
   }

-  template <typename T>
-  std::vector<int64_t> ConverterToInt64Vector(const std::vector<T> &srcDims) {
-    std::vector<int64_t> dims;
-    for (auto shape : srcDims) {
-      dims.push_back(static_cast<int64_t>(shape));
-    }
-    return dims;
-  }
-  int MarkPerformance();
-  int MarkAccuracy(bool enforce_accuracy = true);
-  int CompareOutput();
-  int SaveModels();
-  int CheckExecutionOfSavedModels();
-  void TensorNan(const float *data, int size) {
-    for (int i = 0; i < size; i++) {
-      if (std::isnan(data[i])) {
-        std::cout << "nan value of index=" << i << ", " << data[i] << std::endl;
-        break;
-      }
-    }
-  }
-#ifdef ENABLE_FP16
-  void TensorNan(float16_t *data, int size) {
-    for (int i = 0; i < size; i++) {
-      if (MS_ISNAN_FP16(data[i]) || std::isinf(data[i])) {
-        std::cout << "nan or inf value of index=" << i << ", " << data[i] << std::endl;
-        break;
-      }
-    }
-  }
-#endif
-  NetTrainFlags *flags_{nullptr};
-  static std::function<int(NetTrainFlags *)> nr_cb_;
+  int MarkPerformance() override;
+  int MarkAccuracy(bool enforce_accuracy = true) override;
+  int CompareOutput() override;
+  int SaveModels() override;
+
   // callback parameters
   uint64_t op_begin_ = 0;
   int op_call_times_total_ = 0;
@@ -301,13 +111,6 @@ class MS_API NetTrain {

   mindspore::MSKernelCallBack before_call_back_{nullptr};
   mindspore::MSKernelCallBack after_call_back_{nullptr};
-  nlohmann::json dump_cfg_json_;
-  std::string dump_file_output_dir_;
-  std::vector<std::shared_ptr<char>> inputs_buf_;
-  std::vector<size_t> inputs_size_;
-  size_t batch_num_ = 0;
 };
-
-int MS_API RunNetTrain(int argc, const char **argv);
 }  // namespace mindspore::lite
 #endif  // MINDSPORE_LITE_TOOLS_BENCHMARK_TRAIN_NET_TRAIN_H_
diff --git a/mindspore/lite/tools/benchmark_train/net_train_base.cc b/mindspore/lite/tools/benchmark_train/net_train_base.cc
new file mode 100644
index 00000000..8d3c75de
--- /dev/null
+++ b/mindspore/lite/tools/benchmark_train/net_train_base.cc
@@ -0,0 +1,410 @@
+/**
+ * Copyright 2020-2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tools/benchmark_train/net_train_base.h"
+#define __STDC_FORMAT_MACROS
+#undef __STDC_FORMAT_MACROS
+#include <algorithm>
+#include <cstring>
+#ifdef ENABLE_NEON
+#include <arm_neon.h>
+#endif
+#include "src/common/common.h"
+#include "include/api/serialization.h"
+
+namespace mindspore {
+namespace lite {
+const std::unordered_map<int, std::string> kTypeIdMap{
+  {kNumberTypeFloat16, "Float16"}, {kNumberTypeFloat, "Float32"},    {kNumberTypeFloat32, "Float32"},
+  {kNumberTypeInt8, "Int8"},       {kNumberTypeInt16, "Int16"},      {kNumberTypeInt, "Int32"},
+  {kNumberTypeInt32, "Int32"},     {kNumberTypeUInt8, "UInt8"},      {kNumberTypeUInt16, "UInt16"},
+  {kNumberTypeUInt, "UInt32"},     {kNumberTypeUInt32, "UInt32"},    {kObjectTypeString, "String"},
+  {kNumberTypeBool, "Bool"},       {kObjectTypeTensorType, "Tensor"}};
+
+const std::unordered_map<mindspore::Format, std::string> kTensorFormatMap{
+  {mindspore::NCHW, "NCHW"}, {mindspore::NHWC, "NHWC"},     {mindspore::NHWC4, "NHWC4"}, {mindspore::HWKC, "HWKC"},
+  {mindspore::HWCK, "HWCK"}, {mindspore::KCHW, "KCHW"},     {mindspore::CKHW, "CKHW"},   {mindspore::KHWC, "KHWC"},
+  {mindspore::CHWK, "CHWK"}, {mindspore::HW, "HW"},         {mindspore::HW4, "HW4"},     {mindspore::NC, "NC"},
+  {mindspore::NC4, "NC4"},   {mindspore::NC4HW4, "NC4HW4"}, {mindspore::NCDHW, "NCDHW"}};
+
+std::function<int(NetTrainFlags *)> NetTrainBase::nr_cb_ = nullptr;
+
+int NetTrainBase::SetNr(std::function<int(NetTrainFlags *)> param) {
+  nr_cb_ = param;
+  return 0;
+}
+
+float *NetTrainBase::ReadFileBuf(const std::string file, size_t *size) {
+  if (file.empty()) {
+    MS_LOG(ERROR) << "file is nullptr";
+    return nullptr;
+  }
+  MS_ASSERT(size != nullptr);
+  std::string real_path = RealPath(file.c_str());
+  std::ifstream ifs(real_path);
+  if (!ifs.good()) {
+    MS_LOG(ERROR) << "file: " << real_path << " is not exist";
+    return nullptr;
+  }
+
+  if (!ifs.is_open()) {
+    MS_LOG(ERROR) << "file: " << real_path << " open failed";
+    return nullptr;
+  }
+
+  ifs.seekg(0, std::ios::end);
+  *size = ifs.tellg();
+  std::unique_ptr<float[]> buf = std::make_unique<float[]>(*size / sizeof(float) + 1);
+  if (buf == nullptr) {
+    MS_LOG(ERROR) << "malloc buf failed, file: " << real_path;
+    ifs.close();
+    return nullptr;
+  }
+
+  ifs.seekg(0, std::ios::beg);
+  ifs.read(reinterpret_cast<char *>(buf.get()), *size);
+  ifs.close();
+
+  return buf.release();
+}
+
+int NetTrainBase::GenerateRandomData(mindspore::MSTensor *tensor) {
+  auto input_data = tensor->MutableData();
+  if (input_data == nullptr) {
+    MS_LOG(ERROR) << "MallocData for inTensor failed";
+    return RET_ERROR;
+  }
+  auto tensor_byte_size = tensor->DataSize();
+  char *casted_data = static_cast<char *>(input_data);
+  for (size_t i = 0; i < tensor_byte_size; i++) {
+    casted_data[i] =
+      (tensor->DataType() == mindspore::DataType::kNumberTypeFloat32) ? static_cast<char>(i) : static_cast<char>(0);
+  }
+  return RET_OK;
+}
+
+int NetTrainBase::LoadInput() {
+  inputs_buf_.clear();
+  inputs_size_.clear();
+  batch_num_ = 0;
+  if (flags_->in_data_file_.empty()) {
+    auto status = GenerateInputData();
+    if (status != RET_OK) {
+      std::cerr << "Generate input data error " << status << std::endl;
+      MS_LOG(ERROR) << "Generate input data error " << status;
+      return status;
+    }
+  } else {
+    auto status = ReadInputFile();
+    if (status != RET_OK) {
+      std::cerr << "Read Input File error, " << status << std::endl;
+      MS_LOG(ERROR) << "Read Input File error, " << status;
+      return status;
+    }
+  }
+  return RET_OK;
+}
+
+int NetTrainBase::RunNetTrain() {
+  auto file_name = flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1);
+  bool is_train = (file_name.find("train") != std::string::npos) || !flags_->bb_model_file_.empty();
+  auto status = CreateAndRunNetwork(flags_->model_file_, flags_->bb_model_file_, is_train, flags_->epochs_);
+  if (status != RET_OK) {
+    MS_LOG(ERROR) << "CreateAndRunNetwork failed for model " << flags_->model_file_ << ". Status is " << status;
+    std::cout << "CreateAndRunNetwork failed for model " << flags_->model_file_ << ". Status is " << status
+              << std::endl;
+    return status;
+  }
+
+  status = CheckExecutionOfSavedModels();  // re-initialize sessions according to flags
+  if (status != RET_OK) {
+    MS_LOG(ERROR) << "Run CheckExecute error: " << status;
+    std::cout << "Run CheckExecute error: " << status << std::endl;
+    return status;
+  }
+  return RET_OK;
+}
+
+int NetTrainBase::CheckExecutionOfSavedModels() {
+  int status = RET_OK;
+  if (!flags_->export_file_.empty()) {
+    status = CreateAndRunNetwork(flags_->export_file_, flags_->bb_model_file_, true, 0);
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "Run Exported model " << flags_->export_file_ << " error: " << status;
+      std::cout << "Run Exported model " << flags_->export_file_ << " error: " << status << std::endl;
+      return status;
+    }
+    if (flags_->bb_model_file_.empty()) {
+      status = CreateAndRunNetwork(flags_->export_file_ + "_qt", "", true, 0, false);
+      if (status != RET_OK) {
+        MS_LOG(ERROR) << "Run Exported model " << flags_->export_file_ << "_qt.ms error: " << status;
+        std::cout << "Run Exported model " << flags_->export_file_ << "_qt.ms error: " << status << std::endl;
+        return status;
+      }
+    }
+  }
+  if (!flags_->inference_file_.empty()) {
+    status = CreateAndRunNetwork(flags_->inference_file_, "", false, 0);
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "Running saved model " << flags_->inference_file_ << ".ms error: " << status;
+      std::cout << "Running saved model " << flags_->inference_file_ << ".ms error: " << status << std::endl;
+      return status;
+    }
+    status = CreateAndRunNetwork(flags_->inference_file_ + "_qt", "", false, 0, false);
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "Running saved model " << flags_->inference_file_ << "_qt.ms error: " << status;
+      std::cout << "Running saved model " << flags_->inference_file_ << "_qt.ms error: " << status << std::endl;
+      return status;
+    }
+  }
+  return status;
+}
+
+void NetTrainBase::CheckSum(MSTensor *tensor, const std::string &node_type, int id, const std::string &in_out) {
+  if (tensor == nullptr) {
+    MS_LOG(ERROR) << "input tensor is nullptr.";
+    return;
+  }
+  int tensor_size = tensor->ElementNum();
+  void *data = tensor->MutableData();
+  auto *fdata = reinterpret_cast<float *>(tensor->MutableData());
+  auto type = tensor->DataType();
+  std::cout << node_type << " " << in_out << id << " shape=" << tensor->Shape() << " sum=";
+  switch (type) {
+    case mindspore::DataType::kNumberTypeFloat32:
+      TensorNan(reinterpret_cast<float *>(data), tensor_size);
+      std::cout << TensorSum<float>(data, tensor_size) << std::endl;
+      std::cout << "tensor name: " << tensor->Name() << std::endl;
+      std::cout << "data: ";
+      for (int i = 0; i <= kPrintOffset && i < tensor_size; i++) {
+        std::cout << static_cast<float>(fdata[i]) << ", ";
+      }
+      std::cout << std::endl;
+      break;
+    case mindspore::DataType::kNumberTypeInt32:
+      std::cout << TensorSum<int>(data, tensor_size) << std::endl;
+      break;
+#ifdef ENABLE_FP16
+    case mindspore::DataType::kNumberTypeFloat16:
+      std::cout << TensorSum<float16_t>(data, tensor_size) << std::endl;
+      TensorNan(reinterpret_cast<float16_t *>(data), tensor_size);
+      break;
+#endif
+    default:
+      std::cout << "unsupported type:" << static_cast<int>(type) << std::endl;
+      break;
+  }
+}
+
+std::string NetTrainBase::GenerateOutputFileName(mindspore::MSTensor *tensor, const std::string &op_name,
+                                   const std::string &file_type, const size_t &idx) {
+  std::string file_name = op_name;
+  auto pos = file_name.find_first_of('/');
+  while (pos != std::string::npos) {
+    file_name.replace(pos, 1, ".");
+    pos = file_name.find_first_of('/');
+  }
+  file_name += "_" + file_type + "_" + std::to_string(idx) + "_shape_";
+  for (const auto &dim : tensor->Shape()) {
+    file_name += std::to_string(dim) + "_";
+  }
+  if (kTypeIdMap.find(static_cast<int>(tensor->DataType())) != kTypeIdMap.end()) {
+    file_name += kTypeIdMap.at(static_cast<int>(tensor->DataType()));
+  }
+  auto tensor_format = tensor->format();
+  if (kTensorFormatMap.find(tensor_format) != kTensorFormatMap.end()) {
+    file_name += "_" + kTensorFormatMap.at(tensor_format) + ".bin";
+  }
+
+  file_name += ".bin";
+  return file_name;
+}
+
+int NetTrainBase::InitCallbackParameter() {
+  int ret = RET_OK;
+  if (flags_->dump_tensor_data_) {
+    ret = InitDumpTensorDataCallbackParameter();
+  } else if (flags_->time_profiling_) {
+    ret = InitTimeProfilingCallbackParameter();
+  }
+  return ret;
+}
+
+void NetTrainFlags::InitResizeDimsList() {
+  std::string content = this->resize_dims_in_;
+  if (content.empty()) {
+    return;
+  }
+  std::vector<int> shape;
+  auto shape_strs = StrSplit(content, std::string(DELIM_COLON));
+  for (const auto &shape_str : shape_strs) {
+    shape.clear();
+    auto dim_strs = StrSplit(shape_str, std::string(DELIM_COMMA));
+    std::cout << "Resize Dims: ";
+    for (const auto &dim_str : dim_strs) {
+      std::cout << dim_str << " ";
+      shape.emplace_back(static_cast<int>(std::stoi(dim_str)));
+    }
+    std::cout << std::endl;
+    this->resize_dims_.emplace_back(shape);
+  }
+}
+
+int NetTrainBase::Init() {
+  if (this->flags_ == nullptr) {
+    return 1;
+  }
+  MS_LOG(INFO) << "ModelPath = " << this->flags_->model_file_;
+  MS_LOG(INFO) << "InDataPath = " << this->flags_->in_data_file_;
+  MS_LOG(INFO) << "InDataType = " << this->flags_->in_data_type_in_;
+  MS_LOG(INFO) << "Epochs = " << this->flags_->epochs_;
+  MS_LOG(INFO) << "AccuracyThreshold = " << this->flags_->accuracy_threshold_;
+  MS_LOG(INFO) << "WarmUpLoopCount = " << this->flags_->warm_up_loop_count_;
+  MS_LOG(INFO) << "NumThreads = " << this->flags_->num_threads_;
+  MS_LOG(INFO) << "expectedDataFile = " << this->flags_->data_file_;
+  MS_LOG(INFO) << "exportDataFile = " << this->flags_->export_file_;
+  MS_LOG(INFO) << "enableFp16 = " << this->flags_->enable_fp16_;
+  MS_LOG(INFO) << "virtualBatch = " << this->flags_->virtual_batch_;
+
+  if (this->flags_->epochs_ < 0) {
+    MS_LOG(ERROR) << "epochs:" << this->flags_->epochs_ << " must be equal/greater than 0";
+    std::cerr << "epochs:" << this->flags_->epochs_ << " must be equal/greater than 0" << std::endl;
+    return RET_ERROR;
+  }
+
+  if (this->flags_->num_threads_ < 1) {
+    MS_LOG(ERROR) << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0";
+    std::cerr << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0" << std::endl;
+    return RET_ERROR;
+  }
+
+  this->flags_->in_data_type_ = this->flags_->in_data_type_in_ == "img" ? kImage : kBinary;
+
+  if (flags_->in_data_file_.empty() && !flags_->data_file_.empty()) {
+    MS_LOG(ERROR) << "expectedDataFile not supported in case that inDataFile is not provided";
+    std::cerr << "expectedDataFile is not supported in case that inDataFile is not provided" << std::endl;
+    return RET_ERROR;
+  }
+
+  if (flags_->in_data_file_.empty() && !flags_->export_file_.empty()) {
+    MS_LOG(ERROR) << "exportDataFile not supported in case that inDataFile is not provided";
+    std::cerr << "exportDataFile is not supported in case that inDataFile is not provided" << std::endl;
+    return RET_ERROR;
+  }
+
+  if (flags_->model_file_.empty()) {
+    MS_LOG(ERROR) << "modelPath is required";
+    std::cerr << "modelPath is required" << std::endl;
+    return 1;
+  }
+
+  // get dump data output path
+  auto dump_cfg_path = std::getenv(dump::kConfigPath);
+  if (dump_cfg_path != nullptr) {
+    flags_->dump_tensor_data_ = true;
+    if (InitDumpConfigFromJson(dump_cfg_path) != RET_OK) {
+      MS_LOG(ERROR) << "parse dump config file failed.";
+      return RET_ERROR;
+    }
+  } else {
+    MS_LOG(INFO) << "No MINDSPORE_DUMP_CONFIG in env, don't need to dump data";
+  }
+
+  auto status = InitCallbackParameter();
+  if (status != RET_OK) {
+    MS_LOG(ERROR) << "Init callback Parameter failed.";
+    std::cerr << "Init callback Parameter failed." << std::endl;
+    return RET_ERROR;
+  }
+
+  flags_->InitResizeDimsList();
+  if (!flags_->resize_dims_.empty() && !flags_->input_data_list_.empty() &&
+      flags_->resize_dims_.size() != flags_->input_data_list_.size()) {
+    MS_LOG(ERROR) << "Size of input resizeDims should be equal to size of input inDataPath";
+    std::cerr << "Size of input resizeDims should be equal to size of input inDataPath" << std::endl;
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int NetTrainBase::InitDumpConfigFromJson(std::string path) {
+  auto real_path = RealPath(path.c_str());
+  std::ifstream ifs(real_path);
+  if (!ifs.good()) {
+    MS_LOG(ERROR) << "file: " << real_path << " is not exist";
+    return RET_ERROR;
+  }
+  if (!ifs.is_open()) {
+    MS_LOG(ERROR) << "file: " << real_path << " open failed";
+    return RET_ERROR;
+  }
+
+  try {
+    dump_cfg_json_ = nlohmann::json::parse(ifs);
+  } catch (const nlohmann::json::parse_error &error) {
+    MS_LOG(ERROR) << "parse json file failed, please check your file.";
+    return RET_ERROR;
+  }
+  if (dump_cfg_json_[dump::kSettings] == nullptr) {
+    MS_LOG(ERROR) << "\"common_dump_settings\" is required.";
+    return RET_ERROR;
+  }
+  if (dump_cfg_json_[dump::kSettings][dump::kMode] == nullptr) {
+    MS_LOG(ERROR) << "\"dump_mode\" is required.";
+    return RET_ERROR;
+  }
+  if (dump_cfg_json_[dump::kSettings][dump::kPath] == nullptr) {
+    MS_LOG(ERROR) << "\"path\" is required.";
+    return RET_ERROR;
+  }
+  if (dump_cfg_json_[dump::kSettings][dump::kNetName] == nullptr) {
+    dump_cfg_json_[dump::kSettings][dump::kNetName] = "default";
+  }
+  if (dump_cfg_json_[dump::kSettings][dump::kInputOutput] == nullptr) {
+    dump_cfg_json_[dump::kSettings][dump::kInputOutput] = 0;
+  }
+  if (dump_cfg_json_[dump::kSettings][dump::kKernels] != nullptr &&
+      !dump_cfg_json_[dump::kSettings][dump::kKernels].empty()) {
+    if (dump_cfg_json_[dump::kSettings][dump::kMode] == 0) {
+      MS_LOG(ERROR) << R"("dump_mode" should be 1 when "kernels" isn't empty.)";
+      return RET_ERROR;
+    }
+  }
+
+  auto abs_path = dump_cfg_json_[dump::kSettings][dump::kPath].get<std::string>();
+  auto net_name = dump_cfg_json_[dump::kSettings][dump::kNetName].get<std::string>();
+  if (abs_path.back() == '\\' || abs_path.back() == '/') {
+    dump_file_output_dir_ = abs_path + net_name;
+  } else {
+#ifdef _WIN32
+    dump_file_output_dir_ = abs_path + "\\" + net_name;
+#else
+    dump_file_output_dir_ = abs_path + "/" + net_name;
+#endif
+  }
+
+  auto status = CreateOutputDir(&dump_file_output_dir_);
+  if (status != RET_OK) {
+    MS_LOG(ERROR) << "create data output directory failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+NetTrainBase:: ~NetTrainBase() {
+}
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/tools/benchmark_train/net_train_base.h b/mindspore/lite/tools/benchmark_train/net_train_base.h
new file mode 100644
index 00000000..e3d5f39a
--- /dev/null
+++ b/mindspore/lite/tools/benchmark_train/net_train_base.h
@@ -0,0 +1,288 @@
+/**
+ * Copyright 2020-2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_TRAIN_NET_TRAIN_BASE_H_
+#define MINDSPORE_LITE_TOOLS_BENCHMARK_TRAIN_NET_TRAIN_BASE_H_
+
+#include <getopt.h>
+#include <csignal>
+#include <unordered_map>
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <cmath>
+#include <string>
+#include <vector>
+#include <memory>
+#include <cfloat>
+#include <utility>
+#include <algorithm>
+#include <nlohmann/json.hpp>
+#include "include/api/model.h"
+#include "include/api/types.h"
+#include "include/api/context.h"
+#include "include/api/cfg.h"
+
+#ifdef ENABLE_FP16
+#include <arm_neon.h>
+#endif
+#include "tools/common/flag_parser.h"
+#include "src/common/file_utils.h"
+#include "src/common/utils.h"
+
+#ifdef ENABLE_FP16
+static __attribute__((always_inline)) inline bool MS_ISNAN_FP16(float16_t var) {
+  volatile float16_t d = var;
+  return d != d;
+}
+#endif
+
+namespace mindspore::lite {
+enum MS_API DataType { kImage = 0, kBinary = 1 };
+
+constexpr float relativeTolerance = 1e-5;
+constexpr float absoluteTolerance = 1e-8;
+extern const std::unordered_map<int, std::string> kTypeIdMap;
+extern const std::unordered_map<mindspore::Format, std::string> kTensorFormatMap;
+
+constexpr const char *DELIM_SLASH = "/";
+constexpr const char *DELIM_COLON = ":";
+constexpr const char *DELIM_COMMA = ",";
+
+constexpr int RET_TOO_BIG = -9;
+constexpr int kFieldsToPrint = 5;
+constexpr int kPrintOffset = 4;
+constexpr int kDumpInputsAndOutputs = 0;
+constexpr int kDumpOutputs = 2;
+constexpr int kTHOUSAND = 1000;
+
+namespace dump {
+constexpr auto kConfigPath = "MINDSPORE_DUMP_CONFIG";
+constexpr auto kSettings = "common_dump_settings";
+constexpr auto kMode = "dump_mode";
+constexpr auto kPath = "path";
+constexpr auto kNetName = "net_name";
+constexpr auto kInputOutput = "input_output";
+constexpr auto kKernels = "kernels";
+}  // namespace dump
+
+template <typename T>
+float TensorSum(const void *data, int size) {
+  const T *typed_data = reinterpret_cast<const T *>(data);
+  float sum = 0.f;
+  for (int i = 0; i < size; i++) {
+    sum += static_cast<float>(typed_data[i]);
+  }
+  return sum;
+}
+
+class MS_API NetTrainFlags : public virtual FlagParser {
+ public:
+  NetTrainFlags() {
+    // common
+    AddFlag(&NetTrainFlags::model_file_, "modelFile", "Input model file", "");
+    AddFlag(&NetTrainFlags::bb_model_file_, "bbModelFile", "Backboine model for transfer session", "");
+    AddFlag(&NetTrainFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", "");
+    // MarkPerformance
+    AddFlag(&NetTrainFlags::warm_up_loop_count_, "warmUpLoopCount", "Run warm up loop", 0);
+    AddFlag(&NetTrainFlags::time_profiling_, "timeProfiling", "Run time profiling", false);
+    AddFlag(&NetTrainFlags::epochs_, "epochs", "Number of training epochs to run", 1);
+    AddFlag(&NetTrainFlags::num_threads_, "numThreads", "Run threads number", 1);
+    // MarkAccuracy
+    AddFlag(&NetTrainFlags::data_file_, "expectedDataFile", "Expected results data file path", "");
+    AddFlag(&NetTrainFlags::export_file_, "exportFile", "MS File to export trained model into", "");
+    AddFlag(&NetTrainFlags::accuracy_threshold_, "accuracyThreshold", "Threshold of accuracy", 0.5);
+    AddFlag(&NetTrainFlags::layer_checksum_, "layerCheckSum", "layer output checksum print (debug)", false);
+    AddFlag(&NetTrainFlags::enable_fp16_, "enableFp16", "Enable float16", false);
+    AddFlag(&NetTrainFlags::loss_name_, "lossName", "loss layer name", "");
+    AddFlag(&NetTrainFlags::inference_file_, "inferenceFile", "MS file to export inference model", "");
+    AddFlag(&NetTrainFlags::virtual_batch_, "virtualBatch", "use virtual batch", false);
+    AddFlag(&NetTrainFlags::resize_dims_in_, "inputShapes",
+            "Shape of input data, the format should be NHWC. e.g. 1,32,32,32:1,1,32,32,1", "");
+    AddFlag(&NetTrainFlags::unified_api_, "unifiedApi", "do unified api test", false);
+  }
+
+  ~NetTrainFlags() override = default;
+  void InitResizeDimsList();
+
+ public:
+  // common
+  std::string model_file_;
+  std::string in_data_file_;
+  std::string bb_model_file_;
+  std::vector<std::string> input_data_list_;
+  DataType in_data_type_;
+  std::string in_data_type_in_ = "bin";
+  int cpu_bind_mode_ = 1;
+  bool enable_fp16_ = false;
+  bool virtual_batch_ = false;
+  // MarkPerformance
+  int num_threads_ = 1;
+  int warm_up_loop_count_ = 0;
+  bool time_profiling_;
+  int epochs_ = 1;
+  // MarkAccuracy
+  std::string data_file_;
+  std::string data_type_ = "FLOAT";
+  float accuracy_threshold_;
+  // Resize
+  std::string export_file_ = "";
+  std::string resize_dims_in_ = "";
+  bool layer_checksum_ = false;
+  std::vector<std::vector<int>> resize_dims_;
+  std::string loss_name_ = "";
+  std::string inference_file_ = "";
+  bool unified_api_ = false;
+  bool dump_tensor_data_ = false;
+};
+
+class MS_API NetTrainBase {
+ public:
+  explicit NetTrainBase(NetTrainFlags *flags) : flags_(flags) {}
+  virtual ~NetTrainBase();
+
+  int Init();
+  int RunNetTrain();
+  static float *ReadFileBuf(const std::string file, size_t *size);
+  static int SetNr(std::function<int(NetTrainFlags *)> param);
+  static int RunNr(NetTrainFlags *flags) {
+    if (nr_cb_ != nullptr) {
+      return nr_cb_(flags);
+    }
+    MS_LOG(WARNING) << "unified api was not tested";
+    std::cout << "unified api was not tested";
+    return RET_OK;
+  }
+  // tensorData need to be converter first
+  template <typename T>
+  static float CompareData(const float *refOutput, int size, const T *msTensorData) {
+    size_t errorCount = 0;
+    float meanError = 0;
+    std::cout << "Out tensor size is: " << size << std::endl;
+    std::cout << "Data of model output: ";
+    for (int j = 0; j < std::min(50, size); j++) {
+      std::cout << static_cast<float>(msTensorData[j]) << " ";
+    }
+    std::cout << std::endl;
+    std::cout << "Data of Ref output  : ";
+    for (int j = 0; j < std::min(50, size); j++) {
+      std::cout << refOutput[j] << " ";
+    }
+    std::cout << std::endl;
+    for (int j = 0; j < size; j++) {
+      if (std::isnan(msTensorData[j]) || std::isinf(msTensorData[j])) {
+        std::cerr << "Output tensor has nan or inf data, compare fail" << std::endl;
+        MS_LOG(ERROR) << "Output tensor has nan or inf data, compare fail";
+        return RET_ERROR;
+      }
+
+      auto tolerance = absoluteTolerance + relativeTolerance * fabs(refOutput[j]);
+      auto absoluteError = std::fabs(static_cast<float>(msTensorData[j]) - refOutput[j]);
+      if (absoluteError > tolerance) {
+        if (fabs(refOutput[j]) == 0) {
+          if (absoluteError > 1e-5) {
+            meanError += absoluteError;
+            errorCount++;
+          } else {
+            continue;
+          }
+        } else {
+          // just assume that atol = rtol
+          meanError += absoluteError / (fabs(refOutput[j]) + FLT_MIN);
+          errorCount++;
+        }
+      }
+    }
+    std::cout << std::endl;
+    if (meanError > 0.0f) {
+      meanError /= errorCount;
+    }
+
+    if (meanError <= 0.0000001) {
+      std::cout << "Mean bias of tensor: 0%" << std::endl;
+    } else {
+      std::cout << "Mean bias of tensor: " << meanError * 100 << "%" << std::endl;
+    }
+    return meanError;
+  }
+  int InitDumpConfigFromJson(std::string path);
+
+ protected:
+  // call GenerateInputData or ReadInputFile to init inputTensors
+  int LoadInput();
+  void CheckSum(MSTensor *tensor, const std::string &node_type, int id, const std::string &in_out);
+  // call GenerateRandomData to fill inputTensors
+  virtual int GenerateInputData() = 0;
+
+  int GenerateRandomData(mindspore::MSTensor *tensor);
+
+  std::string GenerateOutputFileName(mindspore::MSTensor *tensor, const std::string &op_name,
+                                     const std::string &file_type, const size_t &idx);
+  virtual int ReadInputFile() = 0;
+
+  virtual int CreateAndRunNetwork(const std::string &filename, const std::string &bb_filename, bool is_train, int epochs,
+                          bool check_accuracy = true) = 0;
+
+  int InitCallbackParameter();
+
+  virtual int InitDumpTensorDataCallbackParameter() = 0;
+
+  virtual int InitTimeProfilingCallbackParameter() = 0;
+
+  virtual int PrintResult(const std::vector<std::string> &title, const std::map<std::string, std::pair<int, float>> &result) = 0;
+
+  template <typename T>
+  std::vector<int64_t> ConverterToInt64Vector(const std::vector<T> &srcDims) {
+    std::vector<int64_t> dims;
+    for (auto shape : srcDims) {
+      dims.push_back(static_cast<int64_t>(shape));
+    }
+    return dims;
+  }
+  virtual int MarkPerformance() = 0;
+  virtual int MarkAccuracy(bool enforce_accuracy = true) = 0;
+  virtual int CompareOutput() = 0;
+  virtual int SaveModels() = 0;
+  int CheckExecutionOfSavedModels();
+  void TensorNan(const float *data, int size) {
+    for (int i = 0; i < size; i++) {
+      if (std::isnan(data[i])) {
+        std::cout << "nan value of index=" << i << ", " << data[i] << std::endl;
+        break;
+      }
+    }
+  }
+#ifdef ENABLE_FP16
+  void TensorNan(float16_t *data, int size) {
+    for (int i = 0; i < size; i++) {
+      if (MS_ISNAN_FP16(data[i]) || std::isinf(data[i])) {
+        std::cout << "nan or inf value of index=" << i << ", " << data[i] << std::endl;
+        break;
+      }
+    }
+  }
+#endif
+  NetTrainFlags *flags_{nullptr};
+  static std::function<int(NetTrainFlags *)> nr_cb_;
+
+  nlohmann::json dump_cfg_json_;
+  std::string dump_file_output_dir_;
+  std::vector<std::shared_ptr<char>> inputs_buf_;
+  std::vector<size_t> inputs_size_;
+  size_t batch_num_ = 0;
+};
+}  // namespace mindspore::lite
+#endif  // MINDSPORE_LITE_TOOLS_BENCHMARK_TRAIN_NET_TRAIN_BASE_H_
diff --git a/mindspore/lite/tools/benchmark_train/net_train_c_api.cc b/mindspore/lite/tools/benchmark_train/net_train_c_api.cc
new file mode 100644
index 00000000..4dcf3af6
--- /dev/null
+++ b/mindspore/lite/tools/benchmark_train/net_train_c_api.cc
@@ -0,0 +1,659 @@
+/**
+ * Copyright 2023-2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "net_train_c_api.h"
+#include "securec/include/securec.h"
+
+namespace mindspore {
+namespace lite {
+uint64_t g_op_begin_ = 0;
+int g_op_call_times_total_ = 0;
+float g_op_cost_total_ = 0.0f;
+
+int NetTrainCApi::GenerateInputData() {
+  for (size_t i = 0; i < ms_inputs_for_api_.handle_num; i++) {
+    OH_AI_TensorHandle tensor = ms_inputs_for_api_.handle_list[i];
+    auto data_type = OH_AI_TensorGetDataType(tensor);
+    if (data_type == OH_AI_DATATYPE_OBJECTTYPE_STRING) {
+      MS_LOG(ERROR) << "Unsupported OH_AI_DATATYPE_OBJECTTYPE_STRING";
+      return RET_ERROR;
+    } else {
+      (void)GenerateRandomData(static_cast<mindspore::MSTensor *>(tensor));
+    }
+  }
+  return RET_OK;
+}
+
+int NetTrainCApi::SaveModels() {
+  if (!flags_->export_file_.empty()) {
+    if (flags_->bb_model_file_.empty()) {
+      auto status = OH_AI_ExportModel(ms_model_, OH_AI_MODELTYPE_MINDIR, (flags_->export_file_ + "_qt").c_str(), OH_AI_WEIGHT_QUANT, false,
+                        nullptr, 0);
+      if (status != OH_AI_STATUS_SUCCESS) {
+        MS_LOG(ERROR) << "Export quantized model error " << flags_->export_file_ + "_qt";
+        std::cout << "Export quantized model error " << flags_->export_file_ + "_qt" << std::endl;
+        return RET_ERROR;
+      }
+    }
+    auto status = OH_AI_ExportModel(ms_model_, OH_AI_MODELTYPE_MINDIR, (flags_->export_file_).c_str(), OH_AI_NO_QUANT, false,
+                                    nullptr, 0);
+
+    if (status != OH_AI_STATUS_SUCCESS) {
+      MS_LOG(ERROR) << "Export non quantized model error " << flags_->export_file_;
+      std::cout << "Export non quantized model error " << flags_->export_file_ << std::endl;
+      return RET_ERROR;
+    }
+  }
+  if (!flags_->inference_file_.empty()) {
+    auto status = OH_AI_ExportModel(ms_model_, OH_AI_MODELTYPE_MINDIR, (flags_->inference_file_ + "_qt").c_str(), OH_AI_WEIGHT_QUANT, true,
+                                    nullptr, 0);
+    if (status != OH_AI_STATUS_SUCCESS) {
+      MS_LOG(ERROR) << "Export quantized inference model error " << flags_->inference_file_ + "_qt";
+      std::cout << "Export quantized inference model error " << flags_->inference_file_ + "_qt" << std::endl;
+      return RET_ERROR;
+    }
+
+    auto tick = GetTimeUs();
+    status = OH_AI_ExportModel(ms_model_, OH_AI_MODELTYPE_MINDIR, (flags_->inference_file_).c_str(), OH_AI_NO_QUANT, true,
+                                    nullptr, 0);
+    if (status != OH_AI_STATUS_SUCCESS) {
+      MS_LOG(ERROR) << "Export non quantized inference model error " << flags_->inference_file_;
+      std::cout << "Export non quantized inference model error " << flags_->inference_file_ << std::endl;
+      return RET_ERROR;
+    }
+    std::cout << "ExportInference() execution time is " << GetTimeUs() - tick << "us\n";
+  }
+  return RET_OK;
+}
+
+int NetTrainCApi::LoadStepInput(size_t step) {
+  if (step >= batch_num_) {
+    auto cur_batch = step + 1;
+    MS_LOG(ERROR) << "Max input Batch is:" << batch_num_ << " but got batch :" << cur_batch;
+    return RET_ERROR;
+  }
+  for (size_t i = 0; i < ms_inputs_for_api_.handle_num; i++) {
+    OH_AI_TensorHandle cur_tensor = ms_inputs_for_api_.handle_list[i];
+    MS_ASSERT(cur_tensor != nullptr);
+    auto tensor_data_size = OH_AI_TensorGetDataSize(cur_tensor);
+    auto input_data = OH_AI_TensorGetMutableData(cur_tensor);
+    MS_ASSERT(input_data != nullptr);
+    memcpy_s(input_data, tensor_data_size, inputs_buf_[i].get() + step * tensor_data_size, tensor_data_size);
+  }
+  return RET_OK;
+}
+
+int NetTrainCApi::ReadInputFile() {
+  if (this->flags_->in_data_type_ == lite::kImage) {
+    MS_LOG(ERROR) << "Unsupported image input";
+    return RET_ERROR;
+  } else {
+    for (size_t i = 0; i < ms_inputs_for_api_.handle_num; i++) {
+      OH_AI_TensorHandle tensor = ms_inputs_for_api_.handle_list[i];
+      MS_ASSERT(tensor != nullptr);
+      size_t size;
+      std::string file_name = flags_->in_data_file_ + std::to_string(i + 1) + ".bin";
+      auto bin_buf = lite::ReadFile(file_name.c_str(), &size);
+      if (bin_buf == nullptr) {
+        MS_LOG(ERROR) << "ReadFile failed";
+        return RET_ERROR;
+      }
+      auto tensor_data_size = OH_AI_TensorGetDataSize(tensor);
+      MS_ASSERT(tensor_data_size != 0);
+      if (size == 0 || size % tensor_data_size != 0 || (batch_num_ != 0 && size / tensor_data_size != batch_num_)) {
+        std::cerr << "Input binary file size error, required :N * " << tensor_data_size << ", in fact: " << size
+                  << " ,file_name: " << file_name.c_str() << std::endl;
+        MS_LOG(ERROR) << "Input binary file size error, required: N * " << tensor_data_size << ", in fact: " << size
+                      << " ,file_name: " << file_name.c_str();
+        delete bin_buf;
+        return RET_ERROR;
+      }
+      inputs_buf_.emplace_back(bin_buf);
+      inputs_size_.emplace_back(size);
+      batch_num_ = size / tensor_data_size;
+    }
+  }
+  return RET_OK;
+}
+
+int NetTrainCApi::InitDumpTensorDataCallbackParameter() {
+  MS_LOG(ERROR) << "Unsupported feature.";
+  return RET_ERROR;
+}
+
+int NetTrainCApi::InitTimeProfilingCallbackParameter() {
+  before_call_back_ = TimeProfilingBeforeCallback;
+  after_call_back_ = TimeProfilingAfterCallback;
+  return RET_OK;
+}
+
+int NetTrainCApi::InitMSContext() {
+  context_ = OH_AI_ContextCreate();
+  if (context_ == nullptr) {
+    MS_LOG(INFO) << "OH_AI_ContextCreate failed";
+    return RET_ERROR;
+  }
+  OH_AI_ContextSetThreadNum(context_, flags_->num_threads_);
+  OH_AI_ContextSetThreadAffinityMode(context_, flags_->cpu_bind_mode_);
+
+  OH_AI_DeviceInfoHandle cpu_device_info = OH_AI_DeviceInfoCreate(OH_AI_DEVICETYPE_CPU);
+  OH_AI_DeviceInfoSetEnableFP16(cpu_device_info, flags_->enable_fp16_);
+  OH_AI_ContextAddDeviceInfo(context_, cpu_device_info);
+  return RET_OK;
+}
+
+char **NetTrainCApi::TransStrVectorToCharArrays(const std::vector<std::string> &s) {
+  char **char_arr = static_cast<char **>(malloc(s.size() * sizeof(char *)));
+  for (size_t i = 0; i < s.size(); i++) {
+    char_arr[i] = static_cast<char *>(malloc((s[i].size() + 1)));
+    strcpy(char_arr[i], s[i].c_str());
+  }
+  return char_arr;
+}
+
+std::vector<std::string> NetTrainCApi::TransCharArraysToStrVector(char **c, const size_t &num) {
+  std::vector<std::string> str;
+  for (size_t i = 0; i < num; i++) {
+    str.push_back(std::string(c[i]));
+  }
+  return str;
+}
+
+void NetTrainCApi::InitTrainCfg() {
+  if (flags_->loss_name_.empty()) {
+    return;
+  }
+
+  std::string delimiter = ",";
+  size_t pos = 0;
+  std::string token;
+  train_cfg_ = OH_AI_TrainCfgCreate();
+  size_t num = 0;
+  std::vector<std::string> train_cfg_loss_name;
+  OH_AI_TrainCfgSetLossName(train_cfg_, nullptr, train_cfg_loss_name.size());
+  while ((pos = flags_->loss_name_.find(delimiter)) != std::string::npos) {
+    token = flags_->loss_name_.substr(0, pos);
+    flags_->loss_name_.erase(0, pos + delimiter.length());  // change to delim without deletion
+    char **name = OH_AI_TrainCfgGetLossName(train_cfg_, &num);
+    train_cfg_loss_name = TransCharArraysToStrVector(name, num);
+    train_cfg_loss_name.push_back(token);
+    char **loss_name = TransStrVectorToCharArrays(train_cfg_loss_name);
+    OH_AI_TrainCfgSetLossName(train_cfg_, const_cast<const char **>(loss_name), train_cfg_loss_name.size());
+    for (size_t i = 0; i < train_cfg_loss_name.size(); i++) {
+      free(loss_name[i]);
+    }
+    free(loss_name);
+    for (size_t i = 0; i < num; i++) {
+      free(name[i]);
+    }
+    free(name);
+  }
+  if (!(flags_->loss_name_.empty())) {
+    char **name = OH_AI_TrainCfgGetLossName(train_cfg_, &num);
+    train_cfg_loss_name = TransCharArraysToStrVector(name, num);
+    train_cfg_loss_name.push_back(flags_->loss_name_);
+    char **loss_name = TransStrVectorToCharArrays(train_cfg_loss_name);
+    OH_AI_TrainCfgSetLossName(train_cfg_, const_cast<const char **>(loss_name), train_cfg_loss_name.size());
+    for (size_t i = 0; i < train_cfg_loss_name.size(); i++) {
+      free(loss_name[i]);
+    }
+    free(loss_name);
+    for (size_t i = 0; i < num; i++) {
+      free(name[i]);
+    }
+    free(name);
+  }
+}
+
+int NetTrainCApi::CreateAndRunNetworkForInference(const std::string &filename,
+                                              const OH_AI_ContextHandle &context) {
+  std::string model_name = filename.substr(filename.find_last_of(DELIM_SLASH) + 1);
+  std::string filenamems = filename;
+  if (filenamems.substr(filenamems.find_last_of('.') + 1) != "ms") {
+    filenamems = filenamems + ".ms";
+  }
+  MS_LOG(INFO) << "start reading model file " << filenamems.c_str();
+  std::cout << "start reading model file " << filenamems.c_str() << std::endl;
+  auto status = OH_AI_ModelBuildFromFile(ms_model_, filenamems.c_str(),
+                                         static_cast<OH_AI_ModelType>(mindspore::kMindIR), context);
+  if (status != OH_AI_STATUS_SUCCESS) {
+    MS_LOG(ERROR) << "ms model build failed. " << model_name;
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int NetTrainCApi::CreateAndRunNetworkForTrain(const std::string &filename, const std::string &bb_filename,
+                                          const OH_AI_ContextHandle &context,
+                                          const OH_AI_TrainCfgHandle &train_cfg, int epochs) {
+  std::string model_name = filename.substr(filename.find_last_of(DELIM_SLASH) + 1);
+  OH_AI_Status status;
+  if (!bb_filename.empty()) {
+      MS_LOG(ERROR) << "build transfer learning not supported. " << model_name;
+      return RET_ERROR;
+  } else {
+    MS_LOG(INFO) << "Build mindspore model from model file" << filename.c_str();
+    std::cout << "Build mindspore model from model file" << filename.c_str() << std::endl;
+    status = OH_AI_TrainModelBuildFromFile(ms_model_, filename.c_str(), OH_AI_MODELTYPE_MINDIR, context, train_cfg);
+    if (status != OH_AI_STATUS_SUCCESS) {
+      MS_LOG(ERROR) << "build transfer learning failed. " << model_name;
+      return RET_ERROR;
+    }
+  }
+  if (epochs > 0) {
+    if (flags_->virtual_batch_) {
+      OH_AI_ModelSetupVirtualBatch(ms_model_, epochs, -1.0f, -1.0f);
+    }
+    status = OH_AI_ModelSetTrainMode(ms_model_, true);
+    if (status != OH_AI_STATUS_SUCCESS) {
+      MS_LOG(ERROR) << "set train mode failed. ";
+      return RET_ERROR;
+    }
+  }
+  return RET_OK;
+}
+
+int NetTrainCApi::CompareOutput() {
+  std::cout << "================ Comparing Forward Output data ================" << std::endl;
+  float total_bias = 0;
+  int total_size = 0;
+  bool has_error = false;
+  auto output_tensors_handle = OH_AI_ModelGetOutputs(ms_model_);
+
+  std::vector<mindspore::MSTensor> output_tensors;
+  for (size_t i = 0; i < output_tensors_handle.handle_num; i++) {
+    output_tensors.push_back(*static_cast<mindspore::MSTensor *>(output_tensors_handle.handle_list[i]));
+  }
+  if (output_tensors.empty()) {
+    MS_LOG(ERROR) << "Cannot find output tensors, get model output failed";
+    return RET_ERROR;
+  }
+  std::map<std::string, MSTensor> ordered_outputs;
+  for (const auto &output_tensor : output_tensors) {
+    ordered_outputs.insert({output_tensor.Name(), output_tensor});
+  }
+  int i = 1;
+  mindspore::MSTensor tensor;
+  for (auto &ordered_output : ordered_outputs) {
+    tensor = ordered_output.second;
+    std::cout << "output is tensor " << ordered_output.first << "\n";
+    auto outputs = tensor.MutableData();
+    size_t size;
+    std::string output_file = flags_->data_file_ + std::to_string(i) + ".bin";
+    auto bin_buf = std::unique_ptr<float[]>(ReadFileBuf(output_file.c_str(), &size));
+    if (bin_buf == nullptr) {
+      MS_LOG(ERROR) << "ReadFile return nullptr";
+      std::cout << "ReadFile return nullptr" << std::endl;
+      return RET_ERROR;
+    }
+    if (size != tensor.DataSize()) {
+      MS_LOG(ERROR) << "Output buffer and output file differ by size. Tensor size: " << tensor.DataSize()
+                    << ", read size: " << size;
+      std::cout << "Output buffer and output file differ by size. Tensor size: " << tensor.DataSize()
+                << ", read size: " << size << std::endl;
+      return RET_ERROR;
+    }
+    float bias = CompareData<float>(bin_buf.get(), tensor.ElementNum(), reinterpret_cast<float *>(outputs));
+    if (bias >= 0) {
+      total_bias += bias;
+      total_size++;
+    } else {
+      has_error = true;
+      break;
+    }
+    i++;
+  }
+
+  if (!has_error) {
+    float mean_bias;
+    if (total_size != 0) {
+      mean_bias = total_bias / total_size * 100;
+    } else {
+      mean_bias = 0;
+    }
+
+    std::cout << "Mean bias of all nodes/tensors: " << mean_bias << "%"
+              << " threshold is:" << this->flags_->accuracy_threshold_ << std::endl;
+    std::cout << "=======================================================" << std::endl << std::endl;
+
+    if (mean_bias > this->flags_->accuracy_threshold_) {
+      MS_LOG(INFO) << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%";
+      std::cout << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%" << std::endl;
+      return RET_TOO_BIG;
+    } else {
+      return RET_OK;
+    }
+  } else {
+    MS_LOG(ERROR) << "Error in CompareData";
+    std::cerr << "Error in CompareData" << std::endl;
+    std::cout << "=======================================================" << std::endl << std::endl;
+    return RET_ERROR;
+  }
+}
+
+int NetTrainCApi::MarkPerformance() {
+  MS_LOG(INFO) << "Running train loops...";
+  std::cout << "Running train loops..." << std::endl;
+  uint64_t time_min = 0xFFFFFFFFFFFFFFFF;
+  uint64_t time_max = 0;
+  uint64_t time_avg = 0;
+  std::vector<MSTensor> outputs;
+
+  for (int i = 0; i < flags_->epochs_; i++) {
+    auto start = GetTimeUs();
+    for (size_t step = 0; step < batch_num_; step++) {
+      MS_LOG(INFO) << "Run for epoch:" << i << " step:" << step;
+      auto ret = LoadStepInput(step);
+      if (ret != RET_OK) {
+        return ret;
+      }
+      auto status = OH_AI_RunStep(ms_model_, before_call_back_, after_call_back_);
+      if (status != OH_AI_STATUS_SUCCESS) {
+        MS_LOG(ERROR) << "Inference error " << status;
+        std::cerr << "Inference error " << status;
+        return RET_ERROR;
+      }
+    }
+
+    auto end = GetTimeUs();
+    auto time = end - start;
+    time_min = std::min(time_min, time);
+    time_max = std::max(time_max, time);
+    time_avg += time;
+  }
+
+  if (flags_->time_profiling_) {
+    const std::vector<std::string> per_op_name = {"opName", "avg(ms)", "percent", "calledTimes", "opTotalTime"};
+    const std::vector<std::string> per_op_type = {"opType", "avg(ms)", "percent", "calledTimes", "opTotalTime"};
+    PrintResult(per_op_name, g_c_op_times_by_name_);
+    PrintResult(per_op_type, g_c_op_times_by_type_);
+  }
+
+  if (flags_->epochs_ > 0) {
+    time_avg /= static_cast<size_t>(flags_->epochs_);
+    MS_LOG(INFO) << "Model = " << flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1).c_str()
+                 << ", NumThreads = " << flags_->num_threads_ << ", MinRunTime = " << time_min / 1000.0f
+                 << ", MaxRuntime = " << time_max / 1000.0f << ", AvgRunTime = " << time_avg / 1000.0f;
+    printf("Model = %s, NumThreads = %d, MinRunTime = %f ms, MaxRuntime = %f ms, AvgRunTime = %f ms\n",
+           flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1).c_str(), flags_->num_threads_,
+           time_min / 1000.0f, time_max / 1000.0f, time_avg / 1000.0f);
+  }
+  return RET_OK;
+}
+
+int NetTrainCApi::MarkAccuracy(bool enforce_accuracy) {
+  MS_LOG(INFO) << "MarkAccuracy";
+  auto load_ret = LoadStepInput(0);
+  if (load_ret != RET_OK) {
+    return load_ret;
+  }
+  auto status = PrintInputData();
+  if (status != RET_OK) {
+    MS_LOG(ERROR) << "PrintInputData failed, ret: " << status;
+    return status;
+  }
+  status = OH_AI_RunStep(ms_model_, before_call_back_, after_call_back_);
+  if (status != OH_AI_STATUS_SUCCESS) {
+    MS_LOG(ERROR) << "Inference error " << status;
+    std::cerr << "Inference error " << status << std::endl;
+    return RET_ERROR;
+  }
+
+  auto ret = CompareOutput();
+  if (ret == RET_TOO_BIG && !enforce_accuracy) {
+    MS_LOG(INFO) << "Accuracy Error is big but not enforced";
+    std::cout << "Accuracy Error is big but not enforced" << std::endl;
+    return RET_OK;
+  }
+
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Compare output error " << ret;
+    std::cerr << "Compare output error " << ret << std::endl;
+    return ret;
+  }
+  return RET_OK;
+}
+
+int NetTrainCApi::CreateAndRunNetwork(const std::string &filename, const std::string &bb_filename, bool is_train,
+                                  int epochs, bool check_accuracy) {
+  auto start_prepare_time = GetTimeUs();
+
+  int ret = InitMSContext();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "InitContext failed, ret: " << ret;
+    return ret;
+  }
+
+  InitTrainCfg();
+  ms_model_ = OH_AI_ModelCreate();
+
+  if (is_train) {
+    ret = CreateAndRunNetworkForTrain(filename, bb_filename, context_ , train_cfg_, epochs);
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "CreateAndRunNetworkForTrain failed.";
+      return RET_ERROR;
+    }
+  } else {
+    ret = CreateAndRunNetworkForInference(filename, context_);
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "CreateAndRunNetworkForInference failed.";
+      return RET_ERROR;
+    }
+  }
+
+  ms_inputs_for_api_ = OH_AI_ModelGetInputs(ms_model_);
+  if (ms_inputs_for_api_.handle_list == nullptr) {
+    MS_LOG(ERROR) << "OH_AI_ModelGetInputs failed, ret: ";
+    return RET_ERROR;
+  }
+
+  if (!flags_->resize_dims_.empty()) {
+    std::vector<OH_AI_ShapeInfo> shape_infos;
+    std::transform(flags_->resize_dims_.begin(), flags_->resize_dims_.end(), std::back_inserter(shape_infos),
+                   [&](auto &shapes) {
+                     OH_AI_ShapeInfo shape_info;
+                     shape_info.shape_num = shapes.size();
+                     for (size_t i = 0; i < shape_info.shape_num; i++) {
+                       shape_info.shape[i] = shapes[i];
+                     }
+                     return shape_info;
+                   });
+    auto status = OH_AI_ModelResize(ms_model_, ms_inputs_for_api_, shape_infos.data(), shape_infos.size());
+    if (status != OH_AI_STATUS_SUCCESS) {
+      MS_LOG(ERROR) << "Input tensor resize failed.";
+      std::cout << "Input tensor resize failed.";
+      return RET_ERROR;
+    }
+  }
+
+  auto end_prepare_time = GetTimeUs();
+  MS_LOG(INFO) << "PrepareTime = " << ((end_prepare_time - start_prepare_time) / kTHOUSAND) << " ms";
+  std::cout << "PrepareTime = " << ((end_prepare_time - start_prepare_time) / kTHOUSAND) << " ms" << std::endl;
+  // Load input
+  MS_LOG(INFO) << "Load input data";
+  auto status = LoadInput();
+  if (status != RET_OK) {
+    MS_LOG(ERROR) << "Load input data error";
+    std::cout << "Load input data error" << std::endl;
+    return status;
+  }
+
+  if ((epochs > 0) && is_train) {
+    status = MarkPerformance();
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "Run MarkPerformance error: " << status;
+      std::cout << "Run MarkPerformance error: " << status << std::endl;
+      return status;
+    }
+    SaveModels();  // save file if flags are on
+  }
+  if (!flags_->data_file_.empty()) {
+    auto res = OH_AI_ModelSetTrainMode(ms_model_, false);
+    if (res != OH_AI_STATUS_SUCCESS) {
+      MS_LOG(ERROR) << "set eval mode failed. ";
+      return RET_ERROR;
+    }
+
+    status = MarkAccuracy(check_accuracy);
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "Run MarkAccuracy error: " << status;
+      std::cout << "Run MarkAccuracy error: " << status << std::endl;
+      return status;
+    }
+  }
+  return RET_OK;
+}
+
+int NetTrainCApi::PrintInputData() {
+  constexpr int64_t kPrintDataNum = 20;
+  for (size_t i = 0; i < ms_inputs_for_api_.handle_num; i++) {
+    auto input = ms_inputs_for_api_.handle_list[i];
+    std::cout << "InData" << i << ": ";
+    auto data_type = static_cast<TypeId>(OH_AI_TensorGetDataType(input));
+    if (data_type == TypeId::kObjectTypeString) {
+      MS_LOG(ERROR) << "Unsupported OH_AI_DATATYPE_OBJECTTYPE_STRING.";
+      return RET_ERROR;
+    }
+    auto tensor_data = OH_AI_TensorGetData(input);
+    size_t print_num = std::min(OH_AI_TensorGetElementNum(input), kPrintDataNum);
+    for (size_t j = 0; j < print_num; j++) {
+      if (data_type == TypeId::kNumberTypeFloat32 || data_type == TypeId::kNumberTypeFloat) {
+        std::cout << static_cast<const float *>(tensor_data)[j] << " ";
+      } else if (data_type == TypeId::kNumberTypeInt8) {
+        std::cout << static_cast<const int8_t *>(tensor_data)[j] << " ";
+      } else if (data_type == TypeId::kNumberTypeUInt8) {
+        std::cout << static_cast<const uint8_t *>(tensor_data)[j] << " ";
+      } else if (data_type == TypeId::kNumberTypeInt32) {
+        std::cout << static_cast<const int32_t *>(tensor_data)[j] << " ";
+      } else if (data_type == TypeId::kNumberTypeInt64) {
+        std::cout << static_cast<const int64_t *>(tensor_data)[j] << " ";
+      } else if (data_type == TypeId::kNumberTypeBool) {
+        std::cout << static_cast<const bool *>(tensor_data)[j] << " ";
+      } else {
+        MS_LOG(ERROR) << "Datatype: " << data_type << " is not supported.";
+        return RET_ERROR;
+      }
+    }
+    std::cout << std::endl;
+  }
+  return RET_OK;
+}
+
+int NetTrainCApi::PrintResult(const std::vector<std::string> &title,
+                          const std::map<std::string, std::pair<int, float>> &result) {
+  std::vector<size_t> columnLenMax(kFieldsToPrint);
+  std::vector<std::vector<std::string>> rows;
+
+  for (auto &iter : result) {
+    std::string stringBuf[kFieldsToPrint];
+    std::vector<std::string> columns;
+    size_t len = 0;
+    int index = 0;
+    len = iter.first.size();
+    if (len > columnLenMax.at(index)) {
+      columnLenMax.at(index) = len + kPrintOffset;
+    }
+    columns.push_back(iter.first);
+
+    index++;
+    if (title[0] == "opName") {
+      stringBuf[index] = std::to_string(iter.second.second / flags_->epochs_);
+    } else {
+      stringBuf[index] = std::to_string(iter.second.second / iter.second.first);
+    }
+    len = stringBuf[index].length();
+    if (len > columnLenMax.at(index)) {
+      columnLenMax.at(index) = len + kPrintOffset;
+    }
+    columns.emplace_back(stringBuf[index]);
+
+    index++;
+    stringBuf[index] = std::to_string(iter.second.second / g_op_cost_total_);
+    len = stringBuf[index].length();
+    if (len > columnLenMax.at(index)) {
+      columnLenMax.at(index) = len + kPrintOffset;
+    }
+    columns.emplace_back(stringBuf[index]);
+
+    index++;
+    stringBuf[index] = std::to_string(iter.second.first);
+    len = stringBuf[index].length();
+    if (len > columnLenMax.at(index)) {
+      columnLenMax.at(index) = len + kPrintOffset;
+    }
+    columns.emplace_back(stringBuf[index]);
+
+    index++;
+    stringBuf[index] = std::to_string(iter.second.second);
+    len = stringBuf[index].length();
+    if (len > columnLenMax.at(index)) {
+      columnLenMax.at(index) = len + kPrintOffset;
+    }
+    columns.emplace_back(stringBuf[index]);
+
+    rows.push_back(columns);
+  }
+
+  printf("-------------------------------------------------------------------------\n");
+  for (int i = 0; i < kFieldsToPrint; i++) {
+    auto printBuf = title[i];
+    if (printBuf.size() > columnLenMax.at(i)) {
+      columnLenMax.at(i) = printBuf.size();
+    }
+    printBuf.resize(columnLenMax.at(i), ' ');
+    printf("%s\t", printBuf.c_str());
+  }
+  printf("\n");
+  for (auto &row : rows) {
+    for (int j = 0; j < kFieldsToPrint; j++) {
+      auto printBuf = row[j];
+      printBuf.resize(columnLenMax.at(j), ' ');
+      printf("%s\t", printBuf.c_str());
+    }
+    printf("\n");
+  }
+  return RET_OK;
+}
+
+bool TimeProfilingBeforeCallback(const OH_AI_TensorHandleArray inputs, const OH_AI_TensorHandleArray outputs,
+                                 const OH_AI_CallBackParam kernel_Info) {
+  if (g_c_op_times_by_type_.find(kernel_Info.node_type) == g_c_op_times_by_type_.end()) {
+    g_c_op_times_by_type_.insert(std::make_pair(kernel_Info.node_type, std::make_pair(0, 0.0f)));
+  }
+  if (g_c_op_times_by_name_.find(kernel_Info.node_name) == g_c_op_times_by_name_.end()) {
+    g_c_op_times_by_name_.insert(std::make_pair(kernel_Info.node_name, std::make_pair(0, 0.0f)));
+  }
+
+  g_op_call_times_total_++;
+  g_op_begin_ = mindspore::lite::GetTimeUs();
+  return true;
+}
+
+bool TimeProfilingAfterCallback(const OH_AI_TensorHandleArray inputs, const OH_AI_TensorHandleArray outputs,
+                                const OH_AI_CallBackParam kernel_Info) {
+  uint64_t opEnd = mindspore::lite::GetTimeUs();
+  float cost = static_cast<float>(opEnd - g_op_begin_) / 1000.0f;
+  g_op_cost_total_ += cost;
+  g_c_op_times_by_type_[kernel_Info.node_type].first++;
+  g_c_op_times_by_type_[kernel_Info.node_type].second += cost;
+  g_c_op_times_by_name_[kernel_Info.node_name].first++;
+  g_c_op_times_by_name_[kernel_Info.node_name].second += cost;
+  return true;
+}
+}  // namespace lite
+}  // namespace mindspore
+
+
diff --git a/mindspore/lite/tools/benchmark_train/net_train_c_api.h b/mindspore/lite/tools/benchmark_train/net_train_c_api.h
new file mode 100644
index 00000000..bb84d3c1
--- /dev/null
+++ b/mindspore/lite/tools/benchmark_train/net_train_c_api.h
@@ -0,0 +1,121 @@
+/**
+ * Copyright 2023-2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_NET_TRAIN_C_API_H
+#define MINDSPORE_LITE_TOOLS_BENCHMARK_NET_TRAIN_C_API_H
+
+#include <getopt.h>
+#include <csignal>
+#include <unordered_map>
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <cmath>
+#include <string>
+#include <vector>
+#include <memory>
+#include <cfloat>
+#include <utility>
+#include <algorithm>
+#include <nlohmann/json.hpp>
+#include "include/api/model.h"
+#include "include/api/types.h"
+#include "include/api/context.h"
+#include "include/api/cfg.h"
+
+#include "include/c_api/model_c.h"
+#include "include/c_api/context_c.h"
+
+#ifdef ENABLE_FP16
+#include <arm_neon.h>
+#endif
+#include "tools/common/flag_parser.h"
+#include "src/common/file_utils.h"
+#include "src/common/utils.h"
+#include "tools/benchmark_train/net_train_base.h"
+
+namespace mindspore::lite {
+  namespace {
+    std::map<std::string, std::pair<int, float>> g_c_op_times_by_type_;
+    std::map<std::string, std::pair<int, float>> g_c_op_times_by_name_;
+  }
+#ifdef __cplusplus
+  extern "C" {
+#endif
+  bool TimeProfilingBeforeCallback(const OH_AI_TensorHandleArray inputs, const OH_AI_TensorHandleArray outputs,
+                                   const OH_AI_CallBackParam kernel_Info);
+  bool TimeProfilingAfterCallback(const OH_AI_TensorHandleArray inputs, const OH_AI_TensorHandleArray outputs,
+                                  const OH_AI_CallBackParam kernel_Info);
+#ifdef __cplusplus
+  }
+#endif
+
+class MS_API NetTrainCApi : public NetTrainBase  {
+ public:
+  explicit NetTrainCApi(NetTrainFlags *flags) : NetTrainBase(flags) {}
+  virtual ~NetTrainCApi() {};
+
+ protected:
+  // call GenerateRandomData to fill inputTensors
+  int GenerateInputData() override;
+
+  int ReadInputFile() override;
+
+  int LoadStepInput(size_t step);
+
+  int InitMSContext();
+
+  void InitTrainCfg();
+
+  char **TransStrVectorToCharArrays(const std::vector<std::string> &s);
+
+  std::vector<std::string> TransCharArraysToStrVector(char **c, const size_t &num);
+
+  int CreateAndRunNetwork(const std::string &filename, const std::string &bb_filename, bool is_train, int epochs,
+    bool check_accuracy = true) override;
+
+  int CreateAndRunNetworkForInference(const std::string &filename, const OH_AI_ContextHandle &context);
+
+  int CreateAndRunNetworkForTrain(const std::string &filename, const std::string &bb_filename,
+    const OH_AI_ContextHandle &context,
+    const OH_AI_TrainCfgHandle &train_cfg, int epochs);
+
+  int InitDumpTensorDataCallbackParameter() override;
+
+  int InitTimeProfilingCallbackParameter() override;
+
+  int PrintResult(const std::vector<std::string> &title, const std::map<std::string, std::pair<int, float>> &result) override;
+
+  int PrintInputData();
+
+  int MarkPerformance() override;
+
+  int MarkAccuracy(bool enforce_accuracy = true) override;
+
+  int CompareOutput() override;
+
+  int SaveModels() override;
+
+  OH_AI_ModelHandle ms_model_;
+  OH_AI_TensorHandleArray ms_inputs_for_api_;
+  OH_AI_ContextHandle context_ = nullptr;
+  OH_AI_TrainCfgHandle train_cfg_ = nullptr;
+  OH_AI_KernelCallBack before_call_back_{nullptr};
+  OH_AI_KernelCallBack after_call_back_{nullptr};
+};
+}  // namespace mindspore::lite
+
+#endif //MINDSPORE_LITE_TOOLS_BENCHMARK_NET_TRAIN_C_API_H
diff --git a/mindspore/lite/tools/benchmark_train/run_net_train.cc b/mindspore/lite/tools/benchmark_train/run_net_train.cc
new file mode 100644
index 00000000..37a7e602
--- /dev/null
+++ b/mindspore/lite/tools/benchmark_train/run_net_train.cc
@@ -0,0 +1,86 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tools/benchmark_train/run_net_train.h"
+#include "tools/benchmark_train/net_train.h"
+#include "tools/benchmark_train/net_train_c_api.h"
+
+namespace mindspore {
+namespace lite {
+int RunNetTrain(int argc, const char **argv) {
+  NetTrainFlags flags;
+  Option<std::string> err = flags.ParseFlags(argc, argv);
+
+  if (err.IsSome()) {
+    std::cerr << err.Get() << std::endl;
+    std::cerr << flags.Usage() << std::endl;
+    return RET_ERROR;
+  }
+
+  if (flags.help) {
+    std::cerr << flags.Usage() << std::endl;
+    return RET_OK;
+  }
+  if (flags.unified_api_) {
+    return NetTrain::RunNr(&flags);
+  }
+
+  auto api_type = std::getenv("MSLITE_API_TYPE");
+  if (api_type != nullptr) {
+    MS_LOG(INFO) << "MSLITE_API_TYPE = " << api_type;
+    std::cout << "MSLITE_API_TYPE = " << api_type << std::endl;
+  }
+
+  NetTrainBase *net_trainer = nullptr;
+  if (api_type == nullptr || std::string(api_type) == "NEW") {
+    net_trainer = new (std::nothrow) NetTrain(&flags);
+  } else if (std::string(api_type) == "C") {
+    net_trainer = new (std::nothrow) NetTrainCApi(&flags);
+  } else {
+    MS_LOG(ERROR) << "Invalid MSLITE_API_TYPE, (NEW/C, default:NEW)";
+    return RET_ERROR;
+  }
+
+  if (net_trainer == nullptr) {
+    MS_LOG(ERROR) << "new net_trainer failed.";
+    return RET_ERROR;
+  }
+  auto status = net_trainer->Init();
+  if (status != RET_OK) {
+    MS_LOG(ERROR) << "NetTrain init Error : " << status;
+    std::cerr << "NetTrain init Error : " << status << std::endl;
+    return RET_ERROR;
+  }
+
+  status = net_trainer->RunNetTrain();
+  if (status != RET_OK) {
+    MS_LOG(ERROR) << "Run NetTrain "
+                  << flags.model_file_.substr(flags.model_file_.find_last_of("/") + 1).c_str()
+                  << " Failed : " << status;
+    std::cerr << "Run NetTrain " << flags.model_file_.substr(flags.model_file_.find_last_of("/") + 1).c_str()
+              << " Failed : " << status << std::endl;
+    return RET_ERROR;
+  }
+
+  MS_LOG(INFO) << "Run NetTrain " << flags.model_file_.substr(flags.model_file_.find_last_of("/") + 1).c_str()
+               << " Success.";
+  std::cout << "Run NetTrain " << flags.model_file_.substr(flags.model_file_.find_last_of("/") + 1).c_str()
+            << " Success." << std::endl;
+  delete net_trainer;
+  return RET_OK;
+}
+}  // namespace lite
+}  // namespace mindspore
\ No newline at end of file
diff --git a/mindspore/lite/tools/benchmark_train/run_net_train.h b/mindspore/lite/tools/benchmark_train/run_net_train.h
new file mode 100644
index 00000000..9ca2d73c
--- /dev/null
+++ b/mindspore/lite/tools/benchmark_train/run_net_train.h
@@ -0,0 +1,22 @@
+/**
+ * Copyright 2023-2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_RUN_NET_TRAIN_H
+#define MINDSPORE_LITE_TOOLS_BENCHMARK_RUN_NET_TRAIN_H
+namespace mindspore::lite {
+int RunNetTrain(int argc, const char **argv);
+}  // namespace mindspore::lite
+#endif  // MINDSPORE_LITE_TOOLS_BENCHMARK_RUN_NET_TRAIN_H
diff --git a/mindspore/lite/tools/converter/CMakeLists.txt b/mindspore/lite/tools/converter/CMakeLists.txt
index 1e09d2ed..f854620f 100644
--- a/mindspore/lite/tools/converter/CMakeLists.txt
+++ b/mindspore/lite/tools/converter/CMakeLists.txt
@@ -7,6 +7,8 @@ endif()

 set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../src)

+include_directories(${CMAKE_SOURCE_DIR}/mindspore/lite/)
+
 if(ENABLE_GPU)
     add_compile_definitions(ENABLE_GPU)
 endif()
@@ -70,6 +72,7 @@ add_subdirectory(parser/caffe)
 add_subdirectory(parser/tflite)
 add_subdirectory(parser/onnx)
 add_subdirectory(parser/tf)
+add_subdirectory(parser/third_party)
 if(ENABLE_CONVERT_PYTORCH_MODEL)
     add_subdirectory(parser/pytorch)
 endif()
@@ -363,6 +366,7 @@ target_link_libraries(mindspore_converter
         tf_parser_mid
         caffe_parser_mid
         onnx_parser_mid
+        third_party_parser_mid
         lite_exporter_mid
         graph_pass_mid
         fusion_mid
diff --git a/mindspore/lite/tools/converter/config_parser/config_file_parser.cc b/mindspore/lite/tools/converter/config_parser/config_file_parser.cc
index fecc56d9..2e7ca749 100644
--- a/mindspore/lite/tools/converter/config_parser/config_file_parser.cc
+++ b/mindspore/lite/tools/converter/config_parser/config_file_parser.cc
@@ -34,6 +34,7 @@ constexpr auto kMixedBitWeightQuantParam = "mixed_bit_weight_quant_param";
 constexpr auto kDataPreprocessParam = "data_preprocess_param";
 constexpr auto kRegistry = "registry";
 constexpr auto kMicroParam = "micro_param";
+constexpr auto kThirdPartyModelParam = "third_party_model";
 constexpr auto kCpuOptionParam = "cpu_option_cfg_param";
 constexpr auto kCustomOppPath = "custom_opp_path";
 constexpr auto kTransformQuantParam = "transform_quant_param";
@@ -330,6 +331,12 @@ int ConfigFileParser::ParseConfigParam(std::map<std::string, std::map<std::strin
     MS_LOG(ERROR) << "ParseMicroParamString failed.";
     return ret;
   }
+  ret = ParseThirdPartyParamString(*maps);
+  (void)maps->erase(kThirdPartyModelParam);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "ParseTransformQuantString failed.";
+    return ret;
+  }
   ret = ParseWeightQuantString(*maps);
   (void)maps->erase(kWeightQuantParam);
   if (ret != RET_OK) {
@@ -594,5 +601,25 @@ int ConfigFileParser::ParseGraphKernelString(const std::map<std::string, std::ma
   }
   return RET_OK;
 }
+
+int ConfigFileParser::ParseThirdPartyParamString(
+  const std::map<std::string, std::map<std::string, std::string>> &sections) {
+  if (sections.find(kThirdPartyModelParam) == sections.end()) {
+    return RET_OK;
+  }
+  const auto &input_args = sections.at(kThirdPartyModelParam);
+  const std::map<std::string, std::string &> kValidArgs = {
+    {"input_shapes", third_party_model_string_.input_shapes},
+    {"input_dtypes", third_party_model_string_.input_dtypes},
+    {"input_names", third_party_model_string_.input_names},
+    {"input_formats", third_party_model_string_.input_formats},
+    {"output_shapes", third_party_model_string_.output_shapes},
+    {"output_dtypes", third_party_model_string_.output_dtypes},
+    {"output_names", third_party_model_string_.output_names},
+    {"output_formats", third_party_model_string_.output_formats},
+    {"extended_parameters", third_party_model_string_.extended_parameters},
+  };
+  return SetMapData(input_args, kValidArgs, kThirdPartyModelParam);
+}
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/converter/config_parser/config_file_parser.h b/mindspore/lite/tools/converter/config_parser/config_file_parser.h
index 31269816..6997bac8 100644
--- a/mindspore/lite/tools/converter/config_parser/config_file_parser.h
+++ b/mindspore/lite/tools/converter/config_parser/config_file_parser.h
@@ -110,6 +110,18 @@ struct MicroParamString {
   std::string changeable_weights_name;
 };

+struct ThirdPartyModelString {
+  std::string input_dtypes;
+  std::string input_shapes;
+  std::string input_names;  // optional, default: ""
+  std::string input_formats;  // optional, default: NHWC
+  std::string output_dtypes;
+  std::string output_shapes;
+  std::string output_names;  // optional, default: ""
+  std::string output_formats;  // optional, default: NHWC
+  std::string extended_parameters;  // format: {key1:value1;ker2:value2}
+};
+
 struct CpuOptionCfgString {
   std::string architecture;
   std::string instruction;
@@ -144,6 +156,7 @@ class ConfigFileParser {
   RegistryInfoString GetRegistryInfoString() const { return this->registry_info_string_; }
   AclOptionCfgString GetAclOptionCfgString() { return this->acl_option_cfg_string_; }
   MicroParamString GetMicroParamString() { return this->micro_param_string_; }
+  lite::ThirdPartyModelString GetThirdPartyModelString() const { return this->third_party_model_string_; }
   CpuOptionCfgString GetCpuOptionCfgString() { return this->cpu_option_cfg_string_; }
   TransformQuantString GetTransformQuantString() const { return this->transform_quant_string_; }
   AscendQuantString GetAscendQuantString() const { return this->ascend_quant_string_; }
@@ -161,6 +174,7 @@ class ConfigFileParser {
   int SetMapData(const std::map<std::string, std::string> &input_map,
                  const std::map<std::string, std::string &> &parse_map, const std::string &section);
   int ParseMicroParamString(const std::map<std::string, std::map<std::string, std::string>> &maps);
+  int ParseThirdPartyParamString(const std::map<std::string, std::map<std::string, std::string>> &sections);
   int ParseCpuOptionCfgString(const std::map<std::string, std::map<std::string, std::string>> &maps);
   int ParseTransformQuantString(const std::map<std::string, std::map<std::string, std::string>> &maps);
   int ParseAscendQuantString(const std::map<std::string, std::map<std::string, std::string>> &maps);
@@ -176,6 +190,7 @@ class ConfigFileParser {
   RegistryInfoString registry_info_string_;
   AclOptionCfgString acl_option_cfg_string_;
   MicroParamString micro_param_string_;
+  lite::ThirdPartyModelString third_party_model_string_;
   CpuOptionCfgString cpu_option_cfg_string_;
   TransformQuantString transform_quant_string_;
   AscendQuantString ascend_quant_string_;
diff --git a/mindspore/lite/tools/converter/config_parser/third_party_param_parser.cc b/mindspore/lite/tools/converter/config_parser/third_party_param_parser.cc
new file mode 100644
index 00000000..aee6a29c
--- /dev/null
+++ b/mindspore/lite/tools/converter/config_parser/third_party_param_parser.cc
@@ -0,0 +1,299 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tools/converter/config_parser/third_party_param_parser.h"
+#include <vector>
+#include <string>
+#include <map>
+#include "include/errorcode.h"
+#include "src/common/log_adapter.h"
+#include "nnacl/op_base.h"
+#include "tools/common/string_util.h"
+
+namespace mindspore {
+namespace lite {
+namespace {
+const std::map<std::string, TypeId> kDataTypeMap = {
+  {"float64", TypeId::kNumberTypeFloat64}, {"float32", TypeId::kNumberTypeFloat32},
+  {"float16", TypeId::kNumberTypeFloat16}, {"int64", TypeId::kNumberTypeInt64},
+  {"int32", TypeId::kNumberTypeInt32},     {"int16", TypeId::kNumberTypeInt16},
+  {"int8", TypeId::kNumberTypeInt8},       {"uint8", TypeId::kNumberTypeUInt8},
+  {"bool", TypeId::kNumberTypeBool},
+};
+
+TypeId ConvertDataType(const std::string &type) {
+  auto iter = kDataTypeMap.find(type);
+  if (iter == kDataTypeMap.end()) {
+    return TypeId::kTypeUnknown;
+  }
+  return iter->second;
+}
+}  // namespace
+
+/**
+ * Parse shapes like "1,256,256,3;3,96;96,96", and return like [[1,256,256,3], [3,96], [96,96]].
+ */
+int ThirdPartyParamParser::DoParseShape(const std::string &src, std::vector<std::vector<int64_t>> *dst_shapes) {
+  MS_CHECK_TRUE_RET(dst_shapes != nullptr, RET_ERROR);
+  dst_shapes->clear();
+
+  auto tmp_shapes = SplitStringToVector(src, ";");
+  for (auto tmp_shape : tmp_shapes) {
+    auto tmp = SplitStringToVector(tmp_shape, ",");
+    std::vector<int64_t> shape = {};
+    for (auto t : tmp) {
+      int value = 0;
+      if (!ConvertIntNum(t, &value)) {
+        MS_LOG(ERROR) << "Found error when convert shape string to integer";
+        return RET_ERROR;
+      }
+      if (value <= 0) {  // Valid shape value should be greater than 0.
+        MS_LOG(ERROR) << "Only support fixed shapes in third party param";
+        return RET_ERROR;
+      }
+      shape.push_back(value);
+    }
+    dst_shapes->push_back(shape);
+  }
+  return RET_OK;
+}
+
+/**
+ * Parse extended parameter like "key_1:value_1;key_2:value_2" and get {{"key_1", "value_1"}, {"key_2", "value_2"}}.
+ */
+int ThirdPartyParamParser::DoParseExtendedParameters(const std::string &src,
+                                                     std::map<std::string, std::vector<uint8_t>> *dst_ext_param) {
+  MS_CHECK_TRUE_RET(dst_ext_param != nullptr, RET_ERROR);
+  constexpr size_t kKeyIndex = 0U;
+  constexpr size_t kValueIndex = 1U;
+  constexpr size_t kKeyValueSize = 2U;
+
+  if (src == "") {  // Just return if 'extended_parameters' is configured.
+    return RET_OK;
+  }
+
+  auto tmp_list = SplitStringToVector(src, ";");
+  std::map<std::string, std::vector<uint8_t>> tmp_map = {};
+  for (auto tmp : tmp_list) {
+    auto key_and_value = SplitStringToVector(tmp, ":");
+    if (key_and_value.size() != kKeyValueSize) {
+      MS_LOG(ERROR) << "Parse extended parameters failed, should keep key:value format";
+      return RET_ERROR;
+    }
+    auto key = key_and_value[kKeyIndex];
+    auto value = key_and_value[kValueIndex];
+    if (tmp_map.find(key) != tmp_map.end()) {
+      MS_LOG(ERROR) << "Parse extended parameters failed, key should not be duplicated";
+      return RET_ERROR;
+    }
+    tmp_map.emplace(key, std::vector<uint8_t>(value.begin(), value.end()));
+  }
+
+  *dst_ext_param = tmp_map;
+  return RET_OK;
+}
+
+/**
+ * Parse dtypes like "float32;float32;int32" and return [kNumberTypeFloat32, kNumberTypeFloat32, kNumberTypeInt32]
+ */
+int ThirdPartyParamParser::DoParseDtypes(const std::string &src, std::vector<TypeId> *dst_dtypes) {
+  MS_CHECK_TRUE_RET(dst_dtypes != nullptr, RET_ERROR);
+  dst_dtypes->clear();
+  auto tmp_dtypes = SplitStringToVector(src, ";");
+  for (auto tmp_dtype : tmp_dtypes) {
+    TypeId type = ConvertDataType(tmp_dtype);
+    if (type == kTypeUnknown) {
+      MS_LOG(ERROR) << "Parse dtypes in third party model config failed";
+      return RET_ERROR;
+    }
+    dst_dtypes->push_back(type);
+  }
+  return RET_OK;
+}
+
+/**
+ * Parse names like "foo;bar;boo" and get ["foo", "bar", "boo"]
+ * If input names are not provided in config, use the default prefix to generate like: "in_0;in_1;..;in_n"
+ */
+int ThirdPartyParamParser::DoParseNames(const std::string &src, size_t num, const std::string &default_prefix,
+                                        std::vector<std::string> *dst_names) {
+  MS_CHECK_TRUE_RET(dst_names != nullptr, RET_ERROR);
+  std::string tmp_names = src;
+  if (tmp_names.empty()) {
+    std::string tmp = "";
+    for (size_t i = 0; i < num; i++) {
+      tmp += default_prefix + "_" + std::to_string(i);
+      if (i + 1 < num) {
+        tmp += ";";
+      }
+    }
+    tmp_names = tmp;
+  }
+
+  *dst_names = SplitStringToVector(tmp_names, ";");
+  if (dst_names->size() != num) {
+    MS_LOG(ERROR) << "Name number " << dst_names->size() << " and input number: " << num << " are not equal";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+/**
+ * Parse formats like "NCHW;NHWC" and get [NCHW, NHWC]
+ */
+namespace {
+  int StringToFormat(const std::string &format_string, schema::Format *format) {
+    static const std::unordered_map<std::string, schema::Format> kFormatTable = {
+      {"NCHW", schema::Format::Format_NCHW},
+      {"NHWC", schema::Format::Format_NHWC},
+      {"NHWC4", schema::Format::Format_NHWC4},
+      {"HWKC", schema::Format::Format_HWKC},
+      {"HWCK", schema::Format::Format_HWCK},
+      {"KCHW", schema::Format::Format_KCHW},
+      {"CKHW", schema::Format::Format_CKHW},
+      {"KHWC", schema::Format::Format_KHWC},
+      {"CHWK", schema::Format::Format_CHWK},
+      {"HW", schema::Format::Format_HW},
+      {"HW4", schema::Format::Format_HW4},
+      {"NC", schema::Format::Format_NC},
+      {"NC4", schema::Format::Format_NC4},
+      {"NC4HW4", schema::Format::Format_NC4HW4},
+      {"NUM_OF_FORMAT", schema::Format::Format_NUM_OF_FORMAT},
+      {"NCDHW", schema::Format::Format_NCDHW},
+      {"NWC", schema::Format::Format_NWC},
+      {"NCW", schema::Format::Format_NCW},
+    };
+
+    if (format == nullptr) {
+      return RET_NULL_PTR;
+    }
+
+    auto iter = kFormatTable.find(format_string);
+    if (iter == kFormatTable.end()) {
+      return RET_PARAM_INVALID;
+    }
+
+    *format = iter->second;
+    return RET_OK;
+  }
+}
+
+int ThirdPartyParamParser::DoParseFormats(const std::string &src, size_t num,
+                                          std::vector<schema::Format> *result_formats) {
+  MS_CHECK_TRUE_RET(result_formats != nullptr, RET_ERROR);
+  std::string tmp_names = src;
+  if (tmp_names.empty()) {
+    std::vector<schema::Format> default_formats(num, schema::Format::Format_NHWC);
+    *result_formats = default_formats;
+    return RET_OK;
+  }
+
+  auto format_strings = SplitStringToVector(tmp_names, ";");
+  if (format_strings.size() != num) {
+    MS_LOG(ERROR) << "Number of format: " << format_strings.size() << " and number of tensor: " << num << " are not equal";
+    return RET_ERROR;
+  }
+
+  std::vector<schema::Format> result(num);
+  for (size_t i = 0; i < num; i++) {
+    if (StringToFormat(format_strings[i], &result[i]) != RET_OK) {
+      MS_LOG(ERROR) << "Tensor format:" << format_strings[i] << " is invalid";
+      return RET_PARAM_INVALID;
+    }
+  }
+  *result_formats = result;
+  return RET_OK;
+}
+
+int ThirdPartyParamParser::Parse(const ThirdPartyModelString &param_string, ThirdPartyModelParam *param) {
+  MS_CHECK_TRUE_RET(param != nullptr, RET_ERROR);
+
+  auto ret = DoParseShape(param_string.input_shapes, &(param->input_shapes));
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Parse input shapes of third party param failed";
+    return RET_ERROR;
+  }
+
+  ret = DoParseDtypes(param_string.input_dtypes, &(param->input_dtypes));
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Parse input dtypes of third party param failed";
+    return RET_ERROR;
+  }
+
+  auto input_shape_num = param->input_shapes.size();
+  auto input_dtype_num = param->input_dtypes.size();
+  if (input_shape_num != input_dtype_num) {
+    MS_LOG(ERROR) << "Input shape number: " << input_shape_num << " and dtype number: " << input_dtype_num
+                  << " are not equal";
+    return RET_ERROR;
+  }
+
+  ret = DoParseFormats(param_string.input_formats, input_shape_num, &(param->input_formats));
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Parse input formats of third party param failed";
+    return RET_ERROR;
+  }
+
+  const std::string kInputNamePrefix = "in";
+  ret = DoParseNames(param_string.input_names, input_shape_num, kInputNamePrefix, &(param->input_names));
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Parse input names of third party param failed";
+    return RET_ERROR;
+  }
+
+  ret = DoParseShape(param_string.output_shapes, &(param->output_shapes));
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Parse output shaped of third party param failed";
+    return RET_ERROR;
+  }
+
+  ret = DoParseDtypes(param_string.output_dtypes, &(param->output_dtypes));
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Parse output dtypes of third party param failed";
+    return RET_ERROR;
+  }
+
+  auto output_shape_num = param->output_shapes.size();
+  auto output_dtype_num = param->output_dtypes.size();
+  if (output_shape_num != output_dtype_num) {
+    MS_LOG(ERROR) << "Output shape number: " << output_shape_num << " and dtype number: " << output_dtype_num
+                  << " are not equal";
+    return RET_ERROR;
+  }
+
+  ret = DoParseFormats(param_string.output_formats, output_shape_num, &(param->output_formats));
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Parse output formats of third party param failed";
+    return RET_ERROR;
+  }
+
+  const std::string kOutputNamePrefix = "out";
+  ret = DoParseNames(param_string.output_names, output_shape_num, kOutputNamePrefix, &(param->output_names));
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Parse output names of third party param failed";
+    return RET_ERROR;
+  }
+
+  ret = DoParseExtendedParameters(param_string.extended_parameters, &(param->extended_parameters));
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Parse extended parameter of third party param failed";
+    return RET_ERROR;
+  }
+
+  return RET_OK;
+}
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/tools/converter/config_parser/third_party_param_parser.h b/mindspore/lite/tools/converter/config_parser/third_party_param_parser.h
new file mode 100644
index 00000000..5cf6e8fb
--- /dev/null
+++ b/mindspore/lite/tools/converter/config_parser/third_party_param_parser.h
@@ -0,0 +1,44 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_CONFIG_PARSER_THIRD_PARTY_PARAM_PARSER_H_
+#define MINDSPORE_LITE_TOOLS_CONVERTER_CONFIG_PARSER_THIRD_PARTY_PARAM_PARSER_H_
+#include <string>
+#include <vector>
+#include <map>
+#include "include/errorcode.h"
+#include "tools/converter/cxx_api/converter_para.h"
+#include "tools/converter/config_parser/config_file_parser.h"
+
+namespace mindspore {
+namespace lite {
+class ThirdPartyParamParser {
+ public:
+  static int Parse(const lite::ThirdPartyModelString &param_string, ThirdPartyModelParam *param);
+
+ private:
+  static int DoParseShape(const std::string &src, std::vector<std::vector<int64_t>> *dst_shapes);
+  static int DoParseExtendedParameters(const std::string &src,
+                                       std::map<std::string, std::vector<uint8_t>> *dst_ext_param);
+  static int DoParseDtypes(const std::string &src, std::vector<TypeId> *dst_dtypes);
+  static int DoParseNames(const std::string &src, size_t num, const std::string &default_prefix,
+                          std::vector<std::string> *dst_names);
+  static int DoParseFormats(const std::string &src, size_t num, std::vector<schema::Format> *result_formats);
+};
+}  // namespace lite
+}  // namespace mindspore
+
+#endif  // MINDSPORE_LITE_TOOLS_CONVERTER_CONFIG_PARSER_THIRD_PARTY_PARAM_PARSER_H_
diff --git a/mindspore/lite/tools/converter/converter.cc b/mindspore/lite/tools/converter/converter.cc
index df3176c2..a61bd51c 100644
--- a/mindspore/lite/tools/converter/converter.cc
+++ b/mindspore/lite/tools/converter/converter.cc
@@ -49,6 +49,7 @@
 #include "tools/converter/config_parser/preprocess_parser.h"
 #include "tools/converter/config_parser/quant_param_parser.h"
 #include "tools/converter/config_parser/graph_kernel_param_parser.h"
+#include "tools/converter/config_parser/third_party_param_parser.h"
 #include "tools/converter/converter_funcgraph.h"
 #include "tools/converter/converter_metagraph.h"
 #include "tools/common/string_util.h"
@@ -472,6 +473,12 @@ int ConverterImpl::ParseParam(lite::ConfigFileParser *config_parser, const std::
       MS_LOG(ERROR) << "Parse mixed bit weight quant param failed.";
       return ret;
     }
+    ret = lite::ThirdPartyParamParser::Parse(config_parser->GetThirdPartyModelString(),
+                                             &param->thirdPartyModelParam);
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "Parse third party param failed.";
+      return ret;
+    }
     ret = InitExtendedIntegrationInfo(param, *config_parser);
     if (ret != RET_OK) {
       MS_LOG(ERROR) << "Parse extended integration info failed.";
@@ -699,19 +706,20 @@ std::string ConverterImpl::GetStrFromConfigFile(const std::string &file, const s

 int CheckFmkType(const std::shared_ptr<ConverterPara> &param) {
   if (param != nullptr) {
-    std::set valid_values = {FmkType::kFmkTypeTf,    FmkType::kFmkTypeCaffe,  FmkType::kFmkTypeOnnx,
-                             FmkType::kFmkTypeMs,    FmkType::kFmkTypeTflite, FmkType::kFmkTypePytorch,
-                             FmkType::kFmkTypeMsLite};
-    if (std::find(valid_values.begin(), valid_values.end(), param->fmk_type) == valid_values.end()) {
-      MS_LOG(ERROR) << "INPUT ILLEGAL: fmk_type must be "
-                       "kFmkTypeTf|kFmkTypeCaffe|kFmkTypeOnnx|kFmkTypeMs|kFmkTypeTflite|kFmkTypeMsLite"
-                    << ", but got " << param->fmk_type;
-      return RET_INPUT_PARAM_INVALID;
-    }
-    if (param->fmk_type != converter::kFmkTypeCaffe && !param->weight_file.empty()) {
-      MS_LOG(ERROR) << "INPUT ILLEGAL: weight_file is not a valid flag";
-      return RET_INPUT_PARAM_INVALID;
-    }
+    return RET_OK;
+  }
+  std::set kValidFmkTypes = {FmkType::kFmkTypeTf,    FmkType::kFmkTypeCaffe,  FmkType::kFmkTypeOnnx,
+                           FmkType::kFmkTypeMs,    FmkType::kFmkTypeTflite, FmkType::kFmkTypePytorch,
+                           FmkType::kFmkTypeMsLite, FmkType::kFmkTypeThirdParty};
+  if (kValidFmkTypes.find(param->fmk_type) == kValidFmkTypes.end()) {
+    MS_LOG(ERROR) << "INPUT ILLEGAL: fmk_type must be "
+                     "TF|CAFFE|ONNX|MS|TFLITE|PYTORCH|MSLITE|THIRDPARTY"
+                  << ", but got " << param->fmk_type;
+    return RET_INPUT_PARAM_INVALID;
+  }
+  if (param->fmk_type != converter::kFmkTypeCaffe && !param->weight_file.empty()) {
+    MS_LOG(ERROR) << "INPUT ILLEGAL: weight_file is not a valid flag";
+    return RET_INPUT_PARAM_INVALID;
   }
   return RET_OK;
 }
diff --git a/mindspore/lite/tools/converter/converter_funcgraph.cc b/mindspore/lite/tools/converter/converter_funcgraph.cc
index f03f995c..61d5c463 100644
--- a/mindspore/lite/tools/converter/converter_funcgraph.cc
+++ b/mindspore/lite/tools/converter/converter_funcgraph.cc
@@ -90,6 +90,7 @@ FuncGraphPtr ConverterFuncGraph::Load3rdModelToFuncgraph(const std::shared_ptr<C
   converter_parameters.save_type = param->save_type;
   converter_parameters.model_file = param->model_file;
   converter_parameters.weight_file = param->weight_file;
+  converter_parameters.attrs.emplace("config_file", param->config_file);
   func_graph_base = model_parser->Parse(converter_parameters);
   if (func_graph_base == nullptr) {
     delete model_parser;
@@ -447,11 +448,13 @@ STATUS ConverterFuncGraph::Optimize(const std::shared_ptr<ConverterPara> &param,
     return status;
   }

-  AnfTransform funcgraph_transform;
-  status = funcgraph_transform.Transform(func_graph, param);
-  if (status != RET_OK) {
-    MS_LOG(ERROR) << "Transform anf graph failed.";
-    return status;
+  if (param->fmk_type != converter::FmkType::kFmkTypeThirdParty) {
+    AnfTransform funcgraph_transform;
+    status = funcgraph_transform.Transform(func_graph, param);
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "Transform anf graph failed.";
+      return status;
+    }
   }

   status = UnifyFuncGraphOutputFormat(param, func_graph);
diff --git a/mindspore/lite/tools/converter/converter_lite/converter_flags.cc b/mindspore/lite/tools/converter/converter_lite/converter_flags.cc
index 4883c48d..024e209f 100644
--- a/mindspore/lite/tools/converter/converter_lite/converter_flags.cc
+++ b/mindspore/lite/tools/converter/converter_lite/converter_flags.cc
@@ -138,11 +138,11 @@ int Flags::InitFmk() {
   // value check not here, it is in converter c++ API's CheckValueParam method.
   std::map<std::string, FmkType> StrToEnumFmkTypeMap = {
     {"CAFFE", kFmkTypeCaffe}, {"MINDIR", kFmkTypeMs},       {"TFLITE", kFmkTypeTflite}, {"ONNX", kFmkTypeOnnx},
-    {"TF", kFmkTypeTf},       {"PYTORCH", kFmkTypePytorch}, {"MSLITE", kFmkTypeMsLite}};
+    {"TF", kFmkTypeTf},       {"PYTORCH", kFmkTypePytorch}, {"MSLITE", kFmkTypeMsLite}, {"THIRDPARTY", kFmkTypeThirdParty}};
   if (StrToEnumFmkTypeMap.find(this->fmkIn) != StrToEnumFmkTypeMap.end()) {
     this->fmk = StrToEnumFmkTypeMap.at(this->fmkIn);
   } else {
-    std::cerr << "INPUT ILLEGAL: fmk must be TF|TFLITE|CAFFE|MINDIR|ONNX" << std::endl;
+    std::cerr << "INPUT ILLEGAL: fmk must be TF|TFLITE|CAFFE|MINDIR|ONNX|PYTORCH|THIRDPARTY" << std::endl;
     return RET_INPUT_PARAM_INVALID;
   }

diff --git a/mindspore/lite/tools/converter/cxx_api/converter_para.h b/mindspore/lite/tools/converter/cxx_api/converter_para.h
index a4f72a69..33210fd0 100644
--- a/mindspore/lite/tools/converter/cxx_api/converter_para.h
+++ b/mindspore/lite/tools/converter/cxx_api/converter_para.h
@@ -21,6 +21,7 @@
 #include <vector>
 #include <set>
 #include "include/converter.h"
+#include "mindapi/base/type_id.h"
 #include "tools/converter/quantizer/quant_params.h"
 #include "tools/converter/preprocess/preprocess_param.h"
 #include "tools/converter/adapter/acl/common/acl_types.h"
@@ -35,6 +36,18 @@ struct ParallelSplitConfig {
   std::vector<std::string> parallel_devices_;
 };

+struct ThirdPartyModelParam {
+  std::vector<TypeId> input_dtypes;
+  std::vector<std::vector<int64_t>> input_shapes;
+  std::vector<std::string> input_names;
+  std::vector<schema::Format> input_formats;
+  std::vector<TypeId> output_dtypes;
+  std::vector<std::vector<int64_t>> output_shapes;
+  std::vector<std::string> output_names;
+  std::vector<schema::Format> output_formats;
+  std::map<std::string, std::vector<uint8_t>> extended_parameters;
+};
+
 struct CpuOptionCfg {
   std::string architecture;
   std::string instruction;
@@ -97,6 +110,7 @@ struct ConverterPara {
   lite::acl::AclModelOptionCfg aclModelOptionCfgParam;
   lite::micro::MicroParam microParam;
   ParallelSplitConfig parallel_split_config;
+  ThirdPartyModelParam thirdPartyModelParam;
   AscendGeOptionCfg ascendGeOptionCfg;
   std::string device;
   std::string provider;
diff --git a/mindspore/lite/tools/converter/graphdef_transform.cc b/mindspore/lite/tools/converter/graphdef_transform.cc
index 90b744e5..bf1a82ae 100644
--- a/mindspore/lite/tools/converter/graphdef_transform.cc
+++ b/mindspore/lite/tools/converter/graphdef_transform.cc
@@ -76,11 +76,55 @@ int QuantTransform(const std::shared_ptr<ConverterPara> &param, schema::MetaGrap
   }
   return RET_OK;
 }
+
+int FillGraphOutputShape(MetaGraphT *meta_graph, const std::vector<std::vector<int64_t>> output_shapes) {
+  const auto &out_indices = meta_graph->outputIndex;
+  for (size_t i = 0; i < out_indices.size(); i++) {
+    auto &out_tensor = meta_graph->allTensors[out_indices[i]];
+    out_tensor->dims = {};
+    for (size_t k = 0; k < output_shapes[i].size(); k++) {
+      out_tensor->dims.push_back(static_cast<int32_t>(output_shapes[i][k]));
+    }
+  }
+  return RET_OK;
+}
+
+void FillGraphInputAndOutputFormats(MetaGraphT *meta_graph, const ConverterPara &para) {
+  const auto &in_indices = meta_graph->inputIndex;
+  for (size_t i = 0; i < in_indices.size(); i++) {
+    auto &in_tensor = meta_graph->allTensors[in_indices[i]];
+    in_tensor->format = para.thirdPartyModelParam.input_formats[i];
+    MS_LOG_DEBUG << "input " << i << " format: " << EnumNameFormat(in_tensor->format);
+  }
+
+  const auto &out_indices = meta_graph->outputIndex;
+  for (size_t i = 0; i < out_indices.size(); i++) {
+    auto &out_tensor = meta_graph->allTensors[out_indices[i]];
+    out_tensor->format = para.thirdPartyModelParam.output_formats[i];
+    MS_LOG_DEBUG << "output " << i << " format: " << EnumNameFormat(out_tensor->format);
+  }
+}
 }  // namespace

 int GraphDefTransform::Transform(const std::shared_ptr<ConverterPara> &param) {
   MS_ASSERT(param != nullptr);
   STATUS status;
+
+  if (param->fmk_type == converter::kFmkTypeThirdParty) {
+
+    // Legacy optimizer infer shape, but op Custom which wraps third party model has no infer-shape function.
+    // So we don't perform legacy optimization for kFmkTypeThirdParty case.
+    auto ret = FillGraphOutputShape(graph_defT_, param->thirdPartyModelParam.output_shapes);
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "Fill output shape of third party model failed, ret:" << ret;
+      return ret;
+    }
+
+    // Tensor of FuncGraph has no attribute of format, so set format in MetaGraph.
+    FillGraphInputAndOutputFormats(graph_defT_, *param);
+    return RET_OK;
+  }
+
   {
     auto old_nodes = GetGraphNodes(*graph_defT_);
     Optimizer unused_op_remove_optimizer;
diff --git a/mindspore/lite/tools/converter/parser/third_party/CMakeLists.txt b/mindspore/lite/tools/converter/parser/third_party/CMakeLists.txt
new file mode 100644
index 00000000..b55e0194
--- /dev/null
+++ b/mindspore/lite/tools/converter/parser/third_party/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_library(third_party_parser_mid OBJECT third_party_model_parser.cc)
+add_dependencies(third_party_parser_mid proto_mid)
+add_dependencies(third_party_parser_mid fbs_src)
+add_dependencies(third_party_parser_mid fbs_inner_src)
\ No newline at end of file
diff --git a/mindspore/lite/tools/converter/parser/third_party/third_party_model_parser.cc b/mindspore/lite/tools/converter/parser/third_party/third_party_model_parser.cc
new file mode 100644
index 00000000..652db4af
--- /dev/null
+++ b/mindspore/lite/tools/converter/parser/third_party/third_party_model_parser.cc
@@ -0,0 +1,277 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "tools/converter/parser/third_party/third_party_model_parser.h"
+#include <string>
+#include <vector>
+#include <memory>
+#include "ir/value.h"
+#include "mindapi/base/type_id.h"
+#include "src/common/log_util.h"
+#include "src/common/file_utils.h"
+#include "nnacl/op_base.h"
+#include "ops/primitive_c.h"
+#include "ops/custom.h"
+#include "ops/tuple_get_item.h"
+#include "ops/make_tuple.h"
+#include "ops/return.h"
+#include "tools/converter/config_parser/config_file_parser.h"
+#include "include/registry/model_parser_registry.h"
+#include "tools/common/graph_util.h"
+#include "tools/common/tensor_util.h"
+#include "tools/converter/converter_context.h"
+#include "tools/converter/parser/lite_model_parser_creator.h"
+
+using mindspore::converter::kFmkTypeThirdParty;
+
+namespace mindspore {
+namespace lite {
+api::FuncGraphPtr ThirdPartyModelParser::Parse(const converter::ConverterParameters &flag) {
+  model_file_ = flag.model_file;
+  auto &attrs = flag.attrs;
+  auto iter = attrs.find("config_file");
+  if (iter == attrs.end()) {
+    return nullptr;
+  }
+  auto config_file = iter->second;
+
+  auto ret = InitConfig(config_file);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Init config for third party model parsing failed";
+    return nullptr;
+  }
+
+  return CreateFuncGraph();
+}
+
+STATUS ThirdPartyModelParser::InitConfig(const std::string &config_file) {
+  lite::ConfigFileParser config_parser;
+  if (config_file.empty()) {
+    MS_LOG(ERROR) << "Missing config file in converting third party model";
+    return RET_ERROR;
+  }
+  auto ret = config_parser.ParseConfigFile(config_file);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Get third party model section from config file failed";
+    return RET_ERROR;
+  }
+
+  ret = ThirdPartyParamParser::Parse(config_parser.GetThirdPartyModelString(), &param_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Parse third party model param failed.";
+    return ret;
+  }
+  return RET_OK;
+}
+
+api::FuncGraphPtr ThirdPartyModelParser::CreateFuncGraph() {
+  auto func_graph = std::make_shared<FuncGraph>();
+  MS_CHECK_TRUE_RET(func_graph != nullptr, nullptr);
+  auto type_value = MakeValue(static_cast<int>(converter::kFmkTypeThirdParty));
+  MS_CHECK_TRUE_RET(type_value != nullptr, nullptr);
+  func_graph->set_attr("fmk", type_value);
+  auto attr_value = MakeValue("third_party");
+  MS_CHECK_TRUE_RET(attr_value != nullptr, nullptr);
+  func_graph->set_attr("graph_name", attr_value);
+
+  std::vector<AnfNodePtr> input_nodes = {};
+  auto ret = BuildGraphInputs(func_graph, &input_nodes);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Create func graph input nodes failed";
+    return nullptr;
+  }
+
+  CNodePtr custom_node = nullptr;
+  ret = BuildCustomOp(func_graph, input_nodes, &custom_node);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Create func graph custom op node failed";
+    return nullptr;
+  }
+
+  ret = BuildGraphOutputs(func_graph, custom_node);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Create func graph output nodes failed";
+    return nullptr;
+  }
+
+  static auto manager = Manage(func_graph);
+  func_graph->set_manager(manager);
+
+  auto result_graph = api::MakeShared<api::FuncGraph>(func_graph);
+  return result_graph;
+}
+
+STATUS ThirdPartyModelParser::BuildGraphInputs(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr> *op_inputs) {
+  MS_ASSERT(anf_node_map != nullptr && func_graph != nullptr);
+  auto &dtypes = param_.input_dtypes;
+  auto &shapes = param_.input_shapes;
+  auto &names = param_.input_names;
+
+  auto input_size = dtypes.size();
+
+  // Create parameter nodes for graph inputs
+  for (size_t i = 0; i < input_size; i++) {
+    auto parameter = func_graph->add_parameter();
+    MSLITE_CHECK_PTR(parameter);
+    auto abstract_tensor = CreateTensorAbstract(shapes[i], dtypes[i]);
+    if (abstract_tensor == nullptr) {
+      MS_LOG(ERROR) << "Create tensor abstract failed";
+      return RET_ERROR;
+    }
+    parameter->set_abstract(abstract_tensor);
+    parameter->set_name(names[i]);
+    op_inputs->push_back(parameter);
+  }
+
+  // Create parameter nodes for const tensor which wrapped third model buffer.
+  size_t model_size = 0U;
+  auto model_data = ReadFile(model_file_.c_str(), &model_size);
+  std::vector<int64_t> model_shape = {static_cast<int64_t>(model_size)};
+  auto tensor_info = CreateTensorInfo(nullptr, 0, model_shape, kNumberTypeUInt8);
+  if (tensor_info == nullptr) {
+    MS_LOG(ERROR) << "init tensor info failed";
+    delete model_data;
+    return RET_NULL_PTR;
+  }
+  auto tensor_data = reinterpret_cast<uint8_t *>(tensor_info->data_c());
+  if (memcpy_s(tensor_data, tensor_info->Size(), model_data, model_size) != EOK) {
+    MS_LOG(ERROR) << "memcpy failed.";
+    delete model_data;
+    return RET_ERROR;
+  }
+  delete model_data;
+  auto parameter = func_graph->add_parameter();
+  MSLITE_CHECK_PTR(parameter);
+  auto status = InitParameterFromTensorInfo(parameter, tensor_info);
+  if (status != RET_OK) {
+    MS_LOG(ERROR) << "init parameter from tensor info failed.";
+    return RET_ERROR;
+  }
+  parameter->set_name("ThirdPartyModel");
+  op_inputs->push_back(parameter);
+  return RET_OK;
+}
+
+STATUS ThirdPartyModelParser::BuildCustomOp(const FuncGraphPtr &func_graph, const std::vector<AnfNodePtr> &op_inputs,
+                                            CNodePtr *operator_node) {
+  MS_ASSERT(anf_node_map != nullptr && func_graph != nullptr);
+  NotSupportOp::GetInstance()->set_fmk_type("THIRDPARTY");
+  STATUS status = RET_OK;
+
+  // create primitive and build CNode of CUSTOM operator
+  ops::PrimitiveCPtr primitive_c;
+  auto prim = std::make_unique<ops::Custom>();
+  MS_CHECK_TRUE_RET(prim != nullptr, RET_ERROR);
+  prim->set_type("ThirdPartyModel");
+
+  const auto &attr = param_.extended_parameters;
+  prim->set_attr(attr);
+  primitive_c = prim->GetPrim();
+  if (primitive_c == nullptr) {
+    MS_LOG(ERROR) << "failed to create primitive: custom";
+    return RET_ERROR;
+  }
+
+  auto operator_cnode = func_graph->NewCNode(primitive_c, op_inputs);
+  MSLITE_CHECK_PTR(operator_cnode);
+  operator_cnode->set_fullname_with_scope("Custom");
+  *operator_node = operator_cnode;
+  return status;
+}
+
+STATUS ThirdPartyModelParser::BuildGraphOutputs(const FuncGraphPtr &func_graph, const CNodePtr &operator_node) {
+  MS_ASSERT(anf_node_map != nullptr && func_graph != nullptr);
+
+  auto dtypes = param_.output_dtypes;
+  auto shapes = param_.output_shapes;
+  auto names = param_.output_names;
+
+  auto output_size = dtypes.size();
+  std::vector<AnfNodePtr> output_nodes = {};
+
+  // Use TupleGetItem to wrap op outputs.
+  AbstractBasePtrList abstract_list;
+  for (size_t i = 0; i < output_size; i++) {
+    auto abstract_tensor = CreateTensorAbstract(shapes[i], dtypes[i]);
+    if (abstract_tensor == nullptr) {
+      MS_LOG(ERROR) << "Create tensor abstract failed";
+      return RET_ERROR;
+    }
+    abstract_list.emplace_back(abstract_tensor);
+    auto tuple_get_item_prim_ptr = std::make_shared<ops::TupleGetItem>();
+    if (tuple_get_item_prim_ptr == nullptr) {
+      MS_LOG(ERROR) << "new TupleGetItem failed";
+      return RET_NULL_PTR;
+    }
+    auto tuple_get_item_prim_c = tuple_get_item_prim_ptr->GetPrim();
+    MSLITE_CHECK_PTR(tuple_get_item_prim_c);
+    auto tuple_get_item_prim = NewValueNode(tuple_get_item_prim_c);
+    MSLITE_CHECK_PTR(tuple_get_item_prim);
+    auto get_item_value = NewValueNode(MakeValue<int>(i));
+    MSLITE_CHECK_PTR(get_item_value);
+    std::vector<AnfNodePtr> inputs = {tuple_get_item_prim, operator_node, get_item_value};
+    CNodePtr get_item_cnode = func_graph->NewCNode(inputs);
+    MSLITE_CHECK_PTR(get_item_cnode);
+    std::string output_item_name = operator_node->fullname_with_scope() + "_getitem_" + std::to_string(i);
+    auto get_item_abstract = CreateTensorAbstract({}, kNumberTypeFloat32);
+    if (get_item_abstract == nullptr) {
+      MS_LOG(ERROR) << "Create tensor abstarct failed";
+      return RET_ERROR;
+    }
+    get_item_cnode->set_fullname_with_scope(output_item_name);
+    get_item_cnode->set_abstract(get_item_abstract);
+    output_nodes.push_back(get_item_cnode);
+  }
+  auto abstract_tuple = std::make_shared<abstract::AbstractTuple>(abstract_list);
+  MSLITE_CHECK_PTR(abstract_tuple);
+  operator_node->set_abstract(abstract_tuple);
+
+  // Use MakeTuple node to wrap all outputs as single input of Return node.
+  auto make_tuple_prim_ptr = std::make_shared<ops::MakeTuple>();
+  if (make_tuple_prim_ptr == nullptr) {
+    MS_LOG(ERROR) << "new MakeTuple failed";
+    return RET_NULL_PTR;
+  }
+  auto make_tuple_prim_c = make_tuple_prim_ptr->GetPrim();
+  MSLITE_CHECK_PTR(make_tuple_prim_c);
+  auto make_tuple_prim = NewValueNode(make_tuple_prim_c);
+  MSLITE_CHECK_PTR(make_tuple_prim);
+  std::vector<AnfNodePtr> make_tuple_inputs = output_nodes;
+  make_tuple_inputs.insert(make_tuple_inputs.begin(), make_tuple_prim);
+  auto make_tuple_cnode = func_graph->NewCNode(make_tuple_inputs);
+  MSLITE_CHECK_PTR(make_tuple_cnode);
+  make_tuple_cnode->set_fullname_with_scope("return_tuple");
+
+  auto return_prim_ptr = std::make_shared<ops::Return>();
+  if (return_prim_ptr == nullptr) {
+    MS_LOG(ERROR) << "new Return failed";
+    return RET_NULL_PTR;
+  }
+  auto return_prim_c = return_prim_ptr->GetPrim();
+  MSLITE_CHECK_PTR(return_prim_c);
+  std::vector<AnfNodePtr> op_inputs{make_tuple_cnode};
+  auto cnode = func_graph->NewCNode(return_prim_c, op_inputs);
+  MSLITE_CHECK_PTR(cnode);
+  cnode->set_fullname_with_scope("Return");
+  func_graph->set_return(cnode);
+
+  // Save original output tensor names.
+  ConverterInnerContext::GetInstance()->SetGraphOutputTensorNames(names);
+  return RET_OK;
+}
+
+REG_MODEL_PARSER(kFmkTypeThirdParty, LiteModelParserCreator<ThirdPartyModelParser>)
+}  // namespace lite
+}  // namespace mindspore
diff --git a/mindspore/lite/tools/converter/parser/third_party/third_party_model_parser.h b/mindspore/lite/tools/converter/parser/third_party/third_party_model_parser.h
new file mode 100644
index 00000000..c4b197b8
--- /dev/null
+++ b/mindspore/lite/tools/converter/parser/third_party/third_party_model_parser.h
@@ -0,0 +1,50 @@
+/**
+ * Copyright 2023 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_PARSER_THIRDPARTY_THIRDPARTY_MODEL_PARSER_H_
+#define MINDSPORE_LITE_TOOLS_CONVERTER_PARSER_THIRDPARTY_THIRDPARTY_MODEL_PARSER_H_
+
+#include <string>
+#include <vector>
+#include "schema/inner/model_generated.h"
+#include "base/base.h"
+#include "ir/anf.h"
+#include "ir/func_graph.h"
+#include "include/errorcode.h"
+#include "include/registry/model_parser.h"
+#include "tools/converter/config_parser/third_party_param_parser.h"
+
+namespace mindspore {
+namespace lite {
+class ThirdPartyModelParser : public converter::ModelParser {
+ public:
+  api::FuncGraphPtr Parse(const converter::ConverterParameters &flag) override;
+
+ private:
+  STATUS InitConfig(const std::string &config_file);
+  api::FuncGraphPtr CreateFuncGraph();
+  STATUS BuildGraphInputs(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr> *op_inputs);
+  STATUS BuildCustomOp(const FuncGraphPtr &func_graph, const std::vector<AnfNodePtr> &op_inputs,
+                       CNodePtr *operator_node);
+  STATUS BuildGraphOutputs(const FuncGraphPtr &func_graph, const CNodePtr &operator_node);
+
+  std::string model_file_ = "";
+  ThirdPartyModelParam param_;
+};
+}  // namespace lite
+}  // namespace mindspore
+
+#endif  // MINDSPORE_LITE_TOOLS_CONVERTER_PARSER_THIRDPARTY_THIRDPARTY_MODEL_PARSER_H_
diff --git a/mindspore/lite/tools/converter/registry/model_parser_registry.cc b/mindspore/lite/tools/converter/registry/model_parser_registry.cc
index 832fb92d..6bc2d4d3 100644
--- a/mindspore/lite/tools/converter/registry/model_parser_registry.cc
+++ b/mindspore/lite/tools/converter/registry/model_parser_registry.cc
@@ -26,7 +26,7 @@ std::map<FmkType, ModelParserCreator> model_parser_room;
 }  // namespace

 ModelParserRegistry::ModelParserRegistry(FmkType fmk, ModelParserCreator creator) {
-  if (fmk < converter::kFmkTypeTf || fmk > converter::kFmkTypePytorch) {
+  if (fmk < converter::kFmkTypeTf || fmk >= converter::kFmkTypeEnd) {
     MS_LOG(ERROR) << "ILLEGAL FMK: fmk must be in FmkType.";
     return;
   }
@@ -38,7 +38,7 @@ ModelParserRegistry::ModelParserRegistry(FmkType fmk, ModelParserCreator creator
 }

 converter::ModelParser *ModelParserRegistry::GetModelParser(FmkType fmk) {
-  if (fmk < converter::kFmkTypeTf || fmk > converter::kFmkTypePytorch) {
+  if (fmk < converter::kFmkTypeTf || fmk >= converter::kFmkTypeEnd) {
     MS_LOG(ERROR) << "ILLEGAL FMK: fmk must be in FmkType.";
     return nullptr;
   }
--
2.17.1