1be168c0dSopenharmony_ciFrom f0daa7ef13e1741f8bcd1dfad7517a4a8ae4a209 Mon Sep 17 00:00:00 2001
2be168c0dSopenharmony_ciFrom: xuanyue <xuanyue@huawei.com>
3be168c0dSopenharmony_ciDate: Thu, 21 Mar 2024 19:38:34 +0800
4be168c0dSopenharmony_ciSubject: [PATCH] DynamicQuant strategy opyimization
5be168c0dSopenharmony_ci
6be168c0dSopenharmony_ci---
7be168c0dSopenharmony_ci .../kernel/nnacl/dynamic_quant_parameter.h    |   7 +-
8be168c0dSopenharmony_ci mindspore/core/ops/dynamic_quant.cc           |  12 +
9be168c0dSopenharmony_ci mindspore/core/ops/dynamic_quant.h            |  10 +
10be168c0dSopenharmony_ci mindspore/core/ops/op_name.h                  |   1 +
11be168c0dSopenharmony_ci mindspore/lite/schema/inner/ops_generated.h   |  53 +++-
12be168c0dSopenharmony_ci mindspore/lite/schema/ops.fbs                 |   1 +
13be168c0dSopenharmony_ci mindspore/lite/schema/ops_generated.h         |  34 +-
14be168c0dSopenharmony_ci mindspore/lite/src/common/ops/ops_def.cc      |   1 +
15be168c0dSopenharmony_ci .../ops/populate/dynamic_quant_populate.cc    |  24 +-
16be168c0dSopenharmony_ci .../litert/kernel/cpu/int8/dynamic_quant.cc   | 299 +++++++++++-------
17be168c0dSopenharmony_ci .../litert/kernel/cpu/int8/dynamic_quant.h    |  59 ++--
18be168c0dSopenharmony_ci .../cpu/int8/matmul_dynamic_base_int8.cc      |  43 ++-
19be168c0dSopenharmony_ci .../cpu/int8/matmul_dynamic_base_int8.h       |   7 +-
20be168c0dSopenharmony_ci .../quantizer/insert_quant_node_manager.cc    |  27 +-
21be168c0dSopenharmony_ci .../quantizer/insert_quant_node_manager.h     |   5 +-
22be168c0dSopenharmony_ci 15 files changed, 395 insertions(+), 188 deletions(-)
23be168c0dSopenharmony_ci
24be168c0dSopenharmony_cidiff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/dynamic_quant_parameter.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/dynamic_quant_parameter.h
25be168c0dSopenharmony_ciindex aaabe041..1fc166cb 100644
26be168c0dSopenharmony_ci--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/dynamic_quant_parameter.h
27be168c0dSopenharmony_ci+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/dynamic_quant_parameter.h
28be168c0dSopenharmony_ci@@ -21,10 +21,9 @@
29be168c0dSopenharmony_ci typedef struct DynamicQuantParameter {
30be168c0dSopenharmony_ci   OpParameter op_parameter_;
31be168c0dSopenharmony_ci   bool symmetric_;
32be168c0dSopenharmony_ci-  int64_t dst_type_;
33be168c0dSopenharmony_ci-  bool activation_perchannel_;
34be168c0dSopenharmony_ci-  int64_t prefer_axis_;
35be168c0dSopenharmony_ci-  bool transpose_;
36be168c0dSopenharmony_ci+  int dst_type_;
37be168c0dSopenharmony_ci+  int axis_num_;
38be168c0dSopenharmony_ci+  int prefer_axes_[MAX_SHAPE_SIZE];
39be168c0dSopenharmony_ci } DynamicQuantParameter;
40be168c0dSopenharmony_ci 
41be168c0dSopenharmony_ci #endif  // NNACL_DYNAMIC_QUANT_PARAMETER_H_
42be168c0dSopenharmony_cidiff --git a/mindspore/core/ops/dynamic_quant.cc b/mindspore/core/ops/dynamic_quant.cc
43be168c0dSopenharmony_ciindex 63ea0be5..1949f809 100644
44be168c0dSopenharmony_ci--- a/mindspore/core/ops/dynamic_quant.cc
45be168c0dSopenharmony_ci+++ b/mindspore/core/ops/dynamic_quant.cc
46be168c0dSopenharmony_ci@@ -48,6 +48,18 @@ bool DynamicQuant::get_transpose() const {
47be168c0dSopenharmony_ci   auto value_ptr = this->GetAttr(kTrans);
48be168c0dSopenharmony_ci   return GetValue<bool>(value_ptr);
49be168c0dSopenharmony_ci }
50be168c0dSopenharmony_ci+
51be168c0dSopenharmony_ci+void DynamicQuant::set_prefer_axes(const std::vector<int> &prefer_axes) {
52be168c0dSopenharmony_ci+  (void)AddAttr(kPreferAxes, api::MakeValue(prefer_axes));
53be168c0dSopenharmony_ci+}
54be168c0dSopenharmony_ci+
55be168c0dSopenharmony_ci+std::vector<int> DynamicQuant::get_prefer_axes() const {
56be168c0dSopenharmony_ci+  auto value_ptr = GetAttr(kPreferAxes);
57be168c0dSopenharmony_ci+  auto tmp = GetValue<std::vector<int64_t>>(value_ptr);
58be168c0dSopenharmony_ci+  std::vector<int> res(tmp.begin(), tmp.end());
59be168c0dSopenharmony_ci+  return res;
60be168c0dSopenharmony_ci+}
61be168c0dSopenharmony_ci+
62be168c0dSopenharmony_ci void DynamicQuant::Init(const bool symmetric, const int64_t dst_type) {
63be168c0dSopenharmony_ci   this->set_symmetric(symmetric);
64be168c0dSopenharmony_ci   this->set_dst_type(dst_type);
65be168c0dSopenharmony_cidiff --git a/mindspore/core/ops/dynamic_quant.h b/mindspore/core/ops/dynamic_quant.h
66be168c0dSopenharmony_ciindex 4cb446c3..963dfb37 100644
67be168c0dSopenharmony_ci--- a/mindspore/core/ops/dynamic_quant.h
68be168c0dSopenharmony_ci+++ b/mindspore/core/ops/dynamic_quant.h
69be168c0dSopenharmony_ci@@ -91,6 +91,16 @@ class MIND_API DynamicQuant : public BaseOperator {
70be168c0dSopenharmony_ci   ///
71be168c0dSopenharmony_ci   /// \return Whether transpose matrix.
72be168c0dSopenharmony_ci   bool get_transpose() const;
73be168c0dSopenharmony_ci+
74be168c0dSopenharmony_ci+  /// \brief Method to set prefer_axis attribute.
75be168c0dSopenharmony_ci+  ///
76be168c0dSopenharmony_ci+  /// \param[in] prefer_axis Define the preferred axis.
77be168c0dSopenharmony_ci+  void set_prefer_axes(const std::vector<int> &prefer_axes);
78be168c0dSopenharmony_ci+
79be168c0dSopenharmony_ci+  /// \brief Method to get prefer_axis attribute.
80be168c0dSopenharmony_ci+  ///
81be168c0dSopenharmony_ci+  /// \return the preferred axis.
82be168c0dSopenharmony_ci+  std::vector<int> get_prefer_axes() const;
83be168c0dSopenharmony_ci };
84be168c0dSopenharmony_ci MIND_API abstract::AbstractBasePtr DynamicQuantInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
85be168c0dSopenharmony_ci                                                      const std::vector<abstract::AbstractBasePtr> &input_args);
86be168c0dSopenharmony_cidiff --git a/mindspore/core/ops/op_name.h b/mindspore/core/ops/op_name.h
87be168c0dSopenharmony_ciindex ad9066e7..1282e6ea 100644
88be168c0dSopenharmony_ci--- a/mindspore/core/ops/op_name.h
89be168c0dSopenharmony_ci+++ b/mindspore/core/ops/op_name.h
90be168c0dSopenharmony_ci@@ -410,6 +410,7 @@ constexpr auto KCurrChunkIndex = "curr_chunk_index";
91be168c0dSopenharmony_ci constexpr auto KCurrBitCount = "curr_bit_count";
92be168c0dSopenharmony_ci constexpr auto KTableLog = "table_log";
93be168c0dSopenharmony_ci constexpr auto kIgnoreIndex = "ignore_index";
94be168c0dSopenharmony_ci+constexpr auto kPreferAxes = "prefer_axes";
95be168c0dSopenharmony_ci 
96be168c0dSopenharmony_ci constexpr size_t kInputIndex0 = 0;
97be168c0dSopenharmony_ci constexpr size_t kInputIndex1 = 1;
98be168c0dSopenharmony_cidiff --git a/mindspore/lite/schema/inner/ops_generated.h b/mindspore/lite/schema/inner/ops_generated.h
99be168c0dSopenharmony_ciindex 6c861aa5..b595f4b2 100644
100be168c0dSopenharmony_ci--- a/mindspore/lite/schema/inner/ops_generated.h
101be168c0dSopenharmony_ci+++ b/mindspore/lite/schema/inner/ops_generated.h
102be168c0dSopenharmony_ci@@ -19790,6 +19790,7 @@ struct DynamicQuantT : public flatbuffers::NativeTable {
103be168c0dSopenharmony_ci   bool activation_channel = false;
104be168c0dSopenharmony_ci   int64_t prefer_axis = 0;
105be168c0dSopenharmony_ci   bool transpose = false;
106be168c0dSopenharmony_ci+  std::vector<int32_t> prefer_axes{};
107be168c0dSopenharmony_ci };
108be168c0dSopenharmony_ci 
109be168c0dSopenharmony_ci struct DynamicQuant FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
110be168c0dSopenharmony_ci@@ -19803,7 +19804,8 @@ struct DynamicQuant FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
111be168c0dSopenharmony_ci     VT_DST_TYPE = 6,
112be168c0dSopenharmony_ci     VT_ACTIVATION_CHANNEL = 8,
113be168c0dSopenharmony_ci     VT_PREFER_AXIS = 10,
114be168c0dSopenharmony_ci-    VT_TRANSPOSE = 12
115be168c0dSopenharmony_ci+    VT_TRANSPOSE = 12,
116be168c0dSopenharmony_ci+    VT_PREFER_AXES = 14
117be168c0dSopenharmony_ci   };
118be168c0dSopenharmony_ci   bool symmetric() const {
119be168c0dSopenharmony_ci     return GetField<uint8_t>(VT_SYMMETRIC, 0) != 0;
120be168c0dSopenharmony_ci@@ -19835,6 +19837,12 @@ struct DynamicQuant FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
121be168c0dSopenharmony_ci   bool mutate_transpose(bool _transpose) {
122be168c0dSopenharmony_ci     return SetField<uint8_t>(VT_TRANSPOSE, static_cast<uint8_t>(_transpose), 0);
123be168c0dSopenharmony_ci   }
124be168c0dSopenharmony_ci+  const flatbuffers::Vector<int32_t> *prefer_axes() const {
125be168c0dSopenharmony_ci+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_PREFER_AXES);
126be168c0dSopenharmony_ci+  }
127be168c0dSopenharmony_ci+  flatbuffers::Vector<int32_t> *mutable_prefer_axes() {
128be168c0dSopenharmony_ci+    return GetPointer<flatbuffers::Vector<int32_t> *>(VT_PREFER_AXES);
129be168c0dSopenharmony_ci+  }
130be168c0dSopenharmony_ci   bool Verify(flatbuffers::Verifier &verifier) const {
131be168c0dSopenharmony_ci     return VerifyTableStart(verifier) &&
132be168c0dSopenharmony_ci            VerifyField<uint8_t>(verifier, VT_SYMMETRIC) &&
133be168c0dSopenharmony_ci@@ -19842,6 +19850,8 @@ struct DynamicQuant FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
134be168c0dSopenharmony_ci            VerifyField<uint8_t>(verifier, VT_ACTIVATION_CHANNEL) &&
135be168c0dSopenharmony_ci            VerifyField<int64_t>(verifier, VT_PREFER_AXIS) &&
136be168c0dSopenharmony_ci            VerifyField<uint8_t>(verifier, VT_TRANSPOSE) &&
137be168c0dSopenharmony_ci+           VerifyOffset(verifier, VT_PREFER_AXES) &&
138be168c0dSopenharmony_ci+           verifier.VerifyVector(prefer_axes()) &&
139be168c0dSopenharmony_ci            verifier.EndTable();
140be168c0dSopenharmony_ci   }
141be168c0dSopenharmony_ci   DynamicQuantT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
142be168c0dSopenharmony_ci@@ -19868,6 +19878,9 @@ struct DynamicQuantBuilder {
143be168c0dSopenharmony_ci   void add_transpose(bool transpose) {
144be168c0dSopenharmony_ci     fbb_.AddElement<uint8_t>(DynamicQuant::VT_TRANSPOSE, static_cast<uint8_t>(transpose), 0);
145be168c0dSopenharmony_ci   }
146be168c0dSopenharmony_ci+  void add_prefer_axes(flatbuffers::Offset<flatbuffers::Vector<int32_t>> prefer_axes) {
147be168c0dSopenharmony_ci+    fbb_.AddOffset(DynamicQuant::VT_PREFER_AXES, prefer_axes);
148be168c0dSopenharmony_ci+  }
149be168c0dSopenharmony_ci   explicit DynamicQuantBuilder(flatbuffers::FlatBufferBuilder &_fbb)
150be168c0dSopenharmony_ci         : fbb_(_fbb) {
151be168c0dSopenharmony_ci     start_ = fbb_.StartTable();
152be168c0dSopenharmony_ci@@ -19885,16 +19898,37 @@ inline flatbuffers::Offset<DynamicQuant> CreateDynamicQuant(
153be168c0dSopenharmony_ci     int64_t dst_type = 32LL,
154be168c0dSopenharmony_ci     bool activation_channel = false,
155be168c0dSopenharmony_ci     int64_t prefer_axis = 0,
156be168c0dSopenharmony_ci-    bool transpose = false) {
157be168c0dSopenharmony_ci+    bool transpose = false,
158be168c0dSopenharmony_ci+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> prefer_axes = 0) {
159be168c0dSopenharmony_ci   DynamicQuantBuilder builder_(_fbb);
160be168c0dSopenharmony_ci   builder_.add_prefer_axis(prefer_axis);
161be168c0dSopenharmony_ci   builder_.add_dst_type(dst_type);
162be168c0dSopenharmony_ci+  builder_.add_prefer_axes(prefer_axes);
163be168c0dSopenharmony_ci   builder_.add_transpose(transpose);
164be168c0dSopenharmony_ci   builder_.add_activation_channel(activation_channel);
165be168c0dSopenharmony_ci   builder_.add_symmetric(symmetric);
166be168c0dSopenharmony_ci   return builder_.Finish();
167be168c0dSopenharmony_ci }
168be168c0dSopenharmony_ci 
169be168c0dSopenharmony_ci+inline flatbuffers::Offset<DynamicQuant> CreateDynamicQuantDirect(
170be168c0dSopenharmony_ci+    flatbuffers::FlatBufferBuilder &_fbb,
171be168c0dSopenharmony_ci+    bool symmetric = false,
172be168c0dSopenharmony_ci+    int64_t dst_type = 32LL,
173be168c0dSopenharmony_ci+    bool activation_channel = false,
174be168c0dSopenharmony_ci+    int64_t prefer_axis = 0,
175be168c0dSopenharmony_ci+    bool transpose = false,
176be168c0dSopenharmony_ci+    const std::vector<int32_t> *prefer_axes = nullptr) {
177be168c0dSopenharmony_ci+  auto prefer_axes__ = prefer_axes ? _fbb.CreateVector<int32_t>(*prefer_axes) : 0;
178be168c0dSopenharmony_ci+  return mindspore::schema::CreateDynamicQuant(
179be168c0dSopenharmony_ci+      _fbb,
180be168c0dSopenharmony_ci+      symmetric,
181be168c0dSopenharmony_ci+      dst_type,
182be168c0dSopenharmony_ci+      activation_channel,
183be168c0dSopenharmony_ci+      prefer_axis,
184be168c0dSopenharmony_ci+      transpose,
185be168c0dSopenharmony_ci+      prefer_axes__);
186be168c0dSopenharmony_ci+}
187be168c0dSopenharmony_ci+
188be168c0dSopenharmony_ci flatbuffers::Offset<DynamicQuant> CreateDynamicQuant(flatbuffers::FlatBufferBuilder &_fbb, const DynamicQuantT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
189be168c0dSopenharmony_ci 
190be168c0dSopenharmony_ci struct LSTMGradDataT : public flatbuffers::NativeTable {
191be168c0dSopenharmony_ci@@ -26903,6 +26937,7 @@ inline void DynamicQuant::UnPackTo(DynamicQuantT *_o, const flatbuffers::resolve
192be168c0dSopenharmony_ci   { auto _e = activation_channel(); _o->activation_channel = _e; }
193be168c0dSopenharmony_ci   { auto _e = prefer_axis(); _o->prefer_axis = _e; }
194be168c0dSopenharmony_ci   { auto _e = transpose(); _o->transpose = _e; }
195be168c0dSopenharmony_ci+  { auto _e = prefer_axes(); if (_e) { _o->prefer_axes.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->prefer_axes[_i] = _e->Get(_i); } } }
196be168c0dSopenharmony_ci }
197be168c0dSopenharmony_ci 
198be168c0dSopenharmony_ci inline flatbuffers::Offset<DynamicQuant> DynamicQuant::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DynamicQuantT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
199be168c0dSopenharmony_ci@@ -26918,13 +26953,15 @@ inline flatbuffers::Offset<DynamicQuant> CreateDynamicQuant(flatbuffers::FlatBuf
200be168c0dSopenharmony_ci   auto _activation_channel = _o->activation_channel;
201be168c0dSopenharmony_ci   auto _prefer_axis = _o->prefer_axis;
202be168c0dSopenharmony_ci   auto _transpose = _o->transpose;
203be168c0dSopenharmony_ci+  auto _prefer_axes = _o->prefer_axes.size() ? _fbb.CreateVector(_o->prefer_axes) : 0;
204be168c0dSopenharmony_ci   return mindspore::schema::CreateDynamicQuant(
205be168c0dSopenharmony_ci       _fbb,
206be168c0dSopenharmony_ci       _symmetric,
207be168c0dSopenharmony_ci       _dst_type,
208be168c0dSopenharmony_ci       _activation_channel,
209be168c0dSopenharmony_ci       _prefer_axis,
210be168c0dSopenharmony_ci-      _transpose);
211be168c0dSopenharmony_ci+      _transpose,
212be168c0dSopenharmony_ci+      _prefer_axes);
213be168c0dSopenharmony_ci }
214be168c0dSopenharmony_ci 
215be168c0dSopenharmony_ci inline LSTMGradDataT *LSTMGradData::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
216be168c0dSopenharmony_ci@@ -33509,10 +33546,11 @@ inline const flatbuffers::TypeTable *LSTMTypeTable() {
217be168c0dSopenharmony_ci     { flatbuffers::ET_LONG, 0, -1 },
218be168c0dSopenharmony_ci     { flatbuffers::ET_FLOAT, 0, -1 },
219be168c0dSopenharmony_ci     { flatbuffers::ET_FLOAT, 0, -1 },
220be168c0dSopenharmony_ci-    { flatbuffers::ET_FLOAT, 0, -1 }
221be168c0dSopenharmony_ci+    { flatbuffers::ET_FLOAT, 0, -1 },
222be168c0dSopenharmony_ci+    { flatbuffers::ET_LONG, 0, -1 }
223be168c0dSopenharmony_ci   };
224be168c0dSopenharmony_ci   static const flatbuffers::TypeTable tt = {
225be168c0dSopenharmony_ci-    flatbuffers::ST_TABLE, 9, type_codes, nullptr, nullptr, nullptr, nullptr
226be168c0dSopenharmony_ci+    flatbuffers::ST_TABLE, 10, type_codes, nullptr, nullptr, nullptr, nullptr
227be168c0dSopenharmony_ci   };
228be168c0dSopenharmony_ci   return &tt;
229be168c0dSopenharmony_ci }
230be168c0dSopenharmony_ci@@ -34744,10 +34782,11 @@ inline const flatbuffers::TypeTable *DynamicQuantTypeTable() {
231be168c0dSopenharmony_ci     { flatbuffers::ET_LONG, 0, -1 },
232be168c0dSopenharmony_ci     { flatbuffers::ET_BOOL, 0, -1 },
233be168c0dSopenharmony_ci     { flatbuffers::ET_LONG, 0, -1 },
234be168c0dSopenharmony_ci-    { flatbuffers::ET_BOOL, 0, -1 }
235be168c0dSopenharmony_ci+    { flatbuffers::ET_BOOL, 0, -1 },
236be168c0dSopenharmony_ci+    { flatbuffers::ET_INT, 1, -1 }
237be168c0dSopenharmony_ci   };
238be168c0dSopenharmony_ci   static const flatbuffers::TypeTable tt = {
239be168c0dSopenharmony_ci-    flatbuffers::ST_TABLE, 5, type_codes, nullptr, nullptr, nullptr, nullptr
240be168c0dSopenharmony_ci+    flatbuffers::ST_TABLE, 6, type_codes, nullptr, nullptr, nullptr, nullptr
241be168c0dSopenharmony_ci   };
242be168c0dSopenharmony_ci   return &tt;
243be168c0dSopenharmony_ci }
244be168c0dSopenharmony_cidiff --git a/mindspore/lite/schema/ops.fbs b/mindspore/lite/schema/ops.fbs
245be168c0dSopenharmony_ciindex 920c0d31..153a21d0 100644
246be168c0dSopenharmony_ci--- a/mindspore/lite/schema/ops.fbs
247be168c0dSopenharmony_ci+++ b/mindspore/lite/schema/ops.fbs
248be168c0dSopenharmony_ci@@ -1250,6 +1250,7 @@ table DynamicQuant {
249be168c0dSopenharmony_ci     activation_channel: bool = false;
250be168c0dSopenharmony_ci     prefer_axis: long = 0;
251be168c0dSopenharmony_ci     transpose: bool = false;
252be168c0dSopenharmony_ci+    prefer_axes: [int];
253be168c0dSopenharmony_ci }
254be168c0dSopenharmony_ci 
255be168c0dSopenharmony_ci table LSTMGradData {
256be168c0dSopenharmony_cidiff --git a/mindspore/lite/schema/ops_generated.h b/mindspore/lite/schema/ops_generated.h
257be168c0dSopenharmony_ciindex 8d387e9d..d2d89bff 100644
258be168c0dSopenharmony_ci--- a/mindspore/lite/schema/ops_generated.h
259be168c0dSopenharmony_ci+++ b/mindspore/lite/schema/ops_generated.h
260be168c0dSopenharmony_ci@@ -13118,7 +13118,8 @@ struct DynamicQuant FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
261be168c0dSopenharmony_ci     VT_DST_TYPE = 6,
262be168c0dSopenharmony_ci     VT_ACTIVATION_CHANNEL = 8,
263be168c0dSopenharmony_ci     VT_PREFER_AXIS = 10,
264be168c0dSopenharmony_ci-    VT_TRANSPOSE = 12
265be168c0dSopenharmony_ci+    VT_TRANSPOSE = 12,
266be168c0dSopenharmony_ci+    VT_PREFER_AXES = 14
267be168c0dSopenharmony_ci   };
268be168c0dSopenharmony_ci   bool symmetric() const {
269be168c0dSopenharmony_ci     return GetField<uint8_t>(VT_SYMMETRIC, 0) != 0;
270be168c0dSopenharmony_ci@@ -13135,6 +13136,9 @@ struct DynamicQuant FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
271be168c0dSopenharmony_ci   bool transpose() const {
272be168c0dSopenharmony_ci     return GetField<uint8_t>(VT_TRANSPOSE, 0) != 0;
273be168c0dSopenharmony_ci   }
274be168c0dSopenharmony_ci+  const flatbuffers::Vector<int32_t> *prefer_axes() const {
275be168c0dSopenharmony_ci+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_PREFER_AXES);
276be168c0dSopenharmony_ci+  }
277be168c0dSopenharmony_ci   bool Verify(flatbuffers::Verifier &verifier) const {
278be168c0dSopenharmony_ci     return VerifyTableStart(verifier) &&
279be168c0dSopenharmony_ci            VerifyField<uint8_t>(verifier, VT_SYMMETRIC) &&
280be168c0dSopenharmony_ci@@ -13142,6 +13146,8 @@ struct DynamicQuant FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
281be168c0dSopenharmony_ci            VerifyField<uint8_t>(verifier, VT_ACTIVATION_CHANNEL) &&
282be168c0dSopenharmony_ci            VerifyField<int64_t>(verifier, VT_PREFER_AXIS) &&
283be168c0dSopenharmony_ci            VerifyField<uint8_t>(verifier, VT_TRANSPOSE) &&
284be168c0dSopenharmony_ci+           VerifyOffset(verifier, VT_PREFER_AXES) &&
285be168c0dSopenharmony_ci+           verifier.VerifyVector(prefer_axes()) &&
286be168c0dSopenharmony_ci            verifier.EndTable();
287be168c0dSopenharmony_ci   }
288be168c0dSopenharmony_ci };
289be168c0dSopenharmony_ci@@ -13165,6 +13171,9 @@ struct DynamicQuantBuilder {
290be168c0dSopenharmony_ci   void add_transpose(bool transpose) {
291be168c0dSopenharmony_ci     fbb_.AddElement<uint8_t>(DynamicQuant::VT_TRANSPOSE, static_cast<uint8_t>(transpose), 0);
292be168c0dSopenharmony_ci   }
293be168c0dSopenharmony_ci+  void add_prefer_axes(flatbuffers::Offset<flatbuffers::Vector<int32_t>> prefer_axes) {
294be168c0dSopenharmony_ci+    fbb_.AddOffset(DynamicQuant::VT_PREFER_AXES, prefer_axes);
295be168c0dSopenharmony_ci+  }
296be168c0dSopenharmony_ci   explicit DynamicQuantBuilder(flatbuffers::FlatBufferBuilder &_fbb)
297be168c0dSopenharmony_ci         : fbb_(_fbb) {
298be168c0dSopenharmony_ci     start_ = fbb_.StartTable();
299be168c0dSopenharmony_ci@@ -13182,16 +13191,37 @@ inline flatbuffers::Offset<DynamicQuant> CreateDynamicQuant(
300be168c0dSopenharmony_ci     int64_t dst_type = 32LL,
301be168c0dSopenharmony_ci     bool activation_channel = false,
302be168c0dSopenharmony_ci     int64_t prefer_axis = 0,
303be168c0dSopenharmony_ci-    bool transpose = false) {
304be168c0dSopenharmony_ci+    bool transpose = false,
305be168c0dSopenharmony_ci+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> prefer_axes = 0) {
306be168c0dSopenharmony_ci   DynamicQuantBuilder builder_(_fbb);
307be168c0dSopenharmony_ci   builder_.add_prefer_axis(prefer_axis);
308be168c0dSopenharmony_ci   builder_.add_dst_type(dst_type);
309be168c0dSopenharmony_ci+  builder_.add_prefer_axes(prefer_axes);
310be168c0dSopenharmony_ci   builder_.add_transpose(transpose);
311be168c0dSopenharmony_ci   builder_.add_activation_channel(activation_channel);
312be168c0dSopenharmony_ci   builder_.add_symmetric(symmetric);
313be168c0dSopenharmony_ci   return builder_.Finish();
314be168c0dSopenharmony_ci }
315be168c0dSopenharmony_ci 
316be168c0dSopenharmony_ci+inline flatbuffers::Offset<DynamicQuant> CreateDynamicQuantDirect(
317be168c0dSopenharmony_ci+    flatbuffers::FlatBufferBuilder &_fbb,
318be168c0dSopenharmony_ci+    bool symmetric = false,
319be168c0dSopenharmony_ci+    int64_t dst_type = 32LL,
320be168c0dSopenharmony_ci+    bool activation_channel = false,
321be168c0dSopenharmony_ci+    int64_t prefer_axis = 0,
322be168c0dSopenharmony_ci+    bool transpose = false,
323be168c0dSopenharmony_ci+    const std::vector<int32_t> *prefer_axes = nullptr) {
324be168c0dSopenharmony_ci+  auto prefer_axes__ = prefer_axes ? _fbb.CreateVector<int32_t>(*prefer_axes) : 0;
325be168c0dSopenharmony_ci+  return mindspore::schema::CreateDynamicQuant(
326be168c0dSopenharmony_ci+      _fbb,
327be168c0dSopenharmony_ci+      symmetric,
328be168c0dSopenharmony_ci+      dst_type,
329be168c0dSopenharmony_ci+      activation_channel,
330be168c0dSopenharmony_ci+      prefer_axis,
331be168c0dSopenharmony_ci+      transpose,
332be168c0dSopenharmony_ci+      prefer_axes__);
333be168c0dSopenharmony_ci+}
334be168c0dSopenharmony_ci+
335be168c0dSopenharmony_ci struct LSTMGradData FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
336be168c0dSopenharmony_ci   typedef LSTMGradDataBuilder Builder;
337be168c0dSopenharmony_ci   enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
338be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/common/ops/ops_def.cc b/mindspore/lite/src/common/ops/ops_def.cc
339be168c0dSopenharmony_ciindex baa2497a..1e973362 100644
340be168c0dSopenharmony_ci--- a/mindspore/lite/src/common/ops/ops_def.cc
341be168c0dSopenharmony_ci+++ b/mindspore/lite/src/common/ops/ops_def.cc
342be168c0dSopenharmony_ci@@ -1254,6 +1254,7 @@ OP_ATTR_WITH_VALUE(dst_type, long, 32)
343be168c0dSopenharmony_ci OP_ATTR_WITH_VALUE(activation_channel, bool, false)
344be168c0dSopenharmony_ci OP_ATTR_WITH_VALUE(prefer_axis, long, 0)
345be168c0dSopenharmony_ci OP_ATTR_WITH_VALUE(transpose, bool, false)
346be168c0dSopenharmony_ci+OP_ATTR(prefer_axes, [int])
347be168c0dSopenharmony_ci OP_SCHEMA_DEF_END(DynamicQuant)
348be168c0dSopenharmony_ci 
349be168c0dSopenharmony_ci OP_SCHEMA_DEF(LSTMGradData)
350be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/common/ops/populate/dynamic_quant_populate.cc b/mindspore/lite/src/common/ops/populate/dynamic_quant_populate.cc
351be168c0dSopenharmony_ciindex 3566f082..8e393320 100644
352be168c0dSopenharmony_ci--- a/mindspore/lite/src/common/ops/populate/dynamic_quant_populate.cc
353be168c0dSopenharmony_ci+++ b/mindspore/lite/src/common/ops/populate/dynamic_quant_populate.cc
354be168c0dSopenharmony_ci@@ -36,11 +36,27 @@ OpParameter *PopulateDynamicQuantParameter(const void *prim) {
355be168c0dSopenharmony_ci   memset(param, 0, sizeof(DynamicQuantParameter));
356be168c0dSopenharmony_ci 
357be168c0dSopenharmony_ci   param->op_parameter_.type_ = primitive->value_type();
358be168c0dSopenharmony_ci-  param->dst_type_ = value->dst_type();
359be168c0dSopenharmony_ci+  param->dst_type_ = static_cast<int>(value->dst_type());
360be168c0dSopenharmony_ci   param->symmetric_ = value->symmetric();
361be168c0dSopenharmony_ci-  param->activation_perchannel_ = value->activation_channel();
362be168c0dSopenharmony_ci-  param->prefer_axis_ = value->prefer_axis();
363be168c0dSopenharmony_ci-  param->transpose_ = value->transpose();
364be168c0dSopenharmony_ci+  auto prefer_axes = value->prefer_axes();
365be168c0dSopenharmony_ci+  if (prefer_axes != nullptr) {
366be168c0dSopenharmony_ci+    param->axis_num_ = static_cast<int>(prefer_axes->size());
367be168c0dSopenharmony_ci+    if (param->axis_num_ > MAX_SHAPE_SIZE) {
368be168c0dSopenharmony_ci+      MS_LOG(ERROR) << "Dynamic quant's prefer_axes's number is more than 8.";
369be168c0dSopenharmony_ci+      free(param);
370be168c0dSopenharmony_ci+      return nullptr;
371be168c0dSopenharmony_ci+    }
372be168c0dSopenharmony_ci+    for (int i = 0; i < param->axis_num_; ++i) {
373be168c0dSopenharmony_ci+      param->prefer_axes_[i] = prefer_axes->Get(i);
374be168c0dSopenharmony_ci+    }
375be168c0dSopenharmony_ci+    return reinterpret_cast<OpParameter *>(param);
376be168c0dSopenharmony_ci+  }
377be168c0dSopenharmony_ci+  auto activation_channel = value->activation_channel();
378be168c0dSopenharmony_ci+  if (!activation_channel) {
379be168c0dSopenharmony_ci+    return reinterpret_cast<OpParameter *>(param);
380be168c0dSopenharmony_ci+  }
381be168c0dSopenharmony_ci+  param->axis_num_ = 1;
382be168c0dSopenharmony_ci+  param->prefer_axes_[0] = static_cast<int>(value->prefer_axis());
383be168c0dSopenharmony_ci   return reinterpret_cast<OpParameter *>(param);
384be168c0dSopenharmony_ci }
385be168c0dSopenharmony_ci REG_POPULATE(PrimitiveType_DynamicQuant, PopulateDynamicQuantParameter, SCHEMA_CUR);
386be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/litert/kernel/cpu/int8/dynamic_quant.cc b/mindspore/lite/src/litert/kernel/cpu/int8/dynamic_quant.cc
387be168c0dSopenharmony_ciindex e9404ef2..acc43c97 100644
388be168c0dSopenharmony_ci--- a/mindspore/lite/src/litert/kernel/cpu/int8/dynamic_quant.cc
389be168c0dSopenharmony_ci+++ b/mindspore/lite/src/litert/kernel/cpu/int8/dynamic_quant.cc
390be168c0dSopenharmony_ci@@ -14,14 +14,16 @@
391be168c0dSopenharmony_ci  * limitations under the License.
392be168c0dSopenharmony_ci  */
393be168c0dSopenharmony_ci #include "src/litert/kernel/cpu/int8/dynamic_quant.h"
394be168c0dSopenharmony_ci+#include <set>
395be168c0dSopenharmony_ci #include <vector>
396be168c0dSopenharmony_ci #include <algorithm>
397be168c0dSopenharmony_ci #include "src/litert/kernel_registry.h"
398be168c0dSopenharmony_ci #include "schema/model_generated.h"
399be168c0dSopenharmony_ci #include "include/errorcode.h"
400be168c0dSopenharmony_ci-#include "nnacl/dynamic_quant_parameter.h"
401be168c0dSopenharmony_ci #include "nnacl/int8/dynamic_quant_int8.h"
402be168c0dSopenharmony_ci #include "nnacl/int8/quant_dtype_cast_int8.h"
403be168c0dSopenharmony_ci+#include "nnacl/fp32/transpose_fp32.h"
404be168c0dSopenharmony_ci+#include "nnacl/int8/transpose_int8.h"
405be168c0dSopenharmony_ci 
406be168c0dSopenharmony_ci using mindspore::kernel::KERNEL_ARCH;
407be168c0dSopenharmony_ci using mindspore::lite::KernelRegistrar;
408be168c0dSopenharmony_ci@@ -44,19 +46,10 @@ int DynamicQuantCPUKernel::Prepare() {
409be168c0dSopenharmony_ci   CHECK_NULL_RETURN(in_tensor);
410be168c0dSopenharmony_ci   auto out_tensor = out_tensors_.front();
411be168c0dSopenharmony_ci   CHECK_NULL_RETURN(out_tensor);
412be168c0dSopenharmony_ci-  auto param = reinterpret_cast<DynamicQuantParameter *>(op_parameter_);
413be168c0dSopenharmony_ci-  CHECK_NULL_RETURN(param);
414be168c0dSopenharmony_ci-  src_dtype_ = in_tensor->data_type();
415be168c0dSopenharmony_ci-  dst_dtype_ = param->dst_type_;
416be168c0dSopenharmony_ci-  symmetric_ = param->symmetric_;
417be168c0dSopenharmony_ci-  activation_perchannel_ = param->activation_perchannel_;
418be168c0dSopenharmony_ci-  prefer_axis_ = param->prefer_axis_;
419be168c0dSopenharmony_ci-  transpose_ = param->transpose_;
420be168c0dSopenharmony_ci-  if (out_tensor->data_type() != dst_dtype_) {
421be168c0dSopenharmony_ci-    MS_LOG(ERROR) << "param data type and tensor data type do not match.";
422be168c0dSopenharmony_ci-    return RET_ERROR;
423be168c0dSopenharmony_ci-  }
424be168c0dSopenharmony_ci-
425be168c0dSopenharmony_ci+  param_ = reinterpret_cast<DynamicQuantParameter *>(op_parameter_);
426be168c0dSopenharmony_ci+  CHECK_NULL_RETURN(param_);
427be168c0dSopenharmony_ci+  MS_CHECK_TRUE_MSG(param_->dst_type_ == out_tensor->data_type(), lite::RET_ERROR,
428be168c0dSopenharmony_ci+                    "param data type and tensor data type do not match.");
429be168c0dSopenharmony_ci   if (!InferShapeDone()) {
430be168c0dSopenharmony_ci     return RET_OK;
431be168c0dSopenharmony_ci   }
432be168c0dSopenharmony_ci@@ -65,71 +58,86 @@ int DynamicQuantCPUKernel::Prepare() {
433be168c0dSopenharmony_ci 
434be168c0dSopenharmony_ci int DynamicQuantCPUKernel::ReSize() {
435be168c0dSopenharmony_ci   auto in_tensor = in_tensors_.front();
436be168c0dSopenharmony_ci-  num_unit_ = static_cast<int>(in_tensor->ElementsNum());
437be168c0dSopenharmony_ci-  if (num_unit_ < kMinNums) {
438be168c0dSopenharmony_ci-    thread_n_num_ = 1;
439be168c0dSopenharmony_ci+  auto ele_num = static_cast<int>(in_tensor->ElementsNum());
440be168c0dSopenharmony_ci+  auto shape = in_tensor->shape();
441be168c0dSopenharmony_ci+  int segment_num = 1;
442be168c0dSopenharmony_ci+  if (param_->axis_num_ == 0) {
443be168c0dSopenharmony_ci+    segment_num = MSMIN(kBucketNums, ele_num / kMinNums);
444be168c0dSopenharmony_ci   } else {
445be168c0dSopenharmony_ci-    thread_n_num_ = MSMIN(thread_num_, num_unit_);
446be168c0dSopenharmony_ci-    // Limit for 8 thread
447be168c0dSopenharmony_ci-    thread_n_num_ = MSMIN(thread_n_num_, kBucketNums);
448be168c0dSopenharmony_ci+    std::set<int> prefer_axes;
449be168c0dSopenharmony_ci+    for (int i = 0; i < param_->axis_num_; ++i) {
450be168c0dSopenharmony_ci+      int axis = param_->prefer_axes_[i] < 0 ? param_->prefer_axes_[i] + static_cast<int>(shape.size())
451be168c0dSopenharmony_ci+                                             : param_->prefer_axes_[i];
452be168c0dSopenharmony_ci+      MS_CHECK_TRUE_MSG(axis >= 0 && axis < static_cast<int>(shape.size()), lite::RET_ERROR,
453be168c0dSopenharmony_ci+                        "The prefer axis is out of range.");
454be168c0dSopenharmony_ci+      if (prefer_axes.find(axis) != prefer_axes.end()) {
455be168c0dSopenharmony_ci+        continue;
456be168c0dSopenharmony_ci+      }
457be168c0dSopenharmony_ci+      segment_num *= shape[axis];
458be168c0dSopenharmony_ci+      (void)prefer_axes.insert(axis);
459be168c0dSopenharmony_ci+    }
460be168c0dSopenharmony_ci+    pre_perm_.resize(shape.size());
461be168c0dSopenharmony_ci+    post_perm_.resize(shape.size());
462be168c0dSopenharmony_ci+    int pre_point0 = 0;
463be168c0dSopenharmony_ci+    int pre_point1 = param_->axis_num_;
464be168c0dSopenharmony_ci+    for (int i = 0; i < static_cast<int>(shape.size()); ++i) {
465be168c0dSopenharmony_ci+      if (prefer_axes.find(i) != prefer_axes.end()) {
466be168c0dSopenharmony_ci+        pre_perm_[pre_point0] = i;
467be168c0dSopenharmony_ci+        post_perm_[i] = pre_point0;
468be168c0dSopenharmony_ci+        ++pre_point0;
469be168c0dSopenharmony_ci+      } else {
470be168c0dSopenharmony_ci+        pre_perm_[pre_point1] = i;
471be168c0dSopenharmony_ci+        post_perm_[i] = pre_point1;
472be168c0dSopenharmony_ci+        ++pre_point1;
473be168c0dSopenharmony_ci+      }
474be168c0dSopenharmony_ci+    }
475be168c0dSopenharmony_ci   }
476be168c0dSopenharmony_ci-
477be168c0dSopenharmony_ci-  int min_max_array_size = 0;
478be168c0dSopenharmony_ci-  if (activation_perchannel_) {
479be168c0dSopenharmony_ci-    auto dims = in_tensor->shape();
480be168c0dSopenharmony_ci-    prefer_axis_ = (prefer_axis_ < 0) ? prefer_axis_ + dims.size() : prefer_axis_;
481be168c0dSopenharmony_ci-    channel_num_ = dims[prefer_axis_];
482be168c0dSopenharmony_ci-    MS_CHECK_GT(channel_num_, 0, RET_ERROR);
483be168c0dSopenharmony_ci-    scale_ = reinterpret_cast<float *>(malloc(channel_num_ * sizeof(float)));
484be168c0dSopenharmony_ci-    MS_CHECK_TRUE_MSG(scale_ != nullptr, RET_ERROR, "Malloc scale_ failed.");
485be168c0dSopenharmony_ci-    zero_point_ = reinterpret_cast<int32_t *>(malloc(channel_num_ * sizeof(int32_t)));
486be168c0dSopenharmony_ci-    MS_CHECK_TRUE_MSG(zero_point_ != nullptr, RET_ERROR, "Malloc zero_point_ failed.");
487be168c0dSopenharmony_ci-    size_t last_axis = dims.size() - 1;
488be168c0dSopenharmony_ci-    row_length_ = dims[last_axis];
489be168c0dSopenharmony_ci-    channel_length_ = num_unit_ / channel_num_;
490be168c0dSopenharmony_ci-    thread_n_stride_ = UP_DIV(num_unit_, thread_n_num_);
491be168c0dSopenharmony_ci-    if (!transpose_ && channel_length_ > thread_n_stride_) {
492be168c0dSopenharmony_ci-      thread_n_num_ = 1;
493be168c0dSopenharmony_ci+  need_transpose_ = false;
494be168c0dSopenharmony_ci+  for (size_t i = 0; i < pre_perm_.size(); ++i) {
495be168c0dSopenharmony_ci+    if (pre_perm_[i] != static_cast<int>(i)) {
496be168c0dSopenharmony_ci+      need_transpose_ = true;
497be168c0dSopenharmony_ci     }
498be168c0dSopenharmony_ci-    min_max_array_size = channel_num_;
499be168c0dSopenharmony_ci-  } else {
500be168c0dSopenharmony_ci-    min_max_array_size = kBucketNums;
501be168c0dSopenharmony_ci   }
502be168c0dSopenharmony_ci-  real_min_ = reinterpret_cast<float *>(malloc(min_max_array_size * sizeof(float)));
503be168c0dSopenharmony_ci-  real_max_ = reinterpret_cast<float *>(malloc(min_max_array_size * sizeof(float)));
504be168c0dSopenharmony_ci-  if (real_min_ == nullptr || real_max_ == nullptr) {
505be168c0dSopenharmony_ci-    return RET_NULL_PTR;
506be168c0dSopenharmony_ci+  if (segment_num <= 0) {
507be168c0dSopenharmony_ci+    segment_num = 1;
508be168c0dSopenharmony_ci   }
509be168c0dSopenharmony_ci-  for (int i = 0; i < min_max_array_size; ++i) {
510be168c0dSopenharmony_ci+  real_min_.resize(segment_num);
511be168c0dSopenharmony_ci+  real_max_.resize(segment_num);
512be168c0dSopenharmony_ci+  scale_.resize(segment_num);
513be168c0dSopenharmony_ci+  zero_point_.resize(segment_num);
514be168c0dSopenharmony_ci+  for (int i = 0; i < segment_num; ++i) {
515be168c0dSopenharmony_ci     real_min_[i] = FLT_MAX;
516be168c0dSopenharmony_ci     real_max_[i] = -FLT_MAX;
517be168c0dSopenharmony_ci   }
518be168c0dSopenharmony_ci-  MS_CHECK_GT(thread_n_num_, 0, RET_ERROR);
519be168c0dSopenharmony_ci-  thread_n_stride_ = UP_DIV(num_unit_, thread_n_num_);
520be168c0dSopenharmony_ci+  thread_num_ = MSMIN(segment_num, op_parameter_->thread_num_);
521be168c0dSopenharmony_ci+  unit_num_ = UP_DIV(ele_num, segment_num);
522be168c0dSopenharmony_ci+  unit_segment_num_ = UP_DIV(segment_num, thread_num_);
523be168c0dSopenharmony_ci   return RET_OK;
524be168c0dSopenharmony_ci }
525be168c0dSopenharmony_ci 
526be168c0dSopenharmony_ci int DynamicQuantCPUKernel::CalculateMinMax(int task_id) {
527be168c0dSopenharmony_ci-  int num_unit_thread = MSMIN(thread_n_stride_, num_unit_ - task_id * thread_n_stride_);
528be168c0dSopenharmony_ci-  if (num_unit_thread <= 0) {
529be168c0dSopenharmony_ci-    return RET_OK;
530be168c0dSopenharmony_ci-  }
531be168c0dSopenharmony_ci-  int thread_offset = task_id * thread_n_stride_;
532be168c0dSopenharmony_ci-  float *data = float32_ptr_ + thread_offset;
533be168c0dSopenharmony_ci-  if (activation_perchannel_) {
534be168c0dSopenharmony_ci-    if (transpose_) {
535be168c0dSopenharmony_ci-      MS_LOG(INFO) << "attribute transpose is true.";
536be168c0dSopenharmony_ci-      CalculateChannelColMinMax(data, num_unit_thread, real_min_, real_max_, row_length_);
537be168c0dSopenharmony_ci-    } else {
538be168c0dSopenharmony_ci-      int channel_offset = task_id * thread_n_stride_ / channel_length_;
539be168c0dSopenharmony_ci-      float *real_min = real_min_ + channel_offset;
540be168c0dSopenharmony_ci-      float *real_max = real_max_ + channel_offset;
541be168c0dSopenharmony_ci-      CalculateChannelRowMinMax(data, num_unit_thread, real_min, real_max, row_length_);
542be168c0dSopenharmony_ci+  int task_unit = unit_segment_num_ * unit_num_;
543be168c0dSopenharmony_ci+  int offset = task_id * task_unit;
544be168c0dSopenharmony_ci+  int ele_num = static_cast<int>(in_tensors_.front()->ElementsNum());
545be168c0dSopenharmony_ci+  int remain = ele_num - offset;
546be168c0dSopenharmony_ci+  if (task_unit <= remain) {
547be168c0dSopenharmony_ci+    for (int i = 0; i < unit_segment_num_; ++i) {
548be168c0dSopenharmony_ci+      CalculateMinMaxFp32(float32_ptr_ + offset + i * unit_num_, unit_num_, &real_min_[task_id * unit_segment_num_ + i],
549be168c0dSopenharmony_ci+                          &real_max_[task_id * unit_segment_num_ + i]);
550be168c0dSopenharmony_ci     }
551be168c0dSopenharmony_ci   } else {
552be168c0dSopenharmony_ci-    float *real_min = real_min_ + task_id;
553be168c0dSopenharmony_ci-    float *real_max = real_max_ + task_id;
554be168c0dSopenharmony_ci-    CalculateMinMaxFp32(data, num_unit_thread, real_min, real_max);
555be168c0dSopenharmony_ci+    int segment_num = remain / unit_num_;
556be168c0dSopenharmony_ci+    int remain_ele_num = remain - segment_num * unit_num_;
557be168c0dSopenharmony_ci+    for (int i = 0; i < segment_num; ++i) {
558be168c0dSopenharmony_ci+      CalculateMinMaxFp32(float32_ptr_ + offset + i * unit_num_, unit_num_, &real_min_[task_id * unit_segment_num_ + i],
559be168c0dSopenharmony_ci+                          &real_max_[task_id * unit_segment_num_ + i]);
560be168c0dSopenharmony_ci+    }
561be168c0dSopenharmony_ci+    if (remain_ele_num == 0) {
562be168c0dSopenharmony_ci+      return RET_OK;
563be168c0dSopenharmony_ci+    }
564be168c0dSopenharmony_ci+    CalculateMinMaxFp32(float32_ptr_ + offset + segment_num * unit_num_, remain_ele_num,
565be168c0dSopenharmony_ci+                        &real_min_[task_id * unit_segment_num_ + segment_num],
566be168c0dSopenharmony_ci+                        &real_max_[task_id * unit_segment_num_ + segment_num]);
567be168c0dSopenharmony_ci   }
568be168c0dSopenharmony_ci   return RET_OK;
569be168c0dSopenharmony_ci }
570be168c0dSopenharmony_ci@@ -148,7 +156,7 @@ int CalculateMinMaxRun(void *cdata, int task_id, float, float) {
571be168c0dSopenharmony_ci void DynamicQuantCPUKernel::CalculatePerlayerScaleZp() {
572be168c0dSopenharmony_ci   float real_min = FLT_MAX;
573be168c0dSopenharmony_ci   float real_max = -FLT_MAX;
574be168c0dSopenharmony_ci-  for (int i = 0; i < kBucketNums; i++) {
575be168c0dSopenharmony_ci+  for (size_t i = 0; i < real_min_.size(); ++i) {
576be168c0dSopenharmony_ci     real_min = (real_min_[i] < real_min) ? real_min_[i] : real_min;
577be168c0dSopenharmony_ci     real_max = (real_max_[i] > real_max) ? real_max_[i] : real_max;
578be168c0dSopenharmony_ci   }
579be168c0dSopenharmony_ci@@ -158,7 +166,7 @@ void DynamicQuantCPUKernel::CalculatePerlayerScaleZp() {
580be168c0dSopenharmony_ci   int zp = 0;
581be168c0dSopenharmony_ci   constexpr int kQSymmetricRange = 255;
582be168c0dSopenharmony_ci   constexpr int kQAsymmetricRange = 254;
583be168c0dSopenharmony_ci-  if (!symmetric_) {
584be168c0dSopenharmony_ci+  if (!param_->symmetric_) {
585be168c0dSopenharmony_ci     auto range = real_max - real_min;
586be168c0dSopenharmony_ci     if (range <= 0) {
587be168c0dSopenharmony_ci       range = kDefaultRange;
588be168c0dSopenharmony_ci@@ -175,12 +183,11 @@ void DynamicQuantCPUKernel::CalculatePerlayerScaleZp() {
589be168c0dSopenharmony_ci   quant_parm.bitNum = k8Bit;
590be168c0dSopenharmony_ci   quant_parm.inited = true;
591be168c0dSopenharmony_ci   this->out_tensors_.front()->set_quant_params({quant_parm});
592be168c0dSopenharmony_ci-  return;
593be168c0dSopenharmony_ci }
594be168c0dSopenharmony_ci 
595be168c0dSopenharmony_ci void DynamicQuantCPUKernel::CalculatePerChannelScaleZp() {
596be168c0dSopenharmony_ci   std::vector<lite::LiteQuantParam> quant_params;
597be168c0dSopenharmony_ci-  for (int i = 0; i < channel_num_; ++i) {
598be168c0dSopenharmony_ci+  for (size_t i = 0; i < real_min_.size(); ++i) {
599be168c0dSopenharmony_ci     float real_min = real_min_[i];
600be168c0dSopenharmony_ci     float real_max = real_max_[i];
601be168c0dSopenharmony_ci 
602be168c0dSopenharmony_ci@@ -189,7 +196,7 @@ void DynamicQuantCPUKernel::CalculatePerChannelScaleZp() {
603be168c0dSopenharmony_ci     int zp = 0;
604be168c0dSopenharmony_ci     constexpr int kQSymmetricRange = 255;
605be168c0dSopenharmony_ci     constexpr int kQAsymmetricRange = 254;
606be168c0dSopenharmony_ci-    if (!symmetric_) {
607be168c0dSopenharmony_ci+    if (!param_->symmetric_) {
608be168c0dSopenharmony_ci       auto range = real_max - real_min;
609be168c0dSopenharmony_ci       if (range <= 0) {
610be168c0dSopenharmony_ci         range = kDefaultRange;
611be168c0dSopenharmony_ci@@ -208,40 +215,34 @@ void DynamicQuantCPUKernel::CalculatePerChannelScaleZp() {
612be168c0dSopenharmony_ci     quant_params.push_back(quant_parm);
613be168c0dSopenharmony_ci   }
614be168c0dSopenharmony_ci   this->out_tensors_.front()->set_quant_params(quant_params);
615be168c0dSopenharmony_ci-  return;
616be168c0dSopenharmony_ci }
617be168c0dSopenharmony_ci+
618be168c0dSopenharmony_ci int DynamicQuantCPUKernel::QuantData(int task_id) {
619be168c0dSopenharmony_ci-  int num_unit_thread = MSMIN(thread_n_stride_, num_unit_ - task_id * thread_n_stride_);
620be168c0dSopenharmony_ci-  MS_CHECK_GT(num_unit_thread, 0, RET_ERROR);
621be168c0dSopenharmony_ci-  TypeId data_type = out_tensors_.front()->data_type();
622be168c0dSopenharmony_ci-  if (data_type != TypeId::kNumberTypeInt8) {
623be168c0dSopenharmony_ci-    MS_LOG(ERROR) << "Data type not supported:" << data_type;
624be168c0dSopenharmony_ci-    return RET_PARAM_INVALID;
625be168c0dSopenharmony_ci-  }
626be168c0dSopenharmony_ci-  int thread_offset = task_id * thread_n_stride_;
627be168c0dSopenharmony_ci-  int ret;
628be168c0dSopenharmony_ci-  if (activation_perchannel_) {
629be168c0dSopenharmony_ci-    MS_CHECK_EQ(out_tensors_.front()->quant_params().size(), static_cast<size_t>(channel_num_), RET_ERROR);
630be168c0dSopenharmony_ci-    for (int i = 0; i < channel_num_; i++) {
631be168c0dSopenharmony_ci-      auto quant_arg = out_tensors_.front()->quant_params().at(i);
632be168c0dSopenharmony_ci-      scale_[i] = quant_arg.scale;
633be168c0dSopenharmony_ci-      zero_point_[i] = quant_arg.zeroPoint;
634be168c0dSopenharmony_ci-    }
635be168c0dSopenharmony_ci-    if (transpose_) {
636be168c0dSopenharmony_ci-      ret = DoChannelColFp32ToInt8(float32_ptr_ + thread_offset, int8_ptr_ + thread_offset, scale_, zero_point_,
637be168c0dSopenharmony_ci-                                   num_unit_thread, row_length_, (int32_t)INT8_MIN, (int32_t)INT8_MAX);
638be168c0dSopenharmony_ci-    } else {
639be168c0dSopenharmony_ci-      ret = DoChannelRowFp32ToInt8(float32_ptr_ + thread_offset, int8_ptr_ + thread_offset, scale_, zero_point_,
640be168c0dSopenharmony_ci-                                   num_unit_thread, row_length_, (int32_t)INT8_MIN, (int32_t)INT8_MAX);
641be168c0dSopenharmony_ci-    }
642be168c0dSopenharmony_ci-  } else {
643be168c0dSopenharmony_ci+  int task_unit = unit_segment_num_ * unit_num_;
644be168c0dSopenharmony_ci+  int offset = task_id * task_unit;
645be168c0dSopenharmony_ci+  int ele_num = static_cast<int>(in_tensors_.front()->ElementsNum());
646be168c0dSopenharmony_ci+  int remain = ele_num - offset;
647be168c0dSopenharmony_ci+  task_unit = MSMIN(task_unit, remain);
648be168c0dSopenharmony_ci+  if (param_->axis_num_ == 0) {  // per-tensor
649be168c0dSopenharmony_ci     auto quant_arg = out_tensors_.front()->quant_params().front();
650be168c0dSopenharmony_ci-    ret = DoQuantizeFp32ToInt8(float32_ptr_ + thread_offset, int8_ptr_ + thread_offset, quant_arg.scale,
651be168c0dSopenharmony_ci-                               quant_arg.zeroPoint, num_unit_thread, (int32_t)INT8_MIN, (int32_t)INT8_MAX);
652be168c0dSopenharmony_ci+    auto ret = DoQuantizeFp32ToInt8(float32_ptr_ + offset, int8_ptr_ + offset, quant_arg.scale, quant_arg.zeroPoint,
653be168c0dSopenharmony_ci+                                    task_unit, (int32_t)INT8_MIN, (int32_t)INT8_MAX);
654be168c0dSopenharmony_ci+    if (ret != RET_OK) {
655be168c0dSopenharmony_ci+      MS_LOG(ERROR) << "QuantDTypeCast error task_id[" << task_id << "] error_code[" << ret << "]";
656be168c0dSopenharmony_ci+      return RET_ERROR;
657be168c0dSopenharmony_ci+    }
658be168c0dSopenharmony_ci+    return RET_OK;
659be168c0dSopenharmony_ci   }
660be168c0dSopenharmony_ci-  if (ret != RET_OK) {
661be168c0dSopenharmony_ci-    MS_LOG(ERROR) << "QuantDTypeCast error task_id[" << task_id << "] error_code[" << ret << "]";
662be168c0dSopenharmony_ci-    return RET_ERROR;
663be168c0dSopenharmony_ci+  int segment_num = task_unit / unit_num_;
664be168c0dSopenharmony_ci+  for (int i = 0; i < segment_num; ++i) {
665be168c0dSopenharmony_ci+    auto quant_arg = out_tensors_.front()->quant_params()[task_id * unit_segment_num_ + i];
666be168c0dSopenharmony_ci+    auto ret =
667be168c0dSopenharmony_ci+      DoQuantizeFp32ToInt8(float32_ptr_ + offset + i * unit_num_, int8_ptr_ + offset + i * unit_num_, quant_arg.scale,
668be168c0dSopenharmony_ci+                           quant_arg.zeroPoint, unit_num_, (int32_t)INT8_MIN, (int32_t)INT8_MAX);
669be168c0dSopenharmony_ci+    if (ret != RET_OK) {
670be168c0dSopenharmony_ci+      MS_LOG(ERROR) << "QuantDTypeCast error task_id[" << task_id << "] error_code[" << ret << "]";
671be168c0dSopenharmony_ci+      return RET_ERROR;
672be168c0dSopenharmony_ci+    }
673be168c0dSopenharmony_ci   }
674be168c0dSopenharmony_ci   return RET_OK;
675be168c0dSopenharmony_ci }
676be168c0dSopenharmony_ci@@ -257,26 +258,110 @@ int QuantDataRun(void *cdata, int task_id, float, float) {
677be168c0dSopenharmony_ci   return RET_OK;
678be168c0dSopenharmony_ci }
679be168c0dSopenharmony_ci 
680be168c0dSopenharmony_ci+int DynamicQuantCPUKernel::MallocTmpBuffer() {
681be168c0dSopenharmony_ci+  auto in_size = in_tensors_.front()->Size();
682be168c0dSopenharmony_ci+  auto out_size = out_tensors_.front()->Size();
683be168c0dSopenharmony_ci+  if (ms_context_ != nullptr && ms_context_->allocator != nullptr) {
684be168c0dSopenharmony_ci+    int8_ptr_ = static_cast<int8_t *>(ms_context_->allocator->Malloc(in_size + out_size));
685be168c0dSopenharmony_ci+  } else {
686be168c0dSopenharmony_ci+    int8_ptr_ = static_cast<int8_t *>(malloc(in_size + out_size));
687be168c0dSopenharmony_ci+  }
688be168c0dSopenharmony_ci+  MS_CHECK_TRUE_MSG(int8_ptr_ != nullptr, lite::RET_NULL_PTR, "DynamicQuant malloc tmp buffer failed.");
689be168c0dSopenharmony_ci+  float32_ptr_ = reinterpret_cast<float *>(int8_ptr_ + out_size);
690be168c0dSopenharmony_ci+  return lite::RET_OK;
691be168c0dSopenharmony_ci+}
692be168c0dSopenharmony_ci+
693be168c0dSopenharmony_ci+void DynamicQuantCPUKernel::FreeTmpBuffer() {
694be168c0dSopenharmony_ci+  if (need_transpose_) {
695be168c0dSopenharmony_ci+    if (int8_ptr_ != nullptr) {
696be168c0dSopenharmony_ci+      if (ms_context_ != nullptr && ms_context_->allocator != nullptr) {
697be168c0dSopenharmony_ci+        ms_context_->allocator->Free(int8_ptr_);
698be168c0dSopenharmony_ci+      } else {
699be168c0dSopenharmony_ci+        free(int8_ptr_);
700be168c0dSopenharmony_ci+      }
701be168c0dSopenharmony_ci+    }
702be168c0dSopenharmony_ci+  }
703be168c0dSopenharmony_ci+  int8_ptr_ = nullptr;
704be168c0dSopenharmony_ci+  float32_ptr_ = nullptr;
705be168c0dSopenharmony_ci+}
706be168c0dSopenharmony_ci+
707be168c0dSopenharmony_ci int DynamicQuantCPUKernel::Run() {
708be168c0dSopenharmony_ci-  int8_ptr_ = reinterpret_cast<int8_t *>(out_tensors_[0]->data());
709be168c0dSopenharmony_ci-  float32_ptr_ = reinterpret_cast<float *>(in_tensors_[0]->data());
710be168c0dSopenharmony_ci-  CHECK_NULL_RETURN(int8_ptr_);
711be168c0dSopenharmony_ci-  CHECK_NULL_RETURN(float32_ptr_);
712be168c0dSopenharmony_ci-  auto ret = ParallelLaunch(this->ms_context_, CalculateMinMaxRun, this, thread_n_num_);
713be168c0dSopenharmony_ci+  std::vector<int> transpose_shape;
714be168c0dSopenharmony_ci+  if (need_transpose_) {
715be168c0dSopenharmony_ci+    auto shape = in_tensors_.front()->shape();
716be168c0dSopenharmony_ci+    transpose_shape.resize(shape.size());
717be168c0dSopenharmony_ci+    for (size_t i = 0; i < shape.size(); ++i) {
718be168c0dSopenharmony_ci+      transpose_shape[i] = shape[pre_perm_[i]];
719be168c0dSopenharmony_ci+    }
720be168c0dSopenharmony_ci+    if (MallocTmpBuffer() != lite::RET_OK) {
721be168c0dSopenharmony_ci+      MS_LOG(ERROR) << "DynamicQuant MallocTmpBuffer failed.";
722be168c0dSopenharmony_ci+      return lite::RET_NULL_PTR;
723be168c0dSopenharmony_ci+    }
724be168c0dSopenharmony_ci+    std::vector<int> strides(shape.size(), 1);
725be168c0dSopenharmony_ci+    std::vector<int> out_strides(shape.size(), 1);
726be168c0dSopenharmony_ci+    for (int i = static_cast<int>(shape.size()) - C2NUM; i >= 0; i--) {
727be168c0dSopenharmony_ci+      strides[i] = shape[i + 1] * strides[i + 1];
728be168c0dSopenharmony_ci+      out_strides[i] = transpose_shape[i + 1] * out_strides[i + 1];
729be168c0dSopenharmony_ci+    }
730be168c0dSopenharmony_ci+    if (shape.size() <= C6NUM) {
731be168c0dSopenharmony_ci+      (void)DoTransposeFp32(in_tensors_.front()->data(), float32_ptr_, transpose_shape.data(), pre_perm_.data(),
732be168c0dSopenharmony_ci+                            strides.data(), out_strides.data(), in_tensors_.front()->Size(), shape.size());
733be168c0dSopenharmony_ci+    } else {
734be168c0dSopenharmony_ci+      TransposeDimsFp32(in_tensors_.front()->data(), float32_ptr_, transpose_shape.data(), pre_perm_.data(),
735be168c0dSopenharmony_ci+                        strides.data(), out_strides.data(), shape.size(), 0, 1);
736be168c0dSopenharmony_ci+    }
737be168c0dSopenharmony_ci+  } else {
738be168c0dSopenharmony_ci+    int8_ptr_ = reinterpret_cast<int8_t *>(out_tensors_[0]->data());
739be168c0dSopenharmony_ci+    float32_ptr_ = reinterpret_cast<float *>(in_tensors_[0]->data());
740be168c0dSopenharmony_ci+  }
741be168c0dSopenharmony_ci+  if (int8_ptr_ == nullptr || float32_ptr_ == nullptr) {
742be168c0dSopenharmony_ci+    FreeTmpBuffer();
743be168c0dSopenharmony_ci+    MS_LOG(ERROR) << "DynamicQuant's original data exists nullptr.";
744be168c0dSopenharmony_ci+    return lite::RET_NULL_PTR;
745be168c0dSopenharmony_ci+  }
746be168c0dSopenharmony_ci+  auto ret = ParallelLaunch(this->ms_context_, CalculateMinMaxRun, this, thread_num_);
747be168c0dSopenharmony_ci   if (ret != RET_OK) {
748be168c0dSopenharmony_ci+    FreeTmpBuffer();
749be168c0dSopenharmony_ci     MS_LOG(ERROR) << "Run error error_code[" << ret << "]";
750be168c0dSopenharmony_ci     return RET_ERROR;
751be168c0dSopenharmony_ci   }
752be168c0dSopenharmony_ci-  if (activation_perchannel_) {
753be168c0dSopenharmony_ci+  if (param_->axis_num_ != 0) {
754be168c0dSopenharmony_ci     CalculatePerChannelScaleZp();
755be168c0dSopenharmony_ci   } else {
756be168c0dSopenharmony_ci     CalculatePerlayerScaleZp();
757be168c0dSopenharmony_ci   }
758be168c0dSopenharmony_ci-  ret = ParallelLaunch(this->ms_context_, QuantDataRun, this, thread_n_num_);
759be168c0dSopenharmony_ci+  ret = ParallelLaunch(this->ms_context_, QuantDataRun, this, thread_num_);
760be168c0dSopenharmony_ci   if (ret != RET_OK) {
761be168c0dSopenharmony_ci+    FreeTmpBuffer();
762be168c0dSopenharmony_ci     MS_LOG(ERROR) << "Run error error_code[" << ret << "]";
763be168c0dSopenharmony_ci     return RET_ERROR;
764be168c0dSopenharmony_ci   }
765be168c0dSopenharmony_ci+  if (need_transpose_) {
766be168c0dSopenharmony_ci+    auto out_shape = out_tensors_.front()->shape();
767be168c0dSopenharmony_ci+    TransposeParameter trans_parameter;
768be168c0dSopenharmony_ci+    (void)memset(&trans_parameter, 0, sizeof(TransposeParameter));
769be168c0dSopenharmony_ci+    trans_parameter.op_parameter_.thread_num_ = 1;
770be168c0dSopenharmony_ci+    trans_parameter.num_axes_ = static_cast<int>(out_shape.size());
771be168c0dSopenharmony_ci+    trans_parameter.data_num_ = out_tensors_[0]->ElementsNum();
772be168c0dSopenharmony_ci+    trans_parameter.perm_size_ = post_perm_.size();
773be168c0dSopenharmony_ci+    int last_index = static_cast<int>(out_shape.size()) - 1;
774be168c0dSopenharmony_ci+    trans_parameter.perm_[last_index] = post_perm_[last_index];
775be168c0dSopenharmony_ci+    trans_parameter.strides_[last_index] = 1;
776be168c0dSopenharmony_ci+    trans_parameter.out_strides_[last_index] = 1;
777be168c0dSopenharmony_ci+    for (int i = last_index - 1; i >= 0; i--) {
778be168c0dSopenharmony_ci+      trans_parameter.perm_[i] = post_perm_[i];
779be168c0dSopenharmony_ci+      trans_parameter.strides_[i] = transpose_shape[i + 1] * trans_parameter.strides_[i + 1];
780be168c0dSopenharmony_ci+      trans_parameter.out_strides_[i] = out_shape[i + 1] * trans_parameter.out_strides_[i + 1];
781be168c0dSopenharmony_ci+    }
782be168c0dSopenharmony_ci+    if (out_shape.size() <= C6NUM) {
783be168c0dSopenharmony_ci+      (void)DoTransposeInt8(int8_ptr_, reinterpret_cast<int8_t *>(out_tensors_[0]->data()), out_shape.data(),
784be168c0dSopenharmony_ci+                            &trans_parameter);
785be168c0dSopenharmony_ci+    } else {
786be168c0dSopenharmony_ci+      TransposeDimsInt8(int8_ptr_, reinterpret_cast<int8_t *>(out_tensors_[0]->data()), out_shape.data(),
787be168c0dSopenharmony_ci+                        &trans_parameter, 0, 1);
788be168c0dSopenharmony_ci+    }
789be168c0dSopenharmony_ci+  }
790be168c0dSopenharmony_ci+  FreeTmpBuffer();
791be168c0dSopenharmony_ci   return RET_OK;
792be168c0dSopenharmony_ci }
793be168c0dSopenharmony_ci 
794be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/litert/kernel/cpu/int8/dynamic_quant.h b/mindspore/lite/src/litert/kernel/cpu/int8/dynamic_quant.h
795be168c0dSopenharmony_ciindex ca84f088..023f1fab 100644
796be168c0dSopenharmony_ci--- a/mindspore/lite/src/litert/kernel/cpu/int8/dynamic_quant.h
797be168c0dSopenharmony_ci+++ b/mindspore/lite/src/litert/kernel/cpu/int8/dynamic_quant.h
798be168c0dSopenharmony_ci@@ -21,31 +21,15 @@
799be168c0dSopenharmony_ci #include <cfloat>
800be168c0dSopenharmony_ci #include <map>
801be168c0dSopenharmony_ci #include "src/litert/lite_kernel.h"
802be168c0dSopenharmony_ci+#include "nnacl/dynamic_quant_parameter.h"
803be168c0dSopenharmony_ci 
804be168c0dSopenharmony_ci namespace mindspore::kernel {
805be168c0dSopenharmony_ci class DynamicQuantCPUKernel : public LiteKernel {
806be168c0dSopenharmony_ci  public:
807be168c0dSopenharmony_ci   DynamicQuantCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
808be168c0dSopenharmony_ci                         const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
809be168c0dSopenharmony_ci-      : LiteKernel(parameter, inputs, outputs, ctx), thread_num_(ctx->thread_num_) {}
810be168c0dSopenharmony_ci-  ~DynamicQuantCPUKernel() override {
811be168c0dSopenharmony_ci-    if (real_min_ != nullptr) {
812be168c0dSopenharmony_ci-      free(real_min_);
813be168c0dSopenharmony_ci-      real_min_ = nullptr;
814be168c0dSopenharmony_ci-    }
815be168c0dSopenharmony_ci-    if (real_max_ != nullptr) {
816be168c0dSopenharmony_ci-      free(real_max_);
817be168c0dSopenharmony_ci-      real_max_ = nullptr;
818be168c0dSopenharmony_ci-    }
819be168c0dSopenharmony_ci-    if (scale_ != nullptr) {
820be168c0dSopenharmony_ci-      free(scale_);
821be168c0dSopenharmony_ci-      scale_ = nullptr;
822be168c0dSopenharmony_ci-    }
823be168c0dSopenharmony_ci-    if (zero_point_ != nullptr) {
824be168c0dSopenharmony_ci-      free(zero_point_);
825be168c0dSopenharmony_ci-      zero_point_ = nullptr;
826be168c0dSopenharmony_ci-    }
827be168c0dSopenharmony_ci-  };
828be168c0dSopenharmony_ci+      : LiteKernel(parameter, inputs, outputs, ctx) {}
829be168c0dSopenharmony_ci+  ~DynamicQuantCPUKernel() override = default;
830be168c0dSopenharmony_ci 
831be168c0dSopenharmony_ci   int Prepare() override;
832be168c0dSopenharmony_ci   int ReSize() override;
833be168c0dSopenharmony_ci@@ -57,28 +41,21 @@ class DynamicQuantCPUKernel : public LiteKernel {
834be168c0dSopenharmony_ci  private:
835be168c0dSopenharmony_ci   void CalculatePerlayerScaleZp();
836be168c0dSopenharmony_ci   void CalculatePerChannelScaleZp();
837be168c0dSopenharmony_ci-
838be168c0dSopenharmony_ci- private:
839be168c0dSopenharmony_ci-  int thread_num_;
840be168c0dSopenharmony_ci-  int thread_n_num_{0};
841be168c0dSopenharmony_ci-  int thread_n_stride_{0};
842be168c0dSopenharmony_ci-  int num_unit_{0};
843be168c0dSopenharmony_ci-  int8_t *int8_ptr_ = nullptr;
844be168c0dSopenharmony_ci-  float *float32_ptr_ = nullptr;
845be168c0dSopenharmony_ci-  float *real_min_ = nullptr;
846be168c0dSopenharmony_ci-  float *real_max_ = nullptr;
847be168c0dSopenharmony_ci-  float *scale_ = nullptr;
848be168c0dSopenharmony_ci-  int32_t *zero_point_ = nullptr;
849be168c0dSopenharmony_ci-
850be168c0dSopenharmony_ci-  int32_t src_dtype_{0};
851be168c0dSopenharmony_ci-  int32_t dst_dtype_{0};
852be168c0dSopenharmony_ci-  bool symmetric_ = false;
853be168c0dSopenharmony_ci-  bool activation_perchannel_ = false;
854be168c0dSopenharmony_ci-  bool transpose_ = false;
855be168c0dSopenharmony_ci-  int32_t prefer_axis_{-1};
856be168c0dSopenharmony_ci-  int32_t channel_num_{0};
857be168c0dSopenharmony_ci-  int32_t channel_length_{0};
858be168c0dSopenharmony_ci-  int32_t row_length_{0};
859be168c0dSopenharmony_ci+  int MallocTmpBuffer();
860be168c0dSopenharmony_ci+  void FreeTmpBuffer();
861be168c0dSopenharmony_ci+
862be168c0dSopenharmony_ci+  DynamicQuantParameter *param_{nullptr};
863be168c0dSopenharmony_ci+  std::vector<float> real_min_;
864be168c0dSopenharmony_ci+  std::vector<float> real_max_;
865be168c0dSopenharmony_ci+  std::vector<float> scale_;
866be168c0dSopenharmony_ci+  std::vector<float> zero_point_;
867be168c0dSopenharmony_ci+  std::vector<int> pre_perm_;
868be168c0dSopenharmony_ci+  std::vector<int> post_perm_;
869be168c0dSopenharmony_ci+  int8_t *int8_ptr_{nullptr};
870be168c0dSopenharmony_ci+  float *float32_ptr_{nullptr};
871be168c0dSopenharmony_ci+  int unit_num_{0};
872be168c0dSopenharmony_ci+  int unit_segment_num_{0};
873be168c0dSopenharmony_ci+  bool need_transpose_{false};
874be168c0dSopenharmony_ci };
875be168c0dSopenharmony_ci }  // namespace mindspore::kernel
876be168c0dSopenharmony_ci 
877be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/litert/kernel/cpu/int8/matmul_dynamic_base_int8.cc b/mindspore/lite/src/litert/kernel/cpu/int8/matmul_dynamic_base_int8.cc
878be168c0dSopenharmony_ciindex adae37aa..bab1f730 100644
879be168c0dSopenharmony_ci--- a/mindspore/lite/src/litert/kernel/cpu/int8/matmul_dynamic_base_int8.cc
880be168c0dSopenharmony_ci+++ b/mindspore/lite/src/litert/kernel/cpu/int8/matmul_dynamic_base_int8.cc
881be168c0dSopenharmony_ci@@ -54,12 +54,12 @@ void MatmulDynamicBaseInt8CPUKernel::FreeQuantParam() {
882be168c0dSopenharmony_ci }
883be168c0dSopenharmony_ci 
884be168c0dSopenharmony_ci int MatmulDynamicBaseInt8CPUKernel::MallocQuantParam() {
885be168c0dSopenharmony_ci-  quant_param_ = reinterpret_cast<MatmulDynamicQuantParameter *>(malloc(sizeof(MatmulQuantParameter)));
886be168c0dSopenharmony_ci+  quant_param_ = reinterpret_cast<MatmulDynamicQuantParameter *>(malloc(sizeof(MatmulDynamicQuantParameter)));
887be168c0dSopenharmony_ci   if (quant_param_ == nullptr) {
888be168c0dSopenharmony_ci     MS_LOG(ERROR) << "Malloc MatmulDynamicQuantParameter for Matmul int8 op failed!";
889be168c0dSopenharmony_ci     return RET_ERROR;
890be168c0dSopenharmony_ci   }
891be168c0dSopenharmony_ci-  memset(quant_param_, 0, sizeof(MatmulQuantParameter));
892be168c0dSopenharmony_ci+  (void)memset(quant_param_, 0, sizeof(MatmulDynamicQuantParameter));
893be168c0dSopenharmony_ci   return RET_OK;
894be168c0dSopenharmony_ci }
895be168c0dSopenharmony_ci 
896be168c0dSopenharmony_ci@@ -80,9 +80,16 @@ int MatmulDynamicBaseInt8CPUKernel::InitFilterQuantParam() {
897be168c0dSopenharmony_ci     MS_LOG(ERROR) << weight_tensor->tensor_name() << " dims < 2.";
898be168c0dSopenharmony_ci     return RET_ERROR;
899be168c0dSopenharmony_ci   }
900be168c0dSopenharmony_ci-  int col = param_->b_transpose_ ? w_shape[w_shape.size() - kSize2] : w_shape[w_shape.size() - kSize1];
901be168c0dSopenharmony_ci   filter_per_channel_ = (weight_quant_params.size() > 1);
902be168c0dSopenharmony_ci-  auto channel_num = filter_per_channel_ ? col : 1;
903be168c0dSopenharmony_ci+  filter_per_batch_channel_ = false;
904be168c0dSopenharmony_ci+  int channel_num = 1;
905be168c0dSopenharmony_ci+  if (filter_per_channel_) {
906be168c0dSopenharmony_ci+    channel_num = param_->col_;
907be168c0dSopenharmony_ci+    if (weight_quant_params.size() > static_cast<size_t>(channel_num)) {
908be168c0dSopenharmony_ci+      filter_per_batch_channel_ = true;
909be168c0dSopenharmony_ci+      channel_num = in_tensors_.at(kWeightIndex)->ElementsNum() / param_->deep_;
910be168c0dSopenharmony_ci+    }
911be168c0dSopenharmony_ci+  }
912be168c0dSopenharmony_ci   if (static_cast<int>(weight_quant_params.size()) != channel_num) {
913be168c0dSopenharmony_ci     MS_LOG(ERROR) << weight_tensor->tensor_name() << " quant params size:" << weight_quant_params.size()
914be168c0dSopenharmony_ci                   << " != channel_num:" << channel_num;
915be168c0dSopenharmony_ci@@ -90,10 +97,10 @@ int MatmulDynamicBaseInt8CPUKernel::InitFilterQuantParam() {
916be168c0dSopenharmony_ci   }
917be168c0dSopenharmony_ci   quant_param_->filter_scale_ = reinterpret_cast<float *>(malloc(channel_num * sizeof(float)));
918be168c0dSopenharmony_ci   CHECK_NULL_RETURN(quant_param_->filter_scale_);
919be168c0dSopenharmony_ci-  memset(quant_param_->filter_scale_, 0, sizeof(channel_num));
920be168c0dSopenharmony_ci+  (void)memset(quant_param_->filter_scale_, 0, sizeof(channel_num));
921be168c0dSopenharmony_ci   quant_param_->filter_zp_ = reinterpret_cast<int32_t *>(malloc(channel_num * sizeof(int32_t)));
922be168c0dSopenharmony_ci   CHECK_NULL_RETURN(quant_param_->filter_zp_);
923be168c0dSopenharmony_ci-  memset(quant_param_->filter_zp_, 0, sizeof(channel_num));
924be168c0dSopenharmony_ci+  (void)memset(quant_param_->filter_zp_, 0, sizeof(channel_num));
925be168c0dSopenharmony_ci 
926be168c0dSopenharmony_ci   for (int i = 0; i < channel_num; i++) {
927be168c0dSopenharmony_ci     quant_param_->filter_scale_[i] = static_cast<float>(weight_quant_params[i].scale);
928be168c0dSopenharmony_ci@@ -143,7 +150,15 @@ int MatmulDynamicBaseInt8CPUKernel::InitInputQuantParam(std::vector<float> *scal
929be168c0dSopenharmony_ci     return RET_ERROR;
930be168c0dSopenharmony_ci   }
931be168c0dSopenharmony_ci   input_per_channel_ = (in_quant_params.size() > 1);
932be168c0dSopenharmony_ci-  auto channel_num = input_per_channel_ ? param_->row_ : 1;
933be168c0dSopenharmony_ci+  input_per_batch_channel_ = false;
934be168c0dSopenharmony_ci+  int channel_num = 1;
935be168c0dSopenharmony_ci+  if (input_per_channel_) {
936be168c0dSopenharmony_ci+    channel_num = param_->row_;
937be168c0dSopenharmony_ci+    if (in_quant_params.size() > static_cast<size_t>(channel_num)) {
938be168c0dSopenharmony_ci+      input_per_batch_channel_ = true;
939be168c0dSopenharmony_ci+      channel_num = in_tensors_.at(kInputIndex)->ElementsNum() / param_->deep_;
940be168c0dSopenharmony_ci+    }
941be168c0dSopenharmony_ci+  }
942be168c0dSopenharmony_ci   if (static_cast<int>(in_quant_params.size()) != channel_num) {
943be168c0dSopenharmony_ci     MS_LOG(ERROR) << in_tensors_.at(kInputIndex)->tensor_name() << " quant params size:" << in_quant_params.size()
944be168c0dSopenharmony_ci                   << " != channel_num:" << channel_num;
945be168c0dSopenharmony_ci@@ -199,7 +214,7 @@ int MatmulDynamicBaseInt8CPUKernel::InitMatrixABuffer() {
946be168c0dSopenharmony_ci     return lite::RET_NULL_PTR;
947be168c0dSopenharmony_ci   }
948be168c0dSopenharmony_ci   input_sums_ = reinterpret_cast<int *>(pack_a_ptr_ + pack_a_size);
949be168c0dSopenharmony_ci-  memset(pack_a_ptr_, 0, pack_a_size + sum_a_size);
950be168c0dSopenharmony_ci+  (void)memset(pack_a_ptr_, 0, pack_a_size + sum_a_size);
951be168c0dSopenharmony_ci   return RET_OK;
952be168c0dSopenharmony_ci }
953be168c0dSopenharmony_ci 
954be168c0dSopenharmony_ci@@ -240,8 +255,8 @@ int MatmulDynamicBaseInt8CPUKernel::InitMatrixBBuffer() {
955be168c0dSopenharmony_ci     FreeTmpBuffer();
956be168c0dSopenharmony_ci     return RET_ERROR;
957be168c0dSopenharmony_ci   }
958be168c0dSopenharmony_ci-  memset(pack_b_ptr_, 0, b_batch_ * param_->col_align_ * param_->deep_align_ * sizeof(int8_t));
959be168c0dSopenharmony_ci-  memset(weight_sums_, 0, b_batch_ * param_->col_align_ * sizeof(int));
960be168c0dSopenharmony_ci+  (void)memset(pack_b_ptr_, 0, b_batch_ * param_->col_align_ * param_->deep_align_ * sizeof(int8_t));
961be168c0dSopenharmony_ci+  (void)memset(weight_sums_, 0, b_batch_ * param_->col_align_ * sizeof(int));
962be168c0dSopenharmony_ci   return RET_OK;
963be168c0dSopenharmony_ci }
964be168c0dSopenharmony_ci 
965be168c0dSopenharmony_ci@@ -258,7 +273,7 @@ int MatmulDynamicBaseInt8CPUKernel::CopyBias() {
966be168c0dSopenharmony_ci       FreeTmpBuffer();
967be168c0dSopenharmony_ci       return RET_MEMORY_FAILED;
968be168c0dSopenharmony_ci     }
969be168c0dSopenharmony_ci-    memcpy(bias_ptr_, bias_tensor->data(), bias_tensor->Size());
970be168c0dSopenharmony_ci+    (void)memcpy(bias_ptr_, bias_tensor->data(), bias_tensor->Size());
971be168c0dSopenharmony_ci   } else {
972be168c0dSopenharmony_ci     bias_ptr_ = nullptr;
973be168c0dSopenharmony_ci   }
974be168c0dSopenharmony_ci@@ -352,6 +367,8 @@ int MatmulDynamicBaseInt8CPUKernel::ReSize() {
975be168c0dSopenharmony_ci int MatmulDynamicBaseInt8CPUKernel::InitBroadcastParams(const std::vector<int> &a_shape_const,
976be168c0dSopenharmony_ci                                                         const std::vector<int> &b_shape_const, MatMulParameter *params,
977be168c0dSopenharmony_ci                                                         std::vector<int> *a_offsets, std::vector<int> *b_offsets) {
978be168c0dSopenharmony_ci+  CHECK_NULL_RETURN(a_offsets);
979be168c0dSopenharmony_ci+  CHECK_NULL_RETURN(b_offsets);
980be168c0dSopenharmony_ci   std::vector<int> a_shape = a_shape_const;
981be168c0dSopenharmony_ci   if (a_shape.size() < kNCHWDimNumber) {
982be168c0dSopenharmony_ci     size_t add_nums = kNCHWDimNumber - a_shape.size();
983be168c0dSopenharmony_ci@@ -370,8 +387,8 @@ int MatmulDynamicBaseInt8CPUKernel::InitBroadcastParams(const std::vector<int> &
984be168c0dSopenharmony_ci   int batch_sizes[MAX_SHAPE_SIZE] = {0};
985be168c0dSopenharmony_ci   int a_batch_sizes[MAX_SHAPE_SIZE] = {0};
986be168c0dSopenharmony_ci   int b_batch_sizes[MAX_SHAPE_SIZE] = {0};
987be168c0dSopenharmony_ci-  for (int i = a_shape.size() - kCHWDimNumber; i >= 0; --i) {
988be168c0dSopenharmony_ci-    if (static_cast<int>(a_shape.size() - kCHWDimNumber) == i) {
989be168c0dSopenharmony_ci+  for (int i = static_cast<int>(a_shape.size()) - kCHWDimNumber; i >= 0; --i) {
990be168c0dSopenharmony_ci+    if (static_cast<int>(a_shape.size()) - kCHWDimNumber == i) {
991be168c0dSopenharmony_ci       batch_sizes[i] = std::max(a_shape[i], b_shape[i]);
992be168c0dSopenharmony_ci       a_batch_sizes[i] = a_shape[i];
993be168c0dSopenharmony_ci       b_batch_sizes[i] = b_shape[i];
994be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/litert/kernel/cpu/int8/matmul_dynamic_base_int8.h b/mindspore/lite/src/litert/kernel/cpu/int8/matmul_dynamic_base_int8.h
995be168c0dSopenharmony_ciindex 3fc20d80..858affc8 100644
996be168c0dSopenharmony_ci--- a/mindspore/lite/src/litert/kernel/cpu/int8/matmul_dynamic_base_int8.h
997be168c0dSopenharmony_ci+++ b/mindspore/lite/src/litert/kernel/cpu/int8/matmul_dynamic_base_int8.h
998be168c0dSopenharmony_ci@@ -58,6 +58,8 @@ class MatmulDynamicBaseInt8CPUKernel : public LiteKernel {
999be168c0dSopenharmony_ci   int b_batch_ = 1;
1000be168c0dSopenharmony_ci   std::vector<int> a_offset_;
1001be168c0dSopenharmony_ci   std::vector<int> b_offset_;
1002be168c0dSopenharmony_ci+  int a_quant_offset_ = 0;
1003be168c0dSopenharmony_ci+  int b_quant_offset_ = 0;
1004be168c0dSopenharmony_ci   typedef void (*PackFunc)(const int8_t *src, int8_t *dst, int row, int col);
1005be168c0dSopenharmony_ci   virtual void InitParameter() = 0;
1006be168c0dSopenharmony_ci   int TransferA();
1007be168c0dSopenharmony_ci@@ -69,14 +71,15 @@ class MatmulDynamicBaseInt8CPUKernel : public LiteKernel {
1008be168c0dSopenharmony_ci   int InitMatrixABuffer();
1009be168c0dSopenharmony_ci   void FreeMatrixABuffer();
1010be168c0dSopenharmony_ci 
1011be168c0dSopenharmony_ci- protected:
1012be168c0dSopenharmony_ci   MatMulParameter *param_ = nullptr;
1013be168c0dSopenharmony_ci   MatmulDynamicQuantParameter *quant_param_ = nullptr;
1014be168c0dSopenharmony_ci   int8_t *pack_a_ptr_ = nullptr;
1015be168c0dSopenharmony_ci   int8_t *pack_b_ptr_ = nullptr;
1016be168c0dSopenharmony_ci 
1017be168c0dSopenharmony_ci   bool input_per_channel_ = false;
1018be168c0dSopenharmony_ci-  bool filter_per_channel_ = true;
1019be168c0dSopenharmony_ci+  bool input_per_batch_channel_ = false;
1020be168c0dSopenharmony_ci+  bool filter_per_channel_ = false;
1021be168c0dSopenharmony_ci+  bool filter_per_batch_channel_ = false;
1022be168c0dSopenharmony_ci   int8_t *batch_input_ptr_ = nullptr;
1023be168c0dSopenharmony_ci   int8_t *batch_weight_ptr_ = nullptr;
1024be168c0dSopenharmony_ci   int8_t *batch_a_ptr_ = nullptr;
1025be168c0dSopenharmony_cidiff --git a/mindspore/lite/tools/converter/quantizer/insert_quant_node_manager.cc b/mindspore/lite/tools/converter/quantizer/insert_quant_node_manager.cc
1026be168c0dSopenharmony_ciindex 721a1a8c..03113eaa 100644
1027be168c0dSopenharmony_ci--- a/mindspore/lite/tools/converter/quantizer/insert_quant_node_manager.cc
1028be168c0dSopenharmony_ci+++ b/mindspore/lite/tools/converter/quantizer/insert_quant_node_manager.cc
1029be168c0dSopenharmony_ci@@ -102,7 +102,7 @@ int InsertQuantNodeManager::InsertDynamicQuantWithIndex(const FuncGraphPtr &grap
1030be168c0dSopenharmony_ci   bool symmetric = activation_channel ? true : false;
1031be168c0dSopenharmony_ci   primitive->set_symmetric(symmetric);
1032be168c0dSopenharmony_ci   primitive->set_activation_channel(activation_channel);
1033be168c0dSopenharmony_ci-  if (activation_channel && SetPreferAxis(cnode, index, primitive) != RET_OK) {
1034be168c0dSopenharmony_ci+  if (activation_channel && SetPreferAxes(cnode, index, primitive) != RET_OK) {
1035be168c0dSopenharmony_ci     MS_LOG(ERROR) << "Set prefer axis failed, " << cnode->fullname_with_scope();
1036be168c0dSopenharmony_ci     return RET_ERROR;
1037be168c0dSopenharmony_ci   }
1038be168c0dSopenharmony_ci@@ -127,18 +127,25 @@ int InsertQuantNodeManager::InsertDynamicQuantWithIndex(const FuncGraphPtr &grap
1039be168c0dSopenharmony_ci   return RET_OK;
1040be168c0dSopenharmony_ci }
1041be168c0dSopenharmony_ci 
1042be168c0dSopenharmony_ci-int InsertQuantNodeManager::SetPreferAxis(const CNodePtr &cnode, size_t index,
1043be168c0dSopenharmony_ci+int InsertQuantNodeManager::SetPreferAxes(const CNodePtr &cnode, size_t index,
1044be168c0dSopenharmony_ci                                           const std::shared_ptr<ops::DynamicQuant> &dynamic_primitive) {
1045be168c0dSopenharmony_ci   auto primitive = GetValueNode<PrimitivePtr>(cnode->input(0));
1046be168c0dSopenharmony_ci   if (primitive->name() == ops::kNameMatMulFusion || primitive->name() == ops::kNameMatMul) {
1047be168c0dSopenharmony_ci     auto matmul_prim = api::MakeShared<ops::MatMul>(primitive);
1048be168c0dSopenharmony_ci     CHECK_NULL_RETURN(matmul_prim);
1049be168c0dSopenharmony_ci+    auto shape = opt::GetAnfNodeOutputShape(cnode->input(index), 0);
1050be168c0dSopenharmony_ci+    std::vector<int> prefer_axes;
1051be168c0dSopenharmony_ci+    for (int i = 0; i < static_cast<int>(shape.size()) - C2NUM; ++i) {
1052be168c0dSopenharmony_ci+      prefer_axes.push_back(i);
1053be168c0dSopenharmony_ci+    }
1054be168c0dSopenharmony_ci     // For MatMul A
1055be168c0dSopenharmony_ci     if (index == kInputIndex + kPrimOffset) {
1056be168c0dSopenharmony_ci       if (matmul_prim->GetAttr(ops::kTransposeA) != nullptr && matmul_prim->get_transpose_a()) {
1057be168c0dSopenharmony_ci+        prefer_axes.push_back(kLastFisrtIndex);
1058be168c0dSopenharmony_ci         dynamic_primitive->set_prefer_axis(kLastFisrtIndex);
1059be168c0dSopenharmony_ci         dynamic_primitive->set_transpose(true);
1060be168c0dSopenharmony_ci       } else {
1061be168c0dSopenharmony_ci+        prefer_axes.push_back(kLastSecondIndex);
1062be168c0dSopenharmony_ci         dynamic_primitive->set_prefer_axis(kLastSecondIndex);
1063be168c0dSopenharmony_ci         dynamic_primitive->set_transpose(false);
1064be168c0dSopenharmony_ci       }
1065be168c0dSopenharmony_ci@@ -146,13 +153,16 @@ int InsertQuantNodeManager::SetPreferAxis(const CNodePtr &cnode, size_t index,
1066be168c0dSopenharmony_ci     // For MatMul B
1067be168c0dSopenharmony_ci     if (index == kWeightIndex + kPrimOffset) {
1068be168c0dSopenharmony_ci       if (matmul_prim->GetAttr(ops::kTransposeB) != nullptr && matmul_prim->get_transpose_b()) {
1069be168c0dSopenharmony_ci+        prefer_axes.push_back(kLastSecondIndex);
1070be168c0dSopenharmony_ci         dynamic_primitive->set_prefer_axis(kLastSecondIndex);
1071be168c0dSopenharmony_ci         dynamic_primitive->set_transpose(true);
1072be168c0dSopenharmony_ci       } else {
1073be168c0dSopenharmony_ci+        prefer_axes.push_back(kLastFisrtIndex);
1074be168c0dSopenharmony_ci         dynamic_primitive->set_prefer_axis(kLastFisrtIndex);
1075be168c0dSopenharmony_ci         dynamic_primitive->set_transpose(false);
1076be168c0dSopenharmony_ci       }
1077be168c0dSopenharmony_ci     }
1078be168c0dSopenharmony_ci+    dynamic_primitive->set_prefer_axes(prefer_axes);
1079be168c0dSopenharmony_ci   } else {
1080be168c0dSopenharmony_ci     MS_LOG(WARNING) << "cnode don't need prefer axis, cnode name: " << cnode->fullname_with_scope();
1081be168c0dSopenharmony_ci   }
1082be168c0dSopenharmony_ci@@ -167,13 +177,17 @@ int InsertQuantNodeManager::NewDynamicQuantNode(const FuncGraphPtr &graph, const
1083be168c0dSopenharmony_ci     return RET_ERROR;
1084be168c0dSopenharmony_ci   }
1085be168c0dSopenharmony_ci   auto input = cnode->input(kInputIndex + kPrimOffset);
1086be168c0dSopenharmony_ci+  auto weight = cnode->input(kWeightIndex + kPrimOffset);
1087be168c0dSopenharmony_ci+  if (activation_channel && (input->isa<mindspore::CNode>() || IsGraphInput(input)) &&
1088be168c0dSopenharmony_ci+      (weight->isa<mindspore::CNode>() || IsGraphInput(weight))) {
1089be168c0dSopenharmony_ci+    return RET_NOT_SUPPORT;
1090be168c0dSopenharmony_ci+  }
1091be168c0dSopenharmony_ci   if (input->isa<mindspore::CNode>() || IsGraphInput(input)) {
1092be168c0dSopenharmony_ci     auto ret = InsertDynamicQuantWithIndex(graph, cnode, kInputIndex + kPrimOffset, activation_channel);
1093be168c0dSopenharmony_ci     if (ret != RET_OK) {
1094be168c0dSopenharmony_ci       MS_LOG(ERROR) << "Insert dynamic quant with index failed.";
1095be168c0dSopenharmony_ci     }
1096be168c0dSopenharmony_ci   }
1097be168c0dSopenharmony_ci-  auto weight = cnode->input(kWeightIndex + kPrimOffset);
1098be168c0dSopenharmony_ci   if (weight->isa<mindspore::CNode>() || IsGraphInput(weight)) {
1099be168c0dSopenharmony_ci     auto ret = InsertDynamicQuantWithIndex(graph, cnode, kWeightIndex + kPrimOffset, activation_channel);
1100be168c0dSopenharmony_ci     if (ret != RET_OK) {
1101be168c0dSopenharmony_ci@@ -218,6 +232,9 @@ int InsertQuantNodeManager::InsertDynamicQuantNode(const FuncGraphPtr &graph,
1102be168c0dSopenharmony_ci       continue;
1103be168c0dSopenharmony_ci     }
1104be168c0dSopenharmony_ci     ret = NewDynamicQuantNode(graph, cnode, activation_channel);
1105be168c0dSopenharmony_ci+    if (ret == RET_NOT_SUPPORT) {
1106be168c0dSopenharmony_ci+      continue;
1107be168c0dSopenharmony_ci+    }
1108be168c0dSopenharmony_ci     if (ret != RET_OK) {
1109be168c0dSopenharmony_ci       MS_LOG(ERROR) << "node:" << op_name << " new dynamic quant node failed.";
1110be168c0dSopenharmony_ci       return ret;
1111be168c0dSopenharmony_ci@@ -684,7 +701,7 @@ int InsertQuantNodeManager::InsertQuantDtypeCastFlyNode(const FuncGraphPtr &func
1112be168c0dSopenharmony_ci 
1113be168c0dSopenharmony_ci int InsertQuantNodeManager::CalculateScaleZPNode(const FuncGraphPtr &func_graph, const CNodePtr &cnode,
1114be168c0dSopenharmony_ci                                                  size_t input_index, ParameterPtr *scales_node, ParameterPtr *zps_node,
1115be168c0dSopenharmony_ci-                                                 TypeId src_dtype, TypeId dst_dtype, int axis) {
1116be168c0dSopenharmony_ci+                                                 TypeId dst_dtype, int axis) {
1117be168c0dSopenharmony_ci   CHECK_NULL_RETURN(scales_node);
1118be168c0dSopenharmony_ci   CHECK_NULL_RETURN(zps_node);
1119be168c0dSopenharmony_ci   auto input_node = cnode->input(input_index);
1120be168c0dSopenharmony_ci@@ -785,7 +802,7 @@ int InsertQuantNodeManager::InsertAscendAntiQuantNode(const FuncGraphPtr &func_g
1121be168c0dSopenharmony_ci   CHECK_NULL_RETURN(cast_cnode);
1122be168c0dSopenharmony_ci   ParameterPtr scales_node;
1123be168c0dSopenharmony_ci   ParameterPtr zps_node;
1124be168c0dSopenharmony_ci-  auto ret = CalculateScaleZPNode(func_graph, cnode, input_index, &scales_node, &zps_node, src_dtype, dst_dtype, axis);
1125be168c0dSopenharmony_ci+  auto ret = CalculateScaleZPNode(func_graph, cnode, input_index, &scales_node, &zps_node, dst_dtype, axis);
1126be168c0dSopenharmony_ci   if (ret != RET_OK) {
1127be168c0dSopenharmony_ci     MS_LOG(ERROR) << "Fail to Remove node: " << input_node->fullname_with_scope() << " quant param";
1128be168c0dSopenharmony_ci     return RET_ERROR;
1129be168c0dSopenharmony_cidiff --git a/mindspore/lite/tools/converter/quantizer/insert_quant_node_manager.h b/mindspore/lite/tools/converter/quantizer/insert_quant_node_manager.h
1130be168c0dSopenharmony_ciindex a46e8c68..6f328485 100644
1131be168c0dSopenharmony_ci--- a/mindspore/lite/tools/converter/quantizer/insert_quant_node_manager.h
1132be168c0dSopenharmony_ci+++ b/mindspore/lite/tools/converter/quantizer/insert_quant_node_manager.h
1133be168c0dSopenharmony_ci@@ -75,13 +75,12 @@ class InsertQuantNodeManager {
1134be168c0dSopenharmony_ci   int MarkDynamicQuantize(const CNodePtr &cnode);
1135be168c0dSopenharmony_ci 
1136be168c0dSopenharmony_ci   int CalculateScaleZPNode(const FuncGraphPtr &func_graph, const CNodePtr &cnode, size_t input_index,
1137be168c0dSopenharmony_ci-                           ParameterPtr *scales_node, ParameterPtr *zps_node, TypeId src_dtype, TypeId dst_dtype,
1138be168c0dSopenharmony_ci-                           int axis);
1139be168c0dSopenharmony_ci+                           ParameterPtr *scales_node, ParameterPtr *zps_node, TypeId dst_dtype, int axis);
1140be168c0dSopenharmony_ci 
1141be168c0dSopenharmony_ci   int InsertDynamicQuantWithIndex(const FuncGraphPtr &graph, const CNodePtr &cnode, size_t index,
1142be168c0dSopenharmony_ci                                   bool activation_channel = true);
1143be168c0dSopenharmony_ci 
1144be168c0dSopenharmony_ci-  int SetPreferAxis(const CNodePtr &cnode, size_t index, const std::shared_ptr<ops::DynamicQuant> &dynamic_primitive);
1145be168c0dSopenharmony_ci+  int SetPreferAxes(const CNodePtr &cnode, size_t index, const std::shared_ptr<ops::DynamicQuant> &dynamic_primitive);
1146be168c0dSopenharmony_ci 
1147be168c0dSopenharmony_ci   int SetCastNodeAbstract(const CNodePtr &cnode, const AnfNodePtr &input_node, const CNodePtr &cast_cnode);
1148be168c0dSopenharmony_ci 
1149be168c0dSopenharmony_ci-- 
1150be168c0dSopenharmony_ci2.25.1
1151be168c0dSopenharmony_ci
1152