1be168c0dSopenharmony_ciFrom f0daa7ef13e1741f8bcd1dfad7517a4a8ae4a209 Mon Sep 17 00:00:00 2001 2be168c0dSopenharmony_ciFrom: xuanyue <xuanyue@huawei.com> 3be168c0dSopenharmony_ciDate: Thu, 21 Mar 2024 19:38:34 +0800 4be168c0dSopenharmony_ciSubject: [PATCH] DynamicQuant strategy opyimization 5be168c0dSopenharmony_ci 6be168c0dSopenharmony_ci--- 7be168c0dSopenharmony_ci .../kernel/nnacl/dynamic_quant_parameter.h | 7 +- 8be168c0dSopenharmony_ci mindspore/core/ops/dynamic_quant.cc | 12 + 9be168c0dSopenharmony_ci mindspore/core/ops/dynamic_quant.h | 10 + 10be168c0dSopenharmony_ci mindspore/core/ops/op_name.h | 1 + 11be168c0dSopenharmony_ci mindspore/lite/schema/inner/ops_generated.h | 53 +++- 12be168c0dSopenharmony_ci mindspore/lite/schema/ops.fbs | 1 + 13be168c0dSopenharmony_ci mindspore/lite/schema/ops_generated.h | 34 +- 14be168c0dSopenharmony_ci mindspore/lite/src/common/ops/ops_def.cc | 1 + 15be168c0dSopenharmony_ci .../ops/populate/dynamic_quant_populate.cc | 24 +- 16be168c0dSopenharmony_ci .../litert/kernel/cpu/int8/dynamic_quant.cc | 299 +++++++++++------- 17be168c0dSopenharmony_ci .../litert/kernel/cpu/int8/dynamic_quant.h | 59 ++-- 18be168c0dSopenharmony_ci .../cpu/int8/matmul_dynamic_base_int8.cc | 43 ++- 19be168c0dSopenharmony_ci .../cpu/int8/matmul_dynamic_base_int8.h | 7 +- 20be168c0dSopenharmony_ci .../quantizer/insert_quant_node_manager.cc | 27 +- 21be168c0dSopenharmony_ci .../quantizer/insert_quant_node_manager.h | 5 +- 22be168c0dSopenharmony_ci 15 files changed, 395 insertions(+), 188 deletions(-) 23be168c0dSopenharmony_ci 24be168c0dSopenharmony_cidiff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/dynamic_quant_parameter.h b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/dynamic_quant_parameter.h 25be168c0dSopenharmony_ciindex aaabe041..1fc166cb 100644 26be168c0dSopenharmony_ci--- a/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/dynamic_quant_parameter.h 27be168c0dSopenharmony_ci+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/dynamic_quant_parameter.h 28be168c0dSopenharmony_ci@@ -21,10 +21,9 @@ 29be168c0dSopenharmony_ci typedef struct DynamicQuantParameter { 30be168c0dSopenharmony_ci OpParameter op_parameter_; 31be168c0dSopenharmony_ci bool symmetric_; 32be168c0dSopenharmony_ci- int64_t dst_type_; 33be168c0dSopenharmony_ci- bool activation_perchannel_; 34be168c0dSopenharmony_ci- int64_t prefer_axis_; 35be168c0dSopenharmony_ci- bool transpose_; 36be168c0dSopenharmony_ci+ int dst_type_; 37be168c0dSopenharmony_ci+ int axis_num_; 38be168c0dSopenharmony_ci+ int prefer_axes_[MAX_SHAPE_SIZE]; 39be168c0dSopenharmony_ci } DynamicQuantParameter; 40be168c0dSopenharmony_ci 41be168c0dSopenharmony_ci #endif // NNACL_DYNAMIC_QUANT_PARAMETER_H_ 42be168c0dSopenharmony_cidiff --git a/mindspore/core/ops/dynamic_quant.cc b/mindspore/core/ops/dynamic_quant.cc 43be168c0dSopenharmony_ciindex 63ea0be5..1949f809 100644 44be168c0dSopenharmony_ci--- a/mindspore/core/ops/dynamic_quant.cc 45be168c0dSopenharmony_ci+++ b/mindspore/core/ops/dynamic_quant.cc 46be168c0dSopenharmony_ci@@ -48,6 +48,18 @@ bool DynamicQuant::get_transpose() const { 47be168c0dSopenharmony_ci auto value_ptr = this->GetAttr(kTrans); 48be168c0dSopenharmony_ci return GetValue<bool>(value_ptr); 49be168c0dSopenharmony_ci } 50be168c0dSopenharmony_ci+ 51be168c0dSopenharmony_ci+void DynamicQuant::set_prefer_axes(const std::vector<int> &prefer_axes) { 52be168c0dSopenharmony_ci+ (void)AddAttr(kPreferAxes, api::MakeValue(prefer_axes)); 53be168c0dSopenharmony_ci+} 54be168c0dSopenharmony_ci+ 55be168c0dSopenharmony_ci+std::vector<int> DynamicQuant::get_prefer_axes() const { 56be168c0dSopenharmony_ci+ auto value_ptr = GetAttr(kPreferAxes); 57be168c0dSopenharmony_ci+ auto tmp = GetValue<std::vector<int64_t>>(value_ptr); 58be168c0dSopenharmony_ci+ std::vector<int> res(tmp.begin(), tmp.end()); 59be168c0dSopenharmony_ci+ return res; 60be168c0dSopenharmony_ci+} 61be168c0dSopenharmony_ci+ 62be168c0dSopenharmony_ci void DynamicQuant::Init(const bool symmetric, const int64_t dst_type) { 63be168c0dSopenharmony_ci this->set_symmetric(symmetric); 64be168c0dSopenharmony_ci this->set_dst_type(dst_type); 65be168c0dSopenharmony_cidiff --git a/mindspore/core/ops/dynamic_quant.h b/mindspore/core/ops/dynamic_quant.h 66be168c0dSopenharmony_ciindex 4cb446c3..963dfb37 100644 67be168c0dSopenharmony_ci--- a/mindspore/core/ops/dynamic_quant.h 68be168c0dSopenharmony_ci+++ b/mindspore/core/ops/dynamic_quant.h 69be168c0dSopenharmony_ci@@ -91,6 +91,16 @@ class MIND_API DynamicQuant : public BaseOperator { 70be168c0dSopenharmony_ci /// 71be168c0dSopenharmony_ci /// \return Whether transpose matrix. 72be168c0dSopenharmony_ci bool get_transpose() const; 73be168c0dSopenharmony_ci+ 74be168c0dSopenharmony_ci+ /// \brief Method to set prefer_axis attribute. 75be168c0dSopenharmony_ci+ /// 76be168c0dSopenharmony_ci+ /// \param[in] prefer_axis Define the preferred axis. 77be168c0dSopenharmony_ci+ void set_prefer_axes(const std::vector<int> &prefer_axes); 78be168c0dSopenharmony_ci+ 79be168c0dSopenharmony_ci+ /// \brief Method to get prefer_axis attribute. 80be168c0dSopenharmony_ci+ /// 81be168c0dSopenharmony_ci+ /// \return the preferred axis. 82be168c0dSopenharmony_ci+ std::vector<int> get_prefer_axes() const; 83be168c0dSopenharmony_ci }; 84be168c0dSopenharmony_ci MIND_API abstract::AbstractBasePtr DynamicQuantInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive, 85be168c0dSopenharmony_ci const std::vector<abstract::AbstractBasePtr> &input_args); 86be168c0dSopenharmony_cidiff --git a/mindspore/core/ops/op_name.h b/mindspore/core/ops/op_name.h 87be168c0dSopenharmony_ciindex ad9066e7..1282e6ea 100644 88be168c0dSopenharmony_ci--- a/mindspore/core/ops/op_name.h 89be168c0dSopenharmony_ci+++ b/mindspore/core/ops/op_name.h 90be168c0dSopenharmony_ci@@ -410,6 +410,7 @@ constexpr auto KCurrChunkIndex = "curr_chunk_index"; 91be168c0dSopenharmony_ci constexpr auto KCurrBitCount = "curr_bit_count"; 92be168c0dSopenharmony_ci constexpr auto KTableLog = "table_log"; 93be168c0dSopenharmony_ci constexpr auto kIgnoreIndex = "ignore_index"; 94be168c0dSopenharmony_ci+constexpr auto kPreferAxes = "prefer_axes"; 95be168c0dSopenharmony_ci 96be168c0dSopenharmony_ci constexpr size_t kInputIndex0 = 0; 97be168c0dSopenharmony_ci constexpr size_t kInputIndex1 = 1; 98be168c0dSopenharmony_cidiff --git a/mindspore/lite/schema/inner/ops_generated.h b/mindspore/lite/schema/inner/ops_generated.h 99be168c0dSopenharmony_ciindex 6c861aa5..b595f4b2 100644 100be168c0dSopenharmony_ci--- a/mindspore/lite/schema/inner/ops_generated.h 101be168c0dSopenharmony_ci+++ b/mindspore/lite/schema/inner/ops_generated.h 102be168c0dSopenharmony_ci@@ -19790,6 +19790,7 @@ struct DynamicQuantT : public flatbuffers::NativeTable { 103be168c0dSopenharmony_ci bool activation_channel = false; 104be168c0dSopenharmony_ci int64_t prefer_axis = 0; 105be168c0dSopenharmony_ci bool transpose = false; 106be168c0dSopenharmony_ci+ std::vector<int32_t> prefer_axes{}; 107be168c0dSopenharmony_ci }; 108be168c0dSopenharmony_ci 109be168c0dSopenharmony_ci struct DynamicQuant FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { 110be168c0dSopenharmony_ci@@ -19803,7 +19804,8 @@ struct DynamicQuant FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { 111be168c0dSopenharmony_ci VT_DST_TYPE = 6, 112be168c0dSopenharmony_ci VT_ACTIVATION_CHANNEL = 8, 113be168c0dSopenharmony_ci VT_PREFER_AXIS = 10, 114be168c0dSopenharmony_ci- VT_TRANSPOSE = 12 115be168c0dSopenharmony_ci+ VT_TRANSPOSE = 12, 116be168c0dSopenharmony_ci+ VT_PREFER_AXES = 14 117be168c0dSopenharmony_ci }; 118be168c0dSopenharmony_ci bool symmetric() const { 119be168c0dSopenharmony_ci return GetField<uint8_t>(VT_SYMMETRIC, 0) != 0; 120be168c0dSopenharmony_ci@@ -19835,6 +19837,12 @@ struct DynamicQuant FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { 121be168c0dSopenharmony_ci bool mutate_transpose(bool _transpose) { 122be168c0dSopenharmony_ci return SetField<uint8_t>(VT_TRANSPOSE, static_cast<uint8_t>(_transpose), 0); 123be168c0dSopenharmony_ci } 124be168c0dSopenharmony_ci+ const flatbuffers::Vector<int32_t> *prefer_axes() const { 125be168c0dSopenharmony_ci+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_PREFER_AXES); 126be168c0dSopenharmony_ci+ } 127be168c0dSopenharmony_ci+ flatbuffers::Vector<int32_t> *mutable_prefer_axes() { 128be168c0dSopenharmony_ci+ return GetPointer<flatbuffers::Vector<int32_t> *>(VT_PREFER_AXES); 129be168c0dSopenharmony_ci+ } 130be168c0dSopenharmony_ci bool Verify(flatbuffers::Verifier &verifier) const { 131be168c0dSopenharmony_ci return VerifyTableStart(verifier) && 132be168c0dSopenharmony_ci VerifyField<uint8_t>(verifier, VT_SYMMETRIC) && 133be168c0dSopenharmony_ci@@ -19842,6 +19850,8 @@ struct DynamicQuant FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { 134be168c0dSopenharmony_ci VerifyField<uint8_t>(verifier, VT_ACTIVATION_CHANNEL) && 135be168c0dSopenharmony_ci VerifyField<int64_t>(verifier, VT_PREFER_AXIS) && 136be168c0dSopenharmony_ci VerifyField<uint8_t>(verifier, VT_TRANSPOSE) && 137be168c0dSopenharmony_ci+ VerifyOffset(verifier, VT_PREFER_AXES) && 138be168c0dSopenharmony_ci+ verifier.VerifyVector(prefer_axes()) && 139be168c0dSopenharmony_ci verifier.EndTable(); 140be168c0dSopenharmony_ci } 141be168c0dSopenharmony_ci DynamicQuantT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; 142be168c0dSopenharmony_ci@@ -19868,6 +19878,9 @@ struct DynamicQuantBuilder { 143be168c0dSopenharmony_ci void add_transpose(bool transpose) { 144be168c0dSopenharmony_ci fbb_.AddElement<uint8_t>(DynamicQuant::VT_TRANSPOSE, static_cast<uint8_t>(transpose), 0); 145be168c0dSopenharmony_ci } 146be168c0dSopenharmony_ci+ void add_prefer_axes(flatbuffers::Offset<flatbuffers::Vector<int32_t>> prefer_axes) { 147be168c0dSopenharmony_ci+ fbb_.AddOffset(DynamicQuant::VT_PREFER_AXES, prefer_axes); 148be168c0dSopenharmony_ci+ } 149be168c0dSopenharmony_ci explicit DynamicQuantBuilder(flatbuffers::FlatBufferBuilder &_fbb) 150be168c0dSopenharmony_ci : fbb_(_fbb) { 151be168c0dSopenharmony_ci start_ = fbb_.StartTable(); 152be168c0dSopenharmony_ci@@ -19885,16 +19898,37 @@ inline flatbuffers::Offset<DynamicQuant> CreateDynamicQuant( 153be168c0dSopenharmony_ci int64_t dst_type = 32LL, 154be168c0dSopenharmony_ci bool activation_channel = false, 155be168c0dSopenharmony_ci int64_t prefer_axis = 0, 156be168c0dSopenharmony_ci- bool transpose = false) { 157be168c0dSopenharmony_ci+ bool transpose = false, 158be168c0dSopenharmony_ci+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> prefer_axes = 0) { 159be168c0dSopenharmony_ci DynamicQuantBuilder builder_(_fbb); 160be168c0dSopenharmony_ci builder_.add_prefer_axis(prefer_axis); 161be168c0dSopenharmony_ci builder_.add_dst_type(dst_type); 162be168c0dSopenharmony_ci+ builder_.add_prefer_axes(prefer_axes); 163be168c0dSopenharmony_ci builder_.add_transpose(transpose); 164be168c0dSopenharmony_ci builder_.add_activation_channel(activation_channel); 165be168c0dSopenharmony_ci builder_.add_symmetric(symmetric); 166be168c0dSopenharmony_ci return builder_.Finish(); 167be168c0dSopenharmony_ci } 168be168c0dSopenharmony_ci 169be168c0dSopenharmony_ci+inline flatbuffers::Offset<DynamicQuant> CreateDynamicQuantDirect( 170be168c0dSopenharmony_ci+ flatbuffers::FlatBufferBuilder &_fbb, 171be168c0dSopenharmony_ci+ bool symmetric = false, 172be168c0dSopenharmony_ci+ int64_t dst_type = 32LL, 173be168c0dSopenharmony_ci+ bool activation_channel = false, 174be168c0dSopenharmony_ci+ int64_t prefer_axis = 0, 175be168c0dSopenharmony_ci+ bool transpose = false, 176be168c0dSopenharmony_ci+ const std::vector<int32_t> *prefer_axes = nullptr) { 177be168c0dSopenharmony_ci+ auto prefer_axes__ = prefer_axes ? _fbb.CreateVector<int32_t>(*prefer_axes) : 0; 178be168c0dSopenharmony_ci+ return mindspore::schema::CreateDynamicQuant( 179be168c0dSopenharmony_ci+ _fbb, 180be168c0dSopenharmony_ci+ symmetric, 181be168c0dSopenharmony_ci+ dst_type, 182be168c0dSopenharmony_ci+ activation_channel, 183be168c0dSopenharmony_ci+ prefer_axis, 184be168c0dSopenharmony_ci+ transpose, 185be168c0dSopenharmony_ci+ prefer_axes__); 186be168c0dSopenharmony_ci+} 187be168c0dSopenharmony_ci+ 188be168c0dSopenharmony_ci flatbuffers::Offset<DynamicQuant> CreateDynamicQuant(flatbuffers::FlatBufferBuilder &_fbb, const DynamicQuantT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); 189be168c0dSopenharmony_ci 190be168c0dSopenharmony_ci struct LSTMGradDataT : public flatbuffers::NativeTable { 191be168c0dSopenharmony_ci@@ -26903,6 +26937,7 @@ inline void DynamicQuant::UnPackTo(DynamicQuantT *_o, const flatbuffers::resolve 192be168c0dSopenharmony_ci { auto _e = activation_channel(); _o->activation_channel = _e; } 193be168c0dSopenharmony_ci { auto _e = prefer_axis(); _o->prefer_axis = _e; } 194be168c0dSopenharmony_ci { auto _e = transpose(); _o->transpose = _e; } 195be168c0dSopenharmony_ci+ { auto _e = prefer_axes(); if (_e) { _o->prefer_axes.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->prefer_axes[_i] = _e->Get(_i); } } } 196be168c0dSopenharmony_ci } 197be168c0dSopenharmony_ci 198be168c0dSopenharmony_ci inline flatbuffers::Offset<DynamicQuant> DynamicQuant::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DynamicQuantT* _o, const flatbuffers::rehasher_function_t *_rehasher) { 199be168c0dSopenharmony_ci@@ -26918,13 +26953,15 @@ inline flatbuffers::Offset<DynamicQuant> CreateDynamicQuant(flatbuffers::FlatBuf 200be168c0dSopenharmony_ci auto _activation_channel = _o->activation_channel; 201be168c0dSopenharmony_ci auto _prefer_axis = _o->prefer_axis; 202be168c0dSopenharmony_ci auto _transpose = _o->transpose; 203be168c0dSopenharmony_ci+ auto _prefer_axes = _o->prefer_axes.size() ? _fbb.CreateVector(_o->prefer_axes) : 0; 204be168c0dSopenharmony_ci return mindspore::schema::CreateDynamicQuant( 205be168c0dSopenharmony_ci _fbb, 206be168c0dSopenharmony_ci _symmetric, 207be168c0dSopenharmony_ci _dst_type, 208be168c0dSopenharmony_ci _activation_channel, 209be168c0dSopenharmony_ci _prefer_axis, 210be168c0dSopenharmony_ci- _transpose); 211be168c0dSopenharmony_ci+ _transpose, 212be168c0dSopenharmony_ci+ _prefer_axes); 213be168c0dSopenharmony_ci } 214be168c0dSopenharmony_ci 215be168c0dSopenharmony_ci inline LSTMGradDataT *LSTMGradData::UnPack(const flatbuffers::resolver_function_t *_resolver) const { 216be168c0dSopenharmony_ci@@ -33509,10 +33546,11 @@ inline const flatbuffers::TypeTable *LSTMTypeTable() { 217be168c0dSopenharmony_ci { flatbuffers::ET_LONG, 0, -1 }, 218be168c0dSopenharmony_ci { flatbuffers::ET_FLOAT, 0, -1 }, 219be168c0dSopenharmony_ci { flatbuffers::ET_FLOAT, 0, -1 }, 220be168c0dSopenharmony_ci- { flatbuffers::ET_FLOAT, 0, -1 } 221be168c0dSopenharmony_ci+ { flatbuffers::ET_FLOAT, 0, -1 }, 222be168c0dSopenharmony_ci+ { flatbuffers::ET_LONG, 0, -1 } 223be168c0dSopenharmony_ci }; 224be168c0dSopenharmony_ci static const flatbuffers::TypeTable tt = { 225be168c0dSopenharmony_ci- flatbuffers::ST_TABLE, 9, type_codes, nullptr, nullptr, nullptr, nullptr 226be168c0dSopenharmony_ci+ flatbuffers::ST_TABLE, 10, type_codes, nullptr, nullptr, nullptr, nullptr 227be168c0dSopenharmony_ci }; 228be168c0dSopenharmony_ci return &tt; 229be168c0dSopenharmony_ci } 230be168c0dSopenharmony_ci@@ -34744,10 +34782,11 @@ inline const flatbuffers::TypeTable *DynamicQuantTypeTable() { 231be168c0dSopenharmony_ci { flatbuffers::ET_LONG, 0, -1 }, 232be168c0dSopenharmony_ci { flatbuffers::ET_BOOL, 0, -1 }, 233be168c0dSopenharmony_ci { flatbuffers::ET_LONG, 0, -1 }, 234be168c0dSopenharmony_ci- { flatbuffers::ET_BOOL, 0, -1 } 235be168c0dSopenharmony_ci+ { flatbuffers::ET_BOOL, 0, -1 }, 236be168c0dSopenharmony_ci+ { flatbuffers::ET_INT, 1, -1 } 237be168c0dSopenharmony_ci }; 238be168c0dSopenharmony_ci static const flatbuffers::TypeTable tt = { 239be168c0dSopenharmony_ci- flatbuffers::ST_TABLE, 5, type_codes, nullptr, nullptr, nullptr, nullptr 240be168c0dSopenharmony_ci+ flatbuffers::ST_TABLE, 6, type_codes, nullptr, nullptr, nullptr, nullptr 241be168c0dSopenharmony_ci }; 242be168c0dSopenharmony_ci return &tt; 243be168c0dSopenharmony_ci } 244be168c0dSopenharmony_cidiff --git a/mindspore/lite/schema/ops.fbs b/mindspore/lite/schema/ops.fbs 245be168c0dSopenharmony_ciindex 920c0d31..153a21d0 100644 246be168c0dSopenharmony_ci--- a/mindspore/lite/schema/ops.fbs 247be168c0dSopenharmony_ci+++ b/mindspore/lite/schema/ops.fbs 248be168c0dSopenharmony_ci@@ -1250,6 +1250,7 @@ table DynamicQuant { 249be168c0dSopenharmony_ci activation_channel: bool = false; 250be168c0dSopenharmony_ci prefer_axis: long = 0; 251be168c0dSopenharmony_ci transpose: bool = false; 252be168c0dSopenharmony_ci+ prefer_axes: [int]; 253be168c0dSopenharmony_ci } 254be168c0dSopenharmony_ci 255be168c0dSopenharmony_ci table LSTMGradData { 256be168c0dSopenharmony_cidiff --git a/mindspore/lite/schema/ops_generated.h b/mindspore/lite/schema/ops_generated.h 257be168c0dSopenharmony_ciindex 8d387e9d..d2d89bff 100644 258be168c0dSopenharmony_ci--- a/mindspore/lite/schema/ops_generated.h 259be168c0dSopenharmony_ci+++ b/mindspore/lite/schema/ops_generated.h 260be168c0dSopenharmony_ci@@ -13118,7 +13118,8 @@ struct DynamicQuant FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { 261be168c0dSopenharmony_ci VT_DST_TYPE = 6, 262be168c0dSopenharmony_ci VT_ACTIVATION_CHANNEL = 8, 263be168c0dSopenharmony_ci VT_PREFER_AXIS = 10, 264be168c0dSopenharmony_ci- VT_TRANSPOSE = 12 265be168c0dSopenharmony_ci+ VT_TRANSPOSE = 12, 266be168c0dSopenharmony_ci+ VT_PREFER_AXES = 14 267be168c0dSopenharmony_ci }; 268be168c0dSopenharmony_ci bool symmetric() const { 269be168c0dSopenharmony_ci return GetField<uint8_t>(VT_SYMMETRIC, 0) != 0; 270be168c0dSopenharmony_ci@@ -13135,6 +13136,9 @@ struct DynamicQuant FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { 271be168c0dSopenharmony_ci bool transpose() const { 272be168c0dSopenharmony_ci return GetField<uint8_t>(VT_TRANSPOSE, 0) != 0; 273be168c0dSopenharmony_ci } 274be168c0dSopenharmony_ci+ const flatbuffers::Vector<int32_t> *prefer_axes() const { 275be168c0dSopenharmony_ci+ return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_PREFER_AXES); 276be168c0dSopenharmony_ci+ } 277be168c0dSopenharmony_ci bool Verify(flatbuffers::Verifier &verifier) const { 278be168c0dSopenharmony_ci return VerifyTableStart(verifier) && 279be168c0dSopenharmony_ci VerifyField<uint8_t>(verifier, VT_SYMMETRIC) && 280be168c0dSopenharmony_ci@@ -13142,6 +13146,8 @@ struct DynamicQuant FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { 281be168c0dSopenharmony_ci VerifyField<uint8_t>(verifier, VT_ACTIVATION_CHANNEL) && 282be168c0dSopenharmony_ci VerifyField<int64_t>(verifier, VT_PREFER_AXIS) && 283be168c0dSopenharmony_ci VerifyField<uint8_t>(verifier, VT_TRANSPOSE) && 284be168c0dSopenharmony_ci+ VerifyOffset(verifier, VT_PREFER_AXES) && 285be168c0dSopenharmony_ci+ verifier.VerifyVector(prefer_axes()) && 286be168c0dSopenharmony_ci verifier.EndTable(); 287be168c0dSopenharmony_ci } 288be168c0dSopenharmony_ci }; 289be168c0dSopenharmony_ci@@ -13165,6 +13171,9 @@ struct DynamicQuantBuilder { 290be168c0dSopenharmony_ci void add_transpose(bool transpose) { 291be168c0dSopenharmony_ci fbb_.AddElement<uint8_t>(DynamicQuant::VT_TRANSPOSE, static_cast<uint8_t>(transpose), 0); 292be168c0dSopenharmony_ci } 293be168c0dSopenharmony_ci+ void add_prefer_axes(flatbuffers::Offset<flatbuffers::Vector<int32_t>> prefer_axes) { 294be168c0dSopenharmony_ci+ fbb_.AddOffset(DynamicQuant::VT_PREFER_AXES, prefer_axes); 295be168c0dSopenharmony_ci+ } 296be168c0dSopenharmony_ci explicit DynamicQuantBuilder(flatbuffers::FlatBufferBuilder &_fbb) 297be168c0dSopenharmony_ci : fbb_(_fbb) { 298be168c0dSopenharmony_ci start_ = fbb_.StartTable(); 299be168c0dSopenharmony_ci@@ -13182,16 +13191,37 @@ inline flatbuffers::Offset<DynamicQuant> CreateDynamicQuant( 300be168c0dSopenharmony_ci int64_t dst_type = 32LL, 301be168c0dSopenharmony_ci bool activation_channel = false, 302be168c0dSopenharmony_ci int64_t prefer_axis = 0, 303be168c0dSopenharmony_ci- bool transpose = false) { 304be168c0dSopenharmony_ci+ bool transpose = false, 305be168c0dSopenharmony_ci+ flatbuffers::Offset<flatbuffers::Vector<int32_t>> prefer_axes = 0) { 306be168c0dSopenharmony_ci DynamicQuantBuilder builder_(_fbb); 307be168c0dSopenharmony_ci builder_.add_prefer_axis(prefer_axis); 308be168c0dSopenharmony_ci builder_.add_dst_type(dst_type); 309be168c0dSopenharmony_ci+ builder_.add_prefer_axes(prefer_axes); 310be168c0dSopenharmony_ci builder_.add_transpose(transpose); 311be168c0dSopenharmony_ci builder_.add_activation_channel(activation_channel); 312be168c0dSopenharmony_ci builder_.add_symmetric(symmetric); 313be168c0dSopenharmony_ci return builder_.Finish(); 314be168c0dSopenharmony_ci } 315be168c0dSopenharmony_ci 316be168c0dSopenharmony_ci+inline flatbuffers::Offset<DynamicQuant> CreateDynamicQuantDirect( 317be168c0dSopenharmony_ci+ flatbuffers::FlatBufferBuilder &_fbb, 318be168c0dSopenharmony_ci+ bool symmetric = false, 319be168c0dSopenharmony_ci+ int64_t dst_type = 32LL, 320be168c0dSopenharmony_ci+ bool activation_channel = false, 321be168c0dSopenharmony_ci+ int64_t prefer_axis = 0, 322be168c0dSopenharmony_ci+ bool transpose = false, 323be168c0dSopenharmony_ci+ const std::vector<int32_t> *prefer_axes = nullptr) { 324be168c0dSopenharmony_ci+ auto prefer_axes__ = prefer_axes ? _fbb.CreateVector<int32_t>(*prefer_axes) : 0; 325be168c0dSopenharmony_ci+ return mindspore::schema::CreateDynamicQuant( 326be168c0dSopenharmony_ci+ _fbb, 327be168c0dSopenharmony_ci+ symmetric, 328be168c0dSopenharmony_ci+ dst_type, 329be168c0dSopenharmony_ci+ activation_channel, 330be168c0dSopenharmony_ci+ prefer_axis, 331be168c0dSopenharmony_ci+ transpose, 332be168c0dSopenharmony_ci+ prefer_axes__); 333be168c0dSopenharmony_ci+} 334be168c0dSopenharmony_ci+ 335be168c0dSopenharmony_ci struct LSTMGradData FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { 336be168c0dSopenharmony_ci typedef LSTMGradDataBuilder Builder; 337be168c0dSopenharmony_ci enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { 338be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/common/ops/ops_def.cc b/mindspore/lite/src/common/ops/ops_def.cc 339be168c0dSopenharmony_ciindex baa2497a..1e973362 100644 340be168c0dSopenharmony_ci--- a/mindspore/lite/src/common/ops/ops_def.cc 341be168c0dSopenharmony_ci+++ b/mindspore/lite/src/common/ops/ops_def.cc 342be168c0dSopenharmony_ci@@ -1254,6 +1254,7 @@ OP_ATTR_WITH_VALUE(dst_type, long, 32) 343be168c0dSopenharmony_ci OP_ATTR_WITH_VALUE(activation_channel, bool, false) 344be168c0dSopenharmony_ci OP_ATTR_WITH_VALUE(prefer_axis, long, 0) 345be168c0dSopenharmony_ci OP_ATTR_WITH_VALUE(transpose, bool, false) 346be168c0dSopenharmony_ci+OP_ATTR(prefer_axes, [int]) 347be168c0dSopenharmony_ci OP_SCHEMA_DEF_END(DynamicQuant) 348be168c0dSopenharmony_ci 349be168c0dSopenharmony_ci OP_SCHEMA_DEF(LSTMGradData) 350be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/common/ops/populate/dynamic_quant_populate.cc b/mindspore/lite/src/common/ops/populate/dynamic_quant_populate.cc 351be168c0dSopenharmony_ciindex 3566f082..8e393320 100644 352be168c0dSopenharmony_ci--- a/mindspore/lite/src/common/ops/populate/dynamic_quant_populate.cc 353be168c0dSopenharmony_ci+++ b/mindspore/lite/src/common/ops/populate/dynamic_quant_populate.cc 354be168c0dSopenharmony_ci@@ -36,11 +36,27 @@ OpParameter *PopulateDynamicQuantParameter(const void *prim) { 355be168c0dSopenharmony_ci memset(param, 0, sizeof(DynamicQuantParameter)); 356be168c0dSopenharmony_ci 357be168c0dSopenharmony_ci param->op_parameter_.type_ = primitive->value_type(); 358be168c0dSopenharmony_ci- param->dst_type_ = value->dst_type(); 359be168c0dSopenharmony_ci+ param->dst_type_ = static_cast<int>(value->dst_type()); 360be168c0dSopenharmony_ci param->symmetric_ = value->symmetric(); 361be168c0dSopenharmony_ci- param->activation_perchannel_ = value->activation_channel(); 362be168c0dSopenharmony_ci- param->prefer_axis_ = value->prefer_axis(); 363be168c0dSopenharmony_ci- param->transpose_ = value->transpose(); 364be168c0dSopenharmony_ci+ auto prefer_axes = value->prefer_axes(); 365be168c0dSopenharmony_ci+ if (prefer_axes != nullptr) { 366be168c0dSopenharmony_ci+ param->axis_num_ = static_cast<int>(prefer_axes->size()); 367be168c0dSopenharmony_ci+ if (param->axis_num_ > MAX_SHAPE_SIZE) { 368be168c0dSopenharmony_ci+ MS_LOG(ERROR) << "Dynamic quant's prefer_axes's number is more than 8."; 369be168c0dSopenharmony_ci+ free(param); 370be168c0dSopenharmony_ci+ return nullptr; 371be168c0dSopenharmony_ci+ } 372be168c0dSopenharmony_ci+ for (int i = 0; i < param->axis_num_; ++i) { 373be168c0dSopenharmony_ci+ param->prefer_axes_[i] = prefer_axes->Get(i); 374be168c0dSopenharmony_ci+ } 375be168c0dSopenharmony_ci+ return reinterpret_cast<OpParameter *>(param); 376be168c0dSopenharmony_ci+ } 377be168c0dSopenharmony_ci+ auto activation_channel = value->activation_channel(); 378be168c0dSopenharmony_ci+ if (!activation_channel) { 379be168c0dSopenharmony_ci+ return reinterpret_cast<OpParameter *>(param); 380be168c0dSopenharmony_ci+ } 381be168c0dSopenharmony_ci+ param->axis_num_ = 1; 382be168c0dSopenharmony_ci+ param->prefer_axes_[0] = static_cast<int>(value->prefer_axis()); 383be168c0dSopenharmony_ci return reinterpret_cast<OpParameter *>(param); 384be168c0dSopenharmony_ci } 385be168c0dSopenharmony_ci REG_POPULATE(PrimitiveType_DynamicQuant, PopulateDynamicQuantParameter, SCHEMA_CUR); 386be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/litert/kernel/cpu/int8/dynamic_quant.cc b/mindspore/lite/src/litert/kernel/cpu/int8/dynamic_quant.cc 387be168c0dSopenharmony_ciindex e9404ef2..acc43c97 100644 388be168c0dSopenharmony_ci--- a/mindspore/lite/src/litert/kernel/cpu/int8/dynamic_quant.cc 389be168c0dSopenharmony_ci+++ b/mindspore/lite/src/litert/kernel/cpu/int8/dynamic_quant.cc 390be168c0dSopenharmony_ci@@ -14,14 +14,16 @@ 391be168c0dSopenharmony_ci * limitations under the License. 392be168c0dSopenharmony_ci */ 393be168c0dSopenharmony_ci #include "src/litert/kernel/cpu/int8/dynamic_quant.h" 394be168c0dSopenharmony_ci+#include <set> 395be168c0dSopenharmony_ci #include <vector> 396be168c0dSopenharmony_ci #include <algorithm> 397be168c0dSopenharmony_ci #include "src/litert/kernel_registry.h" 398be168c0dSopenharmony_ci #include "schema/model_generated.h" 399be168c0dSopenharmony_ci #include "include/errorcode.h" 400be168c0dSopenharmony_ci-#include "nnacl/dynamic_quant_parameter.h" 401be168c0dSopenharmony_ci #include "nnacl/int8/dynamic_quant_int8.h" 402be168c0dSopenharmony_ci #include "nnacl/int8/quant_dtype_cast_int8.h" 403be168c0dSopenharmony_ci+#include "nnacl/fp32/transpose_fp32.h" 404be168c0dSopenharmony_ci+#include "nnacl/int8/transpose_int8.h" 405be168c0dSopenharmony_ci 406be168c0dSopenharmony_ci using mindspore::kernel::KERNEL_ARCH; 407be168c0dSopenharmony_ci using mindspore::lite::KernelRegistrar; 408be168c0dSopenharmony_ci@@ -44,19 +46,10 @@ int DynamicQuantCPUKernel::Prepare() { 409be168c0dSopenharmony_ci CHECK_NULL_RETURN(in_tensor); 410be168c0dSopenharmony_ci auto out_tensor = out_tensors_.front(); 411be168c0dSopenharmony_ci CHECK_NULL_RETURN(out_tensor); 412be168c0dSopenharmony_ci- auto param = reinterpret_cast<DynamicQuantParameter *>(op_parameter_); 413be168c0dSopenharmony_ci- CHECK_NULL_RETURN(param); 414be168c0dSopenharmony_ci- src_dtype_ = in_tensor->data_type(); 415be168c0dSopenharmony_ci- dst_dtype_ = param->dst_type_; 416be168c0dSopenharmony_ci- symmetric_ = param->symmetric_; 417be168c0dSopenharmony_ci- activation_perchannel_ = param->activation_perchannel_; 418be168c0dSopenharmony_ci- prefer_axis_ = param->prefer_axis_; 419be168c0dSopenharmony_ci- transpose_ = param->transpose_; 420be168c0dSopenharmony_ci- if (out_tensor->data_type() != dst_dtype_) { 421be168c0dSopenharmony_ci- MS_LOG(ERROR) << "param data type and tensor data type do not match."; 422be168c0dSopenharmony_ci- return RET_ERROR; 423be168c0dSopenharmony_ci- } 424be168c0dSopenharmony_ci- 425be168c0dSopenharmony_ci+ param_ = reinterpret_cast<DynamicQuantParameter *>(op_parameter_); 426be168c0dSopenharmony_ci+ CHECK_NULL_RETURN(param_); 427be168c0dSopenharmony_ci+ MS_CHECK_TRUE_MSG(param_->dst_type_ == out_tensor->data_type(), lite::RET_ERROR, 428be168c0dSopenharmony_ci+ "param data type and tensor data type do not match."); 429be168c0dSopenharmony_ci if (!InferShapeDone()) { 430be168c0dSopenharmony_ci return RET_OK; 431be168c0dSopenharmony_ci } 432be168c0dSopenharmony_ci@@ -65,71 +58,86 @@ int DynamicQuantCPUKernel::Prepare() { 433be168c0dSopenharmony_ci 434be168c0dSopenharmony_ci int DynamicQuantCPUKernel::ReSize() { 435be168c0dSopenharmony_ci auto in_tensor = in_tensors_.front(); 436be168c0dSopenharmony_ci- num_unit_ = static_cast<int>(in_tensor->ElementsNum()); 437be168c0dSopenharmony_ci- if (num_unit_ < kMinNums) { 438be168c0dSopenharmony_ci- thread_n_num_ = 1; 439be168c0dSopenharmony_ci+ auto ele_num = static_cast<int>(in_tensor->ElementsNum()); 440be168c0dSopenharmony_ci+ auto shape = in_tensor->shape(); 441be168c0dSopenharmony_ci+ int segment_num = 1; 442be168c0dSopenharmony_ci+ if (param_->axis_num_ == 0) { 443be168c0dSopenharmony_ci+ segment_num = MSMIN(kBucketNums, ele_num / kMinNums); 444be168c0dSopenharmony_ci } else { 445be168c0dSopenharmony_ci- thread_n_num_ = MSMIN(thread_num_, num_unit_); 446be168c0dSopenharmony_ci- // Limit for 8 thread 447be168c0dSopenharmony_ci- thread_n_num_ = MSMIN(thread_n_num_, kBucketNums); 448be168c0dSopenharmony_ci+ std::set<int> prefer_axes; 449be168c0dSopenharmony_ci+ for (int i = 0; i < param_->axis_num_; ++i) { 450be168c0dSopenharmony_ci+ int axis = param_->prefer_axes_[i] < 0 ? param_->prefer_axes_[i] + static_cast<int>(shape.size()) 451be168c0dSopenharmony_ci+ : param_->prefer_axes_[i]; 452be168c0dSopenharmony_ci+ MS_CHECK_TRUE_MSG(axis >= 0 && axis < static_cast<int>(shape.size()), lite::RET_ERROR, 453be168c0dSopenharmony_ci+ "The prefer axis is out of range."); 454be168c0dSopenharmony_ci+ if (prefer_axes.find(axis) != prefer_axes.end()) { 455be168c0dSopenharmony_ci+ continue; 456be168c0dSopenharmony_ci+ } 457be168c0dSopenharmony_ci+ segment_num *= shape[axis]; 458be168c0dSopenharmony_ci+ (void)prefer_axes.insert(axis); 459be168c0dSopenharmony_ci+ } 460be168c0dSopenharmony_ci+ pre_perm_.resize(shape.size()); 461be168c0dSopenharmony_ci+ post_perm_.resize(shape.size()); 462be168c0dSopenharmony_ci+ int pre_point0 = 0; 463be168c0dSopenharmony_ci+ int pre_point1 = param_->axis_num_; 464be168c0dSopenharmony_ci+ for (int i = 0; i < static_cast<int>(shape.size()); ++i) { 465be168c0dSopenharmony_ci+ if (prefer_axes.find(i) != prefer_axes.end()) { 466be168c0dSopenharmony_ci+ pre_perm_[pre_point0] = i; 467be168c0dSopenharmony_ci+ post_perm_[i] = pre_point0; 468be168c0dSopenharmony_ci+ ++pre_point0; 469be168c0dSopenharmony_ci+ } else { 470be168c0dSopenharmony_ci+ pre_perm_[pre_point1] = i; 471be168c0dSopenharmony_ci+ post_perm_[i] = pre_point1; 472be168c0dSopenharmony_ci+ ++pre_point1; 473be168c0dSopenharmony_ci+ } 474be168c0dSopenharmony_ci+ } 475be168c0dSopenharmony_ci } 476be168c0dSopenharmony_ci- 477be168c0dSopenharmony_ci- int min_max_array_size = 0; 478be168c0dSopenharmony_ci- if (activation_perchannel_) { 479be168c0dSopenharmony_ci- auto dims = in_tensor->shape(); 480be168c0dSopenharmony_ci- prefer_axis_ = (prefer_axis_ < 0) ? prefer_axis_ + dims.size() : prefer_axis_; 481be168c0dSopenharmony_ci- channel_num_ = dims[prefer_axis_]; 482be168c0dSopenharmony_ci- MS_CHECK_GT(channel_num_, 0, RET_ERROR); 483be168c0dSopenharmony_ci- scale_ = reinterpret_cast<float *>(malloc(channel_num_ * sizeof(float))); 484be168c0dSopenharmony_ci- MS_CHECK_TRUE_MSG(scale_ != nullptr, RET_ERROR, "Malloc scale_ failed."); 485be168c0dSopenharmony_ci- zero_point_ = reinterpret_cast<int32_t *>(malloc(channel_num_ * sizeof(int32_t))); 486be168c0dSopenharmony_ci- MS_CHECK_TRUE_MSG(zero_point_ != nullptr, RET_ERROR, "Malloc zero_point_ failed."); 487be168c0dSopenharmony_ci- size_t last_axis = dims.size() - 1; 488be168c0dSopenharmony_ci- row_length_ = dims[last_axis]; 489be168c0dSopenharmony_ci- channel_length_ = num_unit_ / channel_num_; 490be168c0dSopenharmony_ci- thread_n_stride_ = UP_DIV(num_unit_, thread_n_num_); 491be168c0dSopenharmony_ci- if (!transpose_ && channel_length_ > thread_n_stride_) { 492be168c0dSopenharmony_ci- thread_n_num_ = 1; 493be168c0dSopenharmony_ci+ need_transpose_ = false; 494be168c0dSopenharmony_ci+ for (size_t i = 0; i < pre_perm_.size(); ++i) { 495be168c0dSopenharmony_ci+ if (pre_perm_[i] != static_cast<int>(i)) { 496be168c0dSopenharmony_ci+ need_transpose_ = true; 497be168c0dSopenharmony_ci } 498be168c0dSopenharmony_ci- min_max_array_size = channel_num_; 499be168c0dSopenharmony_ci- } else { 500be168c0dSopenharmony_ci- min_max_array_size = kBucketNums; 501be168c0dSopenharmony_ci } 502be168c0dSopenharmony_ci- real_min_ = reinterpret_cast<float *>(malloc(min_max_array_size * sizeof(float))); 503be168c0dSopenharmony_ci- real_max_ = reinterpret_cast<float *>(malloc(min_max_array_size * sizeof(float))); 504be168c0dSopenharmony_ci- if (real_min_ == nullptr || real_max_ == nullptr) { 505be168c0dSopenharmony_ci- return RET_NULL_PTR; 506be168c0dSopenharmony_ci+ if (segment_num <= 0) { 507be168c0dSopenharmony_ci+ segment_num = 1; 508be168c0dSopenharmony_ci } 509be168c0dSopenharmony_ci- for (int i = 0; i < min_max_array_size; ++i) { 510be168c0dSopenharmony_ci+ real_min_.resize(segment_num); 511be168c0dSopenharmony_ci+ real_max_.resize(segment_num); 512be168c0dSopenharmony_ci+ scale_.resize(segment_num); 513be168c0dSopenharmony_ci+ zero_point_.resize(segment_num); 514be168c0dSopenharmony_ci+ for (int i = 0; i < segment_num; ++i) { 515be168c0dSopenharmony_ci real_min_[i] = FLT_MAX; 516be168c0dSopenharmony_ci real_max_[i] = -FLT_MAX; 517be168c0dSopenharmony_ci } 518be168c0dSopenharmony_ci- MS_CHECK_GT(thread_n_num_, 0, RET_ERROR); 519be168c0dSopenharmony_ci- thread_n_stride_ = UP_DIV(num_unit_, thread_n_num_); 520be168c0dSopenharmony_ci+ thread_num_ = MSMIN(segment_num, op_parameter_->thread_num_); 521be168c0dSopenharmony_ci+ unit_num_ = UP_DIV(ele_num, segment_num); 522be168c0dSopenharmony_ci+ unit_segment_num_ = UP_DIV(segment_num, thread_num_); 523be168c0dSopenharmony_ci return RET_OK; 524be168c0dSopenharmony_ci } 525be168c0dSopenharmony_ci 526be168c0dSopenharmony_ci int DynamicQuantCPUKernel::CalculateMinMax(int task_id) { 527be168c0dSopenharmony_ci- int num_unit_thread = MSMIN(thread_n_stride_, num_unit_ - task_id * thread_n_stride_); 528be168c0dSopenharmony_ci- if (num_unit_thread <= 0) { 529be168c0dSopenharmony_ci- return RET_OK; 530be168c0dSopenharmony_ci- } 531be168c0dSopenharmony_ci- int thread_offset = task_id * thread_n_stride_; 532be168c0dSopenharmony_ci- float *data = float32_ptr_ + thread_offset; 533be168c0dSopenharmony_ci- if (activation_perchannel_) { 534be168c0dSopenharmony_ci- if (transpose_) { 535be168c0dSopenharmony_ci- MS_LOG(INFO) << "attribute transpose is true."; 536be168c0dSopenharmony_ci- CalculateChannelColMinMax(data, num_unit_thread, real_min_, real_max_, row_length_); 537be168c0dSopenharmony_ci- } else { 538be168c0dSopenharmony_ci- int channel_offset = task_id * thread_n_stride_ / channel_length_; 539be168c0dSopenharmony_ci- float *real_min = real_min_ + channel_offset; 540be168c0dSopenharmony_ci- float *real_max = real_max_ + channel_offset; 541be168c0dSopenharmony_ci- CalculateChannelRowMinMax(data, num_unit_thread, real_min, real_max, row_length_); 542be168c0dSopenharmony_ci+ int task_unit = unit_segment_num_ * unit_num_; 543be168c0dSopenharmony_ci+ int offset = task_id * task_unit; 544be168c0dSopenharmony_ci+ int ele_num = static_cast<int>(in_tensors_.front()->ElementsNum()); 545be168c0dSopenharmony_ci+ int remain = ele_num - offset; 546be168c0dSopenharmony_ci+ if (task_unit <= remain) { 547be168c0dSopenharmony_ci+ for (int i = 0; i < unit_segment_num_; ++i) { 548be168c0dSopenharmony_ci+ CalculateMinMaxFp32(float32_ptr_ + offset + i * unit_num_, unit_num_, &real_min_[task_id * unit_segment_num_ + i], 549be168c0dSopenharmony_ci+ &real_max_[task_id * unit_segment_num_ + i]); 550be168c0dSopenharmony_ci } 551be168c0dSopenharmony_ci } else { 552be168c0dSopenharmony_ci- float *real_min = real_min_ + task_id; 553be168c0dSopenharmony_ci- float *real_max = real_max_ + task_id; 554be168c0dSopenharmony_ci- CalculateMinMaxFp32(data, num_unit_thread, real_min, real_max); 555be168c0dSopenharmony_ci+ int segment_num = remain / unit_num_; 556be168c0dSopenharmony_ci+ int remain_ele_num = remain - segment_num * unit_num_; 557be168c0dSopenharmony_ci+ for (int i = 0; i < segment_num; ++i) { 558be168c0dSopenharmony_ci+ CalculateMinMaxFp32(float32_ptr_ + offset + i * unit_num_, unit_num_, &real_min_[task_id * unit_segment_num_ + i], 559be168c0dSopenharmony_ci+ &real_max_[task_id * unit_segment_num_ + i]); 560be168c0dSopenharmony_ci+ } 561be168c0dSopenharmony_ci+ if (remain_ele_num == 0) { 562be168c0dSopenharmony_ci+ return RET_OK; 563be168c0dSopenharmony_ci+ } 564be168c0dSopenharmony_ci+ CalculateMinMaxFp32(float32_ptr_ + offset + segment_num * unit_num_, remain_ele_num, 565be168c0dSopenharmony_ci+ &real_min_[task_id * unit_segment_num_ + segment_num], 566be168c0dSopenharmony_ci+ &real_max_[task_id * unit_segment_num_ + segment_num]); 567be168c0dSopenharmony_ci } 568be168c0dSopenharmony_ci return RET_OK; 569be168c0dSopenharmony_ci } 570be168c0dSopenharmony_ci@@ -148,7 +156,7 @@ int CalculateMinMaxRun(void *cdata, int task_id, float, float) { 571be168c0dSopenharmony_ci void DynamicQuantCPUKernel::CalculatePerlayerScaleZp() { 572be168c0dSopenharmony_ci float real_min = FLT_MAX; 573be168c0dSopenharmony_ci float real_max = -FLT_MAX; 574be168c0dSopenharmony_ci- for (int i = 0; i < kBucketNums; i++) { 575be168c0dSopenharmony_ci+ for (size_t i = 0; i < real_min_.size(); ++i) { 576be168c0dSopenharmony_ci real_min = (real_min_[i] < real_min) ? real_min_[i] : real_min; 577be168c0dSopenharmony_ci real_max = (real_max_[i] > real_max) ? real_max_[i] : real_max; 578be168c0dSopenharmony_ci } 579be168c0dSopenharmony_ci@@ -158,7 +166,7 @@ void DynamicQuantCPUKernel::CalculatePerlayerScaleZp() { 580be168c0dSopenharmony_ci int zp = 0; 581be168c0dSopenharmony_ci constexpr int kQSymmetricRange = 255; 582be168c0dSopenharmony_ci constexpr int kQAsymmetricRange = 254; 583be168c0dSopenharmony_ci- if (!symmetric_) { 584be168c0dSopenharmony_ci+ if (!param_->symmetric_) { 585be168c0dSopenharmony_ci auto range = real_max - real_min; 586be168c0dSopenharmony_ci if (range <= 0) { 587be168c0dSopenharmony_ci range = kDefaultRange; 588be168c0dSopenharmony_ci@@ -175,12 +183,11 @@ void DynamicQuantCPUKernel::CalculatePerlayerScaleZp() { 589be168c0dSopenharmony_ci quant_parm.bitNum = k8Bit; 590be168c0dSopenharmony_ci quant_parm.inited = true; 591be168c0dSopenharmony_ci this->out_tensors_.front()->set_quant_params({quant_parm}); 592be168c0dSopenharmony_ci- return; 593be168c0dSopenharmony_ci } 594be168c0dSopenharmony_ci 595be168c0dSopenharmony_ci void DynamicQuantCPUKernel::CalculatePerChannelScaleZp() { 596be168c0dSopenharmony_ci std::vector<lite::LiteQuantParam> quant_params; 597be168c0dSopenharmony_ci- for (int i = 0; i < channel_num_; ++i) { 598be168c0dSopenharmony_ci+ for (size_t i = 0; i < real_min_.size(); ++i) { 599be168c0dSopenharmony_ci float real_min = real_min_[i]; 600be168c0dSopenharmony_ci float real_max = real_max_[i]; 601be168c0dSopenharmony_ci 602be168c0dSopenharmony_ci@@ -189,7 +196,7 @@ void DynamicQuantCPUKernel::CalculatePerChannelScaleZp() { 603be168c0dSopenharmony_ci int zp = 0; 604be168c0dSopenharmony_ci constexpr int kQSymmetricRange = 255; 605be168c0dSopenharmony_ci constexpr int kQAsymmetricRange = 254; 606be168c0dSopenharmony_ci- if (!symmetric_) { 607be168c0dSopenharmony_ci+ if (!param_->symmetric_) { 608be168c0dSopenharmony_ci auto range = real_max - real_min; 609be168c0dSopenharmony_ci if (range <= 0) { 610be168c0dSopenharmony_ci range = kDefaultRange; 611be168c0dSopenharmony_ci@@ -208,40 +215,34 @@ void DynamicQuantCPUKernel::CalculatePerChannelScaleZp() { 612be168c0dSopenharmony_ci quant_params.push_back(quant_parm); 613be168c0dSopenharmony_ci } 614be168c0dSopenharmony_ci this->out_tensors_.front()->set_quant_params(quant_params); 615be168c0dSopenharmony_ci- return; 616be168c0dSopenharmony_ci } 617be168c0dSopenharmony_ci+ 618be168c0dSopenharmony_ci int DynamicQuantCPUKernel::QuantData(int task_id) { 619be168c0dSopenharmony_ci- int num_unit_thread = MSMIN(thread_n_stride_, num_unit_ - task_id * thread_n_stride_); 620be168c0dSopenharmony_ci- MS_CHECK_GT(num_unit_thread, 0, RET_ERROR); 621be168c0dSopenharmony_ci- TypeId data_type = out_tensors_.front()->data_type(); 622be168c0dSopenharmony_ci- if (data_type != TypeId::kNumberTypeInt8) { 623be168c0dSopenharmony_ci- MS_LOG(ERROR) << "Data type not supported:" << data_type; 624be168c0dSopenharmony_ci- return RET_PARAM_INVALID; 625be168c0dSopenharmony_ci- } 626be168c0dSopenharmony_ci- int thread_offset = task_id * thread_n_stride_; 627be168c0dSopenharmony_ci- int ret; 628be168c0dSopenharmony_ci- if (activation_perchannel_) { 629be168c0dSopenharmony_ci- MS_CHECK_EQ(out_tensors_.front()->quant_params().size(), static_cast<size_t>(channel_num_), RET_ERROR); 630be168c0dSopenharmony_ci- for (int i = 0; i < channel_num_; i++) { 631be168c0dSopenharmony_ci- auto quant_arg = out_tensors_.front()->quant_params().at(i); 632be168c0dSopenharmony_ci- scale_[i] = quant_arg.scale; 633be168c0dSopenharmony_ci- zero_point_[i] = quant_arg.zeroPoint; 634be168c0dSopenharmony_ci- } 635be168c0dSopenharmony_ci- if (transpose_) { 636be168c0dSopenharmony_ci- ret = DoChannelColFp32ToInt8(float32_ptr_ + thread_offset, int8_ptr_ + thread_offset, scale_, zero_point_, 637be168c0dSopenharmony_ci- num_unit_thread, row_length_, (int32_t)INT8_MIN, (int32_t)INT8_MAX); 638be168c0dSopenharmony_ci- } else { 639be168c0dSopenharmony_ci- ret = DoChannelRowFp32ToInt8(float32_ptr_ + thread_offset, int8_ptr_ + thread_offset, scale_, zero_point_, 640be168c0dSopenharmony_ci- num_unit_thread, row_length_, (int32_t)INT8_MIN, (int32_t)INT8_MAX); 641be168c0dSopenharmony_ci- } 642be168c0dSopenharmony_ci- } else { 643be168c0dSopenharmony_ci+ int task_unit = unit_segment_num_ * unit_num_; 644be168c0dSopenharmony_ci+ int offset = task_id * task_unit; 645be168c0dSopenharmony_ci+ int ele_num = static_cast<int>(in_tensors_.front()->ElementsNum()); 646be168c0dSopenharmony_ci+ int remain = ele_num - offset; 647be168c0dSopenharmony_ci+ task_unit = MSMIN(task_unit, remain); 648be168c0dSopenharmony_ci+ if (param_->axis_num_ == 0) { // per-tensor 649be168c0dSopenharmony_ci auto quant_arg = out_tensors_.front()->quant_params().front(); 650be168c0dSopenharmony_ci- ret = DoQuantizeFp32ToInt8(float32_ptr_ + thread_offset, int8_ptr_ + thread_offset, quant_arg.scale, 651be168c0dSopenharmony_ci- quant_arg.zeroPoint, num_unit_thread, (int32_t)INT8_MIN, (int32_t)INT8_MAX); 652be168c0dSopenharmony_ci+ auto ret = DoQuantizeFp32ToInt8(float32_ptr_ + offset, int8_ptr_ + offset, quant_arg.scale, quant_arg.zeroPoint, 653be168c0dSopenharmony_ci+ task_unit, (int32_t)INT8_MIN, (int32_t)INT8_MAX); 654be168c0dSopenharmony_ci+ if (ret != RET_OK) { 655be168c0dSopenharmony_ci+ MS_LOG(ERROR) << "QuantDTypeCast error task_id[" << task_id << "] error_code[" << ret << "]"; 656be168c0dSopenharmony_ci+ return RET_ERROR; 657be168c0dSopenharmony_ci+ } 658be168c0dSopenharmony_ci+ return RET_OK; 659be168c0dSopenharmony_ci } 660be168c0dSopenharmony_ci- if (ret != RET_OK) { 661be168c0dSopenharmony_ci- MS_LOG(ERROR) << "QuantDTypeCast error task_id[" << task_id << "] error_code[" << ret << "]"; 662be168c0dSopenharmony_ci- return RET_ERROR; 663be168c0dSopenharmony_ci+ int segment_num = task_unit / unit_num_; 664be168c0dSopenharmony_ci+ for (int i = 0; i < segment_num; ++i) { 665be168c0dSopenharmony_ci+ auto quant_arg = out_tensors_.front()->quant_params()[task_id * unit_segment_num_ + i]; 666be168c0dSopenharmony_ci+ auto ret = 667be168c0dSopenharmony_ci+ DoQuantizeFp32ToInt8(float32_ptr_ + offset + i * unit_num_, int8_ptr_ + offset + i * unit_num_, quant_arg.scale, 668be168c0dSopenharmony_ci+ quant_arg.zeroPoint, unit_num_, (int32_t)INT8_MIN, (int32_t)INT8_MAX); 669be168c0dSopenharmony_ci+ if (ret != RET_OK) { 670be168c0dSopenharmony_ci+ MS_LOG(ERROR) << "QuantDTypeCast error task_id[" << task_id << "] error_code[" << ret << "]"; 671be168c0dSopenharmony_ci+ return RET_ERROR; 672be168c0dSopenharmony_ci+ } 673be168c0dSopenharmony_ci } 674be168c0dSopenharmony_ci return RET_OK; 675be168c0dSopenharmony_ci } 676be168c0dSopenharmony_ci@@ -257,26 +258,110 @@ int QuantDataRun(void *cdata, int task_id, float, float) { 677be168c0dSopenharmony_ci return RET_OK; 678be168c0dSopenharmony_ci } 679be168c0dSopenharmony_ci 680be168c0dSopenharmony_ci+int DynamicQuantCPUKernel::MallocTmpBuffer() { 681be168c0dSopenharmony_ci+ auto in_size = in_tensors_.front()->Size(); 682be168c0dSopenharmony_ci+ auto out_size = out_tensors_.front()->Size(); 683be168c0dSopenharmony_ci+ if (ms_context_ != nullptr && ms_context_->allocator != nullptr) { 684be168c0dSopenharmony_ci+ int8_ptr_ = static_cast<int8_t *>(ms_context_->allocator->Malloc(in_size + out_size)); 685be168c0dSopenharmony_ci+ } else { 686be168c0dSopenharmony_ci+ int8_ptr_ = static_cast<int8_t *>(malloc(in_size + out_size)); 687be168c0dSopenharmony_ci+ } 688be168c0dSopenharmony_ci+ MS_CHECK_TRUE_MSG(int8_ptr_ != nullptr, lite::RET_NULL_PTR, "DynamicQuant malloc tmp buffer failed."); 689be168c0dSopenharmony_ci+ float32_ptr_ = reinterpret_cast<float *>(int8_ptr_ + out_size); 690be168c0dSopenharmony_ci+ return lite::RET_OK; 691be168c0dSopenharmony_ci+} 692be168c0dSopenharmony_ci+ 693be168c0dSopenharmony_ci+void DynamicQuantCPUKernel::FreeTmpBuffer() { 694be168c0dSopenharmony_ci+ if (need_transpose_) { 695be168c0dSopenharmony_ci+ if (int8_ptr_ != nullptr) { 696be168c0dSopenharmony_ci+ if (ms_context_ != nullptr && ms_context_->allocator != nullptr) { 697be168c0dSopenharmony_ci+ ms_context_->allocator->Free(int8_ptr_); 698be168c0dSopenharmony_ci+ } else { 699be168c0dSopenharmony_ci+ free(int8_ptr_); 700be168c0dSopenharmony_ci+ } 701be168c0dSopenharmony_ci+ } 702be168c0dSopenharmony_ci+ } 703be168c0dSopenharmony_ci+ int8_ptr_ = nullptr; 704be168c0dSopenharmony_ci+ float32_ptr_ = nullptr; 705be168c0dSopenharmony_ci+} 706be168c0dSopenharmony_ci+ 707be168c0dSopenharmony_ci int DynamicQuantCPUKernel::Run() { 708be168c0dSopenharmony_ci- int8_ptr_ = reinterpret_cast<int8_t *>(out_tensors_[0]->data()); 709be168c0dSopenharmony_ci- float32_ptr_ = reinterpret_cast<float *>(in_tensors_[0]->data()); 710be168c0dSopenharmony_ci- CHECK_NULL_RETURN(int8_ptr_); 711be168c0dSopenharmony_ci- CHECK_NULL_RETURN(float32_ptr_); 712be168c0dSopenharmony_ci- auto ret = ParallelLaunch(this->ms_context_, CalculateMinMaxRun, this, thread_n_num_); 713be168c0dSopenharmony_ci+ std::vector<int> transpose_shape; 714be168c0dSopenharmony_ci+ if (need_transpose_) { 715be168c0dSopenharmony_ci+ auto shape = in_tensors_.front()->shape(); 716be168c0dSopenharmony_ci+ transpose_shape.resize(shape.size()); 717be168c0dSopenharmony_ci+ for (size_t i = 0; i < shape.size(); ++i) { 718be168c0dSopenharmony_ci+ transpose_shape[i] = shape[pre_perm_[i]]; 719be168c0dSopenharmony_ci+ } 720be168c0dSopenharmony_ci+ if (MallocTmpBuffer() != lite::RET_OK) { 721be168c0dSopenharmony_ci+ MS_LOG(ERROR) << "DynamicQuant MallocTmpBuffer failed."; 722be168c0dSopenharmony_ci+ return lite::RET_NULL_PTR; 723be168c0dSopenharmony_ci+ } 724be168c0dSopenharmony_ci+ std::vector<int> strides(shape.size(), 1); 725be168c0dSopenharmony_ci+ std::vector<int> out_strides(shape.size(), 1); 726be168c0dSopenharmony_ci+ for (int i = static_cast<int>(shape.size()) - C2NUM; i >= 0; i--) { 727be168c0dSopenharmony_ci+ strides[i] = shape[i + 1] * strides[i + 1]; 728be168c0dSopenharmony_ci+ out_strides[i] = transpose_shape[i + 1] * out_strides[i + 1]; 729be168c0dSopenharmony_ci+ } 730be168c0dSopenharmony_ci+ if (shape.size() <= C6NUM) { 731be168c0dSopenharmony_ci+ (void)DoTransposeFp32(in_tensors_.front()->data(), float32_ptr_, transpose_shape.data(), pre_perm_.data(), 732be168c0dSopenharmony_ci+ strides.data(), out_strides.data(), in_tensors_.front()->Size(), shape.size()); 733be168c0dSopenharmony_ci+ } else { 734be168c0dSopenharmony_ci+ TransposeDimsFp32(in_tensors_.front()->data(), float32_ptr_, transpose_shape.data(), pre_perm_.data(), 735be168c0dSopenharmony_ci+ strides.data(), out_strides.data(), shape.size(), 0, 1); 736be168c0dSopenharmony_ci+ } 737be168c0dSopenharmony_ci+ } else { 738be168c0dSopenharmony_ci+ int8_ptr_ = reinterpret_cast<int8_t *>(out_tensors_[0]->data()); 739be168c0dSopenharmony_ci+ float32_ptr_ = reinterpret_cast<float *>(in_tensors_[0]->data()); 740be168c0dSopenharmony_ci+ } 741be168c0dSopenharmony_ci+ if (int8_ptr_ == nullptr || float32_ptr_ == nullptr) { 742be168c0dSopenharmony_ci+ FreeTmpBuffer(); 743be168c0dSopenharmony_ci+ MS_LOG(ERROR) << "DynamicQuant's original data exists nullptr."; 744be168c0dSopenharmony_ci+ return lite::RET_NULL_PTR; 745be168c0dSopenharmony_ci+ } 746be168c0dSopenharmony_ci+ auto ret = ParallelLaunch(this->ms_context_, CalculateMinMaxRun, this, thread_num_); 747be168c0dSopenharmony_ci if (ret != RET_OK) { 748be168c0dSopenharmony_ci+ FreeTmpBuffer(); 749be168c0dSopenharmony_ci MS_LOG(ERROR) << "Run error error_code[" << ret << "]"; 750be168c0dSopenharmony_ci return RET_ERROR; 751be168c0dSopenharmony_ci } 752be168c0dSopenharmony_ci- if (activation_perchannel_) { 753be168c0dSopenharmony_ci+ if (param_->axis_num_ != 0) { 754be168c0dSopenharmony_ci CalculatePerChannelScaleZp(); 755be168c0dSopenharmony_ci } else { 756be168c0dSopenharmony_ci CalculatePerlayerScaleZp(); 757be168c0dSopenharmony_ci } 758be168c0dSopenharmony_ci- ret = ParallelLaunch(this->ms_context_, QuantDataRun, this, thread_n_num_); 759be168c0dSopenharmony_ci+ ret = ParallelLaunch(this->ms_context_, QuantDataRun, this, thread_num_); 760be168c0dSopenharmony_ci if (ret != RET_OK) { 761be168c0dSopenharmony_ci+ FreeTmpBuffer(); 762be168c0dSopenharmony_ci MS_LOG(ERROR) << "Run error error_code[" << ret << "]"; 763be168c0dSopenharmony_ci return RET_ERROR; 764be168c0dSopenharmony_ci } 765be168c0dSopenharmony_ci+ if (need_transpose_) { 766be168c0dSopenharmony_ci+ auto out_shape = out_tensors_.front()->shape(); 767be168c0dSopenharmony_ci+ TransposeParameter trans_parameter; 768be168c0dSopenharmony_ci+ (void)memset(&trans_parameter, 0, sizeof(TransposeParameter)); 769be168c0dSopenharmony_ci+ trans_parameter.op_parameter_.thread_num_ = 1; 770be168c0dSopenharmony_ci+ trans_parameter.num_axes_ = static_cast<int>(out_shape.size()); 771be168c0dSopenharmony_ci+ trans_parameter.data_num_ = out_tensors_[0]->ElementsNum(); 772be168c0dSopenharmony_ci+ trans_parameter.perm_size_ = post_perm_.size(); 773be168c0dSopenharmony_ci+ int last_index = static_cast<int>(out_shape.size()) - 1; 774be168c0dSopenharmony_ci+ trans_parameter.perm_[last_index] = post_perm_[last_index]; 775be168c0dSopenharmony_ci+ trans_parameter.strides_[last_index] = 1; 776be168c0dSopenharmony_ci+ trans_parameter.out_strides_[last_index] = 1; 777be168c0dSopenharmony_ci+ for (int i = last_index - 1; i >= 0; i--) { 778be168c0dSopenharmony_ci+ trans_parameter.perm_[i] = post_perm_[i]; 779be168c0dSopenharmony_ci+ trans_parameter.strides_[i] = transpose_shape[i + 1] * trans_parameter.strides_[i + 1]; 780be168c0dSopenharmony_ci+ trans_parameter.out_strides_[i] = out_shape[i + 1] * trans_parameter.out_strides_[i + 1]; 781be168c0dSopenharmony_ci+ } 782be168c0dSopenharmony_ci+ if (out_shape.size() <= C6NUM) { 783be168c0dSopenharmony_ci+ (void)DoTransposeInt8(int8_ptr_, reinterpret_cast<int8_t *>(out_tensors_[0]->data()), out_shape.data(), 784be168c0dSopenharmony_ci+ &trans_parameter); 785be168c0dSopenharmony_ci+ } else { 786be168c0dSopenharmony_ci+ TransposeDimsInt8(int8_ptr_, reinterpret_cast<int8_t *>(out_tensors_[0]->data()), out_shape.data(), 787be168c0dSopenharmony_ci+ &trans_parameter, 0, 1); 788be168c0dSopenharmony_ci+ } 789be168c0dSopenharmony_ci+ } 790be168c0dSopenharmony_ci+ FreeTmpBuffer(); 791be168c0dSopenharmony_ci return RET_OK; 792be168c0dSopenharmony_ci } 793be168c0dSopenharmony_ci 794be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/litert/kernel/cpu/int8/dynamic_quant.h b/mindspore/lite/src/litert/kernel/cpu/int8/dynamic_quant.h 795be168c0dSopenharmony_ciindex ca84f088..023f1fab 100644 796be168c0dSopenharmony_ci--- a/mindspore/lite/src/litert/kernel/cpu/int8/dynamic_quant.h 797be168c0dSopenharmony_ci+++ b/mindspore/lite/src/litert/kernel/cpu/int8/dynamic_quant.h 798be168c0dSopenharmony_ci@@ -21,31 +21,15 @@ 799be168c0dSopenharmony_ci #include <cfloat> 800be168c0dSopenharmony_ci #include <map> 801be168c0dSopenharmony_ci #include "src/litert/lite_kernel.h" 802be168c0dSopenharmony_ci+#include "nnacl/dynamic_quant_parameter.h" 803be168c0dSopenharmony_ci 804be168c0dSopenharmony_ci namespace mindspore::kernel { 805be168c0dSopenharmony_ci class DynamicQuantCPUKernel : public LiteKernel { 806be168c0dSopenharmony_ci public: 807be168c0dSopenharmony_ci DynamicQuantCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, 808be168c0dSopenharmony_ci const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx) 809be168c0dSopenharmony_ci- : LiteKernel(parameter, inputs, outputs, ctx), thread_num_(ctx->thread_num_) {} 810be168c0dSopenharmony_ci- ~DynamicQuantCPUKernel() override { 811be168c0dSopenharmony_ci- if (real_min_ != nullptr) { 812be168c0dSopenharmony_ci- free(real_min_); 813be168c0dSopenharmony_ci- real_min_ = nullptr; 814be168c0dSopenharmony_ci- } 815be168c0dSopenharmony_ci- if (real_max_ != nullptr) { 816be168c0dSopenharmony_ci- free(real_max_); 817be168c0dSopenharmony_ci- real_max_ = nullptr; 818be168c0dSopenharmony_ci- } 819be168c0dSopenharmony_ci- if (scale_ != nullptr) { 820be168c0dSopenharmony_ci- free(scale_); 821be168c0dSopenharmony_ci- scale_ = nullptr; 822be168c0dSopenharmony_ci- } 823be168c0dSopenharmony_ci- if (zero_point_ != nullptr) { 824be168c0dSopenharmony_ci- free(zero_point_); 825be168c0dSopenharmony_ci- zero_point_ = nullptr; 826be168c0dSopenharmony_ci- } 827be168c0dSopenharmony_ci- }; 828be168c0dSopenharmony_ci+ : LiteKernel(parameter, inputs, outputs, ctx) {} 829be168c0dSopenharmony_ci+ ~DynamicQuantCPUKernel() override = default; 830be168c0dSopenharmony_ci 831be168c0dSopenharmony_ci int Prepare() override; 832be168c0dSopenharmony_ci int ReSize() override; 833be168c0dSopenharmony_ci@@ -57,28 +41,21 @@ class DynamicQuantCPUKernel : public LiteKernel { 834be168c0dSopenharmony_ci private: 835be168c0dSopenharmony_ci void CalculatePerlayerScaleZp(); 836be168c0dSopenharmony_ci void CalculatePerChannelScaleZp(); 837be168c0dSopenharmony_ci- 838be168c0dSopenharmony_ci- private: 839be168c0dSopenharmony_ci- int thread_num_; 840be168c0dSopenharmony_ci- int thread_n_num_{0}; 841be168c0dSopenharmony_ci- int thread_n_stride_{0}; 842be168c0dSopenharmony_ci- int num_unit_{0}; 843be168c0dSopenharmony_ci- int8_t *int8_ptr_ = nullptr; 844be168c0dSopenharmony_ci- float *float32_ptr_ = nullptr; 845be168c0dSopenharmony_ci- float *real_min_ = nullptr; 846be168c0dSopenharmony_ci- float *real_max_ = nullptr; 847be168c0dSopenharmony_ci- float *scale_ = nullptr; 848be168c0dSopenharmony_ci- int32_t *zero_point_ = nullptr; 849be168c0dSopenharmony_ci- 850be168c0dSopenharmony_ci- int32_t src_dtype_{0}; 851be168c0dSopenharmony_ci- int32_t dst_dtype_{0}; 852be168c0dSopenharmony_ci- bool symmetric_ = false; 853be168c0dSopenharmony_ci- bool activation_perchannel_ = false; 854be168c0dSopenharmony_ci- bool transpose_ = false; 855be168c0dSopenharmony_ci- int32_t prefer_axis_{-1}; 856be168c0dSopenharmony_ci- int32_t channel_num_{0}; 857be168c0dSopenharmony_ci- int32_t channel_length_{0}; 858be168c0dSopenharmony_ci- int32_t row_length_{0}; 859be168c0dSopenharmony_ci+ int MallocTmpBuffer(); 860be168c0dSopenharmony_ci+ void FreeTmpBuffer(); 861be168c0dSopenharmony_ci+ 862be168c0dSopenharmony_ci+ DynamicQuantParameter *param_{nullptr}; 863be168c0dSopenharmony_ci+ std::vector<float> real_min_; 864be168c0dSopenharmony_ci+ std::vector<float> real_max_; 865be168c0dSopenharmony_ci+ std::vector<float> scale_; 866be168c0dSopenharmony_ci+ std::vector<float> zero_point_; 867be168c0dSopenharmony_ci+ std::vector<int> pre_perm_; 868be168c0dSopenharmony_ci+ std::vector<int> post_perm_; 869be168c0dSopenharmony_ci+ int8_t *int8_ptr_{nullptr}; 870be168c0dSopenharmony_ci+ float *float32_ptr_{nullptr}; 871be168c0dSopenharmony_ci+ int unit_num_{0}; 872be168c0dSopenharmony_ci+ int unit_segment_num_{0}; 873be168c0dSopenharmony_ci+ bool need_transpose_{false}; 874be168c0dSopenharmony_ci }; 875be168c0dSopenharmony_ci } // namespace mindspore::kernel 876be168c0dSopenharmony_ci 877be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/litert/kernel/cpu/int8/matmul_dynamic_base_int8.cc b/mindspore/lite/src/litert/kernel/cpu/int8/matmul_dynamic_base_int8.cc 878be168c0dSopenharmony_ciindex adae37aa..bab1f730 100644 879be168c0dSopenharmony_ci--- a/mindspore/lite/src/litert/kernel/cpu/int8/matmul_dynamic_base_int8.cc 880be168c0dSopenharmony_ci+++ b/mindspore/lite/src/litert/kernel/cpu/int8/matmul_dynamic_base_int8.cc 881be168c0dSopenharmony_ci@@ -54,12 +54,12 @@ void MatmulDynamicBaseInt8CPUKernel::FreeQuantParam() { 882be168c0dSopenharmony_ci } 883be168c0dSopenharmony_ci 884be168c0dSopenharmony_ci int MatmulDynamicBaseInt8CPUKernel::MallocQuantParam() { 885be168c0dSopenharmony_ci- quant_param_ = reinterpret_cast<MatmulDynamicQuantParameter *>(malloc(sizeof(MatmulQuantParameter))); 886be168c0dSopenharmony_ci+ quant_param_ = reinterpret_cast<MatmulDynamicQuantParameter *>(malloc(sizeof(MatmulDynamicQuantParameter))); 887be168c0dSopenharmony_ci if (quant_param_ == nullptr) { 888be168c0dSopenharmony_ci MS_LOG(ERROR) << "Malloc MatmulDynamicQuantParameter for Matmul int8 op failed!"; 889be168c0dSopenharmony_ci return RET_ERROR; 890be168c0dSopenharmony_ci } 891be168c0dSopenharmony_ci- memset(quant_param_, 0, sizeof(MatmulQuantParameter)); 892be168c0dSopenharmony_ci+ (void)memset(quant_param_, 0, sizeof(MatmulDynamicQuantParameter)); 893be168c0dSopenharmony_ci return RET_OK; 894be168c0dSopenharmony_ci } 895be168c0dSopenharmony_ci 896be168c0dSopenharmony_ci@@ -80,9 +80,16 @@ int MatmulDynamicBaseInt8CPUKernel::InitFilterQuantParam() { 897be168c0dSopenharmony_ci MS_LOG(ERROR) << weight_tensor->tensor_name() << " dims < 2."; 898be168c0dSopenharmony_ci return RET_ERROR; 899be168c0dSopenharmony_ci } 900be168c0dSopenharmony_ci- int col = param_->b_transpose_ ? w_shape[w_shape.size() - kSize2] : w_shape[w_shape.size() - kSize1]; 901be168c0dSopenharmony_ci filter_per_channel_ = (weight_quant_params.size() > 1); 902be168c0dSopenharmony_ci- auto channel_num = filter_per_channel_ ? col : 1; 903be168c0dSopenharmony_ci+ filter_per_batch_channel_ = false; 904be168c0dSopenharmony_ci+ int channel_num = 1; 905be168c0dSopenharmony_ci+ if (filter_per_channel_) { 906be168c0dSopenharmony_ci+ channel_num = param_->col_; 907be168c0dSopenharmony_ci+ if (weight_quant_params.size() > static_cast<size_t>(channel_num)) { 908be168c0dSopenharmony_ci+ filter_per_batch_channel_ = true; 909be168c0dSopenharmony_ci+ channel_num = in_tensors_.at(kWeightIndex)->ElementsNum() / param_->deep_; 910be168c0dSopenharmony_ci+ } 911be168c0dSopenharmony_ci+ } 912be168c0dSopenharmony_ci if (static_cast<int>(weight_quant_params.size()) != channel_num) { 913be168c0dSopenharmony_ci MS_LOG(ERROR) << weight_tensor->tensor_name() << " quant params size:" << weight_quant_params.size() 914be168c0dSopenharmony_ci << " != channel_num:" << channel_num; 915be168c0dSopenharmony_ci@@ -90,10 +97,10 @@ int MatmulDynamicBaseInt8CPUKernel::InitFilterQuantParam() { 916be168c0dSopenharmony_ci } 917be168c0dSopenharmony_ci quant_param_->filter_scale_ = reinterpret_cast<float *>(malloc(channel_num * sizeof(float))); 918be168c0dSopenharmony_ci CHECK_NULL_RETURN(quant_param_->filter_scale_); 919be168c0dSopenharmony_ci- memset(quant_param_->filter_scale_, 0, sizeof(channel_num)); 920be168c0dSopenharmony_ci+ (void)memset(quant_param_->filter_scale_, 0, sizeof(channel_num)); 921be168c0dSopenharmony_ci quant_param_->filter_zp_ = reinterpret_cast<int32_t *>(malloc(channel_num * sizeof(int32_t))); 922be168c0dSopenharmony_ci CHECK_NULL_RETURN(quant_param_->filter_zp_); 923be168c0dSopenharmony_ci- memset(quant_param_->filter_zp_, 0, sizeof(channel_num)); 924be168c0dSopenharmony_ci+ (void)memset(quant_param_->filter_zp_, 0, sizeof(channel_num)); 925be168c0dSopenharmony_ci 926be168c0dSopenharmony_ci for (int i = 0; i < channel_num; i++) { 927be168c0dSopenharmony_ci quant_param_->filter_scale_[i] = static_cast<float>(weight_quant_params[i].scale); 928be168c0dSopenharmony_ci@@ -143,7 +150,15 @@ int MatmulDynamicBaseInt8CPUKernel::InitInputQuantParam(std::vector<float> *scal 929be168c0dSopenharmony_ci return RET_ERROR; 930be168c0dSopenharmony_ci } 931be168c0dSopenharmony_ci input_per_channel_ = (in_quant_params.size() > 1); 932be168c0dSopenharmony_ci- auto channel_num = input_per_channel_ ? param_->row_ : 1; 933be168c0dSopenharmony_ci+ input_per_batch_channel_ = false; 934be168c0dSopenharmony_ci+ int channel_num = 1; 935be168c0dSopenharmony_ci+ if (input_per_channel_) { 936be168c0dSopenharmony_ci+ channel_num = param_->row_; 937be168c0dSopenharmony_ci+ if (in_quant_params.size() > static_cast<size_t>(channel_num)) { 938be168c0dSopenharmony_ci+ input_per_batch_channel_ = true; 939be168c0dSopenharmony_ci+ channel_num = in_tensors_.at(kInputIndex)->ElementsNum() / param_->deep_; 940be168c0dSopenharmony_ci+ } 941be168c0dSopenharmony_ci+ } 942be168c0dSopenharmony_ci if (static_cast<int>(in_quant_params.size()) != channel_num) { 943be168c0dSopenharmony_ci MS_LOG(ERROR) << in_tensors_.at(kInputIndex)->tensor_name() << " quant params size:" << in_quant_params.size() 944be168c0dSopenharmony_ci << " != channel_num:" << channel_num; 945be168c0dSopenharmony_ci@@ -199,7 +214,7 @@ int MatmulDynamicBaseInt8CPUKernel::InitMatrixABuffer() { 946be168c0dSopenharmony_ci return lite::RET_NULL_PTR; 947be168c0dSopenharmony_ci } 948be168c0dSopenharmony_ci input_sums_ = reinterpret_cast<int *>(pack_a_ptr_ + pack_a_size); 949be168c0dSopenharmony_ci- memset(pack_a_ptr_, 0, pack_a_size + sum_a_size); 950be168c0dSopenharmony_ci+ (void)memset(pack_a_ptr_, 0, pack_a_size + sum_a_size); 951be168c0dSopenharmony_ci return RET_OK; 952be168c0dSopenharmony_ci } 953be168c0dSopenharmony_ci 954be168c0dSopenharmony_ci@@ -240,8 +255,8 @@ int MatmulDynamicBaseInt8CPUKernel::InitMatrixBBuffer() { 955be168c0dSopenharmony_ci FreeTmpBuffer(); 956be168c0dSopenharmony_ci return RET_ERROR; 957be168c0dSopenharmony_ci } 958be168c0dSopenharmony_ci- memset(pack_b_ptr_, 0, b_batch_ * param_->col_align_ * param_->deep_align_ * sizeof(int8_t)); 959be168c0dSopenharmony_ci- memset(weight_sums_, 0, b_batch_ * param_->col_align_ * sizeof(int)); 960be168c0dSopenharmony_ci+ (void)memset(pack_b_ptr_, 0, b_batch_ * param_->col_align_ * param_->deep_align_ * sizeof(int8_t)); 961be168c0dSopenharmony_ci+ (void)memset(weight_sums_, 0, b_batch_ * param_->col_align_ * sizeof(int)); 962be168c0dSopenharmony_ci return RET_OK; 963be168c0dSopenharmony_ci } 964be168c0dSopenharmony_ci 965be168c0dSopenharmony_ci@@ -258,7 +273,7 @@ int MatmulDynamicBaseInt8CPUKernel::CopyBias() { 966be168c0dSopenharmony_ci FreeTmpBuffer(); 967be168c0dSopenharmony_ci return RET_MEMORY_FAILED; 968be168c0dSopenharmony_ci } 969be168c0dSopenharmony_ci- memcpy(bias_ptr_, bias_tensor->data(), bias_tensor->Size()); 970be168c0dSopenharmony_ci+ (void)memcpy(bias_ptr_, bias_tensor->data(), bias_tensor->Size()); 971be168c0dSopenharmony_ci } else { 972be168c0dSopenharmony_ci bias_ptr_ = nullptr; 973be168c0dSopenharmony_ci } 974be168c0dSopenharmony_ci@@ -352,6 +367,8 @@ int MatmulDynamicBaseInt8CPUKernel::ReSize() { 975be168c0dSopenharmony_ci int MatmulDynamicBaseInt8CPUKernel::InitBroadcastParams(const std::vector<int> &a_shape_const, 976be168c0dSopenharmony_ci const std::vector<int> &b_shape_const, MatMulParameter *params, 977be168c0dSopenharmony_ci std::vector<int> *a_offsets, std::vector<int> *b_offsets) { 978be168c0dSopenharmony_ci+ CHECK_NULL_RETURN(a_offsets); 979be168c0dSopenharmony_ci+ CHECK_NULL_RETURN(b_offsets); 980be168c0dSopenharmony_ci std::vector<int> a_shape = a_shape_const; 981be168c0dSopenharmony_ci if (a_shape.size() < kNCHWDimNumber) { 982be168c0dSopenharmony_ci size_t add_nums = kNCHWDimNumber - a_shape.size(); 983be168c0dSopenharmony_ci@@ -370,8 +387,8 @@ int MatmulDynamicBaseInt8CPUKernel::InitBroadcastParams(const std::vector<int> & 984be168c0dSopenharmony_ci int batch_sizes[MAX_SHAPE_SIZE] = {0}; 985be168c0dSopenharmony_ci int a_batch_sizes[MAX_SHAPE_SIZE] = {0}; 986be168c0dSopenharmony_ci int b_batch_sizes[MAX_SHAPE_SIZE] = {0}; 987be168c0dSopenharmony_ci- for (int i = a_shape.size() - kCHWDimNumber; i >= 0; --i) { 988be168c0dSopenharmony_ci- if (static_cast<int>(a_shape.size() - kCHWDimNumber) == i) { 989be168c0dSopenharmony_ci+ for (int i = static_cast<int>(a_shape.size()) - kCHWDimNumber; i >= 0; --i) { 990be168c0dSopenharmony_ci+ if (static_cast<int>(a_shape.size()) - kCHWDimNumber == i) { 991be168c0dSopenharmony_ci batch_sizes[i] = std::max(a_shape[i], b_shape[i]); 992be168c0dSopenharmony_ci a_batch_sizes[i] = a_shape[i]; 993be168c0dSopenharmony_ci b_batch_sizes[i] = b_shape[i]; 994be168c0dSopenharmony_cidiff --git a/mindspore/lite/src/litert/kernel/cpu/int8/matmul_dynamic_base_int8.h b/mindspore/lite/src/litert/kernel/cpu/int8/matmul_dynamic_base_int8.h 995be168c0dSopenharmony_ciindex 3fc20d80..858affc8 100644 996be168c0dSopenharmony_ci--- a/mindspore/lite/src/litert/kernel/cpu/int8/matmul_dynamic_base_int8.h 997be168c0dSopenharmony_ci+++ b/mindspore/lite/src/litert/kernel/cpu/int8/matmul_dynamic_base_int8.h 998be168c0dSopenharmony_ci@@ -58,6 +58,8 @@ class MatmulDynamicBaseInt8CPUKernel : public LiteKernel { 999be168c0dSopenharmony_ci int b_batch_ = 1; 1000be168c0dSopenharmony_ci std::vector<int> a_offset_; 1001be168c0dSopenharmony_ci std::vector<int> b_offset_; 1002be168c0dSopenharmony_ci+ int a_quant_offset_ = 0; 1003be168c0dSopenharmony_ci+ int b_quant_offset_ = 0; 1004be168c0dSopenharmony_ci typedef void (*PackFunc)(const int8_t *src, int8_t *dst, int row, int col); 1005be168c0dSopenharmony_ci virtual void InitParameter() = 0; 1006be168c0dSopenharmony_ci int TransferA(); 1007be168c0dSopenharmony_ci@@ -69,14 +71,15 @@ class MatmulDynamicBaseInt8CPUKernel : public LiteKernel { 1008be168c0dSopenharmony_ci int InitMatrixABuffer(); 1009be168c0dSopenharmony_ci void FreeMatrixABuffer(); 1010be168c0dSopenharmony_ci 1011be168c0dSopenharmony_ci- protected: 1012be168c0dSopenharmony_ci MatMulParameter *param_ = nullptr; 1013be168c0dSopenharmony_ci MatmulDynamicQuantParameter *quant_param_ = nullptr; 1014be168c0dSopenharmony_ci int8_t *pack_a_ptr_ = nullptr; 1015be168c0dSopenharmony_ci int8_t *pack_b_ptr_ = nullptr; 1016be168c0dSopenharmony_ci 1017be168c0dSopenharmony_ci bool input_per_channel_ = false; 1018be168c0dSopenharmony_ci- bool filter_per_channel_ = true; 1019be168c0dSopenharmony_ci+ bool input_per_batch_channel_ = false; 1020be168c0dSopenharmony_ci+ bool filter_per_channel_ = false; 1021be168c0dSopenharmony_ci+ bool filter_per_batch_channel_ = false; 1022be168c0dSopenharmony_ci int8_t *batch_input_ptr_ = nullptr; 1023be168c0dSopenharmony_ci int8_t *batch_weight_ptr_ = nullptr; 1024be168c0dSopenharmony_ci int8_t *batch_a_ptr_ = nullptr; 1025be168c0dSopenharmony_cidiff --git a/mindspore/lite/tools/converter/quantizer/insert_quant_node_manager.cc b/mindspore/lite/tools/converter/quantizer/insert_quant_node_manager.cc 1026be168c0dSopenharmony_ciindex 721a1a8c..03113eaa 100644 1027be168c0dSopenharmony_ci--- a/mindspore/lite/tools/converter/quantizer/insert_quant_node_manager.cc 1028be168c0dSopenharmony_ci+++ b/mindspore/lite/tools/converter/quantizer/insert_quant_node_manager.cc 1029be168c0dSopenharmony_ci@@ -102,7 +102,7 @@ int InsertQuantNodeManager::InsertDynamicQuantWithIndex(const FuncGraphPtr &grap 1030be168c0dSopenharmony_ci bool symmetric = activation_channel ? true : false; 1031be168c0dSopenharmony_ci primitive->set_symmetric(symmetric); 1032be168c0dSopenharmony_ci primitive->set_activation_channel(activation_channel); 1033be168c0dSopenharmony_ci- if (activation_channel && SetPreferAxis(cnode, index, primitive) != RET_OK) { 1034be168c0dSopenharmony_ci+ if (activation_channel && SetPreferAxes(cnode, index, primitive) != RET_OK) { 1035be168c0dSopenharmony_ci MS_LOG(ERROR) << "Set prefer axis failed, " << cnode->fullname_with_scope(); 1036be168c0dSopenharmony_ci return RET_ERROR; 1037be168c0dSopenharmony_ci } 1038be168c0dSopenharmony_ci@@ -127,18 +127,25 @@ int InsertQuantNodeManager::InsertDynamicQuantWithIndex(const FuncGraphPtr &grap 1039be168c0dSopenharmony_ci return RET_OK; 1040be168c0dSopenharmony_ci } 1041be168c0dSopenharmony_ci 1042be168c0dSopenharmony_ci-int InsertQuantNodeManager::SetPreferAxis(const CNodePtr &cnode, size_t index, 1043be168c0dSopenharmony_ci+int InsertQuantNodeManager::SetPreferAxes(const CNodePtr &cnode, size_t index, 1044be168c0dSopenharmony_ci const std::shared_ptr<ops::DynamicQuant> &dynamic_primitive) { 1045be168c0dSopenharmony_ci auto primitive = GetValueNode<PrimitivePtr>(cnode->input(0)); 1046be168c0dSopenharmony_ci if (primitive->name() == ops::kNameMatMulFusion || primitive->name() == ops::kNameMatMul) { 1047be168c0dSopenharmony_ci auto matmul_prim = api::MakeShared<ops::MatMul>(primitive); 1048be168c0dSopenharmony_ci CHECK_NULL_RETURN(matmul_prim); 1049be168c0dSopenharmony_ci+ auto shape = opt::GetAnfNodeOutputShape(cnode->input(index), 0); 1050be168c0dSopenharmony_ci+ std::vector<int> prefer_axes; 1051be168c0dSopenharmony_ci+ for (int i = 0; i < static_cast<int>(shape.size()) - C2NUM; ++i) { 1052be168c0dSopenharmony_ci+ prefer_axes.push_back(i); 1053be168c0dSopenharmony_ci+ } 1054be168c0dSopenharmony_ci // For MatMul A 1055be168c0dSopenharmony_ci if (index == kInputIndex + kPrimOffset) { 1056be168c0dSopenharmony_ci if (matmul_prim->GetAttr(ops::kTransposeA) != nullptr && matmul_prim->get_transpose_a()) { 1057be168c0dSopenharmony_ci+ prefer_axes.push_back(kLastFisrtIndex); 1058be168c0dSopenharmony_ci dynamic_primitive->set_prefer_axis(kLastFisrtIndex); 1059be168c0dSopenharmony_ci dynamic_primitive->set_transpose(true); 1060be168c0dSopenharmony_ci } else { 1061be168c0dSopenharmony_ci+ prefer_axes.push_back(kLastSecondIndex); 1062be168c0dSopenharmony_ci dynamic_primitive->set_prefer_axis(kLastSecondIndex); 1063be168c0dSopenharmony_ci dynamic_primitive->set_transpose(false); 1064be168c0dSopenharmony_ci } 1065be168c0dSopenharmony_ci@@ -146,13 +153,16 @@ int InsertQuantNodeManager::SetPreferAxis(const CNodePtr &cnode, size_t index, 1066be168c0dSopenharmony_ci // For MatMul B 1067be168c0dSopenharmony_ci if (index == kWeightIndex + kPrimOffset) { 1068be168c0dSopenharmony_ci if (matmul_prim->GetAttr(ops::kTransposeB) != nullptr && matmul_prim->get_transpose_b()) { 1069be168c0dSopenharmony_ci+ prefer_axes.push_back(kLastSecondIndex); 1070be168c0dSopenharmony_ci dynamic_primitive->set_prefer_axis(kLastSecondIndex); 1071be168c0dSopenharmony_ci dynamic_primitive->set_transpose(true); 1072be168c0dSopenharmony_ci } else { 1073be168c0dSopenharmony_ci+ prefer_axes.push_back(kLastFisrtIndex); 1074be168c0dSopenharmony_ci dynamic_primitive->set_prefer_axis(kLastFisrtIndex); 1075be168c0dSopenharmony_ci dynamic_primitive->set_transpose(false); 1076be168c0dSopenharmony_ci } 1077be168c0dSopenharmony_ci } 1078be168c0dSopenharmony_ci+ dynamic_primitive->set_prefer_axes(prefer_axes); 1079be168c0dSopenharmony_ci } else { 1080be168c0dSopenharmony_ci MS_LOG(WARNING) << "cnode don't need prefer axis, cnode name: " << cnode->fullname_with_scope(); 1081be168c0dSopenharmony_ci } 1082be168c0dSopenharmony_ci@@ -167,13 +177,17 @@ int InsertQuantNodeManager::NewDynamicQuantNode(const FuncGraphPtr &graph, const 1083be168c0dSopenharmony_ci return RET_ERROR; 1084be168c0dSopenharmony_ci } 1085be168c0dSopenharmony_ci auto input = cnode->input(kInputIndex + kPrimOffset); 1086be168c0dSopenharmony_ci+ auto weight = cnode->input(kWeightIndex + kPrimOffset); 1087be168c0dSopenharmony_ci+ if (activation_channel && (input->isa<mindspore::CNode>() || IsGraphInput(input)) && 1088be168c0dSopenharmony_ci+ (weight->isa<mindspore::CNode>() || IsGraphInput(weight))) { 1089be168c0dSopenharmony_ci+ return RET_NOT_SUPPORT; 1090be168c0dSopenharmony_ci+ } 1091be168c0dSopenharmony_ci if (input->isa<mindspore::CNode>() || IsGraphInput(input)) { 1092be168c0dSopenharmony_ci auto ret = InsertDynamicQuantWithIndex(graph, cnode, kInputIndex + kPrimOffset, activation_channel); 1093be168c0dSopenharmony_ci if (ret != RET_OK) { 1094be168c0dSopenharmony_ci MS_LOG(ERROR) << "Insert dynamic quant with index failed."; 1095be168c0dSopenharmony_ci } 1096be168c0dSopenharmony_ci } 1097be168c0dSopenharmony_ci- auto weight = cnode->input(kWeightIndex + kPrimOffset); 1098be168c0dSopenharmony_ci if (weight->isa<mindspore::CNode>() || IsGraphInput(weight)) { 1099be168c0dSopenharmony_ci auto ret = InsertDynamicQuantWithIndex(graph, cnode, kWeightIndex + kPrimOffset, activation_channel); 1100be168c0dSopenharmony_ci if (ret != RET_OK) { 1101be168c0dSopenharmony_ci@@ -218,6 +232,9 @@ int InsertQuantNodeManager::InsertDynamicQuantNode(const FuncGraphPtr &graph, 1102be168c0dSopenharmony_ci continue; 1103be168c0dSopenharmony_ci } 1104be168c0dSopenharmony_ci ret = NewDynamicQuantNode(graph, cnode, activation_channel); 1105be168c0dSopenharmony_ci+ if (ret == RET_NOT_SUPPORT) { 1106be168c0dSopenharmony_ci+ continue; 1107be168c0dSopenharmony_ci+ } 1108be168c0dSopenharmony_ci if (ret != RET_OK) { 1109be168c0dSopenharmony_ci MS_LOG(ERROR) << "node:" << op_name << " new dynamic quant node failed."; 1110be168c0dSopenharmony_ci return ret; 1111be168c0dSopenharmony_ci@@ -684,7 +701,7 @@ int InsertQuantNodeManager::InsertQuantDtypeCastFlyNode(const FuncGraphPtr &func 1112be168c0dSopenharmony_ci 1113be168c0dSopenharmony_ci int InsertQuantNodeManager::CalculateScaleZPNode(const FuncGraphPtr &func_graph, const CNodePtr &cnode, 1114be168c0dSopenharmony_ci size_t input_index, ParameterPtr *scales_node, ParameterPtr *zps_node, 1115be168c0dSopenharmony_ci- TypeId src_dtype, TypeId dst_dtype, int axis) { 1116be168c0dSopenharmony_ci+ TypeId dst_dtype, int axis) { 1117be168c0dSopenharmony_ci CHECK_NULL_RETURN(scales_node); 1118be168c0dSopenharmony_ci CHECK_NULL_RETURN(zps_node); 1119be168c0dSopenharmony_ci auto input_node = cnode->input(input_index); 1120be168c0dSopenharmony_ci@@ -785,7 +802,7 @@ int InsertQuantNodeManager::InsertAscendAntiQuantNode(const FuncGraphPtr &func_g 1121be168c0dSopenharmony_ci CHECK_NULL_RETURN(cast_cnode); 1122be168c0dSopenharmony_ci ParameterPtr scales_node; 1123be168c0dSopenharmony_ci ParameterPtr zps_node; 1124be168c0dSopenharmony_ci- auto ret = CalculateScaleZPNode(func_graph, cnode, input_index, &scales_node, &zps_node, src_dtype, dst_dtype, axis); 1125be168c0dSopenharmony_ci+ auto ret = CalculateScaleZPNode(func_graph, cnode, input_index, &scales_node, &zps_node, dst_dtype, axis); 1126be168c0dSopenharmony_ci if (ret != RET_OK) { 1127be168c0dSopenharmony_ci MS_LOG(ERROR) << "Fail to Remove node: " << input_node->fullname_with_scope() << " quant param"; 1128be168c0dSopenharmony_ci return RET_ERROR; 1129be168c0dSopenharmony_cidiff --git a/mindspore/lite/tools/converter/quantizer/insert_quant_node_manager.h b/mindspore/lite/tools/converter/quantizer/insert_quant_node_manager.h 1130be168c0dSopenharmony_ciindex a46e8c68..6f328485 100644 1131be168c0dSopenharmony_ci--- a/mindspore/lite/tools/converter/quantizer/insert_quant_node_manager.h 1132be168c0dSopenharmony_ci+++ b/mindspore/lite/tools/converter/quantizer/insert_quant_node_manager.h 1133be168c0dSopenharmony_ci@@ -75,13 +75,12 @@ class InsertQuantNodeManager { 1134be168c0dSopenharmony_ci int MarkDynamicQuantize(const CNodePtr &cnode); 1135be168c0dSopenharmony_ci 1136be168c0dSopenharmony_ci int CalculateScaleZPNode(const FuncGraphPtr &func_graph, const CNodePtr &cnode, size_t input_index, 1137be168c0dSopenharmony_ci- ParameterPtr *scales_node, ParameterPtr *zps_node, TypeId src_dtype, TypeId dst_dtype, 1138be168c0dSopenharmony_ci- int axis); 1139be168c0dSopenharmony_ci+ ParameterPtr *scales_node, ParameterPtr *zps_node, TypeId dst_dtype, int axis); 1140be168c0dSopenharmony_ci 1141be168c0dSopenharmony_ci int InsertDynamicQuantWithIndex(const FuncGraphPtr &graph, const CNodePtr &cnode, size_t index, 1142be168c0dSopenharmony_ci bool activation_channel = true); 1143be168c0dSopenharmony_ci 1144be168c0dSopenharmony_ci- int SetPreferAxis(const CNodePtr &cnode, size_t index, const std::shared_ptr<ops::DynamicQuant> &dynamic_primitive); 1145be168c0dSopenharmony_ci+ int SetPreferAxes(const CNodePtr &cnode, size_t index, const std::shared_ptr<ops::DynamicQuant> &dynamic_primitive); 1146be168c0dSopenharmony_ci 1147be168c0dSopenharmony_ci int SetCastNodeAbstract(const CNodePtr &cnode, const AnfNodePtr &input_node, const CNodePtr &cast_cnode); 1148be168c0dSopenharmony_ci 1149be168c0dSopenharmony_ci-- 1150be168c0dSopenharmony_ci2.25.1 1151be168c0dSopenharmony_ci 1152