18bf80f4bSopenharmony_ci/* 28bf80f4bSopenharmony_ci * Copyright (c) 2024 Huawei Device Co., Ltd. 38bf80f4bSopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License"); 48bf80f4bSopenharmony_ci * you may not use this file except in compliance with the License. 58bf80f4bSopenharmony_ci * You may obtain a copy of the License at 68bf80f4bSopenharmony_ci * 78bf80f4bSopenharmony_ci * http://www.apache.org/licenses/LICENSE-2.0 88bf80f4bSopenharmony_ci * 98bf80f4bSopenharmony_ci * Unless required by applicable law or agreed to in writing, software 108bf80f4bSopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS, 118bf80f4bSopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 128bf80f4bSopenharmony_ci * See the License for the specific language governing permissions and 138bf80f4bSopenharmony_ci * limitations under the License. 148bf80f4bSopenharmony_ci */ 158bf80f4bSopenharmony_ci#include "csv_parser.h" 168bf80f4bSopenharmony_ci 178bf80f4bSopenharmony_ci#include <algorithm> 188bf80f4bSopenharmony_ci#include <cctype> 198bf80f4bSopenharmony_ci 208bf80f4bSopenharmony_ciMETA_BEGIN_NAMESPACE() 218bf80f4bSopenharmony_ci 228bf80f4bSopenharmony_ciCsvParser::CsvParser(BASE_NS::string_view csv, const char delimiter) : delimiter_(delimiter), csv_(csv) {} 238bf80f4bSopenharmony_ci 248bf80f4bSopenharmony_cibool CsvParser::GetRow(CsvRow& row) 258bf80f4bSopenharmony_ci{ 268bf80f4bSopenharmony_ci auto nextRow = ParseRow(); 278bf80f4bSopenharmony_ci row.swap(nextRow); 288bf80f4bSopenharmony_ci return !row.empty(); 298bf80f4bSopenharmony_ci} 308bf80f4bSopenharmony_ci 318bf80f4bSopenharmony_civoid CsvParser::Reset() 328bf80f4bSopenharmony_ci{ 338bf80f4bSopenharmony_ci pos_ = 0; 348bf80f4bSopenharmony_ci} 358bf80f4bSopenharmony_ci 368bf80f4bSopenharmony_ci/** 378bf80f4bSopenharmony_ci * @brief Returns a trimmed string based on state. 388bf80f4bSopenharmony_ci * @param sv The string to trim. 398bf80f4bSopenharmony_ci * @param state State of the parser. 408bf80f4bSopenharmony_ci * @return If state is QUOTED, returns the string itself. Otherwise returns the string 418bf80f4bSopenharmony_ci * trimmed from trailing and leading whitespace. 428bf80f4bSopenharmony_ci */ 438bf80f4bSopenharmony_ciBASE_NS::string_view CsvParser::Trimmed(BASE_NS::string_view sv, State state) 448bf80f4bSopenharmony_ci{ 458bf80f4bSopenharmony_ci if (state == QUOTED) { 468bf80f4bSopenharmony_ci return sv; 478bf80f4bSopenharmony_ci } 488bf80f4bSopenharmony_ci constexpr auto nspace = [](unsigned char ch) { return !std::isspace(static_cast<int>(ch)); }; 498bf80f4bSopenharmony_ci sv.remove_suffix(std::distance(std::find_if(sv.rbegin(), sv.rend(), nspace).base(), sv.end())); 508bf80f4bSopenharmony_ci sv.remove_prefix(std::find_if(sv.begin(), sv.end(), nspace) - sv.begin()); 518bf80f4bSopenharmony_ci return sv; 528bf80f4bSopenharmony_ci} 538bf80f4bSopenharmony_ci 548bf80f4bSopenharmony_cistd::pair<bool, char> HandleEscaped(char next) 558bf80f4bSopenharmony_ci{ 568bf80f4bSopenharmony_ci std::pair<bool, char> result { true, next }; 578bf80f4bSopenharmony_ci switch (next) { 588bf80f4bSopenharmony_ci case 'n': 598bf80f4bSopenharmony_ci result.second = '\n'; 608bf80f4bSopenharmony_ci break; 618bf80f4bSopenharmony_ci case '\\': 628bf80f4bSopenharmony_ci result.second = '\\'; 638bf80f4bSopenharmony_ci break; 648bf80f4bSopenharmony_ci case 't': 658bf80f4bSopenharmony_ci result.second = '\t'; 668bf80f4bSopenharmony_ci break; 678bf80f4bSopenharmony_ci case '"': 688bf80f4bSopenharmony_ci result.second = '"'; 698bf80f4bSopenharmony_ci break; 708bf80f4bSopenharmony_ci default: 718bf80f4bSopenharmony_ci result.first = false; 728bf80f4bSopenharmony_ci break; 738bf80f4bSopenharmony_ci } 748bf80f4bSopenharmony_ci return result; 758bf80f4bSopenharmony_ci} 768bf80f4bSopenharmony_ci 778bf80f4bSopenharmony_ciCsvParser::CsvRow CsvParser::ParseRow() 788bf80f4bSopenharmony_ci{ 798bf80f4bSopenharmony_ci BASE_NS::vector<BASE_NS::string> items; 808bf80f4bSopenharmony_ci BASE_NS::string item; 818bf80f4bSopenharmony_ci State state { NO_QUOTE }; 828bf80f4bSopenharmony_ci 838bf80f4bSopenharmony_ci while (pos_ < csv_.size()) { 848bf80f4bSopenharmony_ci auto c = csv_[pos_++]; 858bf80f4bSopenharmony_ci if (c == '\r') { // Ignore carriage returns 868bf80f4bSopenharmony_ci continue; 878bf80f4bSopenharmony_ci } 888bf80f4bSopenharmony_ci if (c == '"') { 898bf80f4bSopenharmony_ci if (state == IN_QUOTE && pos_ < csv_.size() - 1 && csv_[pos_] == '"') { 908bf80f4bSopenharmony_ci // Double quotes interpreted as a single quote 918bf80f4bSopenharmony_ci item += c; 928bf80f4bSopenharmony_ci pos_++; 938bf80f4bSopenharmony_ci } else { // Begin/end quote 948bf80f4bSopenharmony_ci state = (state == NO_QUOTE) ? IN_QUOTE : QUOTED; 958bf80f4bSopenharmony_ci if (state == IN_QUOTE) { 968bf80f4bSopenharmony_ci // Quoted part starts, ignore anything before it 978bf80f4bSopenharmony_ci item.clear(); 988bf80f4bSopenharmony_ci } 998bf80f4bSopenharmony_ci } 1008bf80f4bSopenharmony_ci } else if (c == delimiter_ && state != IN_QUOTE) { 1018bf80f4bSopenharmony_ci // Delimiter found while not within quotes, move to next item 1028bf80f4bSopenharmony_ci items.emplace_back(Trimmed(item, state)); 1038bf80f4bSopenharmony_ci item.clear(); 1048bf80f4bSopenharmony_ci state = NO_QUOTE; 1058bf80f4bSopenharmony_ci } else if (c == '\n' && state != IN_QUOTE) { 1068bf80f4bSopenharmony_ci // End of line while not within quotes, the row is complete 1078bf80f4bSopenharmony_ci break; 1088bf80f4bSopenharmony_ci } else if (state != QUOTED) { 1098bf80f4bSopenharmony_ci // By default include character in result, unless we already had 1108bf80f4bSopenharmony_ci // quoted content, then anything outside of quotes is ignored until 1118bf80f4bSopenharmony_ci // next delimiter 1128bf80f4bSopenharmony_ci if (c == '\\' && pos_ < csv_.size() - 1) { 1138bf80f4bSopenharmony_ci if (auto esc = HandleEscaped(csv_[pos_]); esc.first) { 1148bf80f4bSopenharmony_ci item += esc.second; 1158bf80f4bSopenharmony_ci pos_++; 1168bf80f4bSopenharmony_ci continue; 1178bf80f4bSopenharmony_ci } 1188bf80f4bSopenharmony_ci } 1198bf80f4bSopenharmony_ci item += c; 1208bf80f4bSopenharmony_ci } 1218bf80f4bSopenharmony_ci } 1228bf80f4bSopenharmony_ci 1238bf80f4bSopenharmony_ci // Any leftover since the last delimiter is the last item on the row 1248bf80f4bSopenharmony_ci if (auto trimmed = Trimmed(item, state); !trimmed.empty()) { 1258bf80f4bSopenharmony_ci items.emplace_back(trimmed); 1268bf80f4bSopenharmony_ci } 1278bf80f4bSopenharmony_ci return items; 1288bf80f4bSopenharmony_ci} 1298bf80f4bSopenharmony_ci 1308bf80f4bSopenharmony_ciMETA_END_NAMESPACE() 131