1/*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include "js_uri.h"
17#include "tools/log.h"
18namespace OHOS::Uri {
19    std::bitset<MAX_BIT_SIZE> g_ruleAlpha;
20    std::bitset<MAX_BIT_SIZE> g_ruleScheme;
21    std::bitset<MAX_BIT_SIZE> g_ruleUrlc;
22    std::bitset<MAX_BIT_SIZE> g_rulePath;
23    std::bitset<MAX_BIT_SIZE> g_ruleUserInfo;
24    std::bitset<MAX_BIT_SIZE> g_ruleDigit;
25    std::bitset<MAX_BIT_SIZE> g_rulePort;
26    void Uri::PreliminaryWork() const
27    {
28        std::string digitAggregate = "0123456789";
29        for (size_t i = 0; i < digitAggregate.size(); ++i) {
30            g_ruleDigit.set(digitAggregate[i]);
31        }
32
33        std::string alphasAggregate = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
34        for (size_t i = 0; i < alphasAggregate.size(); ++i) {
35            g_ruleAlpha.set(alphasAggregate[i]);
36        }
37
38        std::string schemeAggregate = digitAggregate + alphasAggregate + "+-.| _-~!$&=,;'(){}*";
39        for (size_t i = 0; i < schemeAggregate.size(); ++i) {
40            g_ruleScheme.set(schemeAggregate[i]);
41        }
42
43        std::string uricAggregate = schemeAggregate + ";/?:@&=$,[]_!~*'()%\"";
44        for (size_t i = 0; i < uricAggregate.size(); ++i) {
45            g_ruleUrlc.set(uricAggregate[i]);
46        }
47
48        std::string pathAggregate = schemeAggregate + ";/:@&=$,_!~*'()%";
49        for (size_t i = 0; i < pathAggregate.size(); ++i) {
50            g_rulePath.set(pathAggregate[i]);
51        }
52
53        std::string userInfoAggregate = schemeAggregate + ";:&=$,_!~*'()%";
54        for (size_t i = 0; i < userInfoAggregate.size(); ++i) {
55            g_ruleUserInfo.set(userInfoAggregate[i]);
56        }
57
58        std::string portAggregate = digitAggregate + alphasAggregate + ".:@-;&=+$,-_!~*'()";
59        for (size_t i = 0; i < portAggregate.size(); ++i) {
60            g_rulePort.set(portAggregate[i]);
61        }
62    }
63
64    Uri::Uri(const std::string input)
65    {
66        PreliminaryWork();
67        errStr_ = "";
68        if (input.empty()) {
69            errStr_ = "uri is empty";
70            return;
71        }
72        inputUri_ = input;
73        AnalysisUri();
74    }
75
76    void Uri::AssignSchemeSpecificPart()
77    {
78        uriData_.SchemeSpecificPart.reserve(data_.length() + uriData_.query.length() + 1);
79        uriData_.SchemeSpecificPart.append(data_);
80        if (!uriData_.query.empty()) {
81            uriData_.SchemeSpecificPart.append("?");
82            uriData_.SchemeSpecificPart.append(uriData_.query);
83        }
84    }
85
86    void Uri::AnalysisUri()
87    {
88        data_ = inputUri_;
89        size_t pos = data_.find('#'); // Fragment
90        if (pos != std::string::npos) {
91            AnalysisFragment(pos);
92            if (!errStr_.empty()) {
93                return;
94            }
95        }
96        pos = data_.find('?'); // Query
97        if (pos != std::string::npos) {
98            AnalysisQuery(pos);
99            if (!errStr_.empty()) {
100                return;
101            }
102        }
103        pos = data_.find(':'); // Scheme
104        if (pos != std::string::npos) {
105            AnalysisScheme(pos);
106            if (!errStr_.empty()) {
107                return;
108            }
109        } else {
110            SpecialPath();
111            if (!errStr_.empty()) {
112                return;
113            }
114            AssignSchemeSpecificPart();
115            return;
116        }
117        pos = data_.find("//"); // userInfo path host port ipv4 or ipv6
118        if (pos != std::string::npos && pos == 0) {
119            AssignSchemeSpecificPart();
120            data_ = data_.substr(2); // 2:Intercept the string from the second subscript
121            AnalysisHostAndPath();
122            if (!errStr_.empty()) {
123                return;
124            }
125        } else if (data_[0] == '/') {
126            uriData_.path = data_;
127            AssignSchemeSpecificPart();
128            data_ = "";
129        } else {
130            AssignSchemeSpecificPart();
131            uriData_.query = "";
132            data_ = "";
133        }
134    }
135
136    bool Uri::CheckCharacter(std::string data, std::bitset<MAX_BIT_SIZE> rule, bool flag) const
137    {
138        size_t dataLen = data.size();
139        for (size_t i = 0; i < dataLen; ++i) {
140            if (static_cast<int>(data[i]) >= 0 && static_cast<int>(data[i]) < 128) { // 128:ASCII Max Number
141                bool isLegal = rule.test(data[i]);
142                if (!isLegal) {
143                    return false;
144                }
145            } else if (!flag) {
146                return false;
147            }
148        }
149        return true;
150    }
151
152    void Uri::SpecialPath()
153    {
154        if (!CheckCharacter(data_, g_rulePath, true)) {
155            errStr_ = "SpecialPath does not conform to the rule";
156            return;
157        }
158        uriData_.path = data_;
159        data_ = "";
160    }
161
162    void Uri::AnalysisFragment(size_t pos)
163    {
164        if (pos == 0) {
165            errStr_ = "#It can't be the first";
166            return;
167        }
168        std::string fragment = data_.substr(pos + 1);
169        if (!CheckCharacter(fragment, g_ruleUrlc, true)) {
170            errStr_ = "Fragment does not conform to the rule";
171            return;
172        }
173        uriData_.fragment = fragment;
174        data_ = data_.substr(0, pos);
175    }
176
177    void Uri::AnalysisQuery(size_t pos)
178    {
179        std::string query = data_.substr(pos + 1);
180        if (!CheckCharacter(query, g_ruleUrlc, true)) {
181            errStr_ = "Query does not conform to the rule";
182            return;
183        }
184        uriData_.query = query;
185        data_ = data_.substr(0, pos);
186    }
187
188    void Uri::AnalysisScheme(size_t pos)
189    {
190        size_t slashPos = data_.find('/');
191        if (slashPos != std::string::npos && slashPos < pos) {
192            SpecialPath();
193            uriData_.SchemeSpecificPart.reserve(uriData_.path.length() + uriData_.query.length() + 1);
194            uriData_.SchemeSpecificPart.append(uriData_.path);
195            uriData_.SchemeSpecificPart.append("?");
196            uriData_.SchemeSpecificPart.append(uriData_.query);
197            data_ = "";
198        } else {
199            if ((static_cast<int>(data_[0]) >= 0 && static_cast<int>(data_[0]) < MAX_BIT_SIZE) &&
200                !g_ruleAlpha.test(data_[0])) {
201                errStr_ = "Scheme the first character must be a letter";
202                return;
203            }
204            std::string scheme = data_.substr(0, pos);
205            if (!CheckCharacter(scheme, g_ruleScheme, false)) {
206                errStr_ = "scheme does not conform to the rule";
207                return;
208            }
209            uriData_.scheme = scheme;
210            data_ = data_.substr(pos + 1);
211        }
212    }
213
214    void Uri::AnalysisHost(bool isLawfulProt)
215    {
216        // find ipv4 or ipv6 or host
217        if (data_[0] == '[') {
218            if (data_[data_.size() - 1] == ']') {
219                // IPV6
220                if (!isLawfulProt) {
221                    errStr_ = "Prot does not conform to the rule";
222                    return;
223                }
224                AnalysisIPV6();
225            } else {
226                errStr_ = "IPv6 is missing a closing bracket";
227                return;
228            }
229        } else {
230            if (data_.find('[') != std::string::npos || data_.find(']') != std::string::npos) {
231                errStr_ = "host does not conform to the rule";
232                return;
233            }
234            // ipv4
235            if (!isLawfulProt || !AnalysisIPV4()) {
236                uriData_.port = -1;
237                uriData_.host = "";
238                uriData_.userInfo = "";
239            }
240        }
241    }
242
243    void Uri::AnalysisHostAndPath()
244    {
245        if (data_.empty()) {
246            return;
247        }
248        // find path
249        size_t pos = data_.find('/');
250        if (pos != std::string::npos) {
251            AnalysisPath(pos);
252            if (!errStr_.empty()) {
253                return;
254            }
255        }
256        uriData_.authority = data_;
257        // find UserInfo
258        pos = data_.find('@');
259        if (pos != std::string::npos) {
260            AnalysisUserInfo(pos);
261            if (!errStr_.empty()) {
262                return;
263            }
264        }
265        bool isLawfulProt = true;
266        // find port
267        pos = data_.rfind(':');
268        if (pos != std::string::npos) {
269            size_t pos1 = data_.rfind(']');
270            if (pos1 == std::string::npos || pos > pos1) {
271                isLawfulProt = AnalysisPort(pos);
272            }
273            if (!errStr_.empty()) {
274            return;
275            }
276        }
277        AnalysisHost(isLawfulProt);
278    }
279
280    void Uri::AnalysisPath(size_t pos)
281    {
282        std::string path = data_.substr(pos);
283        if (!CheckCharacter(path, g_rulePath, true)) {
284            errStr_ = "path does not conform to the rule";
285            return;
286        }
287        uriData_.path = path;
288        data_ = data_.substr(0, pos);
289    }
290
291    void Uri::AnalysisUserInfo(size_t pos)
292    {
293        std::string userInfo = data_.substr(0, pos);
294        if (!CheckCharacter(userInfo, g_ruleUserInfo, true)) {
295            errStr_ = "userInfo does not conform to the rule";
296            return;
297        }
298        uriData_.userInfo = userInfo;
299        data_ = data_.substr(pos + 1);
300    }
301
302    bool Uri::AnalysisPort(size_t pos)
303    {
304        std::string port = data_.substr(pos + 1);
305        if (!CheckCharacter(port, g_rulePort, true)) {
306            errStr_ = "port does not conform to the rule";
307            return false;
308        } else if (CheckCharacter(port, g_ruleDigit, false)) {
309            if (port.size() == 0 || port.size() > 10) { // 10:The maximum number of bits for int value
310                return false;
311            }
312            double tempPort = std::strtod(port.c_str(), nullptr);
313            if (tempPort < 0 || tempPort > INT32_MAX) {
314                return false;
315            }
316            uriData_.port = static_cast<int>(tempPort);
317            data_ = data_.substr(0, pos);
318            return true;
319        } else {
320            data_ = data_.substr(0, pos);
321            return false;
322        }
323        return false;
324    }
325
326    bool Uri::AnalysisIPV4()
327    {
328        std::regex ipv4("((25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)\\.){3}(25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)");
329        std::regex hostname("(([a-zA-Z0-9]([a-zA-Z0-9\\-~_|\\+{}!$&=,;:'()\\*\\s]*[a-zA-Z0-9])?\\.)+"
330                           "([a-zA-Z]([a-zA-Z0-9\\-~_|\\+{}!$&=,;:'()\\*\\s]*"
331                           "[a-zA-Z0-9\\-~_|\\+{}!$&=,;:'()\\*\\s])?))|"
332                           "([a-zA-Z0-9]([a-zA-Z0-9\\-~_|\\+{}!$&=,;:'()\\*\\s]*[a-zA-Z0-9])?)");
333        bool isIpv4 = std::regex_match(data_, ipv4);
334        bool isHosName = std::regex_match(data_, hostname);
335        if (!isIpv4 && !isHosName) {
336            return false;
337        } else {
338            uriData_.host = data_;
339            data_ = "";
340            return true;
341        }
342    }
343
344    void Uri::AnalysisIPV6()
345    {
346        std::string str = data_.substr(1, data_.size() - 2); // 2:Intercept the string from the second subscript
347        std::regex ipv6("(::|(:((:[0-9A-Fa-f]{1,4}){1,7}))|(([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|"
348                        "(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|:))|(([0-9A-Fa-f]{1,4}:){2}"
349                        "(((:[0-9A-Fa-f]{1,4}){1,5})|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})"
350                        "|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|:))|(([0-9A-Fa-f]{1,4}:){5}"
351                        "(((:[0-9A-Fa-f]{1,4}){1,2})|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|:))|"
352                        "(((:(:[0-9A-Fa-f]{1,4}){0,5}:)|(([0-9A-Fa-f]{1,4}:){1}(:[0-9A-Fa-f]{1,4}){0,4}:)"
353                        "|(([0-9A-Fa-f]{1,4}:){2}(:[0-9A-Fa-f]{1,4}){0,3}:)|(([0-9A-Fa-f]{1,4}:){3}"
354                        "(:[0-9A-Fa-f]{1,4}){0,2}:)|(([0-9A-Fa-f]{1,4}:){4}(:[0-9A-Fa-f]{1,4})?:)|"
355                        "(([0-9A-Fa-f]{1,4}:){5}:)|(([0-9A-Fa-f]{1,4}:){6}))((25[0-5]|2[0-4]\\d|1\\d{2}|"
356                        "[1-9]\\d|\\d)\\.){3}(25[0-5]|2[0-4]\\d|1\\d{2}|[1-9]\\d|\\d)))(%[a-zA-Z0-9._]+)?");
357        if (!std::regex_match(str, ipv6)) {
358            errStr_ = "ipv6 does not conform to the rule";
359            return;
360        }
361        uriData_.host = data_;
362        data_ = "";
363    }
364
365    bool Uri::Equals(const Uri other) const
366    {
367        if (uriData_.port != other.uriData_.port) {
368            return false;
369        }
370        if (uriData_.scheme != other.uriData_.scheme) {
371            return false;
372        }
373        if (uriData_.userInfo != other.uriData_.userInfo) {
374            return false;
375        }
376        if (uriData_.host != other.uriData_.host) {
377            return false;
378        }
379        if (uriData_.query != other.uriData_.query) {
380            return false;
381        }
382        if (uriData_.fragment != other.uriData_.fragment) {
383            return false;
384        }
385        if (uriData_.path != other.uriData_.path) {
386            return false;
387        }
388        if (uriData_.authority != other.uriData_.authority) {
389            return false;
390        }
391        if (uriData_.SchemeSpecificPart != other.uriData_.SchemeSpecificPart) {
392            return false;
393        }
394        return true;
395    }
396
397    std::string Uri::ToString() const
398    {
399        return inputUri_;
400    }
401
402    bool Uri::IsAbsolute() const
403    {
404        return !uriData_.scheme.empty();
405    }
406
407    bool Uri::IsRelative() const
408    {
409        return uriData_.scheme.empty();
410    }
411
412    bool Uri::IsOpaque() const
413    {
414        return !IsHierarchical();
415    }
416
417    bool Uri::IsHierarchical() const
418    {
419        size_t index = inputUri_.find(':');
420        if (index == std::string::npos) {
421            return true;
422        }
423        if (inputUri_.length() == index + 1) {
424            return false;
425        }
426        return inputUri_[index + 1] == '/';
427    }
428
429    std::string Uri::AddQueryValue(const std::string key, const std::string value) const
430    {
431        return BuildUriString("query", key + "=" + value);
432    }
433
434    std::string Uri::AddSegment(const std::string pathSegment) const
435    {
436        return BuildUriString("segment", pathSegment);
437    }
438
439    std::string Uri::BuildUriString(const std::string str, const std::string param) const
440    {
441        std::string result = "";
442        if (!uriData_.scheme.empty()) {
443            result += uriData_.scheme + ":";
444        }
445        if (!uriData_.authority.empty()) {
446            result += "//" + uriData_.authority;
447        }
448        if (!uriData_.path.empty()) {
449            result += uriData_.path ;
450        }
451        if (str == "segment") {
452            if (result.back() == '/') {
453                result += param;
454            } else {
455                result += "/" + param;
456            }
457        }
458        if (str != "clearquery") {
459            if (uriData_.query.empty()) {
460                if (str == "query") {
461                    result +=  "?" + param;
462                }
463            } else {
464                result +=  "?" + uriData_.query;
465                if (str == "query") {
466                    result +=  "&" + param;
467                }
468            }
469        }
470        if (!uriData_.fragment.empty()) {
471            result +=  "#" + uriData_.fragment;
472        }
473        return result;
474    }
475
476    std::vector<std::string> Uri::GetSegment() const
477    {
478        std::vector<std::string> segments;
479        if (uriData_.path.empty()) {
480            return segments;
481        }
482        size_t previous = 0;
483        size_t current = 0;
484        for (current = uriData_.path.find('/', previous); current != std::string::npos;
485            current = uriData_.path.find('/', previous)) {
486            if (previous < current) {
487                std::string segment = uriData_.path.substr(previous, current - previous);
488                segments.push_back(segment);
489            }
490            previous = current + 1;
491        }
492        if (previous < uriData_.path.length()) {
493            segments.push_back(uriData_.path.substr(previous));
494        }
495        return segments;
496    }
497
498    std::string Uri::IsFailed() const
499    {
500        return errStr_;
501    }
502
503    std::string Uri::Normalize() const
504    {
505        std::vector<std::string> temp;
506        size_t pathLen = uriData_.path.size();
507        if (pathLen == 0) {
508            return this->inputUri_;
509        }
510        size_t pos = 0;
511        size_t left = 0;
512        while ((pos = uriData_.path.find('/', left)) != std::string::npos) {
513            temp.push_back(uriData_.path.substr(left, pos - left));
514            left = pos + 1;
515        }
516        if (left != pathLen) {
517            temp.push_back(uriData_.path.substr(left));
518        }
519        size_t tempLen = temp.size();
520        std::vector<std::string> normalizeTemp;
521        for (size_t i = 0; i < tempLen; ++i) {
522            if (!temp[i].empty() && !(temp[i] == ".") && !(temp[i] == "..")) {
523                normalizeTemp.push_back(temp[i]);
524            }
525            if (temp[i] == "..") {
526                if (!normalizeTemp.empty() && normalizeTemp.back() != "..") {
527                    normalizeTemp.pop_back();
528                } else {
529                    normalizeTemp.push_back(temp[i]);
530                }
531            }
532        }
533        std::string normalizePath = "";
534        tempLen = normalizeTemp.size();
535        if (tempLen == 0) {
536            normalizePath = "/";
537        } else {
538            for (size_t i = 0; i < tempLen; ++i) {
539                normalizePath += "/" + normalizeTemp[i];
540            }
541        }
542        return Split(normalizePath);
543    }
544
545
546    std::string Uri::Split(const std::string &path) const
547    {
548        std::string normalizeUri = "";
549        if (!uriData_.scheme.empty()) {
550            normalizeUri += uriData_.scheme + ":";
551        }
552        if (uriData_.path.empty()) {
553            normalizeUri += uriData_.SchemeSpecificPart;
554        } else {
555            if (!uriData_.host.empty()) {
556                normalizeUri += "//";
557                if (!uriData_.userInfo.empty()) {
558                    normalizeUri += uriData_.userInfo + "@";
559                }
560                normalizeUri += uriData_.host;
561                if (uriData_.port != -1) {
562                    normalizeUri += ":" + std::to_string(uriData_.port);
563                }
564            } else if (!uriData_.authority.empty()) {
565                normalizeUri += "//" + uriData_.authority;
566            }
567            normalizeUri += path;
568        }
569        if (!uriData_.query.empty()) {
570            normalizeUri += "?" + uriData_.query;
571        }
572        if (!uriData_.fragment.empty()) {
573            normalizeUri += "#" + uriData_.fragment;
574        }
575        return normalizeUri;
576    }
577
578    std::string Uri::GetScheme() const
579    {
580        return uriData_.scheme;
581    }
582
583    std::string Uri::GetAuthority() const
584    {
585        return uriData_.authority;
586    }
587
588    std::string Uri::GetSsp() const
589    {
590        return uriData_.SchemeSpecificPart;
591    }
592
593    std::string Uri::GetUserinfo() const
594    {
595        return uriData_.userInfo;
596    }
597
598    std::string Uri::GetHost() const
599    {
600        return uriData_.host;
601    }
602
603    std::string Uri::GetPort() const
604    {
605        return std::to_string(uriData_.port);
606    }
607
608    std::string Uri::GetPath() const
609    {
610        return uriData_.path;
611    }
612
613    std::string Uri::GetQuery() const
614    {
615        return uriData_.query;
616    }
617
618    std::string Uri::GetFragment() const
619    {
620        return uriData_.fragment;
621    }
622
623    std::string Uri::ClearQuery() const
624    {
625        return BuildUriString("clearquery", "");
626    }
627} // namespace OHOS::Uri
628