1// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_STRINGS_UNICODE_DECODER_H_
6#define V8_STRINGS_UNICODE_DECODER_H_
7
8#include "src/base/vector.h"
9#include "src/strings/unicode.h"
10
11namespace v8 {
12namespace internal {
13
14// The return value may point to the first aligned word containing the first
15// non-one-byte character, rather than directly to the non-one-byte character.
16// If the return value is >= the passed length, the entire string was
17// one-byte.
18inline int NonAsciiStart(const uint8_t* chars, int length) {
19  const uint8_t* start = chars;
20  const uint8_t* limit = chars + length;
21
22  if (static_cast<size_t>(length) >= kIntptrSize) {
23    // Check unaligned bytes.
24    while (!IsAligned(reinterpret_cast<intptr_t>(chars), kIntptrSize)) {
25      if (*chars > unibrow::Utf8::kMaxOneByteChar) {
26        return static_cast<int>(chars - start);
27      }
28      ++chars;
29    }
30    // Check aligned words.
31    DCHECK_EQ(unibrow::Utf8::kMaxOneByteChar, 0x7F);
32    const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFF * 0x80;
33    while (chars + sizeof(uintptr_t) <= limit) {
34      if (*reinterpret_cast<const uintptr_t*>(chars) & non_one_byte_mask) {
35        return static_cast<int>(chars - start);
36      }
37      chars += sizeof(uintptr_t);
38    }
39  }
40  // Check remaining unaligned bytes.
41  while (chars < limit) {
42    if (*chars > unibrow::Utf8::kMaxOneByteChar) {
43      return static_cast<int>(chars - start);
44    }
45    ++chars;
46  }
47
48  return static_cast<int>(chars - start);
49}
50
51class V8_EXPORT_PRIVATE Utf8Decoder final {
52 public:
53  enum class Encoding : uint8_t { kAscii, kLatin1, kUtf16 };
54
55  explicit Utf8Decoder(const base::Vector<const uint8_t>& chars);
56
57  bool is_ascii() const { return encoding_ == Encoding::kAscii; }
58  bool is_one_byte() const { return encoding_ <= Encoding::kLatin1; }
59  int utf16_length() const { return utf16_length_; }
60  int non_ascii_start() const { return non_ascii_start_; }
61
62  template <typename Char>
63  V8_EXPORT_PRIVATE void Decode(Char* out,
64                                const base::Vector<const uint8_t>& data);
65
66 private:
67  Encoding encoding_;
68  int non_ascii_start_;
69  int utf16_length_;
70};
71
72}  // namespace internal
73}  // namespace v8
74
75#endif  // V8_STRINGS_UNICODE_DECODER_H_
76