1// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_STRINGS_STRING_BUILDER_INL_H_
6#define V8_STRINGS_STRING_BUILDER_INL_H_
7
8#include "src/common/assert-scope.h"
9#include "src/execution/isolate.h"
10#include "src/handles/handles-inl.h"
11#include "src/heap/factory.h"
12#include "src/objects/fixed-array.h"
13#include "src/objects/objects.h"
14#include "src/objects/string-inl.h"
15#include "src/utils/utils.h"
16
17namespace v8 {
18namespace internal {
19
20const int kStringBuilderConcatHelperLengthBits = 11;
21const int kStringBuilderConcatHelperPositionBits = 19;
22
23using StringBuilderSubstringLength =
24    base::BitField<int, 0, kStringBuilderConcatHelperLengthBits>;
25using StringBuilderSubstringPosition =
26    base::BitField<int, kStringBuilderConcatHelperLengthBits,
27                   kStringBuilderConcatHelperPositionBits>;
28
29template <typename sinkchar>
30void StringBuilderConcatHelper(String special, sinkchar* sink,
31                               FixedArray fixed_array, int array_length);
32
33// Returns the result length of the concatenation.
34// On illegal argument, -1 is returned.
35int StringBuilderConcatLength(int special_length, FixedArray fixed_array,
36                              int array_length, bool* one_byte);
37
38class FixedArrayBuilder {
39 public:
40  explicit FixedArrayBuilder(Isolate* isolate, int initial_capacity);
41  explicit FixedArrayBuilder(Handle<FixedArray> backing_store);
42
43  bool HasCapacity(int elements);
44  void EnsureCapacity(Isolate* isolate, int elements);
45
46  void Add(Object value);
47  void Add(Smi value);
48
49  Handle<FixedArray> array() { return array_; }
50
51  int length() { return length_; }
52
53  int capacity();
54
55  Handle<JSArray> ToJSArray(Handle<JSArray> target_array);
56
57 private:
58  Handle<FixedArray> array_;
59  int length_;
60  bool has_non_smi_elements_;
61};
62
63class ReplacementStringBuilder {
64 public:
65  ReplacementStringBuilder(Heap* heap, Handle<String> subject,
66                           int estimated_part_count);
67
68  // Caution: Callers must ensure the builder has enough capacity.
69  static inline void AddSubjectSlice(FixedArrayBuilder* builder, int from,
70                                     int to) {
71    DCHECK_GE(from, 0);
72    int length = to - from;
73    DCHECK_GT(length, 0);
74    if (StringBuilderSubstringLength::is_valid(length) &&
75        StringBuilderSubstringPosition::is_valid(from)) {
76      int encoded_slice = StringBuilderSubstringLength::encode(length) |
77                          StringBuilderSubstringPosition::encode(from);
78      builder->Add(Smi::FromInt(encoded_slice));
79    } else {
80      // Otherwise encode as two smis.
81      builder->Add(Smi::FromInt(-length));
82      builder->Add(Smi::FromInt(from));
83    }
84  }
85
86  void AddSubjectSlice(int from, int to) {
87    EnsureCapacity(2);  // Subject slices are encoded with up to two smis.
88    AddSubjectSlice(&array_builder_, from, to);
89    IncrementCharacterCount(to - from);
90  }
91
92  void AddString(Handle<String> string);
93
94  MaybeHandle<String> ToString();
95
96  void IncrementCharacterCount(int by) {
97    if (character_count_ > String::kMaxLength - by) {
98      STATIC_ASSERT(String::kMaxLength < kMaxInt);
99      character_count_ = kMaxInt;
100    } else {
101      character_count_ += by;
102    }
103  }
104
105 private:
106  void AddElement(Handle<Object> element);
107  void EnsureCapacity(int elements);
108
109  Heap* heap_;
110  FixedArrayBuilder array_builder_;
111  Handle<String> subject_;
112  int character_count_;
113  bool is_one_byte_;
114};
115
116class IncrementalStringBuilder {
117 public:
118  explicit IncrementalStringBuilder(Isolate* isolate);
119
120  V8_INLINE String::Encoding CurrentEncoding() { return encoding_; }
121
122  template <typename SrcChar, typename DestChar>
123  V8_INLINE void Append(SrcChar c);
124
125  V8_INLINE void AppendCharacter(uint8_t c) {
126    if (encoding_ == String::ONE_BYTE_ENCODING) {
127      Append<uint8_t, uint8_t>(c);
128    } else {
129      Append<uint8_t, base::uc16>(c);
130    }
131  }
132
133  template <int N>
134  V8_INLINE void AppendCStringLiteral(const char (&literal)[N]) {
135    // Note that the literal contains the zero char.
136    const int length = N - 1;
137    STATIC_ASSERT(length > 0);
138    if (length == 1) return AppendCharacter(literal[0]);
139    if (encoding_ == String::ONE_BYTE_ENCODING && CurrentPartCanFit(N)) {
140      const uint8_t* chars = reinterpret_cast<const uint8_t*>(literal);
141      SeqOneByteString::cast(*current_part_)
142          .SeqOneByteStringSetChars(current_index_, chars, length);
143      current_index_ += length;
144      if (current_index_ == part_length_) Extend();
145      DCHECK(HasValidCurrentIndex());
146      return;
147    }
148    return AppendCString(literal);
149  }
150
151  V8_INLINE void AppendCString(const char* s) {
152    const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
153    if (encoding_ == String::ONE_BYTE_ENCODING) {
154      while (*u != '\0') Append<uint8_t, uint8_t>(*(u++));
155    } else {
156      while (*u != '\0') Append<uint8_t, base::uc16>(*(u++));
157    }
158  }
159
160  V8_INLINE void AppendCString(const base::uc16* s) {
161    if (encoding_ == String::ONE_BYTE_ENCODING) {
162      while (*s != '\0') Append<base::uc16, uint8_t>(*(s++));
163    } else {
164      while (*s != '\0') Append<base::uc16, base::uc16>(*(s++));
165    }
166  }
167
168  V8_INLINE void AppendInt(int i) {
169    char buffer[kIntToCStringBufferSize];
170    const char* str =
171        IntToCString(i, base::Vector<char>(buffer, kIntToCStringBufferSize));
172    AppendCString(str);
173  }
174
175  V8_INLINE bool CurrentPartCanFit(int length) {
176    return part_length_ - current_index_ > length;
177  }
178
179  // We make a rough estimate to find out if the current string can be
180  // serialized without allocating a new string part. The worst case length of
181  // an escaped character is 6. Shifting the remaining string length right by 3
182  // is a more pessimistic estimate, but faster to calculate.
183  V8_INLINE int EscapedLengthIfCurrentPartFits(int length) {
184    if (length > kMaxPartLength) return 0;
185    STATIC_ASSERT((kMaxPartLength << 3) <= String::kMaxLength);
186    // This shift will not overflow because length is already less than the
187    // maximum part length.
188    int worst_case_length = length << 3;
189    return CurrentPartCanFit(worst_case_length) ? worst_case_length : 0;
190  }
191
192  void AppendString(Handle<String> string);
193
194  MaybeHandle<String> Finish();
195
196  V8_INLINE bool HasOverflowed() const { return overflowed_; }
197
198  int Length() const;
199
200  // Change encoding to two-byte.
201  void ChangeEncoding() {
202    DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
203    ShrinkCurrentPart();
204    encoding_ = String::TWO_BYTE_ENCODING;
205    Extend();
206  }
207
208  template <typename DestChar>
209  class NoExtend {
210   public:
211    NoExtend(String string, int offset,
212             const DisallowGarbageCollection& no_gc) {
213      DCHECK(string.IsSeqOneByteString() || string.IsSeqTwoByteString());
214      if (sizeof(DestChar) == 1) {
215        start_ = reinterpret_cast<DestChar*>(
216            SeqOneByteString::cast(string).GetChars(no_gc) + offset);
217      } else {
218        start_ = reinterpret_cast<DestChar*>(
219            SeqTwoByteString::cast(string).GetChars(no_gc) + offset);
220      }
221      cursor_ = start_;
222#ifdef DEBUG
223      string_ = string;
224#endif
225    }
226
227#ifdef DEBUG
228    ~NoExtend() {
229      DestChar* end;
230      if (sizeof(DestChar) == 1) {
231        auto one_byte_string = SeqOneByteString::cast(string_);
232        end = reinterpret_cast<DestChar*>(one_byte_string.GetChars(no_gc_) +
233                                          one_byte_string.length());
234      } else {
235        auto two_byte_string = SeqTwoByteString::cast(string_);
236        end = reinterpret_cast<DestChar*>(two_byte_string.GetChars(no_gc_) +
237                                          two_byte_string.length());
238      }
239      DCHECK_LE(cursor_, end + 1);
240    }
241#endif
242
243    V8_INLINE void Append(DestChar c) { *(cursor_++) = c; }
244    V8_INLINE void AppendCString(const char* s) {
245      const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
246      while (*u != '\0') Append(*(u++));
247    }
248
249    int written() { return static_cast<int>(cursor_ - start_); }
250
251   private:
252    DestChar* start_;
253    DestChar* cursor_;
254#ifdef DEBUG
255    String string_;
256#endif
257    DISALLOW_GARBAGE_COLLECTION(no_gc_)
258  };
259
260  template <typename DestChar>
261  class NoExtendString : public NoExtend<DestChar> {
262   public:
263    NoExtendString(Handle<String> string, int required_length)
264        : NoExtend<DestChar>(string, 0), string_(string) {
265      DCHECK(string->length() >= required_length);
266    }
267
268    Handle<String> Finalize() {
269      Handle<SeqString> string = Handle<SeqString>::cast(string_);
270      int length = NoExtend<DestChar>::written();
271      Handle<String> result = SeqString::Truncate(string, length);
272      string_ = Handle<String>();
273      return result;
274    }
275
276   private:
277    Handle<String> string_;
278  };
279
280  template <typename DestChar>
281  class NoExtendBuilder : public NoExtend<DestChar> {
282   public:
283    NoExtendBuilder(IncrementalStringBuilder* builder, int required_length,
284                    const DisallowGarbageCollection& no_gc)
285        : NoExtend<DestChar>(*(builder->current_part()),
286                             builder->current_index_, no_gc),
287          builder_(builder) {
288      DCHECK(builder->CurrentPartCanFit(required_length));
289    }
290
291    ~NoExtendBuilder() {
292      builder_->current_index_ += NoExtend<DestChar>::written();
293      DCHECK(builder_->HasValidCurrentIndex());
294    }
295
296   private:
297    IncrementalStringBuilder* builder_;
298  };
299
300  Isolate* isolate() { return isolate_; }
301
302 private:
303  Factory* factory() { return isolate_->factory(); }
304
305  V8_INLINE Handle<String> accumulator() { return accumulator_; }
306
307  V8_INLINE void set_accumulator(Handle<String> string) {
308    accumulator_.PatchValue(*string);
309  }
310
311  V8_INLINE Handle<String> current_part() { return current_part_; }
312
313  V8_INLINE void set_current_part(Handle<String> string) {
314    current_part_.PatchValue(*string);
315  }
316
317  // Add the current part to the accumulator.
318  void Accumulate(Handle<String> new_part);
319
320  // Finish the current part and allocate a new part.
321  void Extend();
322
323  bool HasValidCurrentIndex() const;
324
325  // Shrink current part to the right size.
326  void ShrinkCurrentPart() {
327    DCHECK(current_index_ < part_length_);
328    set_current_part(SeqString::Truncate(
329        Handle<SeqString>::cast(current_part()), current_index_));
330  }
331
332  void AppendStringByCopy(Handle<String> string);
333  bool CanAppendByCopy(Handle<String> string);
334
335  static const int kInitialPartLength = 32;
336  static const int kMaxPartLength = 16 * 1024;
337  static const int kPartLengthGrowthFactor = 2;
338  static const int kIntToCStringBufferSize = 100;
339
340  Isolate* isolate_;
341  String::Encoding encoding_;
342  bool overflowed_;
343  int part_length_;
344  int current_index_;
345  Handle<String> accumulator_;
346  Handle<String> current_part_;
347};
348
349template <typename SrcChar, typename DestChar>
350void IncrementalStringBuilder::Append(SrcChar c) {
351  DCHECK_EQ(encoding_ == String::ONE_BYTE_ENCODING, sizeof(DestChar) == 1);
352  if (sizeof(DestChar) == 1) {
353    DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
354    SeqOneByteString::cast(*current_part_)
355        .SeqOneByteStringSet(current_index_++, c);
356  } else {
357    DCHECK_EQ(String::TWO_BYTE_ENCODING, encoding_);
358    SeqTwoByteString::cast(*current_part_)
359        .SeqTwoByteStringSet(current_index_++, c);
360  }
361  if (current_index_ == part_length_) Extend();
362  DCHECK(HasValidCurrentIndex());
363}
364}  // namespace internal
365}  // namespace v8
366
367#endif  // V8_STRINGS_STRING_BUILDER_INL_H_
368