xref: /third_party/node/deps/v8/src/date/dateparser.h (revision 1cb0ef41)
1// Copyright 2011 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_DATE_DATEPARSER_H_
6#define V8_DATE_DATEPARSER_H_
7
8#include "src/base/vector.h"
9#include "src/strings/char-predicates.h"
10#include "src/utils/allocation.h"
11
12namespace v8 {
13namespace internal {
14
15class DateParser : public AllStatic {
16 public:
17  enum {
18    YEAR,
19    MONTH,
20    DAY,
21    HOUR,
22    MINUTE,
23    SECOND,
24    MILLISECOND,
25    UTC_OFFSET,
26    OUTPUT_SIZE
27  };
28
29  // Parse the string as a date. If parsing succeeds, return true after
30  // filling out the output array as follows (all integers are Smis):
31  // [0]: year
32  // [1]: month (0 = Jan, 1 = Feb, ...)
33  // [2]: day
34  // [3]: hour
35  // [4]: minute
36  // [5]: second
37  // [6]: millisecond
38  // [7]: UTC offset in seconds, or null value if no timezone specified
39  // If parsing fails, return false (content of output array is not defined).
40  template <typename Char>
41  static bool Parse(Isolate* isolate, base::Vector<Char> str, double* output);
42
43 private:
44  // Range testing
45  static inline bool Between(int x, int lo, int hi) {
46    return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
47  }
48
49  // Indicates a missing value.
50  static const int kNone = kMaxInt;
51
52  // Maximal number of digits used to build the value of a numeral.
53  // Remaining digits are ignored.
54  static const int kMaxSignificantDigits = 9;
55
56  // InputReader provides basic string parsing and character classification.
57  template <typename Char>
58  class InputReader {
59   public:
60    explicit InputReader(base::Vector<Char> s) : index_(0), buffer_(s) {
61      Next();
62    }
63
64    int position() { return index_; }
65
66    // Advance to the next character of the string.
67    void Next() {
68      ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
69      index_++;
70    }
71
72    // Read a string of digits as an unsigned number. Cap value at
73    // kMaxSignificantDigits, but skip remaining digits if the numeral
74    // is longer.
75    int ReadUnsignedNumeral() {
76      int n = 0;
77      int i = 0;
78      // First, skip leading zeros
79      while (ch_ == '0') Next();
80      // And then, do the conversion
81      while (IsAsciiDigit()) {
82        if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
83        i++;
84        Next();
85      }
86      return n;
87    }
88
89    // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
90    // lower-case prefix, and pad any remainder of the buffer with zeroes.
91    // Return word length.
92    int ReadWord(uint32_t* prefix, int prefix_size) {
93      int len;
94      for (len = 0; IsAsciiAlphaOrAbove() && !IsWhiteSpaceChar();
95           Next(), len++) {
96        if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
97      }
98      for (int i = len; i < prefix_size; i++) prefix[i] = 0;
99      return len;
100    }
101
102    // The skip methods return whether they actually skipped something.
103    bool Skip(uint32_t c) {
104      if (ch_ == c) {
105        Next();
106        return true;
107      }
108      return false;
109    }
110
111    inline bool SkipWhiteSpace();
112    inline bool SkipParentheses();
113
114    // Character testing/classification. Non-ASCII digits are not supported.
115    bool Is(uint32_t c) const { return ch_ == c; }
116    bool IsEnd() const { return ch_ == 0; }
117    bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
118    bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
119    bool IsWhiteSpaceChar() const { return IsWhiteSpace(ch_); }
120    bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
121
122    // Return 1 for '+' and -1 for '-'.
123    int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
124
125   private:
126    int index_;
127    base::Vector<Char> buffer_;
128    uint32_t ch_;
129  };
130
131  enum KeywordType {
132    INVALID,
133    MONTH_NAME,
134    TIME_ZONE_NAME,
135    TIME_SEPARATOR,
136    AM_PM
137  };
138
139  struct DateToken {
140   public:
141    bool IsInvalid() { return tag_ == kInvalidTokenTag; }
142    bool IsUnknown() { return tag_ == kUnknownTokenTag; }
143    bool IsNumber() { return tag_ == kNumberTag; }
144    bool IsSymbol() { return tag_ == kSymbolTag; }
145    bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
146    bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
147    bool IsKeyword() { return tag_ >= kKeywordTagStart; }
148
149    int length() { return length_; }
150
151    int number() {
152      DCHECK(IsNumber());
153      return value_;
154    }
155    KeywordType keyword_type() {
156      DCHECK(IsKeyword());
157      return static_cast<KeywordType>(tag_);
158    }
159    int keyword_value() {
160      DCHECK(IsKeyword());
161      return value_;
162    }
163    char symbol() {
164      DCHECK(IsSymbol());
165      return static_cast<char>(value_);
166    }
167    bool IsSymbol(char symbol) {
168      return IsSymbol() && this->symbol() == symbol;
169    }
170    bool IsKeywordType(KeywordType tag) { return tag_ == tag; }
171    bool IsFixedLengthNumber(int length) {
172      return IsNumber() && length_ == length;
173    }
174    bool IsAsciiSign() {
175      return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
176    }
177    int ascii_sign() {
178      DCHECK(IsAsciiSign());
179      return 44 - value_;
180    }
181    bool IsKeywordZ() {
182      return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
183    }
184    bool IsUnknown(int character) { return IsUnknown() && value_ == character; }
185    // Factory functions.
186    static DateToken Keyword(KeywordType tag, int value, int length) {
187      return DateToken(tag, length, value);
188    }
189    static DateToken Number(int value, int length) {
190      return DateToken(kNumberTag, length, value);
191    }
192    static DateToken Symbol(char symbol) {
193      return DateToken(kSymbolTag, 1, symbol);
194    }
195    static DateToken EndOfInput() { return DateToken(kEndOfInputTag, 0, -1); }
196    static DateToken WhiteSpace(int length) {
197      return DateToken(kWhiteSpaceTag, length, -1);
198    }
199    static DateToken Unknown() { return DateToken(kUnknownTokenTag, 1, -1); }
200    static DateToken Invalid() { return DateToken(kInvalidTokenTag, 0, -1); }
201
202   private:
203    enum TagType {
204      kInvalidTokenTag = -6,
205      kUnknownTokenTag = -5,
206      kWhiteSpaceTag = -4,
207      kNumberTag = -3,
208      kSymbolTag = -2,
209      kEndOfInputTag = -1,
210      kKeywordTagStart = 0
211    };
212    DateToken(int tag, int length, int value)
213        : tag_(tag), length_(length), value_(value) {}
214
215    int tag_;
216    int length_;  // Number of characters.
217    int value_;
218  };
219
220  template <typename Char>
221  class DateStringTokenizer {
222   public:
223    explicit DateStringTokenizer(InputReader<Char>* in)
224        : in_(in), next_(Scan()) {}
225    DateToken Next() {
226      DateToken result = next_;
227      next_ = Scan();
228      return result;
229    }
230
231    DateToken Peek() { return next_; }
232    bool SkipSymbol(char symbol) {
233      if (next_.IsSymbol(symbol)) {
234        next_ = Scan();
235        return true;
236      }
237      return false;
238    }
239
240   private:
241    DateToken Scan();
242
243    InputReader<Char>* in_;
244    DateToken next_;
245  };
246
247  static int ReadMilliseconds(DateToken number);
248
249  // KeywordTable maps names of months, time zones, am/pm to numbers.
250  class KeywordTable : public AllStatic {
251   public:
252    // Look up a word in the keyword table and return an index.
253    // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
254    // and 'len' is the word length.
255    static int Lookup(const uint32_t* pre, int len);
256    // Get the type of the keyword at index i.
257    static KeywordType GetType(int i) {
258      return static_cast<KeywordType>(array[i][kTypeOffset]);
259    }
260    // Get the value of the keyword at index i.
261    static int GetValue(int i) { return array[i][kValueOffset]; }
262
263    static const int kPrefixLength = 3;
264    static const int kTypeOffset = kPrefixLength;
265    static const int kValueOffset = kTypeOffset + 1;
266    static const int kEntrySize = kValueOffset + 1;
267    static const int8_t array[][kEntrySize];
268  };
269
270  class TimeZoneComposer {
271   public:
272    TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
273    void Set(int offset_in_hours) {
274      sign_ = offset_in_hours < 0 ? -1 : 1;
275      hour_ = offset_in_hours * sign_;
276      minute_ = 0;
277    }
278    void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
279    void SetAbsoluteHour(int hour) { hour_ = hour; }
280    void SetAbsoluteMinute(int minute) { minute_ = minute; }
281    bool IsExpecting(int n) const {
282      return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
283    }
284    bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
285    bool Write(double* output);
286    bool IsEmpty() { return hour_ == kNone; }
287
288   private:
289    int sign_;
290    int hour_;
291    int minute_;
292  };
293
294  class TimeComposer {
295   public:
296    TimeComposer() : index_(0), hour_offset_(kNone) {}
297    bool IsEmpty() const { return index_ == 0; }
298    bool IsExpecting(int n) const {
299      return (index_ == 1 && IsMinute(n)) || (index_ == 2 && IsSecond(n)) ||
300             (index_ == 3 && IsMillisecond(n));
301    }
302    bool Add(int n) {
303      return index_ < kSize ? (comp_[index_++] = n, true) : false;
304    }
305    bool AddFinal(int n) {
306      if (!Add(n)) return false;
307      while (index_ < kSize) comp_[index_++] = 0;
308      return true;
309    }
310    void SetHourOffset(int n) { hour_offset_ = n; }
311    bool Write(double* output);
312
313    static bool IsMinute(int x) { return Between(x, 0, 59); }
314    static bool IsHour(int x) { return Between(x, 0, 23); }
315    static bool IsSecond(int x) { return Between(x, 0, 59); }
316
317   private:
318    static bool IsHour12(int x) { return Between(x, 0, 12); }
319    static bool IsMillisecond(int x) { return Between(x, 0, 999); }
320
321    static const int kSize = 4;
322    int comp_[kSize];
323    int index_;
324    int hour_offset_;
325  };
326
327  class DayComposer {
328   public:
329    DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
330    bool IsEmpty() const { return index_ == 0; }
331    bool Add(int n) {
332      if (index_ < kSize) {
333        comp_[index_] = n;
334        index_++;
335        return true;
336      }
337      return false;
338    }
339    void SetNamedMonth(int n) { named_month_ = n; }
340    bool Write(double* output);
341    void set_iso_date() { is_iso_date_ = true; }
342    static bool IsMonth(int x) { return Between(x, 1, 12); }
343    static bool IsDay(int x) { return Between(x, 1, 31); }
344
345   private:
346    static const int kSize = 3;
347    int comp_[kSize];
348    int index_;
349    int named_month_;
350    // If set, ensures that data is always parsed in year-month-date order.
351    bool is_iso_date_;
352  };
353
354  // Tries to parse an ES5 Date Time String. Returns the next token
355  // to continue with in the legacy date string parser. If parsing is
356  // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
357  // returns DateToken::Invalid(). Otherwise parsing continues in the
358  // legacy parser.
359  template <typename Char>
360  static DateParser::DateToken ParseES5DateTime(
361      DateStringTokenizer<Char>* scanner, DayComposer* day, TimeComposer* time,
362      TimeZoneComposer* tz);
363};
364
365}  // namespace internal
366}  // namespace v8
367
368#endif  // V8_DATE_DATEPARSER_H_
369