1// Copyright 2011 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef V8_DATE_DATEPARSER_H_ 6#define V8_DATE_DATEPARSER_H_ 7 8#include "src/base/vector.h" 9#include "src/strings/char-predicates.h" 10#include "src/utils/allocation.h" 11 12namespace v8 { 13namespace internal { 14 15class DateParser : public AllStatic { 16 public: 17 enum { 18 YEAR, 19 MONTH, 20 DAY, 21 HOUR, 22 MINUTE, 23 SECOND, 24 MILLISECOND, 25 UTC_OFFSET, 26 OUTPUT_SIZE 27 }; 28 29 // Parse the string as a date. If parsing succeeds, return true after 30 // filling out the output array as follows (all integers are Smis): 31 // [0]: year 32 // [1]: month (0 = Jan, 1 = Feb, ...) 33 // [2]: day 34 // [3]: hour 35 // [4]: minute 36 // [5]: second 37 // [6]: millisecond 38 // [7]: UTC offset in seconds, or null value if no timezone specified 39 // If parsing fails, return false (content of output array is not defined). 40 template <typename Char> 41 static bool Parse(Isolate* isolate, base::Vector<Char> str, double* output); 42 43 private: 44 // Range testing 45 static inline bool Between(int x, int lo, int hi) { 46 return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo); 47 } 48 49 // Indicates a missing value. 50 static const int kNone = kMaxInt; 51 52 // Maximal number of digits used to build the value of a numeral. 53 // Remaining digits are ignored. 54 static const int kMaxSignificantDigits = 9; 55 56 // InputReader provides basic string parsing and character classification. 57 template <typename Char> 58 class InputReader { 59 public: 60 explicit InputReader(base::Vector<Char> s) : index_(0), buffer_(s) { 61 Next(); 62 } 63 64 int position() { return index_; } 65 66 // Advance to the next character of the string. 67 void Next() { 68 ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0; 69 index_++; 70 } 71 72 // Read a string of digits as an unsigned number. Cap value at 73 // kMaxSignificantDigits, but skip remaining digits if the numeral 74 // is longer. 75 int ReadUnsignedNumeral() { 76 int n = 0; 77 int i = 0; 78 // First, skip leading zeros 79 while (ch_ == '0') Next(); 80 // And then, do the conversion 81 while (IsAsciiDigit()) { 82 if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0'; 83 i++; 84 Next(); 85 } 86 return n; 87 } 88 89 // Read a word (sequence of chars. >= 'A'), fill the given buffer with a 90 // lower-case prefix, and pad any remainder of the buffer with zeroes. 91 // Return word length. 92 int ReadWord(uint32_t* prefix, int prefix_size) { 93 int len; 94 for (len = 0; IsAsciiAlphaOrAbove() && !IsWhiteSpaceChar(); 95 Next(), len++) { 96 if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_); 97 } 98 for (int i = len; i < prefix_size; i++) prefix[i] = 0; 99 return len; 100 } 101 102 // The skip methods return whether they actually skipped something. 103 bool Skip(uint32_t c) { 104 if (ch_ == c) { 105 Next(); 106 return true; 107 } 108 return false; 109 } 110 111 inline bool SkipWhiteSpace(); 112 inline bool SkipParentheses(); 113 114 // Character testing/classification. Non-ASCII digits are not supported. 115 bool Is(uint32_t c) const { return ch_ == c; } 116 bool IsEnd() const { return ch_ == 0; } 117 bool IsAsciiDigit() const { return IsDecimalDigit(ch_); } 118 bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; } 119 bool IsWhiteSpaceChar() const { return IsWhiteSpace(ch_); } 120 bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; } 121 122 // Return 1 for '+' and -1 for '-'. 123 int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); } 124 125 private: 126 int index_; 127 base::Vector<Char> buffer_; 128 uint32_t ch_; 129 }; 130 131 enum KeywordType { 132 INVALID, 133 MONTH_NAME, 134 TIME_ZONE_NAME, 135 TIME_SEPARATOR, 136 AM_PM 137 }; 138 139 struct DateToken { 140 public: 141 bool IsInvalid() { return tag_ == kInvalidTokenTag; } 142 bool IsUnknown() { return tag_ == kUnknownTokenTag; } 143 bool IsNumber() { return tag_ == kNumberTag; } 144 bool IsSymbol() { return tag_ == kSymbolTag; } 145 bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; } 146 bool IsEndOfInput() { return tag_ == kEndOfInputTag; } 147 bool IsKeyword() { return tag_ >= kKeywordTagStart; } 148 149 int length() { return length_; } 150 151 int number() { 152 DCHECK(IsNumber()); 153 return value_; 154 } 155 KeywordType keyword_type() { 156 DCHECK(IsKeyword()); 157 return static_cast<KeywordType>(tag_); 158 } 159 int keyword_value() { 160 DCHECK(IsKeyword()); 161 return value_; 162 } 163 char symbol() { 164 DCHECK(IsSymbol()); 165 return static_cast<char>(value_); 166 } 167 bool IsSymbol(char symbol) { 168 return IsSymbol() && this->symbol() == symbol; 169 } 170 bool IsKeywordType(KeywordType tag) { return tag_ == tag; } 171 bool IsFixedLengthNumber(int length) { 172 return IsNumber() && length_ == length; 173 } 174 bool IsAsciiSign() { 175 return tag_ == kSymbolTag && (value_ == '-' || value_ == '+'); 176 } 177 int ascii_sign() { 178 DCHECK(IsAsciiSign()); 179 return 44 - value_; 180 } 181 bool IsKeywordZ() { 182 return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0; 183 } 184 bool IsUnknown(int character) { return IsUnknown() && value_ == character; } 185 // Factory functions. 186 static DateToken Keyword(KeywordType tag, int value, int length) { 187 return DateToken(tag, length, value); 188 } 189 static DateToken Number(int value, int length) { 190 return DateToken(kNumberTag, length, value); 191 } 192 static DateToken Symbol(char symbol) { 193 return DateToken(kSymbolTag, 1, symbol); 194 } 195 static DateToken EndOfInput() { return DateToken(kEndOfInputTag, 0, -1); } 196 static DateToken WhiteSpace(int length) { 197 return DateToken(kWhiteSpaceTag, length, -1); 198 } 199 static DateToken Unknown() { return DateToken(kUnknownTokenTag, 1, -1); } 200 static DateToken Invalid() { return DateToken(kInvalidTokenTag, 0, -1); } 201 202 private: 203 enum TagType { 204 kInvalidTokenTag = -6, 205 kUnknownTokenTag = -5, 206 kWhiteSpaceTag = -4, 207 kNumberTag = -3, 208 kSymbolTag = -2, 209 kEndOfInputTag = -1, 210 kKeywordTagStart = 0 211 }; 212 DateToken(int tag, int length, int value) 213 : tag_(tag), length_(length), value_(value) {} 214 215 int tag_; 216 int length_; // Number of characters. 217 int value_; 218 }; 219 220 template <typename Char> 221 class DateStringTokenizer { 222 public: 223 explicit DateStringTokenizer(InputReader<Char>* in) 224 : in_(in), next_(Scan()) {} 225 DateToken Next() { 226 DateToken result = next_; 227 next_ = Scan(); 228 return result; 229 } 230 231 DateToken Peek() { return next_; } 232 bool SkipSymbol(char symbol) { 233 if (next_.IsSymbol(symbol)) { 234 next_ = Scan(); 235 return true; 236 } 237 return false; 238 } 239 240 private: 241 DateToken Scan(); 242 243 InputReader<Char>* in_; 244 DateToken next_; 245 }; 246 247 static int ReadMilliseconds(DateToken number); 248 249 // KeywordTable maps names of months, time zones, am/pm to numbers. 250 class KeywordTable : public AllStatic { 251 public: 252 // Look up a word in the keyword table and return an index. 253 // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength 254 // and 'len' is the word length. 255 static int Lookup(const uint32_t* pre, int len); 256 // Get the type of the keyword at index i. 257 static KeywordType GetType(int i) { 258 return static_cast<KeywordType>(array[i][kTypeOffset]); 259 } 260 // Get the value of the keyword at index i. 261 static int GetValue(int i) { return array[i][kValueOffset]; } 262 263 static const int kPrefixLength = 3; 264 static const int kTypeOffset = kPrefixLength; 265 static const int kValueOffset = kTypeOffset + 1; 266 static const int kEntrySize = kValueOffset + 1; 267 static const int8_t array[][kEntrySize]; 268 }; 269 270 class TimeZoneComposer { 271 public: 272 TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {} 273 void Set(int offset_in_hours) { 274 sign_ = offset_in_hours < 0 ? -1 : 1; 275 hour_ = offset_in_hours * sign_; 276 minute_ = 0; 277 } 278 void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; } 279 void SetAbsoluteHour(int hour) { hour_ = hour; } 280 void SetAbsoluteMinute(int minute) { minute_ = minute; } 281 bool IsExpecting(int n) const { 282 return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n); 283 } 284 bool IsUTC() const { return hour_ == 0 && minute_ == 0; } 285 bool Write(double* output); 286 bool IsEmpty() { return hour_ == kNone; } 287 288 private: 289 int sign_; 290 int hour_; 291 int minute_; 292 }; 293 294 class TimeComposer { 295 public: 296 TimeComposer() : index_(0), hour_offset_(kNone) {} 297 bool IsEmpty() const { return index_ == 0; } 298 bool IsExpecting(int n) const { 299 return (index_ == 1 && IsMinute(n)) || (index_ == 2 && IsSecond(n)) || 300 (index_ == 3 && IsMillisecond(n)); 301 } 302 bool Add(int n) { 303 return index_ < kSize ? (comp_[index_++] = n, true) : false; 304 } 305 bool AddFinal(int n) { 306 if (!Add(n)) return false; 307 while (index_ < kSize) comp_[index_++] = 0; 308 return true; 309 } 310 void SetHourOffset(int n) { hour_offset_ = n; } 311 bool Write(double* output); 312 313 static bool IsMinute(int x) { return Between(x, 0, 59); } 314 static bool IsHour(int x) { return Between(x, 0, 23); } 315 static bool IsSecond(int x) { return Between(x, 0, 59); } 316 317 private: 318 static bool IsHour12(int x) { return Between(x, 0, 12); } 319 static bool IsMillisecond(int x) { return Between(x, 0, 999); } 320 321 static const int kSize = 4; 322 int comp_[kSize]; 323 int index_; 324 int hour_offset_; 325 }; 326 327 class DayComposer { 328 public: 329 DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {} 330 bool IsEmpty() const { return index_ == 0; } 331 bool Add(int n) { 332 if (index_ < kSize) { 333 comp_[index_] = n; 334 index_++; 335 return true; 336 } 337 return false; 338 } 339 void SetNamedMonth(int n) { named_month_ = n; } 340 bool Write(double* output); 341 void set_iso_date() { is_iso_date_ = true; } 342 static bool IsMonth(int x) { return Between(x, 1, 12); } 343 static bool IsDay(int x) { return Between(x, 1, 31); } 344 345 private: 346 static const int kSize = 3; 347 int comp_[kSize]; 348 int index_; 349 int named_month_; 350 // If set, ensures that data is always parsed in year-month-date order. 351 bool is_iso_date_; 352 }; 353 354 // Tries to parse an ES5 Date Time String. Returns the next token 355 // to continue with in the legacy date string parser. If parsing is 356 // complete, returns DateToken::EndOfInput(). If terminally unsuccessful, 357 // returns DateToken::Invalid(). Otherwise parsing continues in the 358 // legacy parser. 359 template <typename Char> 360 static DateParser::DateToken ParseES5DateTime( 361 DateStringTokenizer<Char>* scanner, DayComposer* day, TimeComposer* time, 362 TimeZoneComposer* tz); 363}; 364 365} // namespace internal 366} // namespace v8 367 368#endif // V8_DATE_DATEPARSER_H_ 369