1#ifndef Py_BUILD_CORE_BUILTIN 2# define Py_BUILD_CORE_MODULE 1 3#endif 4 5#include "Python.h" 6#include "pycore_long.h" // _PyLong_GetOne() 7#include "structmember.h" 8 9#include <ctype.h> 10#include <stddef.h> 11#include <stdint.h> 12 13#include "datetime.h" 14 15// Imports 16static PyObject *io_open = NULL; 17static PyObject *_tzpath_find_tzfile = NULL; 18static PyObject *_common_mod = NULL; 19 20typedef struct TransitionRuleType TransitionRuleType; 21typedef struct StrongCacheNode StrongCacheNode; 22 23typedef struct { 24 PyObject *utcoff; 25 PyObject *dstoff; 26 PyObject *tzname; 27 long utcoff_seconds; 28} _ttinfo; 29 30typedef struct { 31 _ttinfo std; 32 _ttinfo dst; 33 int dst_diff; 34 TransitionRuleType *start; 35 TransitionRuleType *end; 36 unsigned char std_only; 37} _tzrule; 38 39typedef struct { 40 PyDateTime_TZInfo base; 41 PyObject *key; 42 PyObject *file_repr; 43 PyObject *weakreflist; 44 size_t num_transitions; 45 size_t num_ttinfos; 46 int64_t *trans_list_utc; 47 int64_t *trans_list_wall[2]; 48 _ttinfo **trans_ttinfos; // References to the ttinfo for each transition 49 _ttinfo *ttinfo_before; 50 _tzrule tzrule_after; 51 _ttinfo *_ttinfos; // Unique array of ttinfos for ease of deallocation 52 unsigned char fixed_offset; 53 unsigned char source; 54} PyZoneInfo_ZoneInfo; 55 56struct TransitionRuleType { 57 int64_t (*year_to_timestamp)(TransitionRuleType *, int); 58}; 59 60typedef struct { 61 TransitionRuleType base; 62 uint8_t month; 63 uint8_t week; 64 uint8_t day; 65 int8_t hour; 66 int8_t minute; 67 int8_t second; 68} CalendarRule; 69 70typedef struct { 71 TransitionRuleType base; 72 uint8_t julian; 73 unsigned int day; 74 int8_t hour; 75 int8_t minute; 76 int8_t second; 77} DayRule; 78 79struct StrongCacheNode { 80 StrongCacheNode *next; 81 StrongCacheNode *prev; 82 PyObject *key; 83 PyObject *zone; 84}; 85 86static PyTypeObject PyZoneInfo_ZoneInfoType; 87 88// Globals 89static PyObject *TIMEDELTA_CACHE = NULL; 90static PyObject *ZONEINFO_WEAK_CACHE = NULL; 91static StrongCacheNode *ZONEINFO_STRONG_CACHE = NULL; 92static size_t ZONEINFO_STRONG_CACHE_MAX_SIZE = 8; 93 94static _ttinfo NO_TTINFO = {NULL, NULL, NULL, 0}; 95 96// Constants 97static const int EPOCHORDINAL = 719163; 98static int DAYS_IN_MONTH[] = { 99 -1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 100}; 101 102static int DAYS_BEFORE_MONTH[] = { 103 -1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 104}; 105 106static const int SOURCE_NOCACHE = 0; 107static const int SOURCE_CACHE = 1; 108static const int SOURCE_FILE = 2; 109 110// Forward declarations 111static int 112load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj); 113static void 114utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs, 115 unsigned char *isdsts, size_t num_transitions, 116 size_t num_ttinfos); 117static int 118ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff, 119 int64_t *trans_local[2], size_t num_ttinfos, 120 size_t num_transitions); 121 122static int 123parse_tz_str(PyObject *tz_str_obj, _tzrule *out); 124 125static Py_ssize_t 126parse_abbr(const char *const p, PyObject **abbr); 127static Py_ssize_t 128parse_tz_delta(const char *const p, long *total_seconds); 129static Py_ssize_t 130parse_transition_time(const char *const p, int8_t *hour, int8_t *minute, 131 int8_t *second); 132static Py_ssize_t 133parse_transition_rule(const char *const p, TransitionRuleType **out); 134 135static _ttinfo * 136find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year); 137static _ttinfo * 138find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year, 139 unsigned char *fold); 140 141static int 142build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out); 143static void 144xdecref_ttinfo(_ttinfo *ttinfo); 145static int 146ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1); 147 148static int 149build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset, 150 long dst_offset, TransitionRuleType *start, 151 TransitionRuleType *end, _tzrule *out); 152static void 153free_tzrule(_tzrule *tzrule); 154 155static PyObject * 156load_timedelta(long seconds); 157 158static int 159get_local_timestamp(PyObject *dt, int64_t *local_ts); 160static _ttinfo * 161find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt); 162 163static int 164ymd_to_ord(int y, int m, int d); 165static int 166is_leap_year(int year); 167 168static size_t 169_bisect(const int64_t value, const int64_t *arr, size_t size); 170 171static int 172eject_from_strong_cache(const PyTypeObject *const type, PyObject *key); 173static void 174clear_strong_cache(const PyTypeObject *const type); 175static void 176update_strong_cache(const PyTypeObject *const type, PyObject *key, 177 PyObject *zone); 178static PyObject * 179zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key); 180 181static PyObject * 182zoneinfo_new_instance(PyTypeObject *type, PyObject *key) 183{ 184 PyObject *file_obj = NULL; 185 PyObject *file_path = NULL; 186 187 file_path = PyObject_CallFunctionObjArgs(_tzpath_find_tzfile, key, NULL); 188 if (file_path == NULL) { 189 return NULL; 190 } 191 else if (file_path == Py_None) { 192 file_obj = PyObject_CallMethod(_common_mod, "load_tzdata", "O", key); 193 if (file_obj == NULL) { 194 Py_DECREF(file_path); 195 return NULL; 196 } 197 } 198 199 PyObject *self = (PyObject *)(type->tp_alloc(type, 0)); 200 if (self == NULL) { 201 goto error; 202 } 203 204 if (file_obj == NULL) { 205 file_obj = PyObject_CallFunction(io_open, "Os", file_path, "rb"); 206 if (file_obj == NULL) { 207 goto error; 208 } 209 } 210 211 if (load_data((PyZoneInfo_ZoneInfo *)self, file_obj)) { 212 goto error; 213 } 214 215 PyObject *rv = PyObject_CallMethod(file_obj, "close", NULL); 216 Py_DECREF(file_obj); 217 file_obj = NULL; 218 if (rv == NULL) { 219 goto error; 220 } 221 Py_DECREF(rv); 222 223 ((PyZoneInfo_ZoneInfo *)self)->key = key; 224 Py_INCREF(key); 225 226 goto cleanup; 227error: 228 Py_XDECREF(self); 229 self = NULL; 230cleanup: 231 if (file_obj != NULL) { 232 PyObject *exc, *val, *tb; 233 PyErr_Fetch(&exc, &val, &tb); 234 PyObject *tmp = PyObject_CallMethod(file_obj, "close", NULL); 235 _PyErr_ChainExceptions(exc, val, tb); 236 if (tmp == NULL) { 237 Py_CLEAR(self); 238 } 239 Py_XDECREF(tmp); 240 Py_DECREF(file_obj); 241 } 242 Py_DECREF(file_path); 243 return self; 244} 245 246static PyObject * 247get_weak_cache(PyTypeObject *type) 248{ 249 if (type == &PyZoneInfo_ZoneInfoType) { 250 return ZONEINFO_WEAK_CACHE; 251 } 252 else { 253 PyObject *cache = 254 PyObject_GetAttrString((PyObject *)type, "_weak_cache"); 255 // We are assuming that the type lives at least as long as the function 256 // that calls get_weak_cache, and that it holds a reference to the 257 // cache, so we'll return a "borrowed reference". 258 Py_XDECREF(cache); 259 return cache; 260 } 261} 262 263static PyObject * 264zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw) 265{ 266 PyObject *key = NULL; 267 static char *kwlist[] = {"key", NULL}; 268 if (PyArg_ParseTupleAndKeywords(args, kw, "O", kwlist, &key) == 0) { 269 return NULL; 270 } 271 272 PyObject *instance = zone_from_strong_cache(type, key); 273 if (instance != NULL || PyErr_Occurred()) { 274 return instance; 275 } 276 277 PyObject *weak_cache = get_weak_cache(type); 278 instance = PyObject_CallMethod(weak_cache, "get", "O", key, Py_None); 279 if (instance == NULL) { 280 return NULL; 281 } 282 283 if (instance == Py_None) { 284 Py_DECREF(instance); 285 PyObject *tmp = zoneinfo_new_instance(type, key); 286 if (tmp == NULL) { 287 return NULL; 288 } 289 290 instance = 291 PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp); 292 Py_DECREF(tmp); 293 if (instance == NULL) { 294 return NULL; 295 } 296 ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE; 297 } 298 299 update_strong_cache(type, key, instance); 300 return instance; 301} 302 303static void 304zoneinfo_dealloc(PyObject *obj_self) 305{ 306 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self; 307 308 if (self->weakreflist != NULL) { 309 PyObject_ClearWeakRefs(obj_self); 310 } 311 312 if (self->trans_list_utc != NULL) { 313 PyMem_Free(self->trans_list_utc); 314 } 315 316 for (size_t i = 0; i < 2; i++) { 317 if (self->trans_list_wall[i] != NULL) { 318 PyMem_Free(self->trans_list_wall[i]); 319 } 320 } 321 322 if (self->_ttinfos != NULL) { 323 for (size_t i = 0; i < self->num_ttinfos; ++i) { 324 xdecref_ttinfo(&(self->_ttinfos[i])); 325 } 326 PyMem_Free(self->_ttinfos); 327 } 328 329 if (self->trans_ttinfos != NULL) { 330 PyMem_Free(self->trans_ttinfos); 331 } 332 333 free_tzrule(&(self->tzrule_after)); 334 335 Py_XDECREF(self->key); 336 Py_XDECREF(self->file_repr); 337 338 Py_TYPE(self)->tp_free((PyObject *)self); 339} 340 341static PyObject * 342zoneinfo_from_file(PyTypeObject *type, PyObject *args, PyObject *kwargs) 343{ 344 PyObject *file_obj = NULL; 345 PyObject *file_repr = NULL; 346 PyObject *key = Py_None; 347 PyZoneInfo_ZoneInfo *self = NULL; 348 349 static char *kwlist[] = {"", "key", NULL}; 350 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", kwlist, &file_obj, 351 &key)) { 352 return NULL; 353 } 354 355 PyObject *obj_self = (PyObject *)(type->tp_alloc(type, 0)); 356 self = (PyZoneInfo_ZoneInfo *)obj_self; 357 if (self == NULL) { 358 return NULL; 359 } 360 361 file_repr = PyUnicode_FromFormat("%R", file_obj); 362 if (file_repr == NULL) { 363 goto error; 364 } 365 366 if (load_data(self, file_obj)) { 367 goto error; 368 } 369 370 self->source = SOURCE_FILE; 371 self->file_repr = file_repr; 372 self->key = key; 373 Py_INCREF(key); 374 375 return obj_self; 376error: 377 Py_XDECREF(file_repr); 378 Py_XDECREF(self); 379 return NULL; 380} 381 382static PyObject * 383zoneinfo_no_cache(PyTypeObject *cls, PyObject *args, PyObject *kwargs) 384{ 385 static char *kwlist[] = {"key", NULL}; 386 PyObject *key = NULL; 387 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &key)) { 388 return NULL; 389 } 390 391 PyObject *out = zoneinfo_new_instance(cls, key); 392 if (out != NULL) { 393 ((PyZoneInfo_ZoneInfo *)out)->source = SOURCE_NOCACHE; 394 } 395 396 return out; 397} 398 399static PyObject * 400zoneinfo_clear_cache(PyObject *cls, PyObject *args, PyObject *kwargs) 401{ 402 PyObject *only_keys = NULL; 403 static char *kwlist[] = {"only_keys", NULL}; 404 405 if (!(PyArg_ParseTupleAndKeywords(args, kwargs, "|$O", kwlist, 406 &only_keys))) { 407 return NULL; 408 } 409 410 PyTypeObject *type = (PyTypeObject *)cls; 411 PyObject *weak_cache = get_weak_cache(type); 412 413 if (only_keys == NULL || only_keys == Py_None) { 414 PyObject *rv = PyObject_CallMethod(weak_cache, "clear", NULL); 415 if (rv != NULL) { 416 Py_DECREF(rv); 417 } 418 419 clear_strong_cache(type); 420 } 421 else { 422 PyObject *item = NULL; 423 PyObject *pop = PyUnicode_FromString("pop"); 424 if (pop == NULL) { 425 return NULL; 426 } 427 428 PyObject *iter = PyObject_GetIter(only_keys); 429 if (iter == NULL) { 430 Py_DECREF(pop); 431 return NULL; 432 } 433 434 while ((item = PyIter_Next(iter))) { 435 // Remove from strong cache 436 if (eject_from_strong_cache(type, item) < 0) { 437 Py_DECREF(item); 438 break; 439 } 440 441 // Remove from weak cache 442 PyObject *tmp = PyObject_CallMethodObjArgs(weak_cache, pop, item, 443 Py_None, NULL); 444 445 Py_DECREF(item); 446 if (tmp == NULL) { 447 break; 448 } 449 Py_DECREF(tmp); 450 } 451 Py_DECREF(iter); 452 Py_DECREF(pop); 453 } 454 455 if (PyErr_Occurred()) { 456 return NULL; 457 } 458 459 Py_RETURN_NONE; 460} 461 462static PyObject * 463zoneinfo_utcoffset(PyObject *self, PyObject *dt) 464{ 465 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt); 466 if (tti == NULL) { 467 return NULL; 468 } 469 Py_INCREF(tti->utcoff); 470 return tti->utcoff; 471} 472 473static PyObject * 474zoneinfo_dst(PyObject *self, PyObject *dt) 475{ 476 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt); 477 if (tti == NULL) { 478 return NULL; 479 } 480 Py_INCREF(tti->dstoff); 481 return tti->dstoff; 482} 483 484static PyObject * 485zoneinfo_tzname(PyObject *self, PyObject *dt) 486{ 487 _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt); 488 if (tti == NULL) { 489 return NULL; 490 } 491 Py_INCREF(tti->tzname); 492 return tti->tzname; 493} 494 495#define GET_DT_TZINFO PyDateTime_DATE_GET_TZINFO 496 497static PyObject * 498zoneinfo_fromutc(PyObject *obj_self, PyObject *dt) 499{ 500 if (!PyDateTime_Check(dt)) { 501 PyErr_SetString(PyExc_TypeError, 502 "fromutc: argument must be a datetime"); 503 return NULL; 504 } 505 if (GET_DT_TZINFO(dt) != obj_self) { 506 PyErr_SetString(PyExc_ValueError, 507 "fromutc: dt.tzinfo " 508 "is not self"); 509 return NULL; 510 } 511 512 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self; 513 514 int64_t timestamp; 515 if (get_local_timestamp(dt, ×tamp)) { 516 return NULL; 517 } 518 size_t num_trans = self->num_transitions; 519 520 _ttinfo *tti = NULL; 521 unsigned char fold = 0; 522 523 if (num_trans >= 1 && timestamp < self->trans_list_utc[0]) { 524 tti = self->ttinfo_before; 525 } 526 else if (num_trans == 0 || 527 timestamp > self->trans_list_utc[num_trans - 1]) { 528 tti = find_tzrule_ttinfo_fromutc(&(self->tzrule_after), timestamp, 529 PyDateTime_GET_YEAR(dt), &fold); 530 531 // Immediately after the last manual transition, the fold/gap is 532 // between self->trans_ttinfos[num_transitions - 1] and whatever 533 // ttinfo applies immediately after the last transition, not between 534 // the STD and DST rules in the tzrule_after, so we may need to 535 // adjust the fold value. 536 if (num_trans) { 537 _ttinfo *tti_prev = NULL; 538 if (num_trans == 1) { 539 tti_prev = self->ttinfo_before; 540 } 541 else { 542 tti_prev = self->trans_ttinfos[num_trans - 2]; 543 } 544 int64_t diff = tti_prev->utcoff_seconds - tti->utcoff_seconds; 545 if (diff > 0 && 546 timestamp < (self->trans_list_utc[num_trans - 1] + diff)) { 547 fold = 1; 548 } 549 } 550 } 551 else { 552 size_t idx = _bisect(timestamp, self->trans_list_utc, num_trans); 553 _ttinfo *tti_prev = NULL; 554 555 if (idx >= 2) { 556 tti_prev = self->trans_ttinfos[idx - 2]; 557 tti = self->trans_ttinfos[idx - 1]; 558 } 559 else { 560 tti_prev = self->ttinfo_before; 561 tti = self->trans_ttinfos[0]; 562 } 563 564 // Detect fold 565 int64_t shift = 566 (int64_t)(tti_prev->utcoff_seconds - tti->utcoff_seconds); 567 if (shift > (timestamp - self->trans_list_utc[idx - 1])) { 568 fold = 1; 569 } 570 } 571 572 PyObject *tmp = PyNumber_Add(dt, tti->utcoff); 573 if (tmp == NULL) { 574 return NULL; 575 } 576 577 if (fold) { 578 if (PyDateTime_CheckExact(tmp)) { 579 ((PyDateTime_DateTime *)tmp)->fold = 1; 580 dt = tmp; 581 } 582 else { 583 PyObject *replace = PyObject_GetAttrString(tmp, "replace"); 584 PyObject *args = PyTuple_New(0); 585 PyObject *kwargs = PyDict_New(); 586 587 Py_DECREF(tmp); 588 if (args == NULL || kwargs == NULL || replace == NULL) { 589 Py_XDECREF(args); 590 Py_XDECREF(kwargs); 591 Py_XDECREF(replace); 592 return NULL; 593 } 594 595 dt = NULL; 596 if (!PyDict_SetItemString(kwargs, "fold", _PyLong_GetOne())) { 597 dt = PyObject_Call(replace, args, kwargs); 598 } 599 600 Py_DECREF(args); 601 Py_DECREF(kwargs); 602 Py_DECREF(replace); 603 604 if (dt == NULL) { 605 return NULL; 606 } 607 } 608 } 609 else { 610 dt = tmp; 611 } 612 return dt; 613} 614 615static PyObject * 616zoneinfo_repr(PyZoneInfo_ZoneInfo *self) 617{ 618 PyObject *rv = NULL; 619 const char *type_name = Py_TYPE((PyObject *)self)->tp_name; 620 if (!(self->key == Py_None)) { 621 rv = PyUnicode_FromFormat("%s(key=%R)", type_name, self->key); 622 } 623 else { 624 assert(PyUnicode_Check(self->file_repr)); 625 rv = PyUnicode_FromFormat("%s.from_file(%U)", type_name, 626 self->file_repr); 627 } 628 629 return rv; 630} 631 632static PyObject * 633zoneinfo_str(PyZoneInfo_ZoneInfo *self) 634{ 635 if (!(self->key == Py_None)) { 636 Py_INCREF(self->key); 637 return self->key; 638 } 639 else { 640 return zoneinfo_repr(self); 641 } 642} 643 644/* Pickles the ZoneInfo object by key and source. 645 * 646 * ZoneInfo objects are pickled by reference to the TZif file that they came 647 * from, which means that the exact transitions may be different or the file 648 * may not un-pickle if the data has changed on disk in the interim. 649 * 650 * It is necessary to include a bit indicating whether or not the object 651 * was constructed from the cache, because from-cache objects will hit the 652 * unpickling process's cache, whereas no-cache objects will bypass it. 653 * 654 * Objects constructed from ZoneInfo.from_file cannot be pickled. 655 */ 656static PyObject * 657zoneinfo_reduce(PyObject *obj_self, PyObject *unused) 658{ 659 PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self; 660 if (self->source == SOURCE_FILE) { 661 // Objects constructed from files cannot be pickled. 662 PyObject *pickle = PyImport_ImportModule("pickle"); 663 if (pickle == NULL) { 664 return NULL; 665 } 666 667 PyObject *pickle_error = 668 PyObject_GetAttrString(pickle, "PicklingError"); 669 Py_DECREF(pickle); 670 if (pickle_error == NULL) { 671 return NULL; 672 } 673 674 PyErr_Format(pickle_error, 675 "Cannot pickle a ZoneInfo file from a file stream."); 676 Py_DECREF(pickle_error); 677 return NULL; 678 } 679 680 unsigned char from_cache = self->source == SOURCE_CACHE ? 1 : 0; 681 PyObject *constructor = PyObject_GetAttrString(obj_self, "_unpickle"); 682 683 if (constructor == NULL) { 684 return NULL; 685 } 686 687 PyObject *rv = Py_BuildValue("O(OB)", constructor, self->key, from_cache); 688 Py_DECREF(constructor); 689 return rv; 690} 691 692static PyObject * 693zoneinfo__unpickle(PyTypeObject *cls, PyObject *args) 694{ 695 PyObject *key; 696 unsigned char from_cache; 697 if (!PyArg_ParseTuple(args, "OB", &key, &from_cache)) { 698 return NULL; 699 } 700 701 if (from_cache) { 702 PyObject *val_args = Py_BuildValue("(O)", key); 703 if (val_args == NULL) { 704 return NULL; 705 } 706 707 PyObject *rv = zoneinfo_new(cls, val_args, NULL); 708 709 Py_DECREF(val_args); 710 return rv; 711 } 712 else { 713 return zoneinfo_new_instance(cls, key); 714 } 715} 716 717/* It is relatively expensive to construct new timedelta objects, and in most 718 * cases we're looking at a relatively small number of timedeltas, such as 719 * integer number of hours, etc. We will keep a cache so that we construct 720 * a minimal number of these. 721 * 722 * Possibly this should be replaced with an LRU cache so that it's not possible 723 * for the memory usage to explode from this, but in order for this to be a 724 * serious problem, one would need to deliberately craft a malicious time zone 725 * file with many distinct offsets. As of tzdb 2019c, loading every single zone 726 * fills the cache with ~450 timedeltas for a total size of ~12kB. 727 * 728 * This returns a new reference to the timedelta. 729 */ 730static PyObject * 731load_timedelta(long seconds) 732{ 733 PyObject *rv; 734 PyObject *pyoffset = PyLong_FromLong(seconds); 735 if (pyoffset == NULL) { 736 return NULL; 737 } 738 rv = PyDict_GetItemWithError(TIMEDELTA_CACHE, pyoffset); 739 if (rv == NULL) { 740 if (PyErr_Occurred()) { 741 goto error; 742 } 743 PyObject *tmp = PyDateTimeAPI->Delta_FromDelta( 744 0, seconds, 0, 1, PyDateTimeAPI->DeltaType); 745 746 if (tmp == NULL) { 747 goto error; 748 } 749 750 rv = PyDict_SetDefault(TIMEDELTA_CACHE, pyoffset, tmp); 751 Py_DECREF(tmp); 752 } 753 754 Py_XINCREF(rv); 755 Py_DECREF(pyoffset); 756 return rv; 757error: 758 Py_DECREF(pyoffset); 759 return NULL; 760} 761 762/* Constructor for _ttinfo object - this starts by initializing the _ttinfo 763 * to { NULL, NULL, NULL }, so that Py_XDECREF will work on partially 764 * initialized _ttinfo objects. 765 */ 766static int 767build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out) 768{ 769 out->utcoff = NULL; 770 out->dstoff = NULL; 771 out->tzname = NULL; 772 773 out->utcoff_seconds = utcoffset; 774 out->utcoff = load_timedelta(utcoffset); 775 if (out->utcoff == NULL) { 776 return -1; 777 } 778 779 out->dstoff = load_timedelta(dstoffset); 780 if (out->dstoff == NULL) { 781 return -1; 782 } 783 784 out->tzname = tzname; 785 Py_INCREF(tzname); 786 787 return 0; 788} 789 790/* Decrease reference count on any non-NULL members of a _ttinfo */ 791static void 792xdecref_ttinfo(_ttinfo *ttinfo) 793{ 794 if (ttinfo != NULL) { 795 Py_XDECREF(ttinfo->utcoff); 796 Py_XDECREF(ttinfo->dstoff); 797 Py_XDECREF(ttinfo->tzname); 798 } 799} 800 801/* Equality function for _ttinfo. */ 802static int 803ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1) 804{ 805 int rv; 806 if ((rv = PyObject_RichCompareBool(tti0->utcoff, tti1->utcoff, Py_EQ)) < 807 1) { 808 goto end; 809 } 810 811 if ((rv = PyObject_RichCompareBool(tti0->dstoff, tti1->dstoff, Py_EQ)) < 812 1) { 813 goto end; 814 } 815 816 if ((rv = PyObject_RichCompareBool(tti0->tzname, tti1->tzname, Py_EQ)) < 817 1) { 818 goto end; 819 } 820end: 821 return rv; 822} 823 824/* Given a file-like object, this populates a ZoneInfo object 825 * 826 * The current version calls into a Python function to read the data from 827 * file into Python objects, and this translates those Python objects into 828 * C values and calculates derived values (e.g. dstoff) in C. 829 * 830 * This returns 0 on success and -1 on failure. 831 * 832 * The function will never return while `self` is partially initialized — 833 * the object only needs to be freed / deallocated if this succeeds. 834 */ 835static int 836load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj) 837{ 838 PyObject *data_tuple = NULL; 839 840 long *utcoff = NULL; 841 long *dstoff = NULL; 842 size_t *trans_idx = NULL; 843 unsigned char *isdst = NULL; 844 845 self->trans_list_utc = NULL; 846 self->trans_list_wall[0] = NULL; 847 self->trans_list_wall[1] = NULL; 848 self->trans_ttinfos = NULL; 849 self->_ttinfos = NULL; 850 self->file_repr = NULL; 851 852 size_t ttinfos_allocated = 0; 853 854 data_tuple = PyObject_CallMethod(_common_mod, "load_data", "O", file_obj); 855 856 if (data_tuple == NULL) { 857 goto error; 858 } 859 860 if (!PyTuple_CheckExact(data_tuple)) { 861 PyErr_Format(PyExc_TypeError, "Invalid data result type: %r", 862 data_tuple); 863 goto error; 864 } 865 866 // Unpack the data tuple 867 PyObject *trans_idx_list = PyTuple_GetItem(data_tuple, 0); 868 if (trans_idx_list == NULL) { 869 goto error; 870 } 871 872 PyObject *trans_utc = PyTuple_GetItem(data_tuple, 1); 873 if (trans_utc == NULL) { 874 goto error; 875 } 876 877 PyObject *utcoff_list = PyTuple_GetItem(data_tuple, 2); 878 if (utcoff_list == NULL) { 879 goto error; 880 } 881 882 PyObject *isdst_list = PyTuple_GetItem(data_tuple, 3); 883 if (isdst_list == NULL) { 884 goto error; 885 } 886 887 PyObject *abbr = PyTuple_GetItem(data_tuple, 4); 888 if (abbr == NULL) { 889 goto error; 890 } 891 892 PyObject *tz_str = PyTuple_GetItem(data_tuple, 5); 893 if (tz_str == NULL) { 894 goto error; 895 } 896 897 // Load the relevant sizes 898 Py_ssize_t num_transitions = PyTuple_Size(trans_utc); 899 if (num_transitions < 0) { 900 goto error; 901 } 902 903 Py_ssize_t num_ttinfos = PyTuple_Size(utcoff_list); 904 if (num_ttinfos < 0) { 905 goto error; 906 } 907 908 self->num_transitions = (size_t)num_transitions; 909 self->num_ttinfos = (size_t)num_ttinfos; 910 911 // Load the transition indices and list 912 self->trans_list_utc = 913 PyMem_Malloc(self->num_transitions * sizeof(int64_t)); 914 if (self->trans_list_utc == NULL) { 915 goto error; 916 } 917 trans_idx = PyMem_Malloc(self->num_transitions * sizeof(Py_ssize_t)); 918 if (trans_idx == NULL) { 919 goto error; 920 } 921 922 for (size_t i = 0; i < self->num_transitions; ++i) { 923 PyObject *num = PyTuple_GetItem(trans_utc, i); 924 if (num == NULL) { 925 goto error; 926 } 927 self->trans_list_utc[i] = PyLong_AsLongLong(num); 928 if (self->trans_list_utc[i] == -1 && PyErr_Occurred()) { 929 goto error; 930 } 931 932 num = PyTuple_GetItem(trans_idx_list, i); 933 if (num == NULL) { 934 goto error; 935 } 936 937 Py_ssize_t cur_trans_idx = PyLong_AsSsize_t(num); 938 if (cur_trans_idx == -1) { 939 goto error; 940 } 941 942 trans_idx[i] = (size_t)cur_trans_idx; 943 if (trans_idx[i] > self->num_ttinfos) { 944 PyErr_Format( 945 PyExc_ValueError, 946 "Invalid transition index found while reading TZif: %zd", 947 cur_trans_idx); 948 949 goto error; 950 } 951 } 952 953 // Load UTC offsets and isdst (size num_ttinfos) 954 utcoff = PyMem_Malloc(self->num_ttinfos * sizeof(long)); 955 isdst = PyMem_Malloc(self->num_ttinfos * sizeof(unsigned char)); 956 957 if (utcoff == NULL || isdst == NULL) { 958 goto error; 959 } 960 for (size_t i = 0; i < self->num_ttinfos; ++i) { 961 PyObject *num = PyTuple_GetItem(utcoff_list, i); 962 if (num == NULL) { 963 goto error; 964 } 965 966 utcoff[i] = PyLong_AsLong(num); 967 if (utcoff[i] == -1 && PyErr_Occurred()) { 968 goto error; 969 } 970 971 num = PyTuple_GetItem(isdst_list, i); 972 if (num == NULL) { 973 goto error; 974 } 975 976 int isdst_with_error = PyObject_IsTrue(num); 977 if (isdst_with_error == -1) { 978 goto error; 979 } 980 else { 981 isdst[i] = (unsigned char)isdst_with_error; 982 } 983 } 984 985 dstoff = PyMem_Calloc(self->num_ttinfos, sizeof(long)); 986 if (dstoff == NULL) { 987 goto error; 988 } 989 990 // Derive dstoff and trans_list_wall from the information we've loaded 991 utcoff_to_dstoff(trans_idx, utcoff, dstoff, isdst, self->num_transitions, 992 self->num_ttinfos); 993 994 if (ts_to_local(trans_idx, self->trans_list_utc, utcoff, 995 self->trans_list_wall, self->num_ttinfos, 996 self->num_transitions)) { 997 goto error; 998 } 999 1000 // Build _ttinfo objects from utcoff, dstoff and abbr 1001 self->_ttinfos = PyMem_Malloc(self->num_ttinfos * sizeof(_ttinfo)); 1002 if (self->_ttinfos == NULL) { 1003 goto error; 1004 } 1005 for (size_t i = 0; i < self->num_ttinfos; ++i) { 1006 PyObject *tzname = PyTuple_GetItem(abbr, i); 1007 if (tzname == NULL) { 1008 goto error; 1009 } 1010 1011 ttinfos_allocated++; 1012 if (build_ttinfo(utcoff[i], dstoff[i], tzname, &(self->_ttinfos[i]))) { 1013 goto error; 1014 } 1015 } 1016 1017 // Build our mapping from transition to the ttinfo that applies 1018 self->trans_ttinfos = 1019 PyMem_Calloc(self->num_transitions, sizeof(_ttinfo *)); 1020 if (self->trans_ttinfos == NULL) { 1021 goto error; 1022 } 1023 for (size_t i = 0; i < self->num_transitions; ++i) { 1024 size_t ttinfo_idx = trans_idx[i]; 1025 assert(ttinfo_idx < self->num_ttinfos); 1026 self->trans_ttinfos[i] = &(self->_ttinfos[ttinfo_idx]); 1027 } 1028 1029 // Set ttinfo_before to the first non-DST transition 1030 for (size_t i = 0; i < self->num_ttinfos; ++i) { 1031 if (!isdst[i]) { 1032 self->ttinfo_before = &(self->_ttinfos[i]); 1033 break; 1034 } 1035 } 1036 1037 // If there are only DST ttinfos, pick the first one, if there are no 1038 // ttinfos at all, set ttinfo_before to NULL 1039 if (self->ttinfo_before == NULL && self->num_ttinfos > 0) { 1040 self->ttinfo_before = &(self->_ttinfos[0]); 1041 } 1042 1043 if (tz_str != Py_None && PyObject_IsTrue(tz_str)) { 1044 if (parse_tz_str(tz_str, &(self->tzrule_after))) { 1045 goto error; 1046 } 1047 } 1048 else { 1049 if (!self->num_ttinfos) { 1050 PyErr_Format(PyExc_ValueError, "No time zone information found."); 1051 goto error; 1052 } 1053 1054 size_t idx; 1055 if (!self->num_transitions) { 1056 idx = self->num_ttinfos - 1; 1057 } 1058 else { 1059 idx = trans_idx[self->num_transitions - 1]; 1060 } 1061 1062 _ttinfo *tti = &(self->_ttinfos[idx]); 1063 build_tzrule(tti->tzname, NULL, tti->utcoff_seconds, 0, NULL, NULL, 1064 &(self->tzrule_after)); 1065 1066 // We've abused the build_tzrule constructor to construct an STD-only 1067 // rule mimicking whatever ttinfo we've picked up, but it's possible 1068 // that the one we've picked up is a DST zone, so we need to make sure 1069 // that the dstoff is set correctly in that case. 1070 if (PyObject_IsTrue(tti->dstoff)) { 1071 _ttinfo *tti_after = &(self->tzrule_after.std); 1072 Py_DECREF(tti_after->dstoff); 1073 tti_after->dstoff = tti->dstoff; 1074 Py_INCREF(tti_after->dstoff); 1075 } 1076 } 1077 1078 // Determine if this is a "fixed offset" zone, meaning that the output of 1079 // the utcoffset, dst and tzname functions does not depend on the specific 1080 // datetime passed. 1081 // 1082 // We make three simplifying assumptions here: 1083 // 1084 // 1. If tzrule_after is not std_only, it has transitions that might occur 1085 // (it is possible to construct TZ strings that specify STD and DST but 1086 // no transitions ever occur, such as AAA0BBB,0/0,J365/25). 1087 // 2. If self->_ttinfos contains more than one _ttinfo object, the objects 1088 // represent different offsets. 1089 // 3. self->ttinfos contains no unused _ttinfos (in which case an otherwise 1090 // fixed-offset zone with extra _ttinfos defined may appear to *not* be 1091 // a fixed offset zone). 1092 // 1093 // Violations to these assumptions would be fairly exotic, and exotic 1094 // zones should almost certainly not be used with datetime.time (the 1095 // only thing that would be affected by this). 1096 if (self->num_ttinfos > 1 || !self->tzrule_after.std_only) { 1097 self->fixed_offset = 0; 1098 } 1099 else if (self->num_ttinfos == 0) { 1100 self->fixed_offset = 1; 1101 } 1102 else { 1103 int constant_offset = 1104 ttinfo_eq(&(self->_ttinfos[0]), &self->tzrule_after.std); 1105 if (constant_offset < 0) { 1106 goto error; 1107 } 1108 else { 1109 self->fixed_offset = constant_offset; 1110 } 1111 } 1112 1113 int rv = 0; 1114 goto cleanup; 1115error: 1116 // These resources only need to be freed if we have failed, if we succeed 1117 // in initializing a PyZoneInfo_ZoneInfo object, we can rely on its dealloc 1118 // method to free the relevant resources. 1119 if (self->trans_list_utc != NULL) { 1120 PyMem_Free(self->trans_list_utc); 1121 self->trans_list_utc = NULL; 1122 } 1123 1124 for (size_t i = 0; i < 2; ++i) { 1125 if (self->trans_list_wall[i] != NULL) { 1126 PyMem_Free(self->trans_list_wall[i]); 1127 self->trans_list_wall[i] = NULL; 1128 } 1129 } 1130 1131 if (self->_ttinfos != NULL) { 1132 for (size_t i = 0; i < ttinfos_allocated; ++i) { 1133 xdecref_ttinfo(&(self->_ttinfos[i])); 1134 } 1135 PyMem_Free(self->_ttinfos); 1136 self->_ttinfos = NULL; 1137 } 1138 1139 if (self->trans_ttinfos != NULL) { 1140 PyMem_Free(self->trans_ttinfos); 1141 self->trans_ttinfos = NULL; 1142 } 1143 1144 rv = -1; 1145cleanup: 1146 Py_XDECREF(data_tuple); 1147 1148 if (utcoff != NULL) { 1149 PyMem_Free(utcoff); 1150 } 1151 1152 if (dstoff != NULL) { 1153 PyMem_Free(dstoff); 1154 } 1155 1156 if (isdst != NULL) { 1157 PyMem_Free(isdst); 1158 } 1159 1160 if (trans_idx != NULL) { 1161 PyMem_Free(trans_idx); 1162 } 1163 1164 return rv; 1165} 1166 1167/* Function to calculate the local timestamp of a transition from the year. */ 1168int64_t 1169calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year) 1170{ 1171 CalendarRule *self = (CalendarRule *)base_self; 1172 1173 // We want (year, month, day of month); we have year and month, but we 1174 // need to turn (week, day-of-week) into day-of-month 1175 // 1176 // Week 1 is the first week in which day `day` (where 0 = Sunday) appears. 1177 // Week 5 represents the last occurrence of day `day`, so we need to know 1178 // the first weekday of the month and the number of days in the month. 1179 int8_t first_day = (ymd_to_ord(year, self->month, 1) + 6) % 7; 1180 uint8_t days_in_month = DAYS_IN_MONTH[self->month]; 1181 if (self->month == 2 && is_leap_year(year)) { 1182 days_in_month += 1; 1183 } 1184 1185 // This equation seems magical, so I'll break it down: 1186 // 1. calendar says 0 = Monday, POSIX says 0 = Sunday so we need first_day 1187 // + 1 to get 1 = Monday -> 7 = Sunday, which is still equivalent 1188 // because this math is mod 7 1189 // 2. Get first day - desired day mod 7 (adjusting by 7 for negative 1190 // numbers so that -1 % 7 = 6). 1191 // 3. Add 1 because month days are a 1-based index. 1192 int8_t month_day = ((int8_t)(self->day) - (first_day + 1)) % 7; 1193 if (month_day < 0) { 1194 month_day += 7; 1195 } 1196 month_day += 1; 1197 1198 // Now use a 0-based index version of `week` to calculate the w-th 1199 // occurrence of `day` 1200 month_day += ((int8_t)(self->week) - 1) * 7; 1201 1202 // month_day will only be > days_in_month if w was 5, and `w` means "last 1203 // occurrence of `d`", so now we just check if we over-shot the end of the 1204 // month and if so knock off 1 week. 1205 if (month_day > days_in_month) { 1206 month_day -= 7; 1207 } 1208 1209 int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL; 1210 return ((ordinal * 86400) + (int64_t)(self->hour * 3600) + 1211 (int64_t)(self->minute * 60) + (int64_t)(self->second)); 1212} 1213 1214/* Constructor for CalendarRule. */ 1215int 1216calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour, 1217 int8_t minute, int8_t second, CalendarRule *out) 1218{ 1219 // These bounds come from the POSIX standard, which describes an Mm.n.d 1220 // rule as: 1221 // 1222 // The d'th day (0 <= d <= 6) of week n of month m of the year (1 <= n <= 1223 // 5, 1 <= m <= 12, where week 5 means "the last d day in month m" which 1224 // may occur in either the fourth or the fifth week). Week 1 is the first 1225 // week in which the d'th day occurs. Day zero is Sunday. 1226 if (month <= 0 || month > 12) { 1227 PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]"); 1228 return -1; 1229 } 1230 1231 if (week <= 0 || week > 5) { 1232 PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]"); 1233 return -1; 1234 } 1235 1236 // If the 'day' parameter type is changed to a signed type, 1237 // "day < 0" check must be added. 1238 if (/* day < 0 || */ day > 6) { 1239 PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]"); 1240 return -1; 1241 } 1242 1243 TransitionRuleType base = {&calendarrule_year_to_timestamp}; 1244 1245 CalendarRule new_offset = { 1246 .base = base, 1247 .month = month, 1248 .week = week, 1249 .day = day, 1250 .hour = hour, 1251 .minute = minute, 1252 .second = second, 1253 }; 1254 1255 *out = new_offset; 1256 return 0; 1257} 1258 1259/* Function to calculate the local timestamp of a transition from the year. 1260 * 1261 * This translates the day of the year into a local timestamp — either a 1262 * 1-based Julian day, not including leap days, or the 0-based year-day, 1263 * including leap days. 1264 * */ 1265int64_t 1266dayrule_year_to_timestamp(TransitionRuleType *base_self, int year) 1267{ 1268 // The function signature requires a TransitionRuleType pointer, but this 1269 // function is only applicable to DayRule* objects. 1270 DayRule *self = (DayRule *)base_self; 1271 1272 // ymd_to_ord calculates the number of days since 0001-01-01, but we want 1273 // to know the number of days since 1970-01-01, so we must subtract off 1274 // the equivalent of ymd_to_ord(1970, 1, 1). 1275 // 1276 // We subtract off an additional 1 day to account for January 1st (we want 1277 // the number of full days *before* the date of the transition - partial 1278 // days are accounted for in the hour, minute and second portions. 1279 int64_t days_before_year = ymd_to_ord(year, 1, 1) - EPOCHORDINAL - 1; 1280 1281 // The Julian day specification skips over February 29th in leap years, 1282 // from the POSIX standard: 1283 // 1284 // Leap days shall not be counted. That is, in all years-including leap 1285 // years-February 28 is day 59 and March 1 is day 60. It is impossible to 1286 // refer explicitly to the occasional February 29. 1287 // 1288 // This is actually more useful than you'd think — if you want a rule that 1289 // always transitions on a given calendar day (other than February 29th), 1290 // you would use a Julian day, e.g. J91 always refers to April 1st and J365 1291 // always refers to December 31st. 1292 unsigned int day = self->day; 1293 if (self->julian && day >= 59 && is_leap_year(year)) { 1294 day += 1; 1295 } 1296 1297 return ((days_before_year + day) * 86400) + (self->hour * 3600) + 1298 (self->minute * 60) + self->second; 1299} 1300 1301/* Constructor for DayRule. */ 1302static int 1303dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute, 1304 int8_t second, DayRule *out) 1305{ 1306 // The POSIX standard specifies that Julian days must be in the range (1 <= 1307 // n <= 365) and that non-Julian (they call it "0-based Julian") days must 1308 // be in the range (0 <= n <= 365). 1309 if (day < julian || day > 365) { 1310 PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u", 1311 julian, day); 1312 return -1; 1313 } 1314 1315 TransitionRuleType base = { 1316 &dayrule_year_to_timestamp, 1317 }; 1318 1319 DayRule tmp = { 1320 .base = base, 1321 .julian = julian, 1322 .day = day, 1323 .hour = hour, 1324 .minute = minute, 1325 .second = second, 1326 }; 1327 1328 *out = tmp; 1329 1330 return 0; 1331} 1332 1333/* Calculate the start and end rules for a _tzrule in the given year. */ 1334static void 1335tzrule_transitions(_tzrule *rule, int year, int64_t *start, int64_t *end) 1336{ 1337 assert(rule->start != NULL); 1338 assert(rule->end != NULL); 1339 *start = rule->start->year_to_timestamp(rule->start, year); 1340 *end = rule->end->year_to_timestamp(rule->end, year); 1341} 1342 1343/* Calculate the _ttinfo that applies at a given local time from a _tzrule. 1344 * 1345 * This takes a local timestamp and fold for disambiguation purposes; the year 1346 * could technically be calculated from the timestamp, but given that the 1347 * callers of this function already have the year information accessible from 1348 * the datetime struct, it is taken as an additional parameter to reduce 1349 * unnecessary calculation. 1350 * */ 1351static _ttinfo * 1352find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year) 1353{ 1354 if (rule->std_only) { 1355 return &(rule->std); 1356 } 1357 1358 int64_t start, end; 1359 uint8_t isdst; 1360 1361 tzrule_transitions(rule, year, &start, &end); 1362 1363 // With fold = 0, the period (denominated in local time) with the smaller 1364 // offset starts at the end of the gap and ends at the end of the fold; 1365 // with fold = 1, it runs from the start of the gap to the beginning of the 1366 // fold. 1367 // 1368 // So in order to determine the DST boundaries we need to know both the 1369 // fold and whether DST is positive or negative (rare), and it turns out 1370 // that this boils down to fold XOR is_positive. 1371 if (fold == (rule->dst_diff >= 0)) { 1372 end -= rule->dst_diff; 1373 } 1374 else { 1375 start += rule->dst_diff; 1376 } 1377 1378 if (start < end) { 1379 isdst = (ts >= start) && (ts < end); 1380 } 1381 else { 1382 isdst = (ts < end) || (ts >= start); 1383 } 1384 1385 if (isdst) { 1386 return &(rule->dst); 1387 } 1388 else { 1389 return &(rule->std); 1390 } 1391} 1392 1393/* Calculate the ttinfo and fold that applies for a _tzrule at an epoch time. 1394 * 1395 * This function can determine the _ttinfo that applies at a given epoch time, 1396 * (analogous to trans_list_utc), and whether or not the datetime is in a fold. 1397 * This is to be used in the .fromutc() function. 1398 * 1399 * The year is technically a redundant parameter, because it can be calculated 1400 * from the timestamp, but all callers of this function should have the year 1401 * in the datetime struct anyway, so taking it as a parameter saves unnecessary 1402 * calculation. 1403 **/ 1404static _ttinfo * 1405find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year, 1406 unsigned char *fold) 1407{ 1408 if (rule->std_only) { 1409 *fold = 0; 1410 return &(rule->std); 1411 } 1412 1413 int64_t start, end; 1414 uint8_t isdst; 1415 tzrule_transitions(rule, year, &start, &end); 1416 start -= rule->std.utcoff_seconds; 1417 end -= rule->dst.utcoff_seconds; 1418 1419 if (start < end) { 1420 isdst = (ts >= start) && (ts < end); 1421 } 1422 else { 1423 isdst = (ts < end) || (ts >= start); 1424 } 1425 1426 // For positive DST, the ambiguous period is one dst_diff after the end of 1427 // DST; for negative DST, the ambiguous period is one dst_diff before the 1428 // start of DST. 1429 int64_t ambig_start, ambig_end; 1430 if (rule->dst_diff > 0) { 1431 ambig_start = end; 1432 ambig_end = end + rule->dst_diff; 1433 } 1434 else { 1435 ambig_start = start; 1436 ambig_end = start - rule->dst_diff; 1437 } 1438 1439 *fold = (ts >= ambig_start) && (ts < ambig_end); 1440 1441 if (isdst) { 1442 return &(rule->dst); 1443 } 1444 else { 1445 return &(rule->std); 1446 } 1447} 1448 1449/* Parse a TZ string in the format specified by the POSIX standard: 1450 * 1451 * std offset[dst[offset],start[/time],end[/time]] 1452 * 1453 * std and dst must be 3 or more characters long and must not contain a 1454 * leading colon, embedded digits, commas, nor a plus or minus signs; The 1455 * spaces between "std" and "offset" are only for display and are not actually 1456 * present in the string. 1457 * 1458 * The format of the offset is ``[+|-]hh[:mm[:ss]]`` 1459 * 1460 * See the POSIX.1 spec: IEE Std 1003.1-2018 §8.3: 1461 * 1462 * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html 1463 */ 1464static int 1465parse_tz_str(PyObject *tz_str_obj, _tzrule *out) 1466{ 1467 PyObject *std_abbr = NULL; 1468 PyObject *dst_abbr = NULL; 1469 TransitionRuleType *start = NULL; 1470 TransitionRuleType *end = NULL; 1471 // Initialize offsets to invalid value (> 24 hours) 1472 long std_offset = 1 << 20; 1473 long dst_offset = 1 << 20; 1474 1475 const char *tz_str = PyBytes_AsString(tz_str_obj); 1476 if (tz_str == NULL) { 1477 return -1; 1478 } 1479 const char *p = tz_str; 1480 1481 // Read the `std` abbreviation, which must be at least 3 characters long. 1482 Py_ssize_t num_chars = parse_abbr(p, &std_abbr); 1483 if (num_chars < 1) { 1484 PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj); 1485 goto error; 1486 } 1487 1488 p += num_chars; 1489 1490 // Now read the STD offset, which is required 1491 num_chars = parse_tz_delta(p, &std_offset); 1492 if (num_chars < 0) { 1493 PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj); 1494 goto error; 1495 } 1496 p += num_chars; 1497 1498 // If the string ends here, there is no DST, otherwise we must parse the 1499 // DST abbreviation and start and end dates and times. 1500 if (*p == '\0') { 1501 goto complete; 1502 } 1503 1504 num_chars = parse_abbr(p, &dst_abbr); 1505 if (num_chars < 1) { 1506 PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj); 1507 goto error; 1508 } 1509 p += num_chars; 1510 1511 if (*p == ',') { 1512 // From the POSIX standard: 1513 // 1514 // If no offset follows dst, the alternative time is assumed to be one 1515 // hour ahead of standard time. 1516 dst_offset = std_offset + 3600; 1517 } 1518 else { 1519 num_chars = parse_tz_delta(p, &dst_offset); 1520 if (num_chars < 0) { 1521 PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R", 1522 tz_str_obj); 1523 goto error; 1524 } 1525 1526 p += num_chars; 1527 } 1528 1529 TransitionRuleType **transitions[2] = {&start, &end}; 1530 for (size_t i = 0; i < 2; ++i) { 1531 if (*p != ',') { 1532 PyErr_Format(PyExc_ValueError, 1533 "Missing transition rules in TZ string: %R", 1534 tz_str_obj); 1535 goto error; 1536 } 1537 p++; 1538 1539 num_chars = parse_transition_rule(p, transitions[i]); 1540 if (num_chars < 0) { 1541 PyErr_Format(PyExc_ValueError, 1542 "Malformed transition rule in TZ string: %R", 1543 tz_str_obj); 1544 goto error; 1545 } 1546 p += num_chars; 1547 } 1548 1549 if (*p != '\0') { 1550 PyErr_Format(PyExc_ValueError, 1551 "Extraneous characters at end of TZ string: %R", 1552 tz_str_obj); 1553 goto error; 1554 } 1555 1556complete: 1557 build_tzrule(std_abbr, dst_abbr, std_offset, dst_offset, start, end, out); 1558 Py_DECREF(std_abbr); 1559 Py_XDECREF(dst_abbr); 1560 1561 return 0; 1562error: 1563 Py_XDECREF(std_abbr); 1564 if (dst_abbr != NULL && dst_abbr != Py_None) { 1565 Py_DECREF(dst_abbr); 1566 } 1567 1568 if (start != NULL) { 1569 PyMem_Free(start); 1570 } 1571 1572 if (end != NULL) { 1573 PyMem_Free(end); 1574 } 1575 1576 return -1; 1577} 1578 1579static int 1580parse_uint(const char *const p, uint8_t *value) 1581{ 1582 if (!isdigit(*p)) { 1583 return -1; 1584 } 1585 1586 *value = (*p) - '0'; 1587 return 0; 1588} 1589 1590/* Parse the STD and DST abbreviations from a TZ string. */ 1591static Py_ssize_t 1592parse_abbr(const char *const p, PyObject **abbr) 1593{ 1594 const char *ptr = p; 1595 char buff = *ptr; 1596 const char *str_start; 1597 const char *str_end; 1598 1599 if (*ptr == '<') { 1600 ptr++; 1601 str_start = ptr; 1602 while ((buff = *ptr) != '>') { 1603 // From the POSIX standard: 1604 // 1605 // In the quoted form, the first character shall be the less-than 1606 // ( '<' ) character and the last character shall be the 1607 // greater-than ( '>' ) character. All characters between these 1608 // quoting characters shall be alphanumeric characters from the 1609 // portable character set in the current locale, the plus-sign ( 1610 // '+' ) character, or the minus-sign ( '-' ) character. The std 1611 // and dst fields in this case shall not include the quoting 1612 // characters. 1613 if (!isalpha(buff) && !isdigit(buff) && buff != '+' && 1614 buff != '-') { 1615 return -1; 1616 } 1617 ptr++; 1618 } 1619 str_end = ptr; 1620 ptr++; 1621 } 1622 else { 1623 str_start = p; 1624 // From the POSIX standard: 1625 // 1626 // In the unquoted form, all characters in these fields shall be 1627 // alphabetic characters from the portable character set in the 1628 // current locale. 1629 while (isalpha(*ptr)) { 1630 ptr++; 1631 } 1632 str_end = ptr; 1633 } 1634 1635 *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start); 1636 if (*abbr == NULL) { 1637 return -1; 1638 } 1639 1640 return ptr - p; 1641} 1642 1643/* Parse a UTC offset from a TZ str. */ 1644static Py_ssize_t 1645parse_tz_delta(const char *const p, long *total_seconds) 1646{ 1647 // From the POSIX spec: 1648 // 1649 // Indicates the value added to the local time to arrive at Coordinated 1650 // Universal Time. The offset has the form: 1651 // 1652 // hh[:mm[:ss]] 1653 // 1654 // One or more digits may be used; the value is always interpreted as a 1655 // decimal number. 1656 // 1657 // The POSIX spec says that the values for `hour` must be between 0 and 24 1658 // hours, but RFC 8536 §3.3.1 specifies that the hours part of the 1659 // transition times may be signed and range from -167 to 167. 1660 long sign = -1; 1661 long hours = 0; 1662 long minutes = 0; 1663 long seconds = 0; 1664 1665 const char *ptr = p; 1666 char buff = *ptr; 1667 if (buff == '-' || buff == '+') { 1668 // Negative numbers correspond to *positive* offsets, from the spec: 1669 // 1670 // If preceded by a '-', the timezone shall be east of the Prime 1671 // Meridian; otherwise, it shall be west (which may be indicated by 1672 // an optional preceding '+' ). 1673 if (buff == '-') { 1674 sign = 1; 1675 } 1676 1677 ptr++; 1678 } 1679 1680 // The hour can be 1 or 2 numeric characters 1681 for (size_t i = 0; i < 2; ++i) { 1682 buff = *ptr; 1683 if (!isdigit(buff)) { 1684 if (i == 0) { 1685 return -1; 1686 } 1687 else { 1688 break; 1689 } 1690 } 1691 1692 hours *= 10; 1693 hours += buff - '0'; 1694 ptr++; 1695 } 1696 1697 if (hours > 24 || hours < 0) { 1698 return -1; 1699 } 1700 1701 // Minutes and seconds always of the format ":dd" 1702 long *outputs[2] = {&minutes, &seconds}; 1703 for (size_t i = 0; i < 2; ++i) { 1704 if (*ptr != ':') { 1705 goto complete; 1706 } 1707 ptr++; 1708 1709 for (size_t j = 0; j < 2; ++j) { 1710 buff = *ptr; 1711 if (!isdigit(buff)) { 1712 return -1; 1713 } 1714 *(outputs[i]) *= 10; 1715 *(outputs[i]) += buff - '0'; 1716 ptr++; 1717 } 1718 } 1719 1720complete: 1721 *total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds); 1722 1723 return ptr - p; 1724} 1725 1726/* Parse the date portion of a transition rule. */ 1727static Py_ssize_t 1728parse_transition_rule(const char *const p, TransitionRuleType **out) 1729{ 1730 // The full transition rule indicates when to change back and forth between 1731 // STD and DST, and has the form: 1732 // 1733 // date[/time],date[/time] 1734 // 1735 // This function parses an individual date[/time] section, and returns 1736 // the number of characters that contributed to the transition rule. This 1737 // does not include the ',' at the end of the first rule. 1738 // 1739 // The POSIX spec states that if *time* is not given, the default is 02:00. 1740 const char *ptr = p; 1741 int8_t hour = 2; 1742 int8_t minute = 0; 1743 int8_t second = 0; 1744 1745 // Rules come in one of three flavors: 1746 // 1747 // 1. Jn: Julian day n, with no leap days. 1748 // 2. n: Day of year (0-based, with leap days) 1749 // 3. Mm.n.d: Specifying by month, week and day-of-week. 1750 1751 if (*ptr == 'M') { 1752 uint8_t month, week, day; 1753 ptr++; 1754 if (parse_uint(ptr, &month)) { 1755 return -1; 1756 } 1757 ptr++; 1758 if (*ptr != '.') { 1759 uint8_t tmp; 1760 if (parse_uint(ptr, &tmp)) { 1761 return -1; 1762 } 1763 1764 month *= 10; 1765 month += tmp; 1766 ptr++; 1767 } 1768 1769 uint8_t *values[2] = {&week, &day}; 1770 for (size_t i = 0; i < 2; ++i) { 1771 if (*ptr != '.') { 1772 return -1; 1773 } 1774 ptr++; 1775 1776 if (parse_uint(ptr, values[i])) { 1777 return -1; 1778 } 1779 ptr++; 1780 } 1781 1782 if (*ptr == '/') { 1783 ptr++; 1784 Py_ssize_t num_chars = 1785 parse_transition_time(ptr, &hour, &minute, &second); 1786 if (num_chars < 0) { 1787 return -1; 1788 } 1789 ptr += num_chars; 1790 } 1791 1792 CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule)); 1793 if (rv == NULL) { 1794 return -1; 1795 } 1796 1797 if (calendarrule_new(month, week, day, hour, minute, second, rv)) { 1798 PyMem_Free(rv); 1799 return -1; 1800 } 1801 1802 *out = (TransitionRuleType *)rv; 1803 } 1804 else { 1805 uint8_t julian = 0; 1806 unsigned int day = 0; 1807 if (*ptr == 'J') { 1808 julian = 1; 1809 ptr++; 1810 } 1811 1812 for (size_t i = 0; i < 3; ++i) { 1813 if (!isdigit(*ptr)) { 1814 if (i == 0) { 1815 return -1; 1816 } 1817 break; 1818 } 1819 day *= 10; 1820 day += (*ptr) - '0'; 1821 ptr++; 1822 } 1823 1824 if (*ptr == '/') { 1825 ptr++; 1826 Py_ssize_t num_chars = 1827 parse_transition_time(ptr, &hour, &minute, &second); 1828 if (num_chars < 0) { 1829 return -1; 1830 } 1831 ptr += num_chars; 1832 } 1833 1834 DayRule *rv = PyMem_Calloc(1, sizeof(DayRule)); 1835 if (rv == NULL) { 1836 return -1; 1837 } 1838 1839 if (dayrule_new(julian, day, hour, minute, second, rv)) { 1840 PyMem_Free(rv); 1841 return -1; 1842 } 1843 *out = (TransitionRuleType *)rv; 1844 } 1845 1846 return ptr - p; 1847} 1848 1849/* Parse the time portion of a transition rule (e.g. following an /) */ 1850static Py_ssize_t 1851parse_transition_time(const char *const p, int8_t *hour, int8_t *minute, 1852 int8_t *second) 1853{ 1854 // From the spec: 1855 // 1856 // The time has the same format as offset except that no leading sign 1857 // ( '-' or '+' ) is allowed. 1858 // 1859 // The format for the offset is: 1860 // 1861 // h[h][:mm[:ss]] 1862 // 1863 // RFC 8536 also allows transition times to be signed and to range from 1864 // -167 to +167, but the current version only supports [0, 99]. 1865 // 1866 // TODO: Support the full range of transition hours. 1867 int8_t *components[3] = {hour, minute, second}; 1868 const char *ptr = p; 1869 int8_t sign = 1; 1870 1871 if (*ptr == '-' || *ptr == '+') { 1872 if (*ptr == '-') { 1873 sign = -1; 1874 } 1875 ptr++; 1876 } 1877 1878 for (size_t i = 0; i < 3; ++i) { 1879 if (i > 0) { 1880 if (*ptr != ':') { 1881 break; 1882 } 1883 ptr++; 1884 } 1885 1886 uint8_t buff = 0; 1887 for (size_t j = 0; j < 2; j++) { 1888 if (!isdigit(*ptr)) { 1889 if (i == 0 && j > 0) { 1890 break; 1891 } 1892 return -1; 1893 } 1894 1895 buff *= 10; 1896 buff += (*ptr) - '0'; 1897 ptr++; 1898 } 1899 1900 *(components[i]) = sign * buff; 1901 } 1902 1903 return ptr - p; 1904} 1905 1906/* Constructor for a _tzrule. 1907 * 1908 * If `dst_abbr` is NULL, this will construct an "STD-only" _tzrule, in which 1909 * case `dst_offset` will be ignored and `start` and `end` are expected to be 1910 * NULL as well. 1911 * 1912 * Returns 0 on success. 1913 */ 1914static int 1915build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset, 1916 long dst_offset, TransitionRuleType *start, 1917 TransitionRuleType *end, _tzrule *out) 1918{ 1919 _tzrule rv = {{0}}; 1920 1921 rv.start = start; 1922 rv.end = end; 1923 1924 if (build_ttinfo(std_offset, 0, std_abbr, &rv.std)) { 1925 goto error; 1926 } 1927 1928 if (dst_abbr != NULL) { 1929 rv.dst_diff = dst_offset - std_offset; 1930 if (build_ttinfo(dst_offset, rv.dst_diff, dst_abbr, &rv.dst)) { 1931 goto error; 1932 } 1933 } 1934 else { 1935 rv.std_only = 1; 1936 } 1937 1938 *out = rv; 1939 1940 return 0; 1941error: 1942 xdecref_ttinfo(&rv.std); 1943 xdecref_ttinfo(&rv.dst); 1944 return -1; 1945} 1946 1947/* Destructor for _tzrule. */ 1948static void 1949free_tzrule(_tzrule *tzrule) 1950{ 1951 xdecref_ttinfo(&(tzrule->std)); 1952 if (!tzrule->std_only) { 1953 xdecref_ttinfo(&(tzrule->dst)); 1954 } 1955 1956 if (tzrule->start != NULL) { 1957 PyMem_Free(tzrule->start); 1958 } 1959 1960 if (tzrule->end != NULL) { 1961 PyMem_Free(tzrule->end); 1962 } 1963} 1964 1965/* Calculate DST offsets from transitions and UTC offsets 1966 * 1967 * This is necessary because each C `ttinfo` only contains the UTC offset, 1968 * time zone abbreviation and an isdst boolean - it does not include the 1969 * amount of the DST offset, but we need the amount for the dst() function. 1970 * 1971 * Thus function uses heuristics to infer what the offset should be, so it 1972 * is not guaranteed that this will work for all zones. If we cannot assign 1973 * a value for a given DST offset, we'll assume it's 1H rather than 0H, so 1974 * bool(dt.dst()) will always match ttinfo.isdst. 1975 */ 1976static void 1977utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs, 1978 unsigned char *isdsts, size_t num_transitions, 1979 size_t num_ttinfos) 1980{ 1981 size_t dst_count = 0; 1982 size_t dst_found = 0; 1983 for (size_t i = 0; i < num_ttinfos; ++i) { 1984 dst_count++; 1985 } 1986 1987 for (size_t i = 1; i < num_transitions; ++i) { 1988 if (dst_count == dst_found) { 1989 break; 1990 } 1991 1992 size_t idx = trans_idx[i]; 1993 size_t comp_idx = trans_idx[i - 1]; 1994 1995 // Only look at DST offsets that have nto been assigned already 1996 if (!isdsts[idx] || dstoffs[idx] != 0) { 1997 continue; 1998 } 1999 2000 long dstoff = 0; 2001 long utcoff = utcoffs[idx]; 2002 2003 if (!isdsts[comp_idx]) { 2004 dstoff = utcoff - utcoffs[comp_idx]; 2005 } 2006 2007 if (!dstoff && idx < (num_ttinfos - 1)) { 2008 comp_idx = trans_idx[i + 1]; 2009 2010 // If the following transition is also DST and we couldn't find 2011 // the DST offset by this point, we're going to have to skip it 2012 // and hope this transition gets assigned later 2013 if (isdsts[comp_idx]) { 2014 continue; 2015 } 2016 2017 dstoff = utcoff - utcoffs[comp_idx]; 2018 } 2019 2020 if (dstoff) { 2021 dst_found++; 2022 dstoffs[idx] = dstoff; 2023 } 2024 } 2025 2026 if (dst_found < dst_count) { 2027 // If there are time zones we didn't find a value for, we'll end up 2028 // with dstoff = 0 for something where isdst=1. This is obviously 2029 // wrong — one hour will be a much better guess than 0. 2030 for (size_t idx = 0; idx < num_ttinfos; ++idx) { 2031 if (isdsts[idx] && !dstoffs[idx]) { 2032 dstoffs[idx] = 3600; 2033 } 2034 } 2035 } 2036} 2037 2038#define _swap(x, y, buffer) \ 2039 buffer = x; \ 2040 x = y; \ 2041 y = buffer; 2042 2043/* Calculate transitions in local time from UTC time and offsets. 2044 * 2045 * We want to know when each transition occurs, denominated in the number of 2046 * nominal wall-time seconds between 1970-01-01T00:00:00 and the transition in 2047 * *local time* (note: this is *not* equivalent to the output of 2048 * datetime.timestamp, which is the total number of seconds actual elapsed 2049 * since 1970-01-01T00:00:00Z in UTC). 2050 * 2051 * This is an ambiguous question because "local time" can be ambiguous — but it 2052 * is disambiguated by the `fold` parameter, so we allocate two arrays: 2053 * 2054 * trans_local[0]: The wall-time transitions for fold=0 2055 * trans_local[1]: The wall-time transitions for fold=1 2056 * 2057 * This returns 0 on success and a negative number of failure. The trans_local 2058 * arrays must be freed if they are not NULL. 2059 */ 2060static int 2061ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff, 2062 int64_t *trans_local[2], size_t num_ttinfos, 2063 size_t num_transitions) 2064{ 2065 if (num_transitions == 0) { 2066 return 0; 2067 } 2068 2069 // Copy the UTC transitions into each array to be modified in place later 2070 for (size_t i = 0; i < 2; ++i) { 2071 trans_local[i] = PyMem_Malloc(num_transitions * sizeof(int64_t)); 2072 if (trans_local[i] == NULL) { 2073 return -1; 2074 } 2075 2076 memcpy(trans_local[i], trans_utc, num_transitions * sizeof(int64_t)); 2077 } 2078 2079 int64_t offset_0, offset_1, buff; 2080 if (num_ttinfos > 1) { 2081 offset_0 = utcoff[0]; 2082 offset_1 = utcoff[trans_idx[0]]; 2083 2084 if (offset_1 > offset_0) { 2085 _swap(offset_0, offset_1, buff); 2086 } 2087 } 2088 else { 2089 offset_0 = utcoff[0]; 2090 offset_1 = utcoff[0]; 2091 } 2092 2093 trans_local[0][0] += offset_0; 2094 trans_local[1][0] += offset_1; 2095 2096 for (size_t i = 1; i < num_transitions; ++i) { 2097 offset_0 = utcoff[trans_idx[i - 1]]; 2098 offset_1 = utcoff[trans_idx[i]]; 2099 2100 if (offset_1 > offset_0) { 2101 _swap(offset_1, offset_0, buff); 2102 } 2103 2104 trans_local[0][i] += offset_0; 2105 trans_local[1][i] += offset_1; 2106 } 2107 2108 return 0; 2109} 2110 2111/* Simple bisect_right binary search implementation */ 2112static size_t 2113_bisect(const int64_t value, const int64_t *arr, size_t size) 2114{ 2115 size_t lo = 0; 2116 size_t hi = size; 2117 size_t m; 2118 2119 while (lo < hi) { 2120 m = (lo + hi) / 2; 2121 if (arr[m] > value) { 2122 hi = m; 2123 } 2124 else { 2125 lo = m + 1; 2126 } 2127 } 2128 2129 return hi; 2130} 2131 2132/* Find the ttinfo rules that apply at a given local datetime. */ 2133static _ttinfo * 2134find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt) 2135{ 2136 // datetime.time has a .tzinfo attribute that passes None as the dt 2137 // argument; it only really has meaning for fixed-offset zones. 2138 if (dt == Py_None) { 2139 if (self->fixed_offset) { 2140 return &(self->tzrule_after.std); 2141 } 2142 else { 2143 return &NO_TTINFO; 2144 } 2145 } 2146 2147 int64_t ts; 2148 if (get_local_timestamp(dt, &ts)) { 2149 return NULL; 2150 } 2151 2152 unsigned char fold = PyDateTime_DATE_GET_FOLD(dt); 2153 assert(fold < 2); 2154 int64_t *local_transitions = self->trans_list_wall[fold]; 2155 size_t num_trans = self->num_transitions; 2156 2157 if (num_trans && ts < local_transitions[0]) { 2158 return self->ttinfo_before; 2159 } 2160 else if (!num_trans || ts > local_transitions[self->num_transitions - 1]) { 2161 return find_tzrule_ttinfo(&(self->tzrule_after), ts, fold, 2162 PyDateTime_GET_YEAR(dt)); 2163 } 2164 else { 2165 size_t idx = _bisect(ts, local_transitions, self->num_transitions) - 1; 2166 assert(idx < self->num_transitions); 2167 return self->trans_ttinfos[idx]; 2168 } 2169} 2170 2171static int 2172is_leap_year(int year) 2173{ 2174 const unsigned int ayear = (unsigned int)year; 2175 return ayear % 4 == 0 && (ayear % 100 != 0 || ayear % 400 == 0); 2176} 2177 2178/* Calculates ordinal datetime from year, month and day. */ 2179static int 2180ymd_to_ord(int y, int m, int d) 2181{ 2182 y -= 1; 2183 int days_before_year = (y * 365) + (y / 4) - (y / 100) + (y / 400); 2184 int yearday = DAYS_BEFORE_MONTH[m]; 2185 if (m > 2 && is_leap_year(y + 1)) { 2186 yearday += 1; 2187 } 2188 2189 return days_before_year + yearday + d; 2190} 2191 2192/* Calculate the number of seconds since 1970-01-01 in local time. 2193 * 2194 * This gets a datetime in the same "units" as self->trans_list_wall so that we 2195 * can easily determine which transitions a datetime falls between. See the 2196 * comment above ts_to_local for more information. 2197 * */ 2198static int 2199get_local_timestamp(PyObject *dt, int64_t *local_ts) 2200{ 2201 assert(local_ts != NULL); 2202 2203 int hour, minute, second; 2204 int ord; 2205 if (PyDateTime_CheckExact(dt)) { 2206 int y = PyDateTime_GET_YEAR(dt); 2207 int m = PyDateTime_GET_MONTH(dt); 2208 int d = PyDateTime_GET_DAY(dt); 2209 hour = PyDateTime_DATE_GET_HOUR(dt); 2210 minute = PyDateTime_DATE_GET_MINUTE(dt); 2211 second = PyDateTime_DATE_GET_SECOND(dt); 2212 2213 ord = ymd_to_ord(y, m, d); 2214 } 2215 else { 2216 PyObject *num = PyObject_CallMethod(dt, "toordinal", NULL); 2217 if (num == NULL) { 2218 return -1; 2219 } 2220 2221 ord = PyLong_AsLong(num); 2222 Py_DECREF(num); 2223 if (ord == -1 && PyErr_Occurred()) { 2224 return -1; 2225 } 2226 2227 num = PyObject_GetAttrString(dt, "hour"); 2228 if (num == NULL) { 2229 return -1; 2230 } 2231 hour = PyLong_AsLong(num); 2232 Py_DECREF(num); 2233 if (hour == -1) { 2234 return -1; 2235 } 2236 2237 num = PyObject_GetAttrString(dt, "minute"); 2238 if (num == NULL) { 2239 return -1; 2240 } 2241 minute = PyLong_AsLong(num); 2242 Py_DECREF(num); 2243 if (minute == -1) { 2244 return -1; 2245 } 2246 2247 num = PyObject_GetAttrString(dt, "second"); 2248 if (num == NULL) { 2249 return -1; 2250 } 2251 second = PyLong_AsLong(num); 2252 Py_DECREF(num); 2253 if (second == -1) { 2254 return -1; 2255 } 2256 } 2257 2258 *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 + 2259 (int64_t)(hour * 3600 + minute * 60 + second); 2260 2261 return 0; 2262} 2263 2264///// 2265// Functions for cache handling 2266 2267/* Constructor for StrongCacheNode */ 2268static StrongCacheNode * 2269strong_cache_node_new(PyObject *key, PyObject *zone) 2270{ 2271 StrongCacheNode *node = PyMem_Malloc(sizeof(StrongCacheNode)); 2272 if (node == NULL) { 2273 return NULL; 2274 } 2275 2276 Py_INCREF(key); 2277 Py_INCREF(zone); 2278 2279 node->next = NULL; 2280 node->prev = NULL; 2281 node->key = key; 2282 node->zone = zone; 2283 2284 return node; 2285} 2286 2287/* Destructor for StrongCacheNode */ 2288void 2289strong_cache_node_free(StrongCacheNode *node) 2290{ 2291 Py_XDECREF(node->key); 2292 Py_XDECREF(node->zone); 2293 2294 PyMem_Free(node); 2295} 2296 2297/* Frees all nodes at or after a specified root in the strong cache. 2298 * 2299 * This can be used on the root node to free the entire cache or it can be used 2300 * to clear all nodes that have been expired (which, if everything is going 2301 * right, will actually only be 1 node at a time). 2302 */ 2303void 2304strong_cache_free(StrongCacheNode *root) 2305{ 2306 StrongCacheNode *node = root; 2307 StrongCacheNode *next_node; 2308 while (node != NULL) { 2309 next_node = node->next; 2310 strong_cache_node_free(node); 2311 2312 node = next_node; 2313 } 2314} 2315 2316/* Removes a node from the cache and update its neighbors. 2317 * 2318 * This is used both when ejecting a node from the cache and when moving it to 2319 * the front of the cache. 2320 */ 2321static void 2322remove_from_strong_cache(StrongCacheNode *node) 2323{ 2324 if (ZONEINFO_STRONG_CACHE == node) { 2325 ZONEINFO_STRONG_CACHE = node->next; 2326 } 2327 2328 if (node->prev != NULL) { 2329 node->prev->next = node->next; 2330 } 2331 2332 if (node->next != NULL) { 2333 node->next->prev = node->prev; 2334 } 2335 2336 node->next = NULL; 2337 node->prev = NULL; 2338} 2339 2340/* Retrieves the node associated with a key, if it exists. 2341 * 2342 * This traverses the strong cache until it finds a matching key and returns a 2343 * pointer to the relevant node if found. Returns NULL if no node is found. 2344 * 2345 * root may be NULL, indicating an empty cache. 2346 */ 2347static StrongCacheNode * 2348find_in_strong_cache(const StrongCacheNode *const root, PyObject *const key) 2349{ 2350 const StrongCacheNode *node = root; 2351 while (node != NULL) { 2352 int rv = PyObject_RichCompareBool(key, node->key, Py_EQ); 2353 if (rv < 0) { 2354 return NULL; 2355 } 2356 if (rv) { 2357 return (StrongCacheNode *)node; 2358 } 2359 2360 node = node->next; 2361 } 2362 2363 return NULL; 2364} 2365 2366/* Ejects a given key from the class's strong cache, if applicable. 2367 * 2368 * This function is used to enable the per-key functionality in clear_cache. 2369 */ 2370static int 2371eject_from_strong_cache(const PyTypeObject *const type, PyObject *key) 2372{ 2373 if (type != &PyZoneInfo_ZoneInfoType) { 2374 return 0; 2375 } 2376 2377 StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key); 2378 if (node != NULL) { 2379 remove_from_strong_cache(node); 2380 2381 strong_cache_node_free(node); 2382 } 2383 else if (PyErr_Occurred()) { 2384 return -1; 2385 } 2386 return 0; 2387} 2388 2389/* Moves a node to the front of the LRU cache. 2390 * 2391 * The strong cache is an LRU cache, so whenever a given node is accessed, if 2392 * it is not at the front of the cache, it needs to be moved there. 2393 */ 2394static void 2395move_strong_cache_node_to_front(StrongCacheNode **root, StrongCacheNode *node) 2396{ 2397 StrongCacheNode *root_p = *root; 2398 if (root_p == node) { 2399 return; 2400 } 2401 2402 remove_from_strong_cache(node); 2403 2404 node->prev = NULL; 2405 node->next = root_p; 2406 2407 if (root_p != NULL) { 2408 root_p->prev = node; 2409 } 2410 2411 *root = node; 2412} 2413 2414/* Retrieves a ZoneInfo from the strong cache if it's present. 2415 * 2416 * This function finds the ZoneInfo by key and if found will move the node to 2417 * the front of the LRU cache and return a new reference to it. It returns NULL 2418 * if the key is not in the cache. 2419 * 2420 * The strong cache is currently only implemented for the base class, so this 2421 * always returns a cache miss for subclasses. 2422 */ 2423static PyObject * 2424zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key) 2425{ 2426 if (type != &PyZoneInfo_ZoneInfoType) { 2427 return NULL; // Strong cache currently only implemented for base class 2428 } 2429 2430 StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key); 2431 2432 if (node != NULL) { 2433 move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, node); 2434 Py_INCREF(node->zone); 2435 return node->zone; 2436 } 2437 2438 return NULL; // Cache miss 2439} 2440 2441/* Inserts a new key into the strong LRU cache. 2442 * 2443 * This function is only to be used after a cache miss — it creates a new node 2444 * at the front of the cache and ejects any stale entries (keeping the size of 2445 * the cache to at most ZONEINFO_STRONG_CACHE_MAX_SIZE). 2446 */ 2447static void 2448update_strong_cache(const PyTypeObject *const type, PyObject *key, 2449 PyObject *zone) 2450{ 2451 if (type != &PyZoneInfo_ZoneInfoType) { 2452 return; 2453 } 2454 2455 StrongCacheNode *new_node = strong_cache_node_new(key, zone); 2456 2457 move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, new_node); 2458 2459 StrongCacheNode *node = new_node->next; 2460 for (size_t i = 1; i < ZONEINFO_STRONG_CACHE_MAX_SIZE; ++i) { 2461 if (node == NULL) { 2462 return; 2463 } 2464 node = node->next; 2465 } 2466 2467 // Everything beyond this point needs to be freed 2468 if (node != NULL) { 2469 if (node->prev != NULL) { 2470 node->prev->next = NULL; 2471 } 2472 strong_cache_free(node); 2473 } 2474} 2475 2476/* Clears all entries into a type's strong cache. 2477 * 2478 * Because the strong cache is not implemented for subclasses, this is a no-op 2479 * for everything except the base class. 2480 */ 2481void 2482clear_strong_cache(const PyTypeObject *const type) 2483{ 2484 if (type != &PyZoneInfo_ZoneInfoType) { 2485 return; 2486 } 2487 2488 strong_cache_free(ZONEINFO_STRONG_CACHE); 2489 ZONEINFO_STRONG_CACHE = NULL; 2490} 2491 2492static PyObject * 2493new_weak_cache(void) 2494{ 2495 PyObject *weakref_module = PyImport_ImportModule("weakref"); 2496 if (weakref_module == NULL) { 2497 return NULL; 2498 } 2499 2500 PyObject *weak_cache = 2501 PyObject_CallMethod(weakref_module, "WeakValueDictionary", ""); 2502 Py_DECREF(weakref_module); 2503 return weak_cache; 2504} 2505 2506static int 2507initialize_caches(void) 2508{ 2509 // TODO: Move to a PyModule_GetState / PEP 573 based caching system. 2510 if (TIMEDELTA_CACHE == NULL) { 2511 TIMEDELTA_CACHE = PyDict_New(); 2512 } 2513 else { 2514 Py_INCREF(TIMEDELTA_CACHE); 2515 } 2516 2517 if (TIMEDELTA_CACHE == NULL) { 2518 return -1; 2519 } 2520 2521 if (ZONEINFO_WEAK_CACHE == NULL) { 2522 ZONEINFO_WEAK_CACHE = new_weak_cache(); 2523 } 2524 else { 2525 Py_INCREF(ZONEINFO_WEAK_CACHE); 2526 } 2527 2528 if (ZONEINFO_WEAK_CACHE == NULL) { 2529 return -1; 2530 } 2531 2532 return 0; 2533} 2534 2535static PyObject * 2536zoneinfo_init_subclass(PyTypeObject *cls, PyObject *args, PyObject **kwargs) 2537{ 2538 PyObject *weak_cache = new_weak_cache(); 2539 if (weak_cache == NULL) { 2540 return NULL; 2541 } 2542 2543 if (PyObject_SetAttrString((PyObject *)cls, "_weak_cache", 2544 weak_cache) < 0) { 2545 Py_DECREF(weak_cache); 2546 return NULL; 2547 } 2548 Py_DECREF(weak_cache); 2549 Py_RETURN_NONE; 2550} 2551 2552///// 2553// Specify the ZoneInfo type 2554static PyMethodDef zoneinfo_methods[] = { 2555 {"clear_cache", (PyCFunction)(void (*)(void))zoneinfo_clear_cache, 2556 METH_VARARGS | METH_KEYWORDS | METH_CLASS, 2557 PyDoc_STR("Clear the ZoneInfo cache.")}, 2558 {"no_cache", (PyCFunction)(void (*)(void))zoneinfo_no_cache, 2559 METH_VARARGS | METH_KEYWORDS | METH_CLASS, 2560 PyDoc_STR("Get a new instance of ZoneInfo, bypassing the cache.")}, 2561 {"from_file", (PyCFunction)(void (*)(void))zoneinfo_from_file, 2562 METH_VARARGS | METH_KEYWORDS | METH_CLASS, 2563 PyDoc_STR("Create a ZoneInfo file from a file object.")}, 2564 {"utcoffset", (PyCFunction)zoneinfo_utcoffset, METH_O, 2565 PyDoc_STR("Retrieve a timedelta representing the UTC offset in a zone at " 2566 "the given datetime.")}, 2567 {"dst", (PyCFunction)zoneinfo_dst, METH_O, 2568 PyDoc_STR("Retrieve a timedelta representing the amount of DST applied " 2569 "in a zone at the given datetime.")}, 2570 {"tzname", (PyCFunction)zoneinfo_tzname, METH_O, 2571 PyDoc_STR("Retrieve a string containing the abbreviation for the time " 2572 "zone that applies in a zone at a given datetime.")}, 2573 {"fromutc", (PyCFunction)zoneinfo_fromutc, METH_O, 2574 PyDoc_STR("Given a datetime with local time in UTC, retrieve an adjusted " 2575 "datetime in local time.")}, 2576 {"__reduce__", (PyCFunction)zoneinfo_reduce, METH_NOARGS, 2577 PyDoc_STR("Function for serialization with the pickle protocol.")}, 2578 {"_unpickle", (PyCFunction)zoneinfo__unpickle, METH_VARARGS | METH_CLASS, 2579 PyDoc_STR("Private method used in unpickling.")}, 2580 {"__init_subclass__", (PyCFunction)(void (*)(void))zoneinfo_init_subclass, 2581 METH_VARARGS | METH_KEYWORDS | METH_CLASS, 2582 PyDoc_STR("Function to initialize subclasses.")}, 2583 {NULL} /* Sentinel */ 2584}; 2585 2586static PyMemberDef zoneinfo_members[] = { 2587 {.name = "key", 2588 .offset = offsetof(PyZoneInfo_ZoneInfo, key), 2589 .type = T_OBJECT_EX, 2590 .flags = READONLY, 2591 .doc = NULL}, 2592 {NULL}, /* Sentinel */ 2593}; 2594 2595static PyTypeObject PyZoneInfo_ZoneInfoType = { 2596 PyVarObject_HEAD_INIT(NULL, 0) // 2597 .tp_name = "zoneinfo.ZoneInfo", 2598 .tp_basicsize = sizeof(PyZoneInfo_ZoneInfo), 2599 .tp_weaklistoffset = offsetof(PyZoneInfo_ZoneInfo, weakreflist), 2600 .tp_repr = (reprfunc)zoneinfo_repr, 2601 .tp_str = (reprfunc)zoneinfo_str, 2602 .tp_getattro = PyObject_GenericGetAttr, 2603 .tp_flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE), 2604 /* .tp_doc = zoneinfo_doc, */ 2605 .tp_methods = zoneinfo_methods, 2606 .tp_members = zoneinfo_members, 2607 .tp_new = zoneinfo_new, 2608 .tp_dealloc = zoneinfo_dealloc, 2609}; 2610 2611///// 2612// Specify the _zoneinfo module 2613static PyMethodDef module_methods[] = {{NULL, NULL}}; 2614static void 2615module_free(void *m) 2616{ 2617 Py_XDECREF(_tzpath_find_tzfile); 2618 _tzpath_find_tzfile = NULL; 2619 2620 Py_XDECREF(_common_mod); 2621 _common_mod = NULL; 2622 2623 Py_XDECREF(io_open); 2624 io_open = NULL; 2625 2626 xdecref_ttinfo(&NO_TTINFO); 2627 2628 if (TIMEDELTA_CACHE != NULL && Py_REFCNT(TIMEDELTA_CACHE) > 1) { 2629 Py_DECREF(TIMEDELTA_CACHE); 2630 } else { 2631 Py_CLEAR(TIMEDELTA_CACHE); 2632 } 2633 2634 if (ZONEINFO_WEAK_CACHE != NULL && Py_REFCNT(ZONEINFO_WEAK_CACHE) > 1) { 2635 Py_DECREF(ZONEINFO_WEAK_CACHE); 2636 } else { 2637 Py_CLEAR(ZONEINFO_WEAK_CACHE); 2638 } 2639 2640 clear_strong_cache(&PyZoneInfo_ZoneInfoType); 2641} 2642 2643static int 2644zoneinfomodule_exec(PyObject *m) 2645{ 2646 PyDateTime_IMPORT; 2647 if (PyDateTimeAPI == NULL) { 2648 goto error; 2649 } 2650 PyZoneInfo_ZoneInfoType.tp_base = PyDateTimeAPI->TZInfoType; 2651 if (PyType_Ready(&PyZoneInfo_ZoneInfoType) < 0) { 2652 goto error; 2653 } 2654 2655 if (PyModule_AddObjectRef(m, "ZoneInfo", (PyObject *)&PyZoneInfo_ZoneInfoType) < 0) { 2656 goto error; 2657 } 2658 2659 /* Populate imports */ 2660 PyObject *_tzpath_module = PyImport_ImportModule("zoneinfo._tzpath"); 2661 if (_tzpath_module == NULL) { 2662 goto error; 2663 } 2664 2665 _tzpath_find_tzfile = 2666 PyObject_GetAttrString(_tzpath_module, "find_tzfile"); 2667 Py_DECREF(_tzpath_module); 2668 if (_tzpath_find_tzfile == NULL) { 2669 goto error; 2670 } 2671 2672 PyObject *io_module = PyImport_ImportModule("io"); 2673 if (io_module == NULL) { 2674 goto error; 2675 } 2676 2677 io_open = PyObject_GetAttrString(io_module, "open"); 2678 Py_DECREF(io_module); 2679 if (io_open == NULL) { 2680 goto error; 2681 } 2682 2683 _common_mod = PyImport_ImportModule("zoneinfo._common"); 2684 if (_common_mod == NULL) { 2685 goto error; 2686 } 2687 2688 if (NO_TTINFO.utcoff == NULL) { 2689 NO_TTINFO.utcoff = Py_None; 2690 NO_TTINFO.dstoff = Py_None; 2691 NO_TTINFO.tzname = Py_None; 2692 2693 for (size_t i = 0; i < 3; ++i) { 2694 Py_INCREF(Py_None); 2695 } 2696 } 2697 2698 if (initialize_caches()) { 2699 goto error; 2700 } 2701 2702 return 0; 2703 2704error: 2705 return -1; 2706} 2707 2708static PyModuleDef_Slot zoneinfomodule_slots[] = { 2709 {Py_mod_exec, zoneinfomodule_exec}, {0, NULL}}; 2710 2711static struct PyModuleDef zoneinfomodule = { 2712 PyModuleDef_HEAD_INIT, 2713 .m_name = "_zoneinfo", 2714 .m_doc = "C implementation of the zoneinfo module", 2715 .m_size = 0, 2716 .m_methods = module_methods, 2717 .m_slots = zoneinfomodule_slots, 2718 .m_free = (freefunc)module_free}; 2719 2720PyMODINIT_FUNC 2721PyInit__zoneinfo(void) 2722{ 2723 return PyModuleDef_Init(&zoneinfomodule); 2724} 2725