xref: /third_party/python/Modules/_zoneinfo.c (revision 7db96d56)
1#ifndef Py_BUILD_CORE_BUILTIN
2#  define Py_BUILD_CORE_MODULE 1
3#endif
4
5#include "Python.h"
6#include "pycore_long.h"          // _PyLong_GetOne()
7#include "structmember.h"
8
9#include <ctype.h>
10#include <stddef.h>
11#include <stdint.h>
12
13#include "datetime.h"
14
15// Imports
16static PyObject *io_open = NULL;
17static PyObject *_tzpath_find_tzfile = NULL;
18static PyObject *_common_mod = NULL;
19
20typedef struct TransitionRuleType TransitionRuleType;
21typedef struct StrongCacheNode StrongCacheNode;
22
23typedef struct {
24    PyObject *utcoff;
25    PyObject *dstoff;
26    PyObject *tzname;
27    long utcoff_seconds;
28} _ttinfo;
29
30typedef struct {
31    _ttinfo std;
32    _ttinfo dst;
33    int dst_diff;
34    TransitionRuleType *start;
35    TransitionRuleType *end;
36    unsigned char std_only;
37} _tzrule;
38
39typedef struct {
40    PyDateTime_TZInfo base;
41    PyObject *key;
42    PyObject *file_repr;
43    PyObject *weakreflist;
44    size_t num_transitions;
45    size_t num_ttinfos;
46    int64_t *trans_list_utc;
47    int64_t *trans_list_wall[2];
48    _ttinfo **trans_ttinfos;  // References to the ttinfo for each transition
49    _ttinfo *ttinfo_before;
50    _tzrule tzrule_after;
51    _ttinfo *_ttinfos;  // Unique array of ttinfos for ease of deallocation
52    unsigned char fixed_offset;
53    unsigned char source;
54} PyZoneInfo_ZoneInfo;
55
56struct TransitionRuleType {
57    int64_t (*year_to_timestamp)(TransitionRuleType *, int);
58};
59
60typedef struct {
61    TransitionRuleType base;
62    uint8_t month;
63    uint8_t week;
64    uint8_t day;
65    int8_t hour;
66    int8_t minute;
67    int8_t second;
68} CalendarRule;
69
70typedef struct {
71    TransitionRuleType base;
72    uint8_t julian;
73    unsigned int day;
74    int8_t hour;
75    int8_t minute;
76    int8_t second;
77} DayRule;
78
79struct StrongCacheNode {
80    StrongCacheNode *next;
81    StrongCacheNode *prev;
82    PyObject *key;
83    PyObject *zone;
84};
85
86static PyTypeObject PyZoneInfo_ZoneInfoType;
87
88// Globals
89static PyObject *TIMEDELTA_CACHE = NULL;
90static PyObject *ZONEINFO_WEAK_CACHE = NULL;
91static StrongCacheNode *ZONEINFO_STRONG_CACHE = NULL;
92static size_t ZONEINFO_STRONG_CACHE_MAX_SIZE = 8;
93
94static _ttinfo NO_TTINFO = {NULL, NULL, NULL, 0};
95
96// Constants
97static const int EPOCHORDINAL = 719163;
98static int DAYS_IN_MONTH[] = {
99    -1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
100};
101
102static int DAYS_BEFORE_MONTH[] = {
103    -1, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334,
104};
105
106static const int SOURCE_NOCACHE = 0;
107static const int SOURCE_CACHE = 1;
108static const int SOURCE_FILE = 2;
109
110// Forward declarations
111static int
112load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj);
113static void
114utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
115                 unsigned char *isdsts, size_t num_transitions,
116                 size_t num_ttinfos);
117static int
118ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
119            int64_t *trans_local[2], size_t num_ttinfos,
120            size_t num_transitions);
121
122static int
123parse_tz_str(PyObject *tz_str_obj, _tzrule *out);
124
125static Py_ssize_t
126parse_abbr(const char *const p, PyObject **abbr);
127static Py_ssize_t
128parse_tz_delta(const char *const p, long *total_seconds);
129static Py_ssize_t
130parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
131                      int8_t *second);
132static Py_ssize_t
133parse_transition_rule(const char *const p, TransitionRuleType **out);
134
135static _ttinfo *
136find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year);
137static _ttinfo *
138find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
139                           unsigned char *fold);
140
141static int
142build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out);
143static void
144xdecref_ttinfo(_ttinfo *ttinfo);
145static int
146ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1);
147
148static int
149build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
150             long dst_offset, TransitionRuleType *start,
151             TransitionRuleType *end, _tzrule *out);
152static void
153free_tzrule(_tzrule *tzrule);
154
155static PyObject *
156load_timedelta(long seconds);
157
158static int
159get_local_timestamp(PyObject *dt, int64_t *local_ts);
160static _ttinfo *
161find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt);
162
163static int
164ymd_to_ord(int y, int m, int d);
165static int
166is_leap_year(int year);
167
168static size_t
169_bisect(const int64_t value, const int64_t *arr, size_t size);
170
171static int
172eject_from_strong_cache(const PyTypeObject *const type, PyObject *key);
173static void
174clear_strong_cache(const PyTypeObject *const type);
175static void
176update_strong_cache(const PyTypeObject *const type, PyObject *key,
177                    PyObject *zone);
178static PyObject *
179zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key);
180
181static PyObject *
182zoneinfo_new_instance(PyTypeObject *type, PyObject *key)
183{
184    PyObject *file_obj = NULL;
185    PyObject *file_path = NULL;
186
187    file_path = PyObject_CallFunctionObjArgs(_tzpath_find_tzfile, key, NULL);
188    if (file_path == NULL) {
189        return NULL;
190    }
191    else if (file_path == Py_None) {
192        file_obj = PyObject_CallMethod(_common_mod, "load_tzdata", "O", key);
193        if (file_obj == NULL) {
194            Py_DECREF(file_path);
195            return NULL;
196        }
197    }
198
199    PyObject *self = (PyObject *)(type->tp_alloc(type, 0));
200    if (self == NULL) {
201        goto error;
202    }
203
204    if (file_obj == NULL) {
205        file_obj = PyObject_CallFunction(io_open, "Os", file_path, "rb");
206        if (file_obj == NULL) {
207            goto error;
208        }
209    }
210
211    if (load_data((PyZoneInfo_ZoneInfo *)self, file_obj)) {
212        goto error;
213    }
214
215    PyObject *rv = PyObject_CallMethod(file_obj, "close", NULL);
216    Py_DECREF(file_obj);
217    file_obj = NULL;
218    if (rv == NULL) {
219        goto error;
220    }
221    Py_DECREF(rv);
222
223    ((PyZoneInfo_ZoneInfo *)self)->key = key;
224    Py_INCREF(key);
225
226    goto cleanup;
227error:
228    Py_XDECREF(self);
229    self = NULL;
230cleanup:
231    if (file_obj != NULL) {
232        PyObject *exc, *val, *tb;
233        PyErr_Fetch(&exc, &val, &tb);
234        PyObject *tmp = PyObject_CallMethod(file_obj, "close", NULL);
235        _PyErr_ChainExceptions(exc, val, tb);
236        if (tmp == NULL) {
237            Py_CLEAR(self);
238        }
239        Py_XDECREF(tmp);
240        Py_DECREF(file_obj);
241    }
242    Py_DECREF(file_path);
243    return self;
244}
245
246static PyObject *
247get_weak_cache(PyTypeObject *type)
248{
249    if (type == &PyZoneInfo_ZoneInfoType) {
250        return ZONEINFO_WEAK_CACHE;
251    }
252    else {
253        PyObject *cache =
254            PyObject_GetAttrString((PyObject *)type, "_weak_cache");
255        // We are assuming that the type lives at least as long as the function
256        // that calls get_weak_cache, and that it holds a reference to the
257        // cache, so we'll return a "borrowed reference".
258        Py_XDECREF(cache);
259        return cache;
260    }
261}
262
263static PyObject *
264zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw)
265{
266    PyObject *key = NULL;
267    static char *kwlist[] = {"key", NULL};
268    if (PyArg_ParseTupleAndKeywords(args, kw, "O", kwlist, &key) == 0) {
269        return NULL;
270    }
271
272    PyObject *instance = zone_from_strong_cache(type, key);
273    if (instance != NULL || PyErr_Occurred()) {
274        return instance;
275    }
276
277    PyObject *weak_cache = get_weak_cache(type);
278    instance = PyObject_CallMethod(weak_cache, "get", "O", key, Py_None);
279    if (instance == NULL) {
280        return NULL;
281    }
282
283    if (instance == Py_None) {
284        Py_DECREF(instance);
285        PyObject *tmp = zoneinfo_new_instance(type, key);
286        if (tmp == NULL) {
287            return NULL;
288        }
289
290        instance =
291            PyObject_CallMethod(weak_cache, "setdefault", "OO", key, tmp);
292        Py_DECREF(tmp);
293        if (instance == NULL) {
294            return NULL;
295        }
296        ((PyZoneInfo_ZoneInfo *)instance)->source = SOURCE_CACHE;
297    }
298
299    update_strong_cache(type, key, instance);
300    return instance;
301}
302
303static void
304zoneinfo_dealloc(PyObject *obj_self)
305{
306    PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
307
308    if (self->weakreflist != NULL) {
309        PyObject_ClearWeakRefs(obj_self);
310    }
311
312    if (self->trans_list_utc != NULL) {
313        PyMem_Free(self->trans_list_utc);
314    }
315
316    for (size_t i = 0; i < 2; i++) {
317        if (self->trans_list_wall[i] != NULL) {
318            PyMem_Free(self->trans_list_wall[i]);
319        }
320    }
321
322    if (self->_ttinfos != NULL) {
323        for (size_t i = 0; i < self->num_ttinfos; ++i) {
324            xdecref_ttinfo(&(self->_ttinfos[i]));
325        }
326        PyMem_Free(self->_ttinfos);
327    }
328
329    if (self->trans_ttinfos != NULL) {
330        PyMem_Free(self->trans_ttinfos);
331    }
332
333    free_tzrule(&(self->tzrule_after));
334
335    Py_XDECREF(self->key);
336    Py_XDECREF(self->file_repr);
337
338    Py_TYPE(self)->tp_free((PyObject *)self);
339}
340
341static PyObject *
342zoneinfo_from_file(PyTypeObject *type, PyObject *args, PyObject *kwargs)
343{
344    PyObject *file_obj = NULL;
345    PyObject *file_repr = NULL;
346    PyObject *key = Py_None;
347    PyZoneInfo_ZoneInfo *self = NULL;
348
349    static char *kwlist[] = {"", "key", NULL};
350    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", kwlist, &file_obj,
351                                     &key)) {
352        return NULL;
353    }
354
355    PyObject *obj_self = (PyObject *)(type->tp_alloc(type, 0));
356    self = (PyZoneInfo_ZoneInfo *)obj_self;
357    if (self == NULL) {
358        return NULL;
359    }
360
361    file_repr = PyUnicode_FromFormat("%R", file_obj);
362    if (file_repr == NULL) {
363        goto error;
364    }
365
366    if (load_data(self, file_obj)) {
367        goto error;
368    }
369
370    self->source = SOURCE_FILE;
371    self->file_repr = file_repr;
372    self->key = key;
373    Py_INCREF(key);
374
375    return obj_self;
376error:
377    Py_XDECREF(file_repr);
378    Py_XDECREF(self);
379    return NULL;
380}
381
382static PyObject *
383zoneinfo_no_cache(PyTypeObject *cls, PyObject *args, PyObject *kwargs)
384{
385    static char *kwlist[] = {"key", NULL};
386    PyObject *key = NULL;
387    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O", kwlist, &key)) {
388        return NULL;
389    }
390
391    PyObject *out = zoneinfo_new_instance(cls, key);
392    if (out != NULL) {
393        ((PyZoneInfo_ZoneInfo *)out)->source = SOURCE_NOCACHE;
394    }
395
396    return out;
397}
398
399static PyObject *
400zoneinfo_clear_cache(PyObject *cls, PyObject *args, PyObject *kwargs)
401{
402    PyObject *only_keys = NULL;
403    static char *kwlist[] = {"only_keys", NULL};
404
405    if (!(PyArg_ParseTupleAndKeywords(args, kwargs, "|$O", kwlist,
406                                      &only_keys))) {
407        return NULL;
408    }
409
410    PyTypeObject *type = (PyTypeObject *)cls;
411    PyObject *weak_cache = get_weak_cache(type);
412
413    if (only_keys == NULL || only_keys == Py_None) {
414        PyObject *rv = PyObject_CallMethod(weak_cache, "clear", NULL);
415        if (rv != NULL) {
416            Py_DECREF(rv);
417        }
418
419        clear_strong_cache(type);
420    }
421    else {
422        PyObject *item = NULL;
423        PyObject *pop = PyUnicode_FromString("pop");
424        if (pop == NULL) {
425            return NULL;
426        }
427
428        PyObject *iter = PyObject_GetIter(only_keys);
429        if (iter == NULL) {
430            Py_DECREF(pop);
431            return NULL;
432        }
433
434        while ((item = PyIter_Next(iter))) {
435            // Remove from strong cache
436            if (eject_from_strong_cache(type, item) < 0) {
437                Py_DECREF(item);
438                break;
439            }
440
441            // Remove from weak cache
442            PyObject *tmp = PyObject_CallMethodObjArgs(weak_cache, pop, item,
443                                                       Py_None, NULL);
444
445            Py_DECREF(item);
446            if (tmp == NULL) {
447                break;
448            }
449            Py_DECREF(tmp);
450        }
451        Py_DECREF(iter);
452        Py_DECREF(pop);
453    }
454
455    if (PyErr_Occurred()) {
456        return NULL;
457    }
458
459    Py_RETURN_NONE;
460}
461
462static PyObject *
463zoneinfo_utcoffset(PyObject *self, PyObject *dt)
464{
465    _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
466    if (tti == NULL) {
467        return NULL;
468    }
469    Py_INCREF(tti->utcoff);
470    return tti->utcoff;
471}
472
473static PyObject *
474zoneinfo_dst(PyObject *self, PyObject *dt)
475{
476    _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
477    if (tti == NULL) {
478        return NULL;
479    }
480    Py_INCREF(tti->dstoff);
481    return tti->dstoff;
482}
483
484static PyObject *
485zoneinfo_tzname(PyObject *self, PyObject *dt)
486{
487    _ttinfo *tti = find_ttinfo((PyZoneInfo_ZoneInfo *)self, dt);
488    if (tti == NULL) {
489        return NULL;
490    }
491    Py_INCREF(tti->tzname);
492    return tti->tzname;
493}
494
495#define GET_DT_TZINFO PyDateTime_DATE_GET_TZINFO
496
497static PyObject *
498zoneinfo_fromutc(PyObject *obj_self, PyObject *dt)
499{
500    if (!PyDateTime_Check(dt)) {
501        PyErr_SetString(PyExc_TypeError,
502                        "fromutc: argument must be a datetime");
503        return NULL;
504    }
505    if (GET_DT_TZINFO(dt) != obj_self) {
506        PyErr_SetString(PyExc_ValueError,
507                        "fromutc: dt.tzinfo "
508                        "is not self");
509        return NULL;
510    }
511
512    PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
513
514    int64_t timestamp;
515    if (get_local_timestamp(dt, &timestamp)) {
516        return NULL;
517    }
518    size_t num_trans = self->num_transitions;
519
520    _ttinfo *tti = NULL;
521    unsigned char fold = 0;
522
523    if (num_trans >= 1 && timestamp < self->trans_list_utc[0]) {
524        tti = self->ttinfo_before;
525    }
526    else if (num_trans == 0 ||
527             timestamp > self->trans_list_utc[num_trans - 1]) {
528        tti = find_tzrule_ttinfo_fromutc(&(self->tzrule_after), timestamp,
529                                         PyDateTime_GET_YEAR(dt), &fold);
530
531        // Immediately after the last manual transition, the fold/gap is
532        // between self->trans_ttinfos[num_transitions - 1] and whatever
533        // ttinfo applies immediately after the last transition, not between
534        // the STD and DST rules in the tzrule_after, so we may need to
535        // adjust the fold value.
536        if (num_trans) {
537            _ttinfo *tti_prev = NULL;
538            if (num_trans == 1) {
539                tti_prev = self->ttinfo_before;
540            }
541            else {
542                tti_prev = self->trans_ttinfos[num_trans - 2];
543            }
544            int64_t diff = tti_prev->utcoff_seconds - tti->utcoff_seconds;
545            if (diff > 0 &&
546                timestamp < (self->trans_list_utc[num_trans - 1] + diff)) {
547                fold = 1;
548            }
549        }
550    }
551    else {
552        size_t idx = _bisect(timestamp, self->trans_list_utc, num_trans);
553        _ttinfo *tti_prev = NULL;
554
555        if (idx >= 2) {
556            tti_prev = self->trans_ttinfos[idx - 2];
557            tti = self->trans_ttinfos[idx - 1];
558        }
559        else {
560            tti_prev = self->ttinfo_before;
561            tti = self->trans_ttinfos[0];
562        }
563
564        // Detect fold
565        int64_t shift =
566            (int64_t)(tti_prev->utcoff_seconds - tti->utcoff_seconds);
567        if (shift > (timestamp - self->trans_list_utc[idx - 1])) {
568            fold = 1;
569        }
570    }
571
572    PyObject *tmp = PyNumber_Add(dt, tti->utcoff);
573    if (tmp == NULL) {
574        return NULL;
575    }
576
577    if (fold) {
578        if (PyDateTime_CheckExact(tmp)) {
579            ((PyDateTime_DateTime *)tmp)->fold = 1;
580            dt = tmp;
581        }
582        else {
583            PyObject *replace = PyObject_GetAttrString(tmp, "replace");
584            PyObject *args = PyTuple_New(0);
585            PyObject *kwargs = PyDict_New();
586
587            Py_DECREF(tmp);
588            if (args == NULL || kwargs == NULL || replace == NULL) {
589                Py_XDECREF(args);
590                Py_XDECREF(kwargs);
591                Py_XDECREF(replace);
592                return NULL;
593            }
594
595            dt = NULL;
596            if (!PyDict_SetItemString(kwargs, "fold", _PyLong_GetOne())) {
597                dt = PyObject_Call(replace, args, kwargs);
598            }
599
600            Py_DECREF(args);
601            Py_DECREF(kwargs);
602            Py_DECREF(replace);
603
604            if (dt == NULL) {
605                return NULL;
606            }
607        }
608    }
609    else {
610        dt = tmp;
611    }
612    return dt;
613}
614
615static PyObject *
616zoneinfo_repr(PyZoneInfo_ZoneInfo *self)
617{
618    PyObject *rv = NULL;
619    const char *type_name = Py_TYPE((PyObject *)self)->tp_name;
620    if (!(self->key == Py_None)) {
621        rv = PyUnicode_FromFormat("%s(key=%R)", type_name, self->key);
622    }
623    else {
624        assert(PyUnicode_Check(self->file_repr));
625        rv = PyUnicode_FromFormat("%s.from_file(%U)", type_name,
626                                  self->file_repr);
627    }
628
629    return rv;
630}
631
632static PyObject *
633zoneinfo_str(PyZoneInfo_ZoneInfo *self)
634{
635    if (!(self->key == Py_None)) {
636        Py_INCREF(self->key);
637        return self->key;
638    }
639    else {
640        return zoneinfo_repr(self);
641    }
642}
643
644/* Pickles the ZoneInfo object by key and source.
645 *
646 * ZoneInfo objects are pickled by reference to the TZif file that they came
647 * from, which means that the exact transitions may be different or the file
648 * may not un-pickle if the data has changed on disk in the interim.
649 *
650 * It is necessary to include a bit indicating whether or not the object
651 * was constructed from the cache, because from-cache objects will hit the
652 * unpickling process's cache, whereas no-cache objects will bypass it.
653 *
654 * Objects constructed from ZoneInfo.from_file cannot be pickled.
655 */
656static PyObject *
657zoneinfo_reduce(PyObject *obj_self, PyObject *unused)
658{
659    PyZoneInfo_ZoneInfo *self = (PyZoneInfo_ZoneInfo *)obj_self;
660    if (self->source == SOURCE_FILE) {
661        // Objects constructed from files cannot be pickled.
662        PyObject *pickle = PyImport_ImportModule("pickle");
663        if (pickle == NULL) {
664            return NULL;
665        }
666
667        PyObject *pickle_error =
668            PyObject_GetAttrString(pickle, "PicklingError");
669        Py_DECREF(pickle);
670        if (pickle_error == NULL) {
671            return NULL;
672        }
673
674        PyErr_Format(pickle_error,
675                     "Cannot pickle a ZoneInfo file from a file stream.");
676        Py_DECREF(pickle_error);
677        return NULL;
678    }
679
680    unsigned char from_cache = self->source == SOURCE_CACHE ? 1 : 0;
681    PyObject *constructor = PyObject_GetAttrString(obj_self, "_unpickle");
682
683    if (constructor == NULL) {
684        return NULL;
685    }
686
687    PyObject *rv = Py_BuildValue("O(OB)", constructor, self->key, from_cache);
688    Py_DECREF(constructor);
689    return rv;
690}
691
692static PyObject *
693zoneinfo__unpickle(PyTypeObject *cls, PyObject *args)
694{
695    PyObject *key;
696    unsigned char from_cache;
697    if (!PyArg_ParseTuple(args, "OB", &key, &from_cache)) {
698        return NULL;
699    }
700
701    if (from_cache) {
702        PyObject *val_args = Py_BuildValue("(O)", key);
703        if (val_args == NULL) {
704            return NULL;
705        }
706
707        PyObject *rv = zoneinfo_new(cls, val_args, NULL);
708
709        Py_DECREF(val_args);
710        return rv;
711    }
712    else {
713        return zoneinfo_new_instance(cls, key);
714    }
715}
716
717/* It is relatively expensive to construct new timedelta objects, and in most
718 * cases we're looking at a relatively small number of timedeltas, such as
719 * integer number of hours, etc. We will keep a cache so that we construct
720 * a minimal number of these.
721 *
722 * Possibly this should be replaced with an LRU cache so that it's not possible
723 * for the memory usage to explode from this, but in order for this to be a
724 * serious problem, one would need to deliberately craft a malicious time zone
725 * file with many distinct offsets. As of tzdb 2019c, loading every single zone
726 * fills the cache with ~450 timedeltas for a total size of ~12kB.
727 *
728 * This returns a new reference to the timedelta.
729 */
730static PyObject *
731load_timedelta(long seconds)
732{
733    PyObject *rv;
734    PyObject *pyoffset = PyLong_FromLong(seconds);
735    if (pyoffset == NULL) {
736        return NULL;
737    }
738    rv = PyDict_GetItemWithError(TIMEDELTA_CACHE, pyoffset);
739    if (rv == NULL) {
740        if (PyErr_Occurred()) {
741            goto error;
742        }
743        PyObject *tmp = PyDateTimeAPI->Delta_FromDelta(
744            0, seconds, 0, 1, PyDateTimeAPI->DeltaType);
745
746        if (tmp == NULL) {
747            goto error;
748        }
749
750        rv = PyDict_SetDefault(TIMEDELTA_CACHE, pyoffset, tmp);
751        Py_DECREF(tmp);
752    }
753
754    Py_XINCREF(rv);
755    Py_DECREF(pyoffset);
756    return rv;
757error:
758    Py_DECREF(pyoffset);
759    return NULL;
760}
761
762/* Constructor for _ttinfo object - this starts by initializing the _ttinfo
763 * to { NULL, NULL, NULL }, so that Py_XDECREF will work on partially
764 * initialized _ttinfo objects.
765 */
766static int
767build_ttinfo(long utcoffset, long dstoffset, PyObject *tzname, _ttinfo *out)
768{
769    out->utcoff = NULL;
770    out->dstoff = NULL;
771    out->tzname = NULL;
772
773    out->utcoff_seconds = utcoffset;
774    out->utcoff = load_timedelta(utcoffset);
775    if (out->utcoff == NULL) {
776        return -1;
777    }
778
779    out->dstoff = load_timedelta(dstoffset);
780    if (out->dstoff == NULL) {
781        return -1;
782    }
783
784    out->tzname = tzname;
785    Py_INCREF(tzname);
786
787    return 0;
788}
789
790/* Decrease reference count on any non-NULL members of a _ttinfo  */
791static void
792xdecref_ttinfo(_ttinfo *ttinfo)
793{
794    if (ttinfo != NULL) {
795        Py_XDECREF(ttinfo->utcoff);
796        Py_XDECREF(ttinfo->dstoff);
797        Py_XDECREF(ttinfo->tzname);
798    }
799}
800
801/* Equality function for _ttinfo. */
802static int
803ttinfo_eq(const _ttinfo *const tti0, const _ttinfo *const tti1)
804{
805    int rv;
806    if ((rv = PyObject_RichCompareBool(tti0->utcoff, tti1->utcoff, Py_EQ)) <
807        1) {
808        goto end;
809    }
810
811    if ((rv = PyObject_RichCompareBool(tti0->dstoff, tti1->dstoff, Py_EQ)) <
812        1) {
813        goto end;
814    }
815
816    if ((rv = PyObject_RichCompareBool(tti0->tzname, tti1->tzname, Py_EQ)) <
817        1) {
818        goto end;
819    }
820end:
821    return rv;
822}
823
824/* Given a file-like object, this populates a ZoneInfo object
825 *
826 * The current version calls into a Python function to read the data from
827 * file into Python objects, and this translates those Python objects into
828 * C values and calculates derived values (e.g. dstoff) in C.
829 *
830 * This returns 0 on success and -1 on failure.
831 *
832 * The function will never return while `self` is partially initialized —
833 * the object only needs to be freed / deallocated if this succeeds.
834 */
835static int
836load_data(PyZoneInfo_ZoneInfo *self, PyObject *file_obj)
837{
838    PyObject *data_tuple = NULL;
839
840    long *utcoff = NULL;
841    long *dstoff = NULL;
842    size_t *trans_idx = NULL;
843    unsigned char *isdst = NULL;
844
845    self->trans_list_utc = NULL;
846    self->trans_list_wall[0] = NULL;
847    self->trans_list_wall[1] = NULL;
848    self->trans_ttinfos = NULL;
849    self->_ttinfos = NULL;
850    self->file_repr = NULL;
851
852    size_t ttinfos_allocated = 0;
853
854    data_tuple = PyObject_CallMethod(_common_mod, "load_data", "O", file_obj);
855
856    if (data_tuple == NULL) {
857        goto error;
858    }
859
860    if (!PyTuple_CheckExact(data_tuple)) {
861        PyErr_Format(PyExc_TypeError, "Invalid data result type: %r",
862                     data_tuple);
863        goto error;
864    }
865
866    // Unpack the data tuple
867    PyObject *trans_idx_list = PyTuple_GetItem(data_tuple, 0);
868    if (trans_idx_list == NULL) {
869        goto error;
870    }
871
872    PyObject *trans_utc = PyTuple_GetItem(data_tuple, 1);
873    if (trans_utc == NULL) {
874        goto error;
875    }
876
877    PyObject *utcoff_list = PyTuple_GetItem(data_tuple, 2);
878    if (utcoff_list == NULL) {
879        goto error;
880    }
881
882    PyObject *isdst_list = PyTuple_GetItem(data_tuple, 3);
883    if (isdst_list == NULL) {
884        goto error;
885    }
886
887    PyObject *abbr = PyTuple_GetItem(data_tuple, 4);
888    if (abbr == NULL) {
889        goto error;
890    }
891
892    PyObject *tz_str = PyTuple_GetItem(data_tuple, 5);
893    if (tz_str == NULL) {
894        goto error;
895    }
896
897    // Load the relevant sizes
898    Py_ssize_t num_transitions = PyTuple_Size(trans_utc);
899    if (num_transitions < 0) {
900        goto error;
901    }
902
903    Py_ssize_t num_ttinfos = PyTuple_Size(utcoff_list);
904    if (num_ttinfos < 0) {
905        goto error;
906    }
907
908    self->num_transitions = (size_t)num_transitions;
909    self->num_ttinfos = (size_t)num_ttinfos;
910
911    // Load the transition indices and list
912    self->trans_list_utc =
913        PyMem_Malloc(self->num_transitions * sizeof(int64_t));
914    if (self->trans_list_utc == NULL) {
915        goto error;
916    }
917    trans_idx = PyMem_Malloc(self->num_transitions * sizeof(Py_ssize_t));
918    if (trans_idx == NULL) {
919        goto error;
920    }
921
922    for (size_t i = 0; i < self->num_transitions; ++i) {
923        PyObject *num = PyTuple_GetItem(trans_utc, i);
924        if (num == NULL) {
925            goto error;
926        }
927        self->trans_list_utc[i] = PyLong_AsLongLong(num);
928        if (self->trans_list_utc[i] == -1 && PyErr_Occurred()) {
929            goto error;
930        }
931
932        num = PyTuple_GetItem(trans_idx_list, i);
933        if (num == NULL) {
934            goto error;
935        }
936
937        Py_ssize_t cur_trans_idx = PyLong_AsSsize_t(num);
938        if (cur_trans_idx == -1) {
939            goto error;
940        }
941
942        trans_idx[i] = (size_t)cur_trans_idx;
943        if (trans_idx[i] > self->num_ttinfos) {
944            PyErr_Format(
945                PyExc_ValueError,
946                "Invalid transition index found while reading TZif: %zd",
947                cur_trans_idx);
948
949            goto error;
950        }
951    }
952
953    // Load UTC offsets and isdst (size num_ttinfos)
954    utcoff = PyMem_Malloc(self->num_ttinfos * sizeof(long));
955    isdst = PyMem_Malloc(self->num_ttinfos * sizeof(unsigned char));
956
957    if (utcoff == NULL || isdst == NULL) {
958        goto error;
959    }
960    for (size_t i = 0; i < self->num_ttinfos; ++i) {
961        PyObject *num = PyTuple_GetItem(utcoff_list, i);
962        if (num == NULL) {
963            goto error;
964        }
965
966        utcoff[i] = PyLong_AsLong(num);
967        if (utcoff[i] == -1 && PyErr_Occurred()) {
968            goto error;
969        }
970
971        num = PyTuple_GetItem(isdst_list, i);
972        if (num == NULL) {
973            goto error;
974        }
975
976        int isdst_with_error = PyObject_IsTrue(num);
977        if (isdst_with_error == -1) {
978            goto error;
979        }
980        else {
981            isdst[i] = (unsigned char)isdst_with_error;
982        }
983    }
984
985    dstoff = PyMem_Calloc(self->num_ttinfos, sizeof(long));
986    if (dstoff == NULL) {
987        goto error;
988    }
989
990    // Derive dstoff and trans_list_wall from the information we've loaded
991    utcoff_to_dstoff(trans_idx, utcoff, dstoff, isdst, self->num_transitions,
992                     self->num_ttinfos);
993
994    if (ts_to_local(trans_idx, self->trans_list_utc, utcoff,
995                    self->trans_list_wall, self->num_ttinfos,
996                    self->num_transitions)) {
997        goto error;
998    }
999
1000    // Build _ttinfo objects from utcoff, dstoff and abbr
1001    self->_ttinfos = PyMem_Malloc(self->num_ttinfos * sizeof(_ttinfo));
1002    if (self->_ttinfos == NULL) {
1003        goto error;
1004    }
1005    for (size_t i = 0; i < self->num_ttinfos; ++i) {
1006        PyObject *tzname = PyTuple_GetItem(abbr, i);
1007        if (tzname == NULL) {
1008            goto error;
1009        }
1010
1011        ttinfos_allocated++;
1012        if (build_ttinfo(utcoff[i], dstoff[i], tzname, &(self->_ttinfos[i]))) {
1013            goto error;
1014        }
1015    }
1016
1017    // Build our mapping from transition to the ttinfo that applies
1018    self->trans_ttinfos =
1019        PyMem_Calloc(self->num_transitions, sizeof(_ttinfo *));
1020    if (self->trans_ttinfos == NULL) {
1021        goto error;
1022    }
1023    for (size_t i = 0; i < self->num_transitions; ++i) {
1024        size_t ttinfo_idx = trans_idx[i];
1025        assert(ttinfo_idx < self->num_ttinfos);
1026        self->trans_ttinfos[i] = &(self->_ttinfos[ttinfo_idx]);
1027    }
1028
1029    // Set ttinfo_before to the first non-DST transition
1030    for (size_t i = 0; i < self->num_ttinfos; ++i) {
1031        if (!isdst[i]) {
1032            self->ttinfo_before = &(self->_ttinfos[i]);
1033            break;
1034        }
1035    }
1036
1037    // If there are only DST ttinfos, pick the first one, if there are no
1038    // ttinfos at all, set ttinfo_before to NULL
1039    if (self->ttinfo_before == NULL && self->num_ttinfos > 0) {
1040        self->ttinfo_before = &(self->_ttinfos[0]);
1041    }
1042
1043    if (tz_str != Py_None && PyObject_IsTrue(tz_str)) {
1044        if (parse_tz_str(tz_str, &(self->tzrule_after))) {
1045            goto error;
1046        }
1047    }
1048    else {
1049        if (!self->num_ttinfos) {
1050            PyErr_Format(PyExc_ValueError, "No time zone information found.");
1051            goto error;
1052        }
1053
1054        size_t idx;
1055        if (!self->num_transitions) {
1056            idx = self->num_ttinfos - 1;
1057        }
1058        else {
1059            idx = trans_idx[self->num_transitions - 1];
1060        }
1061
1062        _ttinfo *tti = &(self->_ttinfos[idx]);
1063        build_tzrule(tti->tzname, NULL, tti->utcoff_seconds, 0, NULL, NULL,
1064                     &(self->tzrule_after));
1065
1066        // We've abused the build_tzrule constructor to construct an STD-only
1067        // rule mimicking whatever ttinfo we've picked up, but it's possible
1068        // that the one we've picked up is a DST zone, so we need to make sure
1069        // that the dstoff is set correctly in that case.
1070        if (PyObject_IsTrue(tti->dstoff)) {
1071            _ttinfo *tti_after = &(self->tzrule_after.std);
1072            Py_DECREF(tti_after->dstoff);
1073            tti_after->dstoff = tti->dstoff;
1074            Py_INCREF(tti_after->dstoff);
1075        }
1076    }
1077
1078    // Determine if this is a "fixed offset" zone, meaning that the output of
1079    // the utcoffset, dst and tzname functions does not depend on the specific
1080    // datetime passed.
1081    //
1082    // We make three simplifying assumptions here:
1083    //
1084    // 1. If tzrule_after is not std_only, it has transitions that might occur
1085    //    (it is possible to construct TZ strings that specify STD and DST but
1086    //    no transitions ever occur, such as AAA0BBB,0/0,J365/25).
1087    // 2. If self->_ttinfos contains more than one _ttinfo object, the objects
1088    //    represent different offsets.
1089    // 3. self->ttinfos contains no unused _ttinfos (in which case an otherwise
1090    //    fixed-offset zone with extra _ttinfos defined may appear to *not* be
1091    //    a fixed offset zone).
1092    //
1093    // Violations to these assumptions would be fairly exotic, and exotic
1094    // zones should almost certainly not be used with datetime.time (the
1095    // only thing that would be affected by this).
1096    if (self->num_ttinfos > 1 || !self->tzrule_after.std_only) {
1097        self->fixed_offset = 0;
1098    }
1099    else if (self->num_ttinfos == 0) {
1100        self->fixed_offset = 1;
1101    }
1102    else {
1103        int constant_offset =
1104            ttinfo_eq(&(self->_ttinfos[0]), &self->tzrule_after.std);
1105        if (constant_offset < 0) {
1106            goto error;
1107        }
1108        else {
1109            self->fixed_offset = constant_offset;
1110        }
1111    }
1112
1113    int rv = 0;
1114    goto cleanup;
1115error:
1116    // These resources only need to be freed if we have failed, if we succeed
1117    // in initializing a PyZoneInfo_ZoneInfo object, we can rely on its dealloc
1118    // method to free the relevant resources.
1119    if (self->trans_list_utc != NULL) {
1120        PyMem_Free(self->trans_list_utc);
1121        self->trans_list_utc = NULL;
1122    }
1123
1124    for (size_t i = 0; i < 2; ++i) {
1125        if (self->trans_list_wall[i] != NULL) {
1126            PyMem_Free(self->trans_list_wall[i]);
1127            self->trans_list_wall[i] = NULL;
1128        }
1129    }
1130
1131    if (self->_ttinfos != NULL) {
1132        for (size_t i = 0; i < ttinfos_allocated; ++i) {
1133            xdecref_ttinfo(&(self->_ttinfos[i]));
1134        }
1135        PyMem_Free(self->_ttinfos);
1136        self->_ttinfos = NULL;
1137    }
1138
1139    if (self->trans_ttinfos != NULL) {
1140        PyMem_Free(self->trans_ttinfos);
1141        self->trans_ttinfos = NULL;
1142    }
1143
1144    rv = -1;
1145cleanup:
1146    Py_XDECREF(data_tuple);
1147
1148    if (utcoff != NULL) {
1149        PyMem_Free(utcoff);
1150    }
1151
1152    if (dstoff != NULL) {
1153        PyMem_Free(dstoff);
1154    }
1155
1156    if (isdst != NULL) {
1157        PyMem_Free(isdst);
1158    }
1159
1160    if (trans_idx != NULL) {
1161        PyMem_Free(trans_idx);
1162    }
1163
1164    return rv;
1165}
1166
1167/* Function to calculate the local timestamp of a transition from the year. */
1168int64_t
1169calendarrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1170{
1171    CalendarRule *self = (CalendarRule *)base_self;
1172
1173    // We want (year, month, day of month); we have year and month, but we
1174    // need to turn (week, day-of-week) into day-of-month
1175    //
1176    // Week 1 is the first week in which day `day` (where 0 = Sunday) appears.
1177    // Week 5 represents the last occurrence of day `day`, so we need to know
1178    // the first weekday of the month and the number of days in the month.
1179    int8_t first_day = (ymd_to_ord(year, self->month, 1) + 6) % 7;
1180    uint8_t days_in_month = DAYS_IN_MONTH[self->month];
1181    if (self->month == 2 && is_leap_year(year)) {
1182        days_in_month += 1;
1183    }
1184
1185    // This equation seems magical, so I'll break it down:
1186    // 1. calendar says 0 = Monday, POSIX says 0 = Sunday so we need first_day
1187    //    + 1 to get 1 = Monday -> 7 = Sunday, which is still equivalent
1188    //    because this math is mod 7
1189    // 2. Get first day - desired day mod 7 (adjusting by 7 for negative
1190    //    numbers so that -1 % 7 = 6).
1191    // 3. Add 1 because month days are a 1-based index.
1192    int8_t month_day = ((int8_t)(self->day) - (first_day + 1)) % 7;
1193    if (month_day < 0) {
1194        month_day += 7;
1195    }
1196    month_day += 1;
1197
1198    // Now use a 0-based index version of `week` to calculate the w-th
1199    // occurrence of `day`
1200    month_day += ((int8_t)(self->week) - 1) * 7;
1201
1202    // month_day will only be > days_in_month if w was 5, and `w` means "last
1203    // occurrence of `d`", so now we just check if we over-shot the end of the
1204    // month and if so knock off 1 week.
1205    if (month_day > days_in_month) {
1206        month_day -= 7;
1207    }
1208
1209    int64_t ordinal = ymd_to_ord(year, self->month, month_day) - EPOCHORDINAL;
1210    return ((ordinal * 86400) + (int64_t)(self->hour * 3600) +
1211            (int64_t)(self->minute * 60) + (int64_t)(self->second));
1212}
1213
1214/* Constructor for CalendarRule. */
1215int
1216calendarrule_new(uint8_t month, uint8_t week, uint8_t day, int8_t hour,
1217                 int8_t minute, int8_t second, CalendarRule *out)
1218{
1219    // These bounds come from the POSIX standard, which describes an Mm.n.d
1220    // rule as:
1221    //
1222    //   The d'th day (0 <= d <= 6) of week n of month m of the year (1 <= n <=
1223    //   5, 1 <= m <= 12, where week 5 means "the last d day in month m" which
1224    //   may occur in either the fourth or the fifth week). Week 1 is the first
1225    //   week in which the d'th day occurs. Day zero is Sunday.
1226    if (month <= 0 || month > 12) {
1227        PyErr_Format(PyExc_ValueError, "Month must be in (0, 12]");
1228        return -1;
1229    }
1230
1231    if (week <= 0 || week > 5) {
1232        PyErr_Format(PyExc_ValueError, "Week must be in (0, 5]");
1233        return -1;
1234    }
1235
1236    // If the 'day' parameter type is changed to a signed type,
1237    // "day < 0" check must be added.
1238    if (/* day < 0 || */ day > 6) {
1239        PyErr_Format(PyExc_ValueError, "Day must be in [0, 6]");
1240        return -1;
1241    }
1242
1243    TransitionRuleType base = {&calendarrule_year_to_timestamp};
1244
1245    CalendarRule new_offset = {
1246        .base = base,
1247        .month = month,
1248        .week = week,
1249        .day = day,
1250        .hour = hour,
1251        .minute = minute,
1252        .second = second,
1253    };
1254
1255    *out = new_offset;
1256    return 0;
1257}
1258
1259/* Function to calculate the local timestamp of a transition from the year.
1260 *
1261 * This translates the day of the year into a local timestamp — either a
1262 * 1-based Julian day, not including leap days, or the 0-based year-day,
1263 * including leap days.
1264 * */
1265int64_t
1266dayrule_year_to_timestamp(TransitionRuleType *base_self, int year)
1267{
1268    // The function signature requires a TransitionRuleType pointer, but this
1269    // function is only applicable to DayRule* objects.
1270    DayRule *self = (DayRule *)base_self;
1271
1272    // ymd_to_ord calculates the number of days since 0001-01-01, but we want
1273    // to know the number of days since 1970-01-01, so we must subtract off
1274    // the equivalent of ymd_to_ord(1970, 1, 1).
1275    //
1276    // We subtract off an additional 1 day to account for January 1st (we want
1277    // the number of full days *before* the date of the transition - partial
1278    // days are accounted for in the hour, minute and second portions.
1279    int64_t days_before_year = ymd_to_ord(year, 1, 1) - EPOCHORDINAL - 1;
1280
1281    // The Julian day specification skips over February 29th in leap years,
1282    // from the POSIX standard:
1283    //
1284    //   Leap days shall not be counted. That is, in all years-including leap
1285    //   years-February 28 is day 59 and March 1 is day 60. It is impossible to
1286    //   refer explicitly to the occasional February 29.
1287    //
1288    // This is actually more useful than you'd think — if you want a rule that
1289    // always transitions on a given calendar day (other than February 29th),
1290    // you would use a Julian day, e.g. J91 always refers to April 1st and J365
1291    // always refers to December 31st.
1292    unsigned int day = self->day;
1293    if (self->julian && day >= 59 && is_leap_year(year)) {
1294        day += 1;
1295    }
1296
1297    return ((days_before_year + day) * 86400) + (self->hour * 3600) +
1298           (self->minute * 60) + self->second;
1299}
1300
1301/* Constructor for DayRule. */
1302static int
1303dayrule_new(uint8_t julian, unsigned int day, int8_t hour, int8_t minute,
1304            int8_t second, DayRule *out)
1305{
1306    // The POSIX standard specifies that Julian days must be in the range (1 <=
1307    // n <= 365) and that non-Julian (they call it "0-based Julian") days must
1308    // be in the range (0 <= n <= 365).
1309    if (day < julian || day > 365) {
1310        PyErr_Format(PyExc_ValueError, "day must be in [%u, 365], not: %u",
1311                     julian, day);
1312        return -1;
1313    }
1314
1315    TransitionRuleType base = {
1316        &dayrule_year_to_timestamp,
1317    };
1318
1319    DayRule tmp = {
1320        .base = base,
1321        .julian = julian,
1322        .day = day,
1323        .hour = hour,
1324        .minute = minute,
1325        .second = second,
1326    };
1327
1328    *out = tmp;
1329
1330    return 0;
1331}
1332
1333/* Calculate the start and end rules for a _tzrule in the given year. */
1334static void
1335tzrule_transitions(_tzrule *rule, int year, int64_t *start, int64_t *end)
1336{
1337    assert(rule->start != NULL);
1338    assert(rule->end != NULL);
1339    *start = rule->start->year_to_timestamp(rule->start, year);
1340    *end = rule->end->year_to_timestamp(rule->end, year);
1341}
1342
1343/* Calculate the _ttinfo that applies at a given local time from a _tzrule.
1344 *
1345 * This takes a local timestamp and fold for disambiguation purposes; the year
1346 * could technically be calculated from the timestamp, but given that the
1347 * callers of this function already have the year information accessible from
1348 * the datetime struct, it is taken as an additional parameter to reduce
1349 * unnecessary calculation.
1350 * */
1351static _ttinfo *
1352find_tzrule_ttinfo(_tzrule *rule, int64_t ts, unsigned char fold, int year)
1353{
1354    if (rule->std_only) {
1355        return &(rule->std);
1356    }
1357
1358    int64_t start, end;
1359    uint8_t isdst;
1360
1361    tzrule_transitions(rule, year, &start, &end);
1362
1363    // With fold = 0, the period (denominated in local time) with the smaller
1364    // offset starts at the end of the gap and ends at the end of the fold;
1365    // with fold = 1, it runs from the start of the gap to the beginning of the
1366    // fold.
1367    //
1368    // So in order to determine the DST boundaries we need to know both the
1369    // fold and whether DST is positive or negative (rare), and it turns out
1370    // that this boils down to fold XOR is_positive.
1371    if (fold == (rule->dst_diff >= 0)) {
1372        end -= rule->dst_diff;
1373    }
1374    else {
1375        start += rule->dst_diff;
1376    }
1377
1378    if (start < end) {
1379        isdst = (ts >= start) && (ts < end);
1380    }
1381    else {
1382        isdst = (ts < end) || (ts >= start);
1383    }
1384
1385    if (isdst) {
1386        return &(rule->dst);
1387    }
1388    else {
1389        return &(rule->std);
1390    }
1391}
1392
1393/* Calculate the ttinfo and fold that applies for a _tzrule at an epoch time.
1394 *
1395 * This function can determine the _ttinfo that applies at a given epoch time,
1396 * (analogous to trans_list_utc), and whether or not the datetime is in a fold.
1397 * This is to be used in the .fromutc() function.
1398 *
1399 * The year is technically a redundant parameter, because it can be calculated
1400 * from the timestamp, but all callers of this function should have the year
1401 * in the datetime struct anyway, so taking it as a parameter saves unnecessary
1402 * calculation.
1403 **/
1404static _ttinfo *
1405find_tzrule_ttinfo_fromutc(_tzrule *rule, int64_t ts, int year,
1406                           unsigned char *fold)
1407{
1408    if (rule->std_only) {
1409        *fold = 0;
1410        return &(rule->std);
1411    }
1412
1413    int64_t start, end;
1414    uint8_t isdst;
1415    tzrule_transitions(rule, year, &start, &end);
1416    start -= rule->std.utcoff_seconds;
1417    end -= rule->dst.utcoff_seconds;
1418
1419    if (start < end) {
1420        isdst = (ts >= start) && (ts < end);
1421    }
1422    else {
1423        isdst = (ts < end) || (ts >= start);
1424    }
1425
1426    // For positive DST, the ambiguous period is one dst_diff after the end of
1427    // DST; for negative DST, the ambiguous period is one dst_diff before the
1428    // start of DST.
1429    int64_t ambig_start, ambig_end;
1430    if (rule->dst_diff > 0) {
1431        ambig_start = end;
1432        ambig_end = end + rule->dst_diff;
1433    }
1434    else {
1435        ambig_start = start;
1436        ambig_end = start - rule->dst_diff;
1437    }
1438
1439    *fold = (ts >= ambig_start) && (ts < ambig_end);
1440
1441    if (isdst) {
1442        return &(rule->dst);
1443    }
1444    else {
1445        return &(rule->std);
1446    }
1447}
1448
1449/* Parse a TZ string in the format specified by the POSIX standard:
1450 *
1451 *  std offset[dst[offset],start[/time],end[/time]]
1452 *
1453 *  std and dst must be 3 or more characters long and must not contain a
1454 *  leading colon, embedded digits, commas, nor a plus or minus signs; The
1455 *  spaces between "std" and "offset" are only for display and are not actually
1456 *  present in the string.
1457 *
1458 *  The format of the offset is ``[+|-]hh[:mm[:ss]]``
1459 *
1460 * See the POSIX.1 spec: IEE Std 1003.1-2018 §8.3:
1461 *
1462 * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html
1463 */
1464static int
1465parse_tz_str(PyObject *tz_str_obj, _tzrule *out)
1466{
1467    PyObject *std_abbr = NULL;
1468    PyObject *dst_abbr = NULL;
1469    TransitionRuleType *start = NULL;
1470    TransitionRuleType *end = NULL;
1471    // Initialize offsets to invalid value (> 24 hours)
1472    long std_offset = 1 << 20;
1473    long dst_offset = 1 << 20;
1474
1475    const char *tz_str = PyBytes_AsString(tz_str_obj);
1476    if (tz_str == NULL) {
1477        return -1;
1478    }
1479    const char *p = tz_str;
1480
1481    // Read the `std` abbreviation, which must be at least 3 characters long.
1482    Py_ssize_t num_chars = parse_abbr(p, &std_abbr);
1483    if (num_chars < 1) {
1484        PyErr_Format(PyExc_ValueError, "Invalid STD format in %R", tz_str_obj);
1485        goto error;
1486    }
1487
1488    p += num_chars;
1489
1490    // Now read the STD offset, which is required
1491    num_chars = parse_tz_delta(p, &std_offset);
1492    if (num_chars < 0) {
1493        PyErr_Format(PyExc_ValueError, "Invalid STD offset in %R", tz_str_obj);
1494        goto error;
1495    }
1496    p += num_chars;
1497
1498    // If the string ends here, there is no DST, otherwise we must parse the
1499    // DST abbreviation and start and end dates and times.
1500    if (*p == '\0') {
1501        goto complete;
1502    }
1503
1504    num_chars = parse_abbr(p, &dst_abbr);
1505    if (num_chars < 1) {
1506        PyErr_Format(PyExc_ValueError, "Invalid DST format in %R", tz_str_obj);
1507        goto error;
1508    }
1509    p += num_chars;
1510
1511    if (*p == ',') {
1512        // From the POSIX standard:
1513        //
1514        // If no offset follows dst, the alternative time is assumed to be one
1515        // hour ahead of standard time.
1516        dst_offset = std_offset + 3600;
1517    }
1518    else {
1519        num_chars = parse_tz_delta(p, &dst_offset);
1520        if (num_chars < 0) {
1521            PyErr_Format(PyExc_ValueError, "Invalid DST offset in %R",
1522                         tz_str_obj);
1523            goto error;
1524        }
1525
1526        p += num_chars;
1527    }
1528
1529    TransitionRuleType **transitions[2] = {&start, &end};
1530    for (size_t i = 0; i < 2; ++i) {
1531        if (*p != ',') {
1532            PyErr_Format(PyExc_ValueError,
1533                         "Missing transition rules in TZ string: %R",
1534                         tz_str_obj);
1535            goto error;
1536        }
1537        p++;
1538
1539        num_chars = parse_transition_rule(p, transitions[i]);
1540        if (num_chars < 0) {
1541            PyErr_Format(PyExc_ValueError,
1542                         "Malformed transition rule in TZ string: %R",
1543                         tz_str_obj);
1544            goto error;
1545        }
1546        p += num_chars;
1547    }
1548
1549    if (*p != '\0') {
1550        PyErr_Format(PyExc_ValueError,
1551                     "Extraneous characters at end of TZ string: %R",
1552                     tz_str_obj);
1553        goto error;
1554    }
1555
1556complete:
1557    build_tzrule(std_abbr, dst_abbr, std_offset, dst_offset, start, end, out);
1558    Py_DECREF(std_abbr);
1559    Py_XDECREF(dst_abbr);
1560
1561    return 0;
1562error:
1563    Py_XDECREF(std_abbr);
1564    if (dst_abbr != NULL && dst_abbr != Py_None) {
1565        Py_DECREF(dst_abbr);
1566    }
1567
1568    if (start != NULL) {
1569        PyMem_Free(start);
1570    }
1571
1572    if (end != NULL) {
1573        PyMem_Free(end);
1574    }
1575
1576    return -1;
1577}
1578
1579static int
1580parse_uint(const char *const p, uint8_t *value)
1581{
1582    if (!isdigit(*p)) {
1583        return -1;
1584    }
1585
1586    *value = (*p) - '0';
1587    return 0;
1588}
1589
1590/* Parse the STD and DST abbreviations from a TZ string. */
1591static Py_ssize_t
1592parse_abbr(const char *const p, PyObject **abbr)
1593{
1594    const char *ptr = p;
1595    char buff = *ptr;
1596    const char *str_start;
1597    const char *str_end;
1598
1599    if (*ptr == '<') {
1600        ptr++;
1601        str_start = ptr;
1602        while ((buff = *ptr) != '>') {
1603            // From the POSIX standard:
1604            //
1605            //   In the quoted form, the first character shall be the less-than
1606            //   ( '<' ) character and the last character shall be the
1607            //   greater-than ( '>' ) character. All characters between these
1608            //   quoting characters shall be alphanumeric characters from the
1609            //   portable character set in the current locale, the plus-sign (
1610            //   '+' ) character, or the minus-sign ( '-' ) character. The std
1611            //   and dst fields in this case shall not include the quoting
1612            //   characters.
1613            if (!isalpha(buff) && !isdigit(buff) && buff != '+' &&
1614                buff != '-') {
1615                return -1;
1616            }
1617            ptr++;
1618        }
1619        str_end = ptr;
1620        ptr++;
1621    }
1622    else {
1623        str_start = p;
1624        // From the POSIX standard:
1625        //
1626        //   In the unquoted form, all characters in these fields shall be
1627        //   alphabetic characters from the portable character set in the
1628        //   current locale.
1629        while (isalpha(*ptr)) {
1630            ptr++;
1631        }
1632        str_end = ptr;
1633    }
1634
1635    *abbr = PyUnicode_FromStringAndSize(str_start, str_end - str_start);
1636    if (*abbr == NULL) {
1637        return -1;
1638    }
1639
1640    return ptr - p;
1641}
1642
1643/* Parse a UTC offset from a TZ str. */
1644static Py_ssize_t
1645parse_tz_delta(const char *const p, long *total_seconds)
1646{
1647    // From the POSIX spec:
1648    //
1649    //   Indicates the value added to the local time to arrive at Coordinated
1650    //   Universal Time. The offset has the form:
1651    //
1652    //   hh[:mm[:ss]]
1653    //
1654    //   One or more digits may be used; the value is always interpreted as a
1655    //   decimal number.
1656    //
1657    // The POSIX spec says that the values for `hour` must be between 0 and 24
1658    // hours, but RFC 8536 §3.3.1 specifies that the hours part of the
1659    // transition times may be signed and range from -167 to 167.
1660    long sign = -1;
1661    long hours = 0;
1662    long minutes = 0;
1663    long seconds = 0;
1664
1665    const char *ptr = p;
1666    char buff = *ptr;
1667    if (buff == '-' || buff == '+') {
1668        // Negative numbers correspond to *positive* offsets, from the spec:
1669        //
1670        //   If preceded by a '-', the timezone shall be east of the Prime
1671        //   Meridian; otherwise, it shall be west (which may be indicated by
1672        //   an optional preceding '+' ).
1673        if (buff == '-') {
1674            sign = 1;
1675        }
1676
1677        ptr++;
1678    }
1679
1680    // The hour can be 1 or 2 numeric characters
1681    for (size_t i = 0; i < 2; ++i) {
1682        buff = *ptr;
1683        if (!isdigit(buff)) {
1684            if (i == 0) {
1685                return -1;
1686            }
1687            else {
1688                break;
1689            }
1690        }
1691
1692        hours *= 10;
1693        hours += buff - '0';
1694        ptr++;
1695    }
1696
1697    if (hours > 24 || hours < 0) {
1698        return -1;
1699    }
1700
1701    // Minutes and seconds always of the format ":dd"
1702    long *outputs[2] = {&minutes, &seconds};
1703    for (size_t i = 0; i < 2; ++i) {
1704        if (*ptr != ':') {
1705            goto complete;
1706        }
1707        ptr++;
1708
1709        for (size_t j = 0; j < 2; ++j) {
1710            buff = *ptr;
1711            if (!isdigit(buff)) {
1712                return -1;
1713            }
1714            *(outputs[i]) *= 10;
1715            *(outputs[i]) += buff - '0';
1716            ptr++;
1717        }
1718    }
1719
1720complete:
1721    *total_seconds = sign * ((hours * 3600) + (minutes * 60) + seconds);
1722
1723    return ptr - p;
1724}
1725
1726/* Parse the date portion of a transition rule. */
1727static Py_ssize_t
1728parse_transition_rule(const char *const p, TransitionRuleType **out)
1729{
1730    // The full transition rule indicates when to change back and forth between
1731    // STD and DST, and has the form:
1732    //
1733    //   date[/time],date[/time]
1734    //
1735    // This function parses an individual date[/time] section, and returns
1736    // the number of characters that contributed to the transition rule. This
1737    // does not include the ',' at the end of the first rule.
1738    //
1739    // The POSIX spec states that if *time* is not given, the default is 02:00.
1740    const char *ptr = p;
1741    int8_t hour = 2;
1742    int8_t minute = 0;
1743    int8_t second = 0;
1744
1745    // Rules come in one of three flavors:
1746    //
1747    //   1. Jn: Julian day n, with no leap days.
1748    //   2. n: Day of year (0-based, with leap days)
1749    //   3. Mm.n.d: Specifying by month, week and day-of-week.
1750
1751    if (*ptr == 'M') {
1752        uint8_t month, week, day;
1753        ptr++;
1754        if (parse_uint(ptr, &month)) {
1755            return -1;
1756        }
1757        ptr++;
1758        if (*ptr != '.') {
1759            uint8_t tmp;
1760            if (parse_uint(ptr, &tmp)) {
1761                return -1;
1762            }
1763
1764            month *= 10;
1765            month += tmp;
1766            ptr++;
1767        }
1768
1769        uint8_t *values[2] = {&week, &day};
1770        for (size_t i = 0; i < 2; ++i) {
1771            if (*ptr != '.') {
1772                return -1;
1773            }
1774            ptr++;
1775
1776            if (parse_uint(ptr, values[i])) {
1777                return -1;
1778            }
1779            ptr++;
1780        }
1781
1782        if (*ptr == '/') {
1783            ptr++;
1784            Py_ssize_t num_chars =
1785                parse_transition_time(ptr, &hour, &minute, &second);
1786            if (num_chars < 0) {
1787                return -1;
1788            }
1789            ptr += num_chars;
1790        }
1791
1792        CalendarRule *rv = PyMem_Calloc(1, sizeof(CalendarRule));
1793        if (rv == NULL) {
1794            return -1;
1795        }
1796
1797        if (calendarrule_new(month, week, day, hour, minute, second, rv)) {
1798            PyMem_Free(rv);
1799            return -1;
1800        }
1801
1802        *out = (TransitionRuleType *)rv;
1803    }
1804    else {
1805        uint8_t julian = 0;
1806        unsigned int day = 0;
1807        if (*ptr == 'J') {
1808            julian = 1;
1809            ptr++;
1810        }
1811
1812        for (size_t i = 0; i < 3; ++i) {
1813            if (!isdigit(*ptr)) {
1814                if (i == 0) {
1815                    return -1;
1816                }
1817                break;
1818            }
1819            day *= 10;
1820            day += (*ptr) - '0';
1821            ptr++;
1822        }
1823
1824        if (*ptr == '/') {
1825            ptr++;
1826            Py_ssize_t num_chars =
1827                parse_transition_time(ptr, &hour, &minute, &second);
1828            if (num_chars < 0) {
1829                return -1;
1830            }
1831            ptr += num_chars;
1832        }
1833
1834        DayRule *rv = PyMem_Calloc(1, sizeof(DayRule));
1835        if (rv == NULL) {
1836            return -1;
1837        }
1838
1839        if (dayrule_new(julian, day, hour, minute, second, rv)) {
1840            PyMem_Free(rv);
1841            return -1;
1842        }
1843        *out = (TransitionRuleType *)rv;
1844    }
1845
1846    return ptr - p;
1847}
1848
1849/* Parse the time portion of a transition rule (e.g. following an /) */
1850static Py_ssize_t
1851parse_transition_time(const char *const p, int8_t *hour, int8_t *minute,
1852                      int8_t *second)
1853{
1854    // From the spec:
1855    //
1856    //   The time has the same format as offset except that no leading sign
1857    //   ( '-' or '+' ) is allowed.
1858    //
1859    // The format for the offset is:
1860    //
1861    //   h[h][:mm[:ss]]
1862    //
1863    // RFC 8536 also allows transition times to be signed and to range from
1864    // -167 to +167, but the current version only supports [0, 99].
1865    //
1866    // TODO: Support the full range of transition hours.
1867    int8_t *components[3] = {hour, minute, second};
1868    const char *ptr = p;
1869    int8_t sign = 1;
1870
1871    if (*ptr == '-' || *ptr == '+') {
1872        if (*ptr == '-') {
1873            sign = -1;
1874        }
1875        ptr++;
1876    }
1877
1878    for (size_t i = 0; i < 3; ++i) {
1879        if (i > 0) {
1880            if (*ptr != ':') {
1881                break;
1882            }
1883            ptr++;
1884        }
1885
1886        uint8_t buff = 0;
1887        for (size_t j = 0; j < 2; j++) {
1888            if (!isdigit(*ptr)) {
1889                if (i == 0 && j > 0) {
1890                    break;
1891                }
1892                return -1;
1893            }
1894
1895            buff *= 10;
1896            buff += (*ptr) - '0';
1897            ptr++;
1898        }
1899
1900        *(components[i]) = sign * buff;
1901    }
1902
1903    return ptr - p;
1904}
1905
1906/* Constructor for a _tzrule.
1907 *
1908 * If `dst_abbr` is NULL, this will construct an "STD-only" _tzrule, in which
1909 * case `dst_offset` will be ignored and `start` and `end` are expected to be
1910 * NULL as well.
1911 *
1912 * Returns 0 on success.
1913 */
1914static int
1915build_tzrule(PyObject *std_abbr, PyObject *dst_abbr, long std_offset,
1916             long dst_offset, TransitionRuleType *start,
1917             TransitionRuleType *end, _tzrule *out)
1918{
1919    _tzrule rv = {{0}};
1920
1921    rv.start = start;
1922    rv.end = end;
1923
1924    if (build_ttinfo(std_offset, 0, std_abbr, &rv.std)) {
1925        goto error;
1926    }
1927
1928    if (dst_abbr != NULL) {
1929        rv.dst_diff = dst_offset - std_offset;
1930        if (build_ttinfo(dst_offset, rv.dst_diff, dst_abbr, &rv.dst)) {
1931            goto error;
1932        }
1933    }
1934    else {
1935        rv.std_only = 1;
1936    }
1937
1938    *out = rv;
1939
1940    return 0;
1941error:
1942    xdecref_ttinfo(&rv.std);
1943    xdecref_ttinfo(&rv.dst);
1944    return -1;
1945}
1946
1947/* Destructor for _tzrule. */
1948static void
1949free_tzrule(_tzrule *tzrule)
1950{
1951    xdecref_ttinfo(&(tzrule->std));
1952    if (!tzrule->std_only) {
1953        xdecref_ttinfo(&(tzrule->dst));
1954    }
1955
1956    if (tzrule->start != NULL) {
1957        PyMem_Free(tzrule->start);
1958    }
1959
1960    if (tzrule->end != NULL) {
1961        PyMem_Free(tzrule->end);
1962    }
1963}
1964
1965/* Calculate DST offsets from transitions and UTC offsets
1966 *
1967 * This is necessary because each C `ttinfo` only contains the UTC offset,
1968 * time zone abbreviation and an isdst boolean - it does not include the
1969 * amount of the DST offset, but we need the amount for the dst() function.
1970 *
1971 * Thus function uses heuristics to infer what the offset should be, so it
1972 * is not guaranteed that this will work for all zones. If we cannot assign
1973 * a value for a given DST offset, we'll assume it's 1H rather than 0H, so
1974 * bool(dt.dst()) will always match ttinfo.isdst.
1975 */
1976static void
1977utcoff_to_dstoff(size_t *trans_idx, long *utcoffs, long *dstoffs,
1978                 unsigned char *isdsts, size_t num_transitions,
1979                 size_t num_ttinfos)
1980{
1981    size_t dst_count = 0;
1982    size_t dst_found = 0;
1983    for (size_t i = 0; i < num_ttinfos; ++i) {
1984        dst_count++;
1985    }
1986
1987    for (size_t i = 1; i < num_transitions; ++i) {
1988        if (dst_count == dst_found) {
1989            break;
1990        }
1991
1992        size_t idx = trans_idx[i];
1993        size_t comp_idx = trans_idx[i - 1];
1994
1995        // Only look at DST offsets that have nto been assigned already
1996        if (!isdsts[idx] || dstoffs[idx] != 0) {
1997            continue;
1998        }
1999
2000        long dstoff = 0;
2001        long utcoff = utcoffs[idx];
2002
2003        if (!isdsts[comp_idx]) {
2004            dstoff = utcoff - utcoffs[comp_idx];
2005        }
2006
2007        if (!dstoff && idx < (num_ttinfos - 1)) {
2008            comp_idx = trans_idx[i + 1];
2009
2010            // If the following transition is also DST and we couldn't find
2011            // the DST offset by this point, we're going to have to skip it
2012            // and hope this transition gets assigned later
2013            if (isdsts[comp_idx]) {
2014                continue;
2015            }
2016
2017            dstoff = utcoff - utcoffs[comp_idx];
2018        }
2019
2020        if (dstoff) {
2021            dst_found++;
2022            dstoffs[idx] = dstoff;
2023        }
2024    }
2025
2026    if (dst_found < dst_count) {
2027        // If there are time zones we didn't find a value for, we'll end up
2028        // with dstoff = 0 for something where isdst=1. This is obviously
2029        // wrong — one hour will be a much better guess than 0.
2030        for (size_t idx = 0; idx < num_ttinfos; ++idx) {
2031            if (isdsts[idx] && !dstoffs[idx]) {
2032                dstoffs[idx] = 3600;
2033            }
2034        }
2035    }
2036}
2037
2038#define _swap(x, y, buffer) \
2039    buffer = x;             \
2040    x = y;                  \
2041    y = buffer;
2042
2043/* Calculate transitions in local time from UTC time and offsets.
2044 *
2045 * We want to know when each transition occurs, denominated in the number of
2046 * nominal wall-time seconds between 1970-01-01T00:00:00 and the transition in
2047 * *local time* (note: this is *not* equivalent to the output of
2048 * datetime.timestamp, which is the total number of seconds actual elapsed
2049 * since 1970-01-01T00:00:00Z in UTC).
2050 *
2051 * This is an ambiguous question because "local time" can be ambiguous — but it
2052 * is disambiguated by the `fold` parameter, so we allocate two arrays:
2053 *
2054 *  trans_local[0]: The wall-time transitions for fold=0
2055 *  trans_local[1]: The wall-time transitions for fold=1
2056 *
2057 * This returns 0 on success and a negative number of failure. The trans_local
2058 * arrays must be freed if they are not NULL.
2059 */
2060static int
2061ts_to_local(size_t *trans_idx, int64_t *trans_utc, long *utcoff,
2062            int64_t *trans_local[2], size_t num_ttinfos,
2063            size_t num_transitions)
2064{
2065    if (num_transitions == 0) {
2066        return 0;
2067    }
2068
2069    // Copy the UTC transitions into each array to be modified in place later
2070    for (size_t i = 0; i < 2; ++i) {
2071        trans_local[i] = PyMem_Malloc(num_transitions * sizeof(int64_t));
2072        if (trans_local[i] == NULL) {
2073            return -1;
2074        }
2075
2076        memcpy(trans_local[i], trans_utc, num_transitions * sizeof(int64_t));
2077    }
2078
2079    int64_t offset_0, offset_1, buff;
2080    if (num_ttinfos > 1) {
2081        offset_0 = utcoff[0];
2082        offset_1 = utcoff[trans_idx[0]];
2083
2084        if (offset_1 > offset_0) {
2085            _swap(offset_0, offset_1, buff);
2086        }
2087    }
2088    else {
2089        offset_0 = utcoff[0];
2090        offset_1 = utcoff[0];
2091    }
2092
2093    trans_local[0][0] += offset_0;
2094    trans_local[1][0] += offset_1;
2095
2096    for (size_t i = 1; i < num_transitions; ++i) {
2097        offset_0 = utcoff[trans_idx[i - 1]];
2098        offset_1 = utcoff[trans_idx[i]];
2099
2100        if (offset_1 > offset_0) {
2101            _swap(offset_1, offset_0, buff);
2102        }
2103
2104        trans_local[0][i] += offset_0;
2105        trans_local[1][i] += offset_1;
2106    }
2107
2108    return 0;
2109}
2110
2111/* Simple bisect_right binary search implementation */
2112static size_t
2113_bisect(const int64_t value, const int64_t *arr, size_t size)
2114{
2115    size_t lo = 0;
2116    size_t hi = size;
2117    size_t m;
2118
2119    while (lo < hi) {
2120        m = (lo + hi) / 2;
2121        if (arr[m] > value) {
2122            hi = m;
2123        }
2124        else {
2125            lo = m + 1;
2126        }
2127    }
2128
2129    return hi;
2130}
2131
2132/* Find the ttinfo rules that apply at a given local datetime. */
2133static _ttinfo *
2134find_ttinfo(PyZoneInfo_ZoneInfo *self, PyObject *dt)
2135{
2136    // datetime.time has a .tzinfo attribute that passes None as the dt
2137    // argument; it only really has meaning for fixed-offset zones.
2138    if (dt == Py_None) {
2139        if (self->fixed_offset) {
2140            return &(self->tzrule_after.std);
2141        }
2142        else {
2143            return &NO_TTINFO;
2144        }
2145    }
2146
2147    int64_t ts;
2148    if (get_local_timestamp(dt, &ts)) {
2149        return NULL;
2150    }
2151
2152    unsigned char fold = PyDateTime_DATE_GET_FOLD(dt);
2153    assert(fold < 2);
2154    int64_t *local_transitions = self->trans_list_wall[fold];
2155    size_t num_trans = self->num_transitions;
2156
2157    if (num_trans && ts < local_transitions[0]) {
2158        return self->ttinfo_before;
2159    }
2160    else if (!num_trans || ts > local_transitions[self->num_transitions - 1]) {
2161        return find_tzrule_ttinfo(&(self->tzrule_after), ts, fold,
2162                                  PyDateTime_GET_YEAR(dt));
2163    }
2164    else {
2165        size_t idx = _bisect(ts, local_transitions, self->num_transitions) - 1;
2166        assert(idx < self->num_transitions);
2167        return self->trans_ttinfos[idx];
2168    }
2169}
2170
2171static int
2172is_leap_year(int year)
2173{
2174    const unsigned int ayear = (unsigned int)year;
2175    return ayear % 4 == 0 && (ayear % 100 != 0 || ayear % 400 == 0);
2176}
2177
2178/* Calculates ordinal datetime from year, month and day. */
2179static int
2180ymd_to_ord(int y, int m, int d)
2181{
2182    y -= 1;
2183    int days_before_year = (y * 365) + (y / 4) - (y / 100) + (y / 400);
2184    int yearday = DAYS_BEFORE_MONTH[m];
2185    if (m > 2 && is_leap_year(y + 1)) {
2186        yearday += 1;
2187    }
2188
2189    return days_before_year + yearday + d;
2190}
2191
2192/* Calculate the number of seconds since 1970-01-01 in local time.
2193 *
2194 * This gets a datetime in the same "units" as self->trans_list_wall so that we
2195 * can easily determine which transitions a datetime falls between. See the
2196 * comment above ts_to_local for more information.
2197 * */
2198static int
2199get_local_timestamp(PyObject *dt, int64_t *local_ts)
2200{
2201    assert(local_ts != NULL);
2202
2203    int hour, minute, second;
2204    int ord;
2205    if (PyDateTime_CheckExact(dt)) {
2206        int y = PyDateTime_GET_YEAR(dt);
2207        int m = PyDateTime_GET_MONTH(dt);
2208        int d = PyDateTime_GET_DAY(dt);
2209        hour = PyDateTime_DATE_GET_HOUR(dt);
2210        minute = PyDateTime_DATE_GET_MINUTE(dt);
2211        second = PyDateTime_DATE_GET_SECOND(dt);
2212
2213        ord = ymd_to_ord(y, m, d);
2214    }
2215    else {
2216        PyObject *num = PyObject_CallMethod(dt, "toordinal", NULL);
2217        if (num == NULL) {
2218            return -1;
2219        }
2220
2221        ord = PyLong_AsLong(num);
2222        Py_DECREF(num);
2223        if (ord == -1 && PyErr_Occurred()) {
2224            return -1;
2225        }
2226
2227        num = PyObject_GetAttrString(dt, "hour");
2228        if (num == NULL) {
2229            return -1;
2230        }
2231        hour = PyLong_AsLong(num);
2232        Py_DECREF(num);
2233        if (hour == -1) {
2234            return -1;
2235        }
2236
2237        num = PyObject_GetAttrString(dt, "minute");
2238        if (num == NULL) {
2239            return -1;
2240        }
2241        minute = PyLong_AsLong(num);
2242        Py_DECREF(num);
2243        if (minute == -1) {
2244            return -1;
2245        }
2246
2247        num = PyObject_GetAttrString(dt, "second");
2248        if (num == NULL) {
2249            return -1;
2250        }
2251        second = PyLong_AsLong(num);
2252        Py_DECREF(num);
2253        if (second == -1) {
2254            return -1;
2255        }
2256    }
2257
2258    *local_ts = (int64_t)(ord - EPOCHORDINAL) * 86400 +
2259                (int64_t)(hour * 3600 + minute * 60 + second);
2260
2261    return 0;
2262}
2263
2264/////
2265// Functions for cache handling
2266
2267/* Constructor for StrongCacheNode */
2268static StrongCacheNode *
2269strong_cache_node_new(PyObject *key, PyObject *zone)
2270{
2271    StrongCacheNode *node = PyMem_Malloc(sizeof(StrongCacheNode));
2272    if (node == NULL) {
2273        return NULL;
2274    }
2275
2276    Py_INCREF(key);
2277    Py_INCREF(zone);
2278
2279    node->next = NULL;
2280    node->prev = NULL;
2281    node->key = key;
2282    node->zone = zone;
2283
2284    return node;
2285}
2286
2287/* Destructor for StrongCacheNode */
2288void
2289strong_cache_node_free(StrongCacheNode *node)
2290{
2291    Py_XDECREF(node->key);
2292    Py_XDECREF(node->zone);
2293
2294    PyMem_Free(node);
2295}
2296
2297/* Frees all nodes at or after a specified root in the strong cache.
2298 *
2299 * This can be used on the root node to free the entire cache or it can be used
2300 * to clear all nodes that have been expired (which, if everything is going
2301 * right, will actually only be 1 node at a time).
2302 */
2303void
2304strong_cache_free(StrongCacheNode *root)
2305{
2306    StrongCacheNode *node = root;
2307    StrongCacheNode *next_node;
2308    while (node != NULL) {
2309        next_node = node->next;
2310        strong_cache_node_free(node);
2311
2312        node = next_node;
2313    }
2314}
2315
2316/* Removes a node from the cache and update its neighbors.
2317 *
2318 * This is used both when ejecting a node from the cache and when moving it to
2319 * the front of the cache.
2320 */
2321static void
2322remove_from_strong_cache(StrongCacheNode *node)
2323{
2324    if (ZONEINFO_STRONG_CACHE == node) {
2325        ZONEINFO_STRONG_CACHE = node->next;
2326    }
2327
2328    if (node->prev != NULL) {
2329        node->prev->next = node->next;
2330    }
2331
2332    if (node->next != NULL) {
2333        node->next->prev = node->prev;
2334    }
2335
2336    node->next = NULL;
2337    node->prev = NULL;
2338}
2339
2340/* Retrieves the node associated with a key, if it exists.
2341 *
2342 * This traverses the strong cache until it finds a matching key and returns a
2343 * pointer to the relevant node if found. Returns NULL if no node is found.
2344 *
2345 * root may be NULL, indicating an empty cache.
2346 */
2347static StrongCacheNode *
2348find_in_strong_cache(const StrongCacheNode *const root, PyObject *const key)
2349{
2350    const StrongCacheNode *node = root;
2351    while (node != NULL) {
2352        int rv = PyObject_RichCompareBool(key, node->key, Py_EQ);
2353        if (rv < 0) {
2354            return NULL;
2355        }
2356        if (rv) {
2357            return (StrongCacheNode *)node;
2358        }
2359
2360        node = node->next;
2361    }
2362
2363    return NULL;
2364}
2365
2366/* Ejects a given key from the class's strong cache, if applicable.
2367 *
2368 * This function is used to enable the per-key functionality in clear_cache.
2369 */
2370static int
2371eject_from_strong_cache(const PyTypeObject *const type, PyObject *key)
2372{
2373    if (type != &PyZoneInfo_ZoneInfoType) {
2374        return 0;
2375    }
2376
2377    StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2378    if (node != NULL) {
2379        remove_from_strong_cache(node);
2380
2381        strong_cache_node_free(node);
2382    }
2383    else if (PyErr_Occurred()) {
2384        return -1;
2385    }
2386    return 0;
2387}
2388
2389/* Moves a node to the front of the LRU cache.
2390 *
2391 * The strong cache is an LRU cache, so whenever a given node is accessed, if
2392 * it is not at the front of the cache, it needs to be moved there.
2393 */
2394static void
2395move_strong_cache_node_to_front(StrongCacheNode **root, StrongCacheNode *node)
2396{
2397    StrongCacheNode *root_p = *root;
2398    if (root_p == node) {
2399        return;
2400    }
2401
2402    remove_from_strong_cache(node);
2403
2404    node->prev = NULL;
2405    node->next = root_p;
2406
2407    if (root_p != NULL) {
2408        root_p->prev = node;
2409    }
2410
2411    *root = node;
2412}
2413
2414/* Retrieves a ZoneInfo from the strong cache if it's present.
2415 *
2416 * This function finds the ZoneInfo by key and if found will move the node to
2417 * the front of the LRU cache and return a new reference to it. It returns NULL
2418 * if the key is not in the cache.
2419 *
2420 * The strong cache is currently only implemented for the base class, so this
2421 * always returns a cache miss for subclasses.
2422 */
2423static PyObject *
2424zone_from_strong_cache(const PyTypeObject *const type, PyObject *const key)
2425{
2426    if (type != &PyZoneInfo_ZoneInfoType) {
2427        return NULL;  // Strong cache currently only implemented for base class
2428    }
2429
2430    StrongCacheNode *node = find_in_strong_cache(ZONEINFO_STRONG_CACHE, key);
2431
2432    if (node != NULL) {
2433        move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, node);
2434        Py_INCREF(node->zone);
2435        return node->zone;
2436    }
2437
2438    return NULL;  // Cache miss
2439}
2440
2441/* Inserts a new key into the strong LRU cache.
2442 *
2443 * This function is only to be used after a cache miss — it creates a new node
2444 * at the front of the cache and ejects any stale entries (keeping the size of
2445 * the cache to at most ZONEINFO_STRONG_CACHE_MAX_SIZE).
2446 */
2447static void
2448update_strong_cache(const PyTypeObject *const type, PyObject *key,
2449                    PyObject *zone)
2450{
2451    if (type != &PyZoneInfo_ZoneInfoType) {
2452        return;
2453    }
2454
2455    StrongCacheNode *new_node = strong_cache_node_new(key, zone);
2456
2457    move_strong_cache_node_to_front(&ZONEINFO_STRONG_CACHE, new_node);
2458
2459    StrongCacheNode *node = new_node->next;
2460    for (size_t i = 1; i < ZONEINFO_STRONG_CACHE_MAX_SIZE; ++i) {
2461        if (node == NULL) {
2462            return;
2463        }
2464        node = node->next;
2465    }
2466
2467    // Everything beyond this point needs to be freed
2468    if (node != NULL) {
2469        if (node->prev != NULL) {
2470            node->prev->next = NULL;
2471        }
2472        strong_cache_free(node);
2473    }
2474}
2475
2476/* Clears all entries into a type's strong cache.
2477 *
2478 * Because the strong cache is not implemented for subclasses, this is a no-op
2479 * for everything except the base class.
2480 */
2481void
2482clear_strong_cache(const PyTypeObject *const type)
2483{
2484    if (type != &PyZoneInfo_ZoneInfoType) {
2485        return;
2486    }
2487
2488    strong_cache_free(ZONEINFO_STRONG_CACHE);
2489    ZONEINFO_STRONG_CACHE = NULL;
2490}
2491
2492static PyObject *
2493new_weak_cache(void)
2494{
2495    PyObject *weakref_module = PyImport_ImportModule("weakref");
2496    if (weakref_module == NULL) {
2497        return NULL;
2498    }
2499
2500    PyObject *weak_cache =
2501        PyObject_CallMethod(weakref_module, "WeakValueDictionary", "");
2502    Py_DECREF(weakref_module);
2503    return weak_cache;
2504}
2505
2506static int
2507initialize_caches(void)
2508{
2509    // TODO: Move to a PyModule_GetState / PEP 573 based caching system.
2510    if (TIMEDELTA_CACHE == NULL) {
2511        TIMEDELTA_CACHE = PyDict_New();
2512    }
2513    else {
2514        Py_INCREF(TIMEDELTA_CACHE);
2515    }
2516
2517    if (TIMEDELTA_CACHE == NULL) {
2518        return -1;
2519    }
2520
2521    if (ZONEINFO_WEAK_CACHE == NULL) {
2522        ZONEINFO_WEAK_CACHE = new_weak_cache();
2523    }
2524    else {
2525        Py_INCREF(ZONEINFO_WEAK_CACHE);
2526    }
2527
2528    if (ZONEINFO_WEAK_CACHE == NULL) {
2529        return -1;
2530    }
2531
2532    return 0;
2533}
2534
2535static PyObject *
2536zoneinfo_init_subclass(PyTypeObject *cls, PyObject *args, PyObject **kwargs)
2537{
2538    PyObject *weak_cache = new_weak_cache();
2539    if (weak_cache == NULL) {
2540        return NULL;
2541    }
2542
2543    if (PyObject_SetAttrString((PyObject *)cls, "_weak_cache",
2544                               weak_cache) < 0) {
2545        Py_DECREF(weak_cache);
2546        return NULL;
2547    }
2548    Py_DECREF(weak_cache);
2549    Py_RETURN_NONE;
2550}
2551
2552/////
2553// Specify the ZoneInfo type
2554static PyMethodDef zoneinfo_methods[] = {
2555    {"clear_cache", (PyCFunction)(void (*)(void))zoneinfo_clear_cache,
2556     METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2557     PyDoc_STR("Clear the ZoneInfo cache.")},
2558    {"no_cache", (PyCFunction)(void (*)(void))zoneinfo_no_cache,
2559     METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2560     PyDoc_STR("Get a new instance of ZoneInfo, bypassing the cache.")},
2561    {"from_file", (PyCFunction)(void (*)(void))zoneinfo_from_file,
2562     METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2563     PyDoc_STR("Create a ZoneInfo file from a file object.")},
2564    {"utcoffset", (PyCFunction)zoneinfo_utcoffset, METH_O,
2565     PyDoc_STR("Retrieve a timedelta representing the UTC offset in a zone at "
2566               "the given datetime.")},
2567    {"dst", (PyCFunction)zoneinfo_dst, METH_O,
2568     PyDoc_STR("Retrieve a timedelta representing the amount of DST applied "
2569               "in a zone at the given datetime.")},
2570    {"tzname", (PyCFunction)zoneinfo_tzname, METH_O,
2571     PyDoc_STR("Retrieve a string containing the abbreviation for the time "
2572               "zone that applies in a zone at a given datetime.")},
2573    {"fromutc", (PyCFunction)zoneinfo_fromutc, METH_O,
2574     PyDoc_STR("Given a datetime with local time in UTC, retrieve an adjusted "
2575               "datetime in local time.")},
2576    {"__reduce__", (PyCFunction)zoneinfo_reduce, METH_NOARGS,
2577     PyDoc_STR("Function for serialization with the pickle protocol.")},
2578    {"_unpickle", (PyCFunction)zoneinfo__unpickle, METH_VARARGS | METH_CLASS,
2579     PyDoc_STR("Private method used in unpickling.")},
2580    {"__init_subclass__", (PyCFunction)(void (*)(void))zoneinfo_init_subclass,
2581     METH_VARARGS | METH_KEYWORDS | METH_CLASS,
2582     PyDoc_STR("Function to initialize subclasses.")},
2583    {NULL} /* Sentinel */
2584};
2585
2586static PyMemberDef zoneinfo_members[] = {
2587    {.name = "key",
2588     .offset = offsetof(PyZoneInfo_ZoneInfo, key),
2589     .type = T_OBJECT_EX,
2590     .flags = READONLY,
2591     .doc = NULL},
2592    {NULL}, /* Sentinel */
2593};
2594
2595static PyTypeObject PyZoneInfo_ZoneInfoType = {
2596    PyVarObject_HEAD_INIT(NULL, 0)  //
2597        .tp_name = "zoneinfo.ZoneInfo",
2598    .tp_basicsize = sizeof(PyZoneInfo_ZoneInfo),
2599    .tp_weaklistoffset = offsetof(PyZoneInfo_ZoneInfo, weakreflist),
2600    .tp_repr = (reprfunc)zoneinfo_repr,
2601    .tp_str = (reprfunc)zoneinfo_str,
2602    .tp_getattro = PyObject_GenericGetAttr,
2603    .tp_flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE),
2604    /* .tp_doc = zoneinfo_doc, */
2605    .tp_methods = zoneinfo_methods,
2606    .tp_members = zoneinfo_members,
2607    .tp_new = zoneinfo_new,
2608    .tp_dealloc = zoneinfo_dealloc,
2609};
2610
2611/////
2612// Specify the _zoneinfo module
2613static PyMethodDef module_methods[] = {{NULL, NULL}};
2614static void
2615module_free(void *m)
2616{
2617    Py_XDECREF(_tzpath_find_tzfile);
2618    _tzpath_find_tzfile = NULL;
2619
2620    Py_XDECREF(_common_mod);
2621    _common_mod = NULL;
2622
2623    Py_XDECREF(io_open);
2624    io_open = NULL;
2625
2626    xdecref_ttinfo(&NO_TTINFO);
2627
2628    if (TIMEDELTA_CACHE != NULL && Py_REFCNT(TIMEDELTA_CACHE) > 1) {
2629        Py_DECREF(TIMEDELTA_CACHE);
2630    } else {
2631        Py_CLEAR(TIMEDELTA_CACHE);
2632    }
2633
2634    if (ZONEINFO_WEAK_CACHE != NULL && Py_REFCNT(ZONEINFO_WEAK_CACHE) > 1) {
2635        Py_DECREF(ZONEINFO_WEAK_CACHE);
2636    } else {
2637        Py_CLEAR(ZONEINFO_WEAK_CACHE);
2638    }
2639
2640    clear_strong_cache(&PyZoneInfo_ZoneInfoType);
2641}
2642
2643static int
2644zoneinfomodule_exec(PyObject *m)
2645{
2646    PyDateTime_IMPORT;
2647    if (PyDateTimeAPI == NULL) {
2648        goto error;
2649    }
2650    PyZoneInfo_ZoneInfoType.tp_base = PyDateTimeAPI->TZInfoType;
2651    if (PyType_Ready(&PyZoneInfo_ZoneInfoType) < 0) {
2652        goto error;
2653    }
2654
2655    if (PyModule_AddObjectRef(m, "ZoneInfo", (PyObject *)&PyZoneInfo_ZoneInfoType) < 0) {
2656        goto error;
2657    }
2658
2659    /* Populate imports */
2660    PyObject *_tzpath_module = PyImport_ImportModule("zoneinfo._tzpath");
2661    if (_tzpath_module == NULL) {
2662        goto error;
2663    }
2664
2665    _tzpath_find_tzfile =
2666        PyObject_GetAttrString(_tzpath_module, "find_tzfile");
2667    Py_DECREF(_tzpath_module);
2668    if (_tzpath_find_tzfile == NULL) {
2669        goto error;
2670    }
2671
2672    PyObject *io_module = PyImport_ImportModule("io");
2673    if (io_module == NULL) {
2674        goto error;
2675    }
2676
2677    io_open = PyObject_GetAttrString(io_module, "open");
2678    Py_DECREF(io_module);
2679    if (io_open == NULL) {
2680        goto error;
2681    }
2682
2683    _common_mod = PyImport_ImportModule("zoneinfo._common");
2684    if (_common_mod == NULL) {
2685        goto error;
2686    }
2687
2688    if (NO_TTINFO.utcoff == NULL) {
2689        NO_TTINFO.utcoff = Py_None;
2690        NO_TTINFO.dstoff = Py_None;
2691        NO_TTINFO.tzname = Py_None;
2692
2693        for (size_t i = 0; i < 3; ++i) {
2694            Py_INCREF(Py_None);
2695        }
2696    }
2697
2698    if (initialize_caches()) {
2699        goto error;
2700    }
2701
2702    return 0;
2703
2704error:
2705    return -1;
2706}
2707
2708static PyModuleDef_Slot zoneinfomodule_slots[] = {
2709    {Py_mod_exec, zoneinfomodule_exec}, {0, NULL}};
2710
2711static struct PyModuleDef zoneinfomodule = {
2712    PyModuleDef_HEAD_INIT,
2713    .m_name = "_zoneinfo",
2714    .m_doc = "C implementation of the zoneinfo module",
2715    .m_size = 0,
2716    .m_methods = module_methods,
2717    .m_slots = zoneinfomodule_slots,
2718    .m_free = (freefunc)module_free};
2719
2720PyMODINIT_FUNC
2721PyInit__zoneinfo(void)
2722{
2723    return PyModuleDef_Init(&zoneinfomodule);
2724}
2725