xref: /third_party/python/Modules/_elementtree.c (revision 7db96d56)
1/*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See https://www.python.org/psf/license for licensing details.
4 *
5 * _elementtree - C accelerator for xml.etree.ElementTree
6 * Copyright (c) 1999-2009 by Secret Labs AB.  All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
8 *
9 * info@pythonware.com
10 * http://www.pythonware.com
11 *--------------------------------------------------------------------
12 */
13
14#define PY_SSIZE_T_CLEAN
15#define NEEDS_PY_IDENTIFIER
16
17#include "Python.h"
18#include "structmember.h"         // PyMemberDef
19
20/* -------------------------------------------------------------------- */
21/* configuration */
22
23/* An element can hold this many children without extra memory
24   allocations. */
25#define STATIC_CHILDREN 4
26
27/* For best performance, chose a value so that 80-90% of all nodes
28   have no more than the given number of children.  Set this to zero
29   to minimize the size of the element structure itself (this only
30   helps if you have lots of leaf nodes with attributes). */
31
32/* Also note that pymalloc always allocates blocks in multiples of
33   eight bytes.  For the current C version of ElementTree, this means
34   that the number of children should be an even number, at least on
35   32-bit platforms. */
36
37/* -------------------------------------------------------------------- */
38
39/* compiler tweaks */
40#if defined(_MSC_VER)
41#define LOCAL(type) static __inline type __fastcall
42#else
43#define LOCAL(type) static type
44#endif
45
46/* macros used to store 'join' flags in string object pointers.  note
47   that all use of text and tail as object pointers must be wrapped in
48   JOIN_OBJ.  see comments in the ElementObject definition for more
49   info. */
50#define JOIN_GET(p) ((uintptr_t) (p) & 1)
51#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
52#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
53
54/* Py_SETREF for a PyObject* that uses a join flag. */
55Py_LOCAL_INLINE(void)
56_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
57{
58    PyObject *tmp = JOIN_OBJ(*p);
59    *p = new_joined_ptr;
60    Py_DECREF(tmp);
61}
62
63/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
64 * reference since this function sets it to NULL.
65*/
66static void _clear_joined_ptr(PyObject **p)
67{
68    if (*p) {
69        _set_joined_ptr(p, NULL);
70    }
71}
72
73/* Types defined by this extension */
74static PyTypeObject Element_Type;
75static PyTypeObject ElementIter_Type;
76static PyTypeObject TreeBuilder_Type;
77static PyTypeObject XMLParser_Type;
78
79
80/* Per-module state; PEP 3121 */
81typedef struct {
82    PyObject *parseerror_obj;
83    PyObject *deepcopy_obj;
84    PyObject *elementpath_obj;
85    PyObject *comment_factory;
86    PyObject *pi_factory;
87} elementtreestate;
88
89static struct PyModuleDef elementtreemodule;
90
91/* Given a module object (assumed to be _elementtree), get its per-module
92 * state.
93 */
94static inline elementtreestate*
95get_elementtree_state(PyObject *module)
96{
97    void *state = PyModule_GetState(module);
98    assert(state != NULL);
99    return (elementtreestate *)state;
100}
101
102/* Find the module instance imported in the currently running sub-interpreter
103 * and get its state.
104 */
105#define ET_STATE_GLOBAL \
106    ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
107
108static int
109elementtree_clear(PyObject *m)
110{
111    elementtreestate *st = get_elementtree_state(m);
112    Py_CLEAR(st->parseerror_obj);
113    Py_CLEAR(st->deepcopy_obj);
114    Py_CLEAR(st->elementpath_obj);
115    Py_CLEAR(st->comment_factory);
116    Py_CLEAR(st->pi_factory);
117    return 0;
118}
119
120static int
121elementtree_traverse(PyObject *m, visitproc visit, void *arg)
122{
123    elementtreestate *st = get_elementtree_state(m);
124    Py_VISIT(st->parseerror_obj);
125    Py_VISIT(st->deepcopy_obj);
126    Py_VISIT(st->elementpath_obj);
127    Py_VISIT(st->comment_factory);
128    Py_VISIT(st->pi_factory);
129    return 0;
130}
131
132static void
133elementtree_free(void *m)
134{
135    elementtree_clear((PyObject *)m);
136}
137
138/* helpers */
139
140LOCAL(PyObject*)
141list_join(PyObject* list)
142{
143    /* join list elements */
144    PyObject* joiner;
145    PyObject* result;
146
147    joiner = PyUnicode_FromStringAndSize("", 0);
148    if (!joiner)
149        return NULL;
150    result = PyUnicode_Join(joiner, list);
151    Py_DECREF(joiner);
152    return result;
153}
154
155/* Is the given object an empty dictionary?
156*/
157static int
158is_empty_dict(PyObject *obj)
159{
160    return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
161}
162
163
164/* -------------------------------------------------------------------- */
165/* the Element type */
166
167typedef struct {
168
169    /* attributes (a dictionary object), or NULL if no attributes */
170    PyObject* attrib;
171
172    /* child elements */
173    Py_ssize_t length; /* actual number of items */
174    Py_ssize_t allocated; /* allocated items */
175
176    /* this either points to _children or to a malloced buffer */
177    PyObject* *children;
178
179    PyObject* _children[STATIC_CHILDREN];
180
181} ElementObjectExtra;
182
183typedef struct {
184    PyObject_HEAD
185
186    /* element tag (a string). */
187    PyObject* tag;
188
189    /* text before first child.  note that this is a tagged pointer;
190       use JOIN_OBJ to get the object pointer.  the join flag is used
191       to distinguish lists created by the tree builder from lists
192       assigned to the attribute by application code; the former
193       should be joined before being returned to the user, the latter
194       should be left intact. */
195    PyObject* text;
196
197    /* text after this element, in parent.  note that this is a tagged
198       pointer; use JOIN_OBJ to get the object pointer. */
199    PyObject* tail;
200
201    ElementObjectExtra* extra;
202
203    PyObject *weakreflist; /* For tp_weaklistoffset */
204
205} ElementObject;
206
207
208#define Element_CheckExact(op) Py_IS_TYPE(op, &Element_Type)
209#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
210
211
212/* -------------------------------------------------------------------- */
213/* Element constructors and destructor */
214
215LOCAL(int)
216create_extra(ElementObject* self, PyObject* attrib)
217{
218    self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
219    if (!self->extra) {
220        PyErr_NoMemory();
221        return -1;
222    }
223
224    Py_XINCREF(attrib);
225    self->extra->attrib = attrib;
226
227    self->extra->length = 0;
228    self->extra->allocated = STATIC_CHILDREN;
229    self->extra->children = self->extra->_children;
230
231    return 0;
232}
233
234LOCAL(void)
235dealloc_extra(ElementObjectExtra *extra)
236{
237    Py_ssize_t i;
238
239    if (!extra)
240        return;
241
242    Py_XDECREF(extra->attrib);
243
244    for (i = 0; i < extra->length; i++)
245        Py_DECREF(extra->children[i]);
246
247    if (extra->children != extra->_children)
248        PyObject_Free(extra->children);
249
250    PyObject_Free(extra);
251}
252
253LOCAL(void)
254clear_extra(ElementObject* self)
255{
256    ElementObjectExtra *myextra;
257
258    if (!self->extra)
259        return;
260
261    /* Avoid DECREFs calling into this code again (cycles, etc.)
262    */
263    myextra = self->extra;
264    self->extra = NULL;
265
266    dealloc_extra(myextra);
267}
268
269/* Convenience internal function to create new Element objects with the given
270 * tag and attributes.
271*/
272LOCAL(PyObject*)
273create_new_element(PyObject* tag, PyObject* attrib)
274{
275    ElementObject* self;
276
277    self = PyObject_GC_New(ElementObject, &Element_Type);
278    if (self == NULL)
279        return NULL;
280    self->extra = NULL;
281
282    Py_INCREF(tag);
283    self->tag = tag;
284
285    Py_INCREF(Py_None);
286    self->text = Py_None;
287
288    Py_INCREF(Py_None);
289    self->tail = Py_None;
290
291    self->weakreflist = NULL;
292
293    PyObject_GC_Track(self);
294
295    if (attrib != NULL && !is_empty_dict(attrib)) {
296        if (create_extra(self, attrib) < 0) {
297            Py_DECREF(self);
298            return NULL;
299        }
300    }
301
302    return (PyObject*) self;
303}
304
305static PyObject *
306element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
307{
308    ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
309    if (e != NULL) {
310        Py_INCREF(Py_None);
311        e->tag = Py_None;
312
313        Py_INCREF(Py_None);
314        e->text = Py_None;
315
316        Py_INCREF(Py_None);
317        e->tail = Py_None;
318
319        e->extra = NULL;
320        e->weakreflist = NULL;
321    }
322    return (PyObject *)e;
323}
324
325/* Helper function for extracting the attrib dictionary from a keywords dict.
326 * This is required by some constructors/functions in this module that can
327 * either accept attrib as a keyword argument or all attributes splashed
328 * directly into *kwds.
329 *
330 * Return a dictionary with the content of kwds merged into the content of
331 * attrib. If there is no attrib keyword, return a copy of kwds.
332 */
333static PyObject*
334get_attrib_from_keywords(PyObject *kwds)
335{
336    PyObject *attrib_str = PyUnicode_FromString("attrib");
337    if (attrib_str == NULL) {
338        return NULL;
339    }
340    PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
341
342    if (attrib) {
343        /* If attrib was found in kwds, copy its value and remove it from
344         * kwds
345         */
346        if (!PyDict_Check(attrib)) {
347            Py_DECREF(attrib_str);
348            PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
349                         Py_TYPE(attrib)->tp_name);
350            return NULL;
351        }
352        attrib = PyDict_Copy(attrib);
353        if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
354            Py_DECREF(attrib);
355            attrib = NULL;
356        }
357    }
358    else if (!PyErr_Occurred()) {
359        attrib = PyDict_New();
360    }
361
362    Py_DECREF(attrib_str);
363
364    if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
365        Py_DECREF(attrib);
366        return NULL;
367    }
368    return attrib;
369}
370
371/*[clinic input]
372module _elementtree
373class _elementtree.Element "ElementObject *" "&Element_Type"
374class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
375class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
376[clinic start generated code]*/
377/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
378
379static int
380element_init(PyObject *self, PyObject *args, PyObject *kwds)
381{
382    PyObject *tag;
383    PyObject *attrib = NULL;
384    ElementObject *self_elem;
385
386    if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
387        return -1;
388
389    if (attrib) {
390        /* attrib passed as positional arg */
391        attrib = PyDict_Copy(attrib);
392        if (!attrib)
393            return -1;
394        if (kwds) {
395            if (PyDict_Update(attrib, kwds) < 0) {
396                Py_DECREF(attrib);
397                return -1;
398            }
399        }
400    } else if (kwds) {
401        /* have keywords args */
402        attrib = get_attrib_from_keywords(kwds);
403        if (!attrib)
404            return -1;
405    }
406
407    self_elem = (ElementObject *)self;
408
409    if (attrib != NULL && !is_empty_dict(attrib)) {
410        if (create_extra(self_elem, attrib) < 0) {
411            Py_DECREF(attrib);
412            return -1;
413        }
414    }
415
416    /* We own a reference to attrib here and it's no longer needed. */
417    Py_XDECREF(attrib);
418
419    /* Replace the objects already pointed to by tag, text and tail. */
420    Py_INCREF(tag);
421    Py_XSETREF(self_elem->tag, tag);
422
423    Py_INCREF(Py_None);
424    _set_joined_ptr(&self_elem->text, Py_None);
425
426    Py_INCREF(Py_None);
427    _set_joined_ptr(&self_elem->tail, Py_None);
428
429    return 0;
430}
431
432LOCAL(int)
433element_resize(ElementObject* self, Py_ssize_t extra)
434{
435    Py_ssize_t size;
436    PyObject* *children;
437
438    assert(extra >= 0);
439    /* make sure self->children can hold the given number of extra
440       elements.  set an exception and return -1 if allocation failed */
441
442    if (!self->extra) {
443        if (create_extra(self, NULL) < 0)
444            return -1;
445    }
446
447    size = self->extra->length + extra;  /* never overflows */
448
449    if (size > self->extra->allocated) {
450        /* use Python 2.4's list growth strategy */
451        size = (size >> 3) + (size < 9 ? 3 : 6) + size;
452        /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
453         * which needs at least 4 bytes.
454         * Although it's a false alarm always assume at least one child to
455         * be safe.
456         */
457        size = size ? size : 1;
458        if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
459            goto nomemory;
460        if (self->extra->children != self->extra->_children) {
461            /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
462             * "children", which needs at least 4 bytes. Although it's a
463             * false alarm always assume at least one child to be safe.
464             */
465            children = PyObject_Realloc(self->extra->children,
466                                        size * sizeof(PyObject*));
467            if (!children)
468                goto nomemory;
469        } else {
470            children = PyObject_Malloc(size * sizeof(PyObject*));
471            if (!children)
472                goto nomemory;
473            /* copy existing children from static area to malloc buffer */
474            memcpy(children, self->extra->children,
475                   self->extra->length * sizeof(PyObject*));
476        }
477        self->extra->children = children;
478        self->extra->allocated = size;
479    }
480
481    return 0;
482
483  nomemory:
484    PyErr_NoMemory();
485    return -1;
486}
487
488LOCAL(void)
489raise_type_error(PyObject *element)
490{
491    PyErr_Format(PyExc_TypeError,
492                 "expected an Element, not \"%.200s\"",
493                 Py_TYPE(element)->tp_name);
494}
495
496LOCAL(int)
497element_add_subelement(ElementObject* self, PyObject* element)
498{
499    /* add a child element to a parent */
500
501    if (!Element_Check(element)) {
502        raise_type_error(element);
503        return -1;
504    }
505
506    if (element_resize(self, 1) < 0)
507        return -1;
508
509    Py_INCREF(element);
510    self->extra->children[self->extra->length] = element;
511
512    self->extra->length++;
513
514    return 0;
515}
516
517LOCAL(PyObject*)
518element_get_attrib(ElementObject* self)
519{
520    /* return borrowed reference to attrib dictionary */
521    /* note: this function assumes that the extra section exists */
522
523    PyObject* res = self->extra->attrib;
524
525    if (!res) {
526        /* create missing dictionary */
527        res = self->extra->attrib = PyDict_New();
528    }
529
530    return res;
531}
532
533LOCAL(PyObject*)
534element_get_text(ElementObject* self)
535{
536    /* return borrowed reference to text attribute */
537
538    PyObject *res = self->text;
539
540    if (JOIN_GET(res)) {
541        res = JOIN_OBJ(res);
542        if (PyList_CheckExact(res)) {
543            PyObject *tmp = list_join(res);
544            if (!tmp)
545                return NULL;
546            self->text = tmp;
547            Py_DECREF(res);
548            res = tmp;
549        }
550    }
551
552    return res;
553}
554
555LOCAL(PyObject*)
556element_get_tail(ElementObject* self)
557{
558    /* return borrowed reference to text attribute */
559
560    PyObject *res = self->tail;
561
562    if (JOIN_GET(res)) {
563        res = JOIN_OBJ(res);
564        if (PyList_CheckExact(res)) {
565            PyObject *tmp = list_join(res);
566            if (!tmp)
567                return NULL;
568            self->tail = tmp;
569            Py_DECREF(res);
570            res = tmp;
571        }
572    }
573
574    return res;
575}
576
577static PyObject*
578subelement(PyObject *self, PyObject *args, PyObject *kwds)
579{
580    PyObject* elem;
581
582    ElementObject* parent;
583    PyObject* tag;
584    PyObject* attrib = NULL;
585    if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
586                          &Element_Type, &parent, &tag,
587                          &PyDict_Type, &attrib)) {
588        return NULL;
589    }
590
591    if (attrib) {
592        /* attrib passed as positional arg */
593        attrib = PyDict_Copy(attrib);
594        if (!attrib)
595            return NULL;
596        if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
597            Py_DECREF(attrib);
598            return NULL;
599        }
600    } else if (kwds) {
601        /* have keyword args */
602        attrib = get_attrib_from_keywords(kwds);
603        if (!attrib)
604            return NULL;
605    } else {
606        /* no attrib arg, no kwds, so no attribute */
607    }
608
609    elem = create_new_element(tag, attrib);
610    Py_XDECREF(attrib);
611    if (elem == NULL)
612        return NULL;
613
614    if (element_add_subelement(parent, elem) < 0) {
615        Py_DECREF(elem);
616        return NULL;
617    }
618
619    return elem;
620}
621
622static int
623element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
624{
625    Py_VISIT(self->tag);
626    Py_VISIT(JOIN_OBJ(self->text));
627    Py_VISIT(JOIN_OBJ(self->tail));
628
629    if (self->extra) {
630        Py_ssize_t i;
631        Py_VISIT(self->extra->attrib);
632
633        for (i = 0; i < self->extra->length; ++i)
634            Py_VISIT(self->extra->children[i]);
635    }
636    return 0;
637}
638
639static int
640element_gc_clear(ElementObject *self)
641{
642    Py_CLEAR(self->tag);
643    _clear_joined_ptr(&self->text);
644    _clear_joined_ptr(&self->tail);
645
646    /* After dropping all references from extra, it's no longer valid anyway,
647     * so fully deallocate it.
648    */
649    clear_extra(self);
650    return 0;
651}
652
653static void
654element_dealloc(ElementObject* self)
655{
656    /* bpo-31095: UnTrack is needed before calling any callbacks */
657    PyObject_GC_UnTrack(self);
658    Py_TRASHCAN_BEGIN(self, element_dealloc)
659
660    if (self->weakreflist != NULL)
661        PyObject_ClearWeakRefs((PyObject *) self);
662
663    /* element_gc_clear clears all references and deallocates extra
664    */
665    element_gc_clear(self);
666
667    Py_TYPE(self)->tp_free((PyObject *)self);
668    Py_TRASHCAN_END
669}
670
671/* -------------------------------------------------------------------- */
672
673/*[clinic input]
674_elementtree.Element.append
675
676    subelement: object(subclass_of='&Element_Type')
677    /
678
679[clinic start generated code]*/
680
681static PyObject *
682_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
683/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
684{
685    if (element_add_subelement(self, subelement) < 0)
686        return NULL;
687
688    Py_RETURN_NONE;
689}
690
691/*[clinic input]
692_elementtree.Element.clear
693
694[clinic start generated code]*/
695
696static PyObject *
697_elementtree_Element_clear_impl(ElementObject *self)
698/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
699{
700    clear_extra(self);
701
702    Py_INCREF(Py_None);
703    _set_joined_ptr(&self->text, Py_None);
704
705    Py_INCREF(Py_None);
706    _set_joined_ptr(&self->tail, Py_None);
707
708    Py_RETURN_NONE;
709}
710
711/*[clinic input]
712_elementtree.Element.__copy__
713
714[clinic start generated code]*/
715
716static PyObject *
717_elementtree_Element___copy___impl(ElementObject *self)
718/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
719{
720    Py_ssize_t i;
721    ElementObject* element;
722
723    element = (ElementObject*) create_new_element(
724        self->tag, self->extra ? self->extra->attrib : NULL);
725    if (!element)
726        return NULL;
727
728    Py_INCREF(JOIN_OBJ(self->text));
729    _set_joined_ptr(&element->text, self->text);
730
731    Py_INCREF(JOIN_OBJ(self->tail));
732    _set_joined_ptr(&element->tail, self->tail);
733
734    assert(!element->extra || !element->extra->length);
735    if (self->extra) {
736        if (element_resize(element, self->extra->length) < 0) {
737            Py_DECREF(element);
738            return NULL;
739        }
740
741        for (i = 0; i < self->extra->length; i++) {
742            Py_INCREF(self->extra->children[i]);
743            element->extra->children[i] = self->extra->children[i];
744        }
745
746        assert(!element->extra->length);
747        element->extra->length = self->extra->length;
748    }
749
750    return (PyObject*) element;
751}
752
753/* Helper for a deep copy. */
754LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
755
756/*[clinic input]
757_elementtree.Element.__deepcopy__
758
759    memo: object(subclass_of="&PyDict_Type")
760    /
761
762[clinic start generated code]*/
763
764static PyObject *
765_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
766/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
767{
768    Py_ssize_t i;
769    ElementObject* element;
770    PyObject* tag;
771    PyObject* attrib;
772    PyObject* text;
773    PyObject* tail;
774    PyObject* id;
775
776    tag = deepcopy(self->tag, memo);
777    if (!tag)
778        return NULL;
779
780    if (self->extra && self->extra->attrib) {
781        attrib = deepcopy(self->extra->attrib, memo);
782        if (!attrib) {
783            Py_DECREF(tag);
784            return NULL;
785        }
786    } else {
787        attrib = NULL;
788    }
789
790    element = (ElementObject*) create_new_element(tag, attrib);
791
792    Py_DECREF(tag);
793    Py_XDECREF(attrib);
794
795    if (!element)
796        return NULL;
797
798    text = deepcopy(JOIN_OBJ(self->text), memo);
799    if (!text)
800        goto error;
801    _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
802
803    tail = deepcopy(JOIN_OBJ(self->tail), memo);
804    if (!tail)
805        goto error;
806    _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
807
808    assert(!element->extra || !element->extra->length);
809    if (self->extra) {
810        if (element_resize(element, self->extra->length) < 0)
811            goto error;
812
813        for (i = 0; i < self->extra->length; i++) {
814            PyObject* child = deepcopy(self->extra->children[i], memo);
815            if (!child || !Element_Check(child)) {
816                if (child) {
817                    raise_type_error(child);
818                    Py_DECREF(child);
819                }
820                element->extra->length = i;
821                goto error;
822            }
823            element->extra->children[i] = child;
824        }
825
826        assert(!element->extra->length);
827        element->extra->length = self->extra->length;
828    }
829
830    /* add object to memo dictionary (so deepcopy won't visit it again) */
831    id = PyLong_FromSsize_t((uintptr_t) self);
832    if (!id)
833        goto error;
834
835    i = PyDict_SetItem(memo, id, (PyObject*) element);
836
837    Py_DECREF(id);
838
839    if (i < 0)
840        goto error;
841
842    return (PyObject*) element;
843
844  error:
845    Py_DECREF(element);
846    return NULL;
847}
848
849LOCAL(PyObject *)
850deepcopy(PyObject *object, PyObject *memo)
851{
852    /* do a deep copy of the given object */
853    elementtreestate *st;
854    PyObject *stack[2];
855
856    /* Fast paths */
857    if (object == Py_None || PyUnicode_CheckExact(object)) {
858        Py_INCREF(object);
859        return object;
860    }
861
862    if (Py_REFCNT(object) == 1) {
863        if (PyDict_CheckExact(object)) {
864            PyObject *key, *value;
865            Py_ssize_t pos = 0;
866            int simple = 1;
867            while (PyDict_Next(object, &pos, &key, &value)) {
868                if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
869                    simple = 0;
870                    break;
871                }
872            }
873            if (simple)
874                return PyDict_Copy(object);
875            /* Fall through to general case */
876        }
877        else if (Element_CheckExact(object)) {
878            return _elementtree_Element___deepcopy___impl(
879                (ElementObject *)object, memo);
880        }
881    }
882
883    /* General case */
884    st = ET_STATE_GLOBAL;
885    if (!st->deepcopy_obj) {
886        PyErr_SetString(PyExc_RuntimeError,
887                        "deepcopy helper not found");
888        return NULL;
889    }
890
891    stack[0] = object;
892    stack[1] = memo;
893    return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
894}
895
896
897/*[clinic input]
898_elementtree.Element.__sizeof__ -> Py_ssize_t
899
900[clinic start generated code]*/
901
902static Py_ssize_t
903_elementtree_Element___sizeof___impl(ElementObject *self)
904/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
905{
906    Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
907    if (self->extra) {
908        result += sizeof(ElementObjectExtra);
909        if (self->extra->children != self->extra->_children)
910            result += sizeof(PyObject*) * self->extra->allocated;
911    }
912    return result;
913}
914
915/* dict keys for getstate/setstate. */
916#define PICKLED_TAG "tag"
917#define PICKLED_CHILDREN "_children"
918#define PICKLED_ATTRIB "attrib"
919#define PICKLED_TAIL "tail"
920#define PICKLED_TEXT "text"
921
922/* __getstate__ returns a fabricated instance dict as in the pure-Python
923 * Element implementation, for interoperability/interchangeability.  This
924 * makes the pure-Python implementation details an API, but (a) there aren't
925 * any unnecessary structures there; and (b) it buys compatibility with 3.2
926 * pickles.  See issue #16076.
927 */
928/*[clinic input]
929_elementtree.Element.__getstate__
930
931[clinic start generated code]*/
932
933static PyObject *
934_elementtree_Element___getstate___impl(ElementObject *self)
935/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
936{
937    Py_ssize_t i;
938    PyObject *children, *attrib;
939
940    /* Build a list of children. */
941    children = PyList_New(self->extra ? self->extra->length : 0);
942    if (!children)
943        return NULL;
944    for (i = 0; i < PyList_GET_SIZE(children); i++) {
945        PyObject *child = self->extra->children[i];
946        Py_INCREF(child);
947        PyList_SET_ITEM(children, i, child);
948    }
949
950    if (self->extra && self->extra->attrib) {
951        attrib = self->extra->attrib;
952        Py_INCREF(attrib);
953    }
954    else {
955        attrib = PyDict_New();
956        if (!attrib) {
957            Py_DECREF(children);
958            return NULL;
959        }
960    }
961
962    return Py_BuildValue("{sOsNsNsOsO}",
963                         PICKLED_TAG, self->tag,
964                         PICKLED_CHILDREN, children,
965                         PICKLED_ATTRIB, attrib,
966                         PICKLED_TEXT, JOIN_OBJ(self->text),
967                         PICKLED_TAIL, JOIN_OBJ(self->tail));
968}
969
970static PyObject *
971element_setstate_from_attributes(ElementObject *self,
972                                 PyObject *tag,
973                                 PyObject *attrib,
974                                 PyObject *text,
975                                 PyObject *tail,
976                                 PyObject *children)
977{
978    Py_ssize_t i, nchildren;
979    ElementObjectExtra *oldextra = NULL;
980
981    if (!tag) {
982        PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
983        return NULL;
984    }
985
986    Py_INCREF(tag);
987    Py_XSETREF(self->tag, tag);
988
989    text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
990    Py_INCREF(JOIN_OBJ(text));
991    _set_joined_ptr(&self->text, text);
992
993    tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
994    Py_INCREF(JOIN_OBJ(tail));
995    _set_joined_ptr(&self->tail, tail);
996
997    /* Handle ATTRIB and CHILDREN. */
998    if (!children && !attrib) {
999        Py_RETURN_NONE;
1000    }
1001
1002    /* Compute 'nchildren'. */
1003    if (children) {
1004        if (!PyList_Check(children)) {
1005            PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1006            return NULL;
1007        }
1008        nchildren = PyList_GET_SIZE(children);
1009
1010        /* (Re-)allocate 'extra'.
1011           Avoid DECREFs calling into this code again (cycles, etc.)
1012         */
1013        oldextra = self->extra;
1014        self->extra = NULL;
1015        if (element_resize(self, nchildren)) {
1016            assert(!self->extra || !self->extra->length);
1017            clear_extra(self);
1018            self->extra = oldextra;
1019            return NULL;
1020        }
1021        assert(self->extra);
1022        assert(self->extra->allocated >= nchildren);
1023        if (oldextra) {
1024            assert(self->extra->attrib == NULL);
1025            self->extra->attrib = oldextra->attrib;
1026            oldextra->attrib = NULL;
1027        }
1028
1029        /* Copy children */
1030        for (i = 0; i < nchildren; i++) {
1031            PyObject *child = PyList_GET_ITEM(children, i);
1032            if (!Element_Check(child)) {
1033                raise_type_error(child);
1034                self->extra->length = i;
1035                dealloc_extra(oldextra);
1036                return NULL;
1037            }
1038            Py_INCREF(child);
1039            self->extra->children[i] = child;
1040        }
1041
1042        assert(!self->extra->length);
1043        self->extra->length = nchildren;
1044    }
1045    else {
1046        if (element_resize(self, 0)) {
1047            return NULL;
1048        }
1049    }
1050
1051    /* Stash attrib. */
1052    Py_XINCREF(attrib);
1053    Py_XSETREF(self->extra->attrib, attrib);
1054    dealloc_extra(oldextra);
1055
1056    Py_RETURN_NONE;
1057}
1058
1059/* __setstate__ for Element instance from the Python implementation.
1060 * 'state' should be the instance dict.
1061 */
1062
1063static PyObject *
1064element_setstate_from_Python(ElementObject *self, PyObject *state)
1065{
1066    static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1067                             PICKLED_TAIL, PICKLED_CHILDREN, 0};
1068    PyObject *args;
1069    PyObject *tag, *attrib, *text, *tail, *children;
1070    PyObject *retval;
1071
1072    tag = attrib = text = tail = children = NULL;
1073    args = PyTuple_New(0);
1074    if (!args)
1075        return NULL;
1076
1077    if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1078                                    &attrib, &text, &tail, &children))
1079        retval = element_setstate_from_attributes(self, tag, attrib, text,
1080                                                  tail, children);
1081    else
1082        retval = NULL;
1083
1084    Py_DECREF(args);
1085    return retval;
1086}
1087
1088/*[clinic input]
1089_elementtree.Element.__setstate__
1090
1091    state: object
1092    /
1093
1094[clinic start generated code]*/
1095
1096static PyObject *
1097_elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1098/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
1099{
1100    if (!PyDict_CheckExact(state)) {
1101        PyErr_Format(PyExc_TypeError,
1102                     "Don't know how to unpickle \"%.200R\" as an Element",
1103                     state);
1104        return NULL;
1105    }
1106    else
1107        return element_setstate_from_Python(self, state);
1108}
1109
1110LOCAL(int)
1111checkpath(PyObject* tag)
1112{
1113    Py_ssize_t i;
1114    int check = 1;
1115
1116    /* check if a tag contains an xpath character */
1117
1118#define PATHCHAR(ch) \
1119    (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
1120
1121    if (PyUnicode_Check(tag)) {
1122        const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1123        const void *data = PyUnicode_DATA(tag);
1124        unsigned int kind = PyUnicode_KIND(tag);
1125        if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && (
1126                PyUnicode_READ(kind, data, 1) == '}' || (
1127                PyUnicode_READ(kind, data, 1) == '*' &&
1128                PyUnicode_READ(kind, data, 2) == '}'))) {
1129            /* wildcard: '{}tag' or '{*}tag' */
1130            return 1;
1131        }
1132        for (i = 0; i < len; i++) {
1133            Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1134            if (ch == '{')
1135                check = 0;
1136            else if (ch == '}')
1137                check = 1;
1138            else if (check && PATHCHAR(ch))
1139                return 1;
1140        }
1141        return 0;
1142    }
1143    if (PyBytes_Check(tag)) {
1144        const char *p = PyBytes_AS_STRING(tag);
1145        const Py_ssize_t len = PyBytes_GET_SIZE(tag);
1146        if (len >= 3 && p[0] == '{' && (
1147                p[1] == '}' || (p[1] == '*' && p[2] == '}'))) {
1148            /* wildcard: '{}tag' or '{*}tag' */
1149            return 1;
1150        }
1151        for (i = 0; i < len; i++) {
1152            if (p[i] == '{')
1153                check = 0;
1154            else if (p[i] == '}')
1155                check = 1;
1156            else if (check && PATHCHAR(p[i]))
1157                return 1;
1158        }
1159        return 0;
1160    }
1161
1162    return 1; /* unknown type; might be path expression */
1163}
1164
1165/*[clinic input]
1166_elementtree.Element.extend
1167
1168    elements: object
1169    /
1170
1171[clinic start generated code]*/
1172
1173static PyObject *
1174_elementtree_Element_extend(ElementObject *self, PyObject *elements)
1175/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
1176{
1177    PyObject* seq;
1178    Py_ssize_t i;
1179
1180    seq = PySequence_Fast(elements, "");
1181    if (!seq) {
1182        PyErr_Format(
1183            PyExc_TypeError,
1184            "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
1185            );
1186        return NULL;
1187    }
1188
1189    for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
1190        PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1191        Py_INCREF(element);
1192        if (element_add_subelement(self, element) < 0) {
1193            Py_DECREF(seq);
1194            Py_DECREF(element);
1195            return NULL;
1196        }
1197        Py_DECREF(element);
1198    }
1199
1200    Py_DECREF(seq);
1201
1202    Py_RETURN_NONE;
1203}
1204
1205/*[clinic input]
1206_elementtree.Element.find
1207
1208    path: object
1209    namespaces: object = None
1210
1211[clinic start generated code]*/
1212
1213static PyObject *
1214_elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1215                               PyObject *namespaces)
1216/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
1217{
1218    Py_ssize_t i;
1219    elementtreestate *st = ET_STATE_GLOBAL;
1220
1221    if (checkpath(path) || namespaces != Py_None) {
1222        _Py_IDENTIFIER(find);
1223        return _PyObject_CallMethodIdObjArgs(
1224            st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
1225            );
1226    }
1227
1228    if (!self->extra)
1229        Py_RETURN_NONE;
1230
1231    for (i = 0; i < self->extra->length; i++) {
1232        PyObject* item = self->extra->children[i];
1233        int rc;
1234        assert(Element_Check(item));
1235        Py_INCREF(item);
1236        rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1237        if (rc > 0)
1238            return item;
1239        Py_DECREF(item);
1240        if (rc < 0)
1241            return NULL;
1242    }
1243
1244    Py_RETURN_NONE;
1245}
1246
1247/*[clinic input]
1248_elementtree.Element.findtext
1249
1250    path: object
1251    default: object = None
1252    namespaces: object = None
1253
1254[clinic start generated code]*/
1255
1256static PyObject *
1257_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1258                                   PyObject *default_value,
1259                                   PyObject *namespaces)
1260/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
1261{
1262    Py_ssize_t i;
1263    _Py_IDENTIFIER(findtext);
1264    elementtreestate *st = ET_STATE_GLOBAL;
1265
1266    if (checkpath(path) || namespaces != Py_None)
1267        return _PyObject_CallMethodIdObjArgs(
1268            st->elementpath_obj, &PyId_findtext,
1269            self, path, default_value, namespaces, NULL
1270            );
1271
1272    if (!self->extra) {
1273        Py_INCREF(default_value);
1274        return default_value;
1275    }
1276
1277    for (i = 0; i < self->extra->length; i++) {
1278        PyObject *item = self->extra->children[i];
1279        int rc;
1280        assert(Element_Check(item));
1281        Py_INCREF(item);
1282        rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1283        if (rc > 0) {
1284            PyObject* text = element_get_text((ElementObject*)item);
1285            if (text == Py_None) {
1286                Py_DECREF(item);
1287                return PyUnicode_New(0, 0);
1288            }
1289            Py_XINCREF(text);
1290            Py_DECREF(item);
1291            return text;
1292        }
1293        Py_DECREF(item);
1294        if (rc < 0)
1295            return NULL;
1296    }
1297
1298    Py_INCREF(default_value);
1299    return default_value;
1300}
1301
1302/*[clinic input]
1303_elementtree.Element.findall
1304
1305    path: object
1306    namespaces: object = None
1307
1308[clinic start generated code]*/
1309
1310static PyObject *
1311_elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1312                                  PyObject *namespaces)
1313/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
1314{
1315    Py_ssize_t i;
1316    PyObject* out;
1317    elementtreestate *st = ET_STATE_GLOBAL;
1318
1319    if (checkpath(path) || namespaces != Py_None) {
1320        _Py_IDENTIFIER(findall);
1321        return _PyObject_CallMethodIdObjArgs(
1322            st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
1323            );
1324    }
1325
1326    out = PyList_New(0);
1327    if (!out)
1328        return NULL;
1329
1330    if (!self->extra)
1331        return out;
1332
1333    for (i = 0; i < self->extra->length; i++) {
1334        PyObject* item = self->extra->children[i];
1335        int rc;
1336        assert(Element_Check(item));
1337        Py_INCREF(item);
1338        rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1339        if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1340            Py_DECREF(item);
1341            Py_DECREF(out);
1342            return NULL;
1343        }
1344        Py_DECREF(item);
1345    }
1346
1347    return out;
1348}
1349
1350/*[clinic input]
1351_elementtree.Element.iterfind
1352
1353    path: object
1354    namespaces: object = None
1355
1356[clinic start generated code]*/
1357
1358static PyObject *
1359_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1360                                   PyObject *namespaces)
1361/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1362{
1363    PyObject* tag = path;
1364    _Py_IDENTIFIER(iterfind);
1365    elementtreestate *st = ET_STATE_GLOBAL;
1366
1367    return _PyObject_CallMethodIdObjArgs(
1368        st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
1369}
1370
1371/*[clinic input]
1372_elementtree.Element.get
1373
1374    key: object
1375    default: object = None
1376
1377[clinic start generated code]*/
1378
1379static PyObject *
1380_elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1381                              PyObject *default_value)
1382/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
1383{
1384    if (self->extra && self->extra->attrib) {
1385        PyObject *attrib = self->extra->attrib;
1386        Py_INCREF(attrib);
1387        PyObject *value = PyDict_GetItemWithError(attrib, key);
1388        Py_XINCREF(value);
1389        Py_DECREF(attrib);
1390        if (value != NULL || PyErr_Occurred()) {
1391            return value;
1392        }
1393    }
1394
1395    Py_INCREF(default_value);
1396    return default_value;
1397}
1398
1399static PyObject *
1400create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1401
1402
1403/*[clinic input]
1404_elementtree.Element.iter
1405
1406    tag: object = None
1407
1408[clinic start generated code]*/
1409
1410static PyObject *
1411_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1412/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
1413{
1414    if (PyUnicode_Check(tag)) {
1415        if (PyUnicode_READY(tag) < 0)
1416            return NULL;
1417        if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1418            tag = Py_None;
1419    }
1420    else if (PyBytes_Check(tag)) {
1421        if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1422            tag = Py_None;
1423    }
1424
1425    return create_elementiter(self, tag, 0);
1426}
1427
1428
1429/*[clinic input]
1430_elementtree.Element.itertext
1431
1432[clinic start generated code]*/
1433
1434static PyObject *
1435_elementtree_Element_itertext_impl(ElementObject *self)
1436/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1437{
1438    return create_elementiter(self, Py_None, 1);
1439}
1440
1441
1442static PyObject*
1443element_getitem(PyObject* self_, Py_ssize_t index)
1444{
1445    ElementObject* self = (ElementObject*) self_;
1446
1447    if (!self->extra || index < 0 || index >= self->extra->length) {
1448        PyErr_SetString(
1449            PyExc_IndexError,
1450            "child index out of range"
1451            );
1452        return NULL;
1453    }
1454
1455    Py_INCREF(self->extra->children[index]);
1456    return self->extra->children[index];
1457}
1458
1459/*[clinic input]
1460_elementtree.Element.insert
1461
1462    index: Py_ssize_t
1463    subelement: object(subclass_of='&Element_Type')
1464    /
1465
1466[clinic start generated code]*/
1467
1468static PyObject *
1469_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1470                                 PyObject *subelement)
1471/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
1472{
1473    Py_ssize_t i;
1474
1475    if (!self->extra) {
1476        if (create_extra(self, NULL) < 0)
1477            return NULL;
1478    }
1479
1480    if (index < 0) {
1481        index += self->extra->length;
1482        if (index < 0)
1483            index = 0;
1484    }
1485    if (index > self->extra->length)
1486        index = self->extra->length;
1487
1488    if (element_resize(self, 1) < 0)
1489        return NULL;
1490
1491    for (i = self->extra->length; i > index; i--)
1492        self->extra->children[i] = self->extra->children[i-1];
1493
1494    Py_INCREF(subelement);
1495    self->extra->children[index] = subelement;
1496
1497    self->extra->length++;
1498
1499    Py_RETURN_NONE;
1500}
1501
1502/*[clinic input]
1503_elementtree.Element.items
1504
1505[clinic start generated code]*/
1506
1507static PyObject *
1508_elementtree_Element_items_impl(ElementObject *self)
1509/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1510{
1511    if (!self->extra || !self->extra->attrib)
1512        return PyList_New(0);
1513
1514    return PyDict_Items(self->extra->attrib);
1515}
1516
1517/*[clinic input]
1518_elementtree.Element.keys
1519
1520[clinic start generated code]*/
1521
1522static PyObject *
1523_elementtree_Element_keys_impl(ElementObject *self)
1524/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1525{
1526    if (!self->extra || !self->extra->attrib)
1527        return PyList_New(0);
1528
1529    return PyDict_Keys(self->extra->attrib);
1530}
1531
1532static Py_ssize_t
1533element_length(ElementObject* self)
1534{
1535    if (!self->extra)
1536        return 0;
1537
1538    return self->extra->length;
1539}
1540
1541/*[clinic input]
1542_elementtree.Element.makeelement
1543
1544    tag: object
1545    attrib: object(subclass_of='&PyDict_Type')
1546    /
1547
1548[clinic start generated code]*/
1549
1550static PyObject *
1551_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1552                                      PyObject *attrib)
1553/*[clinic end generated code: output=4109832d5bb789ef input=2279d974529c3861]*/
1554{
1555    PyObject* elem;
1556
1557    attrib = PyDict_Copy(attrib);
1558    if (!attrib)
1559        return NULL;
1560
1561    elem = create_new_element(tag, attrib);
1562
1563    Py_DECREF(attrib);
1564
1565    return elem;
1566}
1567
1568/*[clinic input]
1569_elementtree.Element.remove
1570
1571    subelement: object(subclass_of='&Element_Type')
1572    /
1573
1574[clinic start generated code]*/
1575
1576static PyObject *
1577_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1578/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
1579{
1580    Py_ssize_t i;
1581    int rc;
1582    PyObject *found;
1583
1584    if (!self->extra) {
1585        /* element has no children, so raise exception */
1586        PyErr_SetString(
1587            PyExc_ValueError,
1588            "list.remove(x): x not in list"
1589            );
1590        return NULL;
1591    }
1592
1593    for (i = 0; i < self->extra->length; i++) {
1594        if (self->extra->children[i] == subelement)
1595            break;
1596        rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
1597        if (rc > 0)
1598            break;
1599        if (rc < 0)
1600            return NULL;
1601    }
1602
1603    if (i >= self->extra->length) {
1604        /* subelement is not in children, so raise exception */
1605        PyErr_SetString(
1606            PyExc_ValueError,
1607            "list.remove(x): x not in list"
1608            );
1609        return NULL;
1610    }
1611
1612    found = self->extra->children[i];
1613
1614    self->extra->length--;
1615    for (; i < self->extra->length; i++)
1616        self->extra->children[i] = self->extra->children[i+1];
1617
1618    Py_DECREF(found);
1619    Py_RETURN_NONE;
1620}
1621
1622static PyObject*
1623element_repr(ElementObject* self)
1624{
1625    int status;
1626
1627    if (self->tag == NULL)
1628        return PyUnicode_FromFormat("<Element at %p>", self);
1629
1630    status = Py_ReprEnter((PyObject *)self);
1631    if (status == 0) {
1632        PyObject *res;
1633        res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1634        Py_ReprLeave((PyObject *)self);
1635        return res;
1636    }
1637    if (status > 0)
1638        PyErr_Format(PyExc_RuntimeError,
1639                     "reentrant call inside %s.__repr__",
1640                     Py_TYPE(self)->tp_name);
1641    return NULL;
1642}
1643
1644/*[clinic input]
1645_elementtree.Element.set
1646
1647    key: object
1648    value: object
1649    /
1650
1651[clinic start generated code]*/
1652
1653static PyObject *
1654_elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1655                              PyObject *value)
1656/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
1657{
1658    PyObject* attrib;
1659
1660    if (!self->extra) {
1661        if (create_extra(self, NULL) < 0)
1662            return NULL;
1663    }
1664
1665    attrib = element_get_attrib(self);
1666    if (!attrib)
1667        return NULL;
1668
1669    if (PyDict_SetItem(attrib, key, value) < 0)
1670        return NULL;
1671
1672    Py_RETURN_NONE;
1673}
1674
1675static int
1676element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
1677{
1678    ElementObject* self = (ElementObject*) self_;
1679    Py_ssize_t i;
1680    PyObject* old;
1681
1682    if (!self->extra || index < 0 || index >= self->extra->length) {
1683        PyErr_SetString(
1684            PyExc_IndexError,
1685            "child assignment index out of range");
1686        return -1;
1687    }
1688
1689    old = self->extra->children[index];
1690
1691    if (item) {
1692        if (!Element_Check(item)) {
1693            raise_type_error(item);
1694            return -1;
1695        }
1696        Py_INCREF(item);
1697        self->extra->children[index] = item;
1698    } else {
1699        self->extra->length--;
1700        for (i = index; i < self->extra->length; i++)
1701            self->extra->children[i] = self->extra->children[i+1];
1702    }
1703
1704    Py_DECREF(old);
1705
1706    return 0;
1707}
1708
1709static PyObject*
1710element_subscr(PyObject* self_, PyObject* item)
1711{
1712    ElementObject* self = (ElementObject*) self_;
1713
1714    if (PyIndex_Check(item)) {
1715        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1716
1717        if (i == -1 && PyErr_Occurred()) {
1718            return NULL;
1719        }
1720        if (i < 0 && self->extra)
1721            i += self->extra->length;
1722        return element_getitem(self_, i);
1723    }
1724    else if (PySlice_Check(item)) {
1725        Py_ssize_t start, stop, step, slicelen, i;
1726        size_t cur;
1727        PyObject* list;
1728
1729        if (!self->extra)
1730            return PyList_New(0);
1731
1732        if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1733            return NULL;
1734        }
1735        slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1736                                         step);
1737
1738        if (slicelen <= 0)
1739            return PyList_New(0);
1740        else {
1741            list = PyList_New(slicelen);
1742            if (!list)
1743                return NULL;
1744
1745            for (cur = start, i = 0; i < slicelen;
1746                 cur += step, i++) {
1747                PyObject* item = self->extra->children[cur];
1748                Py_INCREF(item);
1749                PyList_SET_ITEM(list, i, item);
1750            }
1751
1752            return list;
1753        }
1754    }
1755    else {
1756        PyErr_SetString(PyExc_TypeError,
1757                "element indices must be integers");
1758        return NULL;
1759    }
1760}
1761
1762static int
1763element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1764{
1765    ElementObject* self = (ElementObject*) self_;
1766
1767    if (PyIndex_Check(item)) {
1768        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1769
1770        if (i == -1 && PyErr_Occurred()) {
1771            return -1;
1772        }
1773        if (i < 0 && self->extra)
1774            i += self->extra->length;
1775        return element_setitem(self_, i, value);
1776    }
1777    else if (PySlice_Check(item)) {
1778        Py_ssize_t start, stop, step, slicelen, newlen, i;
1779        size_t cur;
1780
1781        PyObject* recycle = NULL;
1782        PyObject* seq;
1783
1784        if (!self->extra) {
1785            if (create_extra(self, NULL) < 0)
1786                return -1;
1787        }
1788
1789        if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1790            return -1;
1791        }
1792        slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1793                                         step);
1794
1795        if (value == NULL) {
1796            /* Delete slice */
1797            size_t cur;
1798            Py_ssize_t i;
1799
1800            if (slicelen <= 0)
1801                return 0;
1802
1803            /* Since we're deleting, the direction of the range doesn't matter,
1804             * so for simplicity make it always ascending.
1805            */
1806            if (step < 0) {
1807                stop = start + 1;
1808                start = stop + step * (slicelen - 1) - 1;
1809                step = -step;
1810            }
1811
1812            assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
1813
1814            /* recycle is a list that will contain all the children
1815             * scheduled for removal.
1816            */
1817            if (!(recycle = PyList_New(slicelen))) {
1818                return -1;
1819            }
1820
1821            /* This loop walks over all the children that have to be deleted,
1822             * with cur pointing at them. num_moved is the amount of children
1823             * until the next deleted child that have to be "shifted down" to
1824             * occupy the deleted's places.
1825             * Note that in the ith iteration, shifting is done i+i places down
1826             * because i children were already removed.
1827            */
1828            for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1829                /* Compute how many children have to be moved, clipping at the
1830                 * list end.
1831                */
1832                Py_ssize_t num_moved = step - 1;
1833                if (cur + step >= (size_t)self->extra->length) {
1834                    num_moved = self->extra->length - cur - 1;
1835                }
1836
1837                PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1838
1839                memmove(
1840                    self->extra->children + cur - i,
1841                    self->extra->children + cur + 1,
1842                    num_moved * sizeof(PyObject *));
1843            }
1844
1845            /* Leftover "tail" after the last removed child */
1846            cur = start + (size_t)slicelen * step;
1847            if (cur < (size_t)self->extra->length) {
1848                memmove(
1849                    self->extra->children + cur - slicelen,
1850                    self->extra->children + cur,
1851                    (self->extra->length - cur) * sizeof(PyObject *));
1852            }
1853
1854            self->extra->length -= slicelen;
1855
1856            /* Discard the recycle list with all the deleted sub-elements */
1857            Py_DECREF(recycle);
1858            return 0;
1859        }
1860
1861        /* A new slice is actually being assigned */
1862        seq = PySequence_Fast(value, "");
1863        if (!seq) {
1864            PyErr_Format(
1865                PyExc_TypeError,
1866                "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1867                );
1868            return -1;
1869        }
1870        newlen = PySequence_Fast_GET_SIZE(seq);
1871
1872        if (step !=  1 && newlen != slicelen)
1873        {
1874            Py_DECREF(seq);
1875            PyErr_Format(PyExc_ValueError,
1876                "attempt to assign sequence of size %zd "
1877                "to extended slice of size %zd",
1878                newlen, slicelen
1879                );
1880            return -1;
1881        }
1882
1883        /* Resize before creating the recycle bin, to prevent refleaks. */
1884        if (newlen > slicelen) {
1885            if (element_resize(self, newlen - slicelen) < 0) {
1886                Py_DECREF(seq);
1887                return -1;
1888            }
1889        }
1890
1891        for (i = 0; i < newlen; i++) {
1892            PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1893            if (!Element_Check(element)) {
1894                raise_type_error(element);
1895                Py_DECREF(seq);
1896                return -1;
1897            }
1898        }
1899
1900        if (slicelen > 0) {
1901            /* to avoid recursive calls to this method (via decref), move
1902               old items to the recycle bin here, and get rid of them when
1903               we're done modifying the element */
1904            recycle = PyList_New(slicelen);
1905            if (!recycle) {
1906                Py_DECREF(seq);
1907                return -1;
1908            }
1909            for (cur = start, i = 0; i < slicelen;
1910                 cur += step, i++)
1911                PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1912        }
1913
1914        if (newlen < slicelen) {
1915            /* delete slice */
1916            for (i = stop; i < self->extra->length; i++)
1917                self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1918        } else if (newlen > slicelen) {
1919            /* insert slice */
1920            for (i = self->extra->length-1; i >= stop; i--)
1921                self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1922        }
1923
1924        /* replace the slice */
1925        for (cur = start, i = 0; i < newlen;
1926             cur += step, i++) {
1927            PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1928            Py_INCREF(element);
1929            self->extra->children[cur] = element;
1930        }
1931
1932        self->extra->length += newlen - slicelen;
1933
1934        Py_DECREF(seq);
1935
1936        /* discard the recycle bin, and everything in it */
1937        Py_XDECREF(recycle);
1938
1939        return 0;
1940    }
1941    else {
1942        PyErr_SetString(PyExc_TypeError,
1943                "element indices must be integers");
1944        return -1;
1945    }
1946}
1947
1948static PyObject*
1949element_tag_getter(ElementObject *self, void *closure)
1950{
1951    PyObject *res = self->tag;
1952    Py_INCREF(res);
1953    return res;
1954}
1955
1956static PyObject*
1957element_text_getter(ElementObject *self, void *closure)
1958{
1959    PyObject *res = element_get_text(self);
1960    Py_XINCREF(res);
1961    return res;
1962}
1963
1964static PyObject*
1965element_tail_getter(ElementObject *self, void *closure)
1966{
1967    PyObject *res = element_get_tail(self);
1968    Py_XINCREF(res);
1969    return res;
1970}
1971
1972static PyObject*
1973element_attrib_getter(ElementObject *self, void *closure)
1974{
1975    PyObject *res;
1976    if (!self->extra) {
1977        if (create_extra(self, NULL) < 0)
1978            return NULL;
1979    }
1980    res = element_get_attrib(self);
1981    Py_XINCREF(res);
1982    return res;
1983}
1984
1985/* macro for setter validation */
1986#define _VALIDATE_ATTR_VALUE(V)                     \
1987    if ((V) == NULL) {                              \
1988        PyErr_SetString(                            \
1989            PyExc_AttributeError,                   \
1990            "can't delete element attribute");      \
1991        return -1;                                  \
1992    }
1993
1994static int
1995element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1996{
1997    _VALIDATE_ATTR_VALUE(value);
1998    Py_INCREF(value);
1999    Py_SETREF(self->tag, value);
2000    return 0;
2001}
2002
2003static int
2004element_text_setter(ElementObject *self, PyObject *value, void *closure)
2005{
2006    _VALIDATE_ATTR_VALUE(value);
2007    Py_INCREF(value);
2008    _set_joined_ptr(&self->text, value);
2009    return 0;
2010}
2011
2012static int
2013element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2014{
2015    _VALIDATE_ATTR_VALUE(value);
2016    Py_INCREF(value);
2017    _set_joined_ptr(&self->tail, value);
2018    return 0;
2019}
2020
2021static int
2022element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2023{
2024    _VALIDATE_ATTR_VALUE(value);
2025    if (!PyDict_Check(value)) {
2026        PyErr_Format(PyExc_TypeError,
2027                     "attrib must be dict, not %.200s",
2028                     Py_TYPE(value)->tp_name);
2029        return -1;
2030    }
2031    if (!self->extra) {
2032        if (create_extra(self, NULL) < 0)
2033            return -1;
2034    }
2035    Py_INCREF(value);
2036    Py_XSETREF(self->extra->attrib, value);
2037    return 0;
2038}
2039
2040static PySequenceMethods element_as_sequence = {
2041    (lenfunc) element_length,
2042    0, /* sq_concat */
2043    0, /* sq_repeat */
2044    element_getitem,
2045    0,
2046    element_setitem,
2047    0,
2048};
2049
2050/******************************* Element iterator ****************************/
2051
2052/* ElementIterObject represents the iteration state over an XML element in
2053 * pre-order traversal. To keep track of which sub-element should be returned
2054 * next, a stack of parents is maintained. This is a standard stack-based
2055 * iterative pre-order traversal of a tree.
2056 * The stack is managed using a continuous array.
2057 * Each stack item contains the saved parent to which we should return after
2058 * the current one is exhausted, and the next child to examine in that parent.
2059 */
2060typedef struct ParentLocator_t {
2061    ElementObject *parent;
2062    Py_ssize_t child_index;
2063} ParentLocator;
2064
2065typedef struct {
2066    PyObject_HEAD
2067    ParentLocator *parent_stack;
2068    Py_ssize_t parent_stack_used;
2069    Py_ssize_t parent_stack_size;
2070    ElementObject *root_element;
2071    PyObject *sought_tag;
2072    int gettext;
2073} ElementIterObject;
2074
2075
2076static void
2077elementiter_dealloc(ElementIterObject *it)
2078{
2079    Py_ssize_t i = it->parent_stack_used;
2080    it->parent_stack_used = 0;
2081    /* bpo-31095: UnTrack is needed before calling any callbacks */
2082    PyObject_GC_UnTrack(it);
2083    while (i--)
2084        Py_XDECREF(it->parent_stack[i].parent);
2085    PyMem_Free(it->parent_stack);
2086
2087    Py_XDECREF(it->sought_tag);
2088    Py_XDECREF(it->root_element);
2089
2090    PyObject_GC_Del(it);
2091}
2092
2093static int
2094elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2095{
2096    Py_ssize_t i = it->parent_stack_used;
2097    while (i--)
2098        Py_VISIT(it->parent_stack[i].parent);
2099
2100    Py_VISIT(it->root_element);
2101    Py_VISIT(it->sought_tag);
2102    return 0;
2103}
2104
2105/* Helper function for elementiter_next. Add a new parent to the parent stack.
2106 */
2107static int
2108parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
2109{
2110    ParentLocator *item;
2111
2112    if (it->parent_stack_used >= it->parent_stack_size) {
2113        Py_ssize_t new_size = it->parent_stack_size * 2;  /* never overflow */
2114        ParentLocator *parent_stack = it->parent_stack;
2115        PyMem_Resize(parent_stack, ParentLocator, new_size);
2116        if (parent_stack == NULL)
2117            return -1;
2118        it->parent_stack = parent_stack;
2119        it->parent_stack_size = new_size;
2120    }
2121    item = it->parent_stack + it->parent_stack_used++;
2122    Py_INCREF(parent);
2123    item->parent = parent;
2124    item->child_index = 0;
2125    return 0;
2126}
2127
2128static PyObject *
2129elementiter_next(ElementIterObject *it)
2130{
2131    /* Sub-element iterator.
2132     *
2133     * A short note on gettext: this function serves both the iter() and
2134     * itertext() methods to avoid code duplication. However, there are a few
2135     * small differences in the way these iterations work. Namely:
2136     *   - itertext() only yields text from nodes that have it, and continues
2137     *     iterating when a node doesn't have text (so it doesn't return any
2138     *     node like iter())
2139     *   - itertext() also has to handle tail, after finishing with all the
2140     *     children of a node.
2141     */
2142    int rc;
2143    ElementObject *elem;
2144    PyObject *text;
2145
2146    while (1) {
2147        /* Handle the case reached in the beginning and end of iteration, where
2148         * the parent stack is empty. If root_element is NULL and we're here, the
2149         * iterator is exhausted.
2150         */
2151        if (!it->parent_stack_used) {
2152            if (!it->root_element) {
2153                PyErr_SetNone(PyExc_StopIteration);
2154                return NULL;
2155            }
2156
2157            elem = it->root_element;  /* steals a reference */
2158            it->root_element = NULL;
2159        }
2160        else {
2161            /* See if there are children left to traverse in the current parent. If
2162             * yes, visit the next child. If not, pop the stack and try again.
2163             */
2164            ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2165            Py_ssize_t child_index = item->child_index;
2166            ElementObjectExtra *extra;
2167            elem = item->parent;
2168            extra = elem->extra;
2169            if (!extra || child_index >= extra->length) {
2170                it->parent_stack_used--;
2171                /* Note that extra condition on it->parent_stack_used here;
2172                 * this is because itertext() is supposed to only return *inner*
2173                 * text, not text following the element it began iteration with.
2174                 */
2175                if (it->gettext && it->parent_stack_used) {
2176                    text = element_get_tail(elem);
2177                    goto gettext;
2178                }
2179                Py_DECREF(elem);
2180                continue;
2181            }
2182
2183            assert(Element_Check(extra->children[child_index]));
2184            elem = (ElementObject *)extra->children[child_index];
2185            item->child_index++;
2186            Py_INCREF(elem);
2187        }
2188
2189        if (parent_stack_push_new(it, elem) < 0) {
2190            Py_DECREF(elem);
2191            PyErr_NoMemory();
2192            return NULL;
2193        }
2194        if (it->gettext) {
2195            text = element_get_text(elem);
2196            goto gettext;
2197        }
2198
2199        if (it->sought_tag == Py_None)
2200            return (PyObject *)elem;
2201
2202        rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2203        if (rc > 0)
2204            return (PyObject *)elem;
2205
2206        Py_DECREF(elem);
2207        if (rc < 0)
2208            return NULL;
2209        continue;
2210
2211gettext:
2212        if (!text) {
2213            Py_DECREF(elem);
2214            return NULL;
2215        }
2216        if (text == Py_None) {
2217            Py_DECREF(elem);
2218        }
2219        else {
2220            Py_INCREF(text);
2221            Py_DECREF(elem);
2222            rc = PyObject_IsTrue(text);
2223            if (rc > 0)
2224                return text;
2225            Py_DECREF(text);
2226            if (rc < 0)
2227                return NULL;
2228        }
2229    }
2230
2231    return NULL;
2232}
2233
2234
2235static PyTypeObject ElementIter_Type = {
2236    PyVarObject_HEAD_INIT(NULL, 0)
2237    /* Using the module's name since the pure-Python implementation does not
2238       have such a type. */
2239    "_elementtree._element_iterator",           /* tp_name */
2240    sizeof(ElementIterObject),                  /* tp_basicsize */
2241    0,                                          /* tp_itemsize */
2242    /* methods */
2243    (destructor)elementiter_dealloc,            /* tp_dealloc */
2244    0,                                          /* tp_vectorcall_offset */
2245    0,                                          /* tp_getattr */
2246    0,                                          /* tp_setattr */
2247    0,                                          /* tp_as_async */
2248    0,                                          /* tp_repr */
2249    0,                                          /* tp_as_number */
2250    0,                                          /* tp_as_sequence */
2251    0,                                          /* tp_as_mapping */
2252    0,                                          /* tp_hash */
2253    0,                                          /* tp_call */
2254    0,                                          /* tp_str */
2255    0,                                          /* tp_getattro */
2256    0,                                          /* tp_setattro */
2257    0,                                          /* tp_as_buffer */
2258    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,    /* tp_flags */
2259    0,                                          /* tp_doc */
2260    (traverseproc)elementiter_traverse,         /* tp_traverse */
2261    0,                                          /* tp_clear */
2262    0,                                          /* tp_richcompare */
2263    0,                                          /* tp_weaklistoffset */
2264    PyObject_SelfIter,                          /* tp_iter */
2265    (iternextfunc)elementiter_next,             /* tp_iternext */
2266    0,                                          /* tp_methods */
2267    0,                                          /* tp_members */
2268    0,                                          /* tp_getset */
2269    0,                                          /* tp_base */
2270    0,                                          /* tp_dict */
2271    0,                                          /* tp_descr_get */
2272    0,                                          /* tp_descr_set */
2273    0,                                          /* tp_dictoffset */
2274    0,                                          /* tp_init */
2275    0,                                          /* tp_alloc */
2276    0,                                          /* tp_new */
2277};
2278
2279#define INIT_PARENT_STACK_SIZE 8
2280
2281static PyObject *
2282create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2283{
2284    ElementIterObject *it;
2285
2286    it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2287    if (!it)
2288        return NULL;
2289
2290    Py_INCREF(tag);
2291    it->sought_tag = tag;
2292    it->gettext = gettext;
2293    Py_INCREF(self);
2294    it->root_element = self;
2295
2296    it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
2297    if (it->parent_stack == NULL) {
2298        Py_DECREF(it);
2299        PyErr_NoMemory();
2300        return NULL;
2301    }
2302    it->parent_stack_used = 0;
2303    it->parent_stack_size = INIT_PARENT_STACK_SIZE;
2304
2305    PyObject_GC_Track(it);
2306
2307    return (PyObject *)it;
2308}
2309
2310
2311/* ==================================================================== */
2312/* the tree builder type */
2313
2314typedef struct {
2315    PyObject_HEAD
2316
2317    PyObject *root; /* root node (first created node) */
2318
2319    PyObject *this; /* current node */
2320    PyObject *last; /* most recently created node */
2321    PyObject *last_for_tail; /* most recently created node that takes a tail */
2322
2323    PyObject *data; /* data collector (string or list), or NULL */
2324
2325    PyObject *stack; /* element stack */
2326    Py_ssize_t index; /* current stack size (0 means empty) */
2327
2328    PyObject *element_factory;
2329    PyObject *comment_factory;
2330    PyObject *pi_factory;
2331
2332    /* element tracing */
2333    PyObject *events_append; /* the append method of the list of events, or NULL */
2334    PyObject *start_event_obj; /* event objects (NULL to ignore) */
2335    PyObject *end_event_obj;
2336    PyObject *start_ns_event_obj;
2337    PyObject *end_ns_event_obj;
2338    PyObject *comment_event_obj;
2339    PyObject *pi_event_obj;
2340
2341    char insert_comments;
2342    char insert_pis;
2343} TreeBuilderObject;
2344
2345#define TreeBuilder_CheckExact(op) Py_IS_TYPE((op), &TreeBuilder_Type)
2346
2347/* -------------------------------------------------------------------- */
2348/* constructor and destructor */
2349
2350static PyObject *
2351treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2352{
2353    TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2354    if (t != NULL) {
2355        t->root = NULL;
2356
2357        Py_INCREF(Py_None);
2358        t->this = Py_None;
2359        Py_INCREF(Py_None);
2360        t->last = Py_None;
2361
2362        t->data = NULL;
2363        t->element_factory = NULL;
2364        t->comment_factory = NULL;
2365        t->pi_factory = NULL;
2366        t->stack = PyList_New(20);
2367        if (!t->stack) {
2368            Py_DECREF(t->this);
2369            Py_DECREF(t->last);
2370            Py_DECREF((PyObject *) t);
2371            return NULL;
2372        }
2373        t->index = 0;
2374
2375        t->events_append = NULL;
2376        t->start_event_obj = t->end_event_obj = NULL;
2377        t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2378        t->comment_event_obj = t->pi_event_obj = NULL;
2379        t->insert_comments = t->insert_pis = 0;
2380    }
2381    return (PyObject *)t;
2382}
2383
2384/*[clinic input]
2385_elementtree.TreeBuilder.__init__
2386
2387    element_factory: object = None
2388    *
2389    comment_factory: object = None
2390    pi_factory: object = None
2391    insert_comments: bool = False
2392    insert_pis: bool = False
2393
2394[clinic start generated code]*/
2395
2396static int
2397_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2398                                       PyObject *element_factory,
2399                                       PyObject *comment_factory,
2400                                       PyObject *pi_factory,
2401                                       int insert_comments, int insert_pis)
2402/*[clinic end generated code: output=8571d4dcadfdf952 input=ae98a94df20b5cc3]*/
2403{
2404    if (element_factory != Py_None) {
2405        Py_INCREF(element_factory);
2406        Py_XSETREF(self->element_factory, element_factory);
2407    } else {
2408        Py_CLEAR(self->element_factory);
2409    }
2410
2411    if (comment_factory == Py_None) {
2412        elementtreestate *st = ET_STATE_GLOBAL;
2413        comment_factory = st->comment_factory;
2414    }
2415    if (comment_factory) {
2416        Py_INCREF(comment_factory);
2417        Py_XSETREF(self->comment_factory, comment_factory);
2418        self->insert_comments = insert_comments;
2419    } else {
2420        Py_CLEAR(self->comment_factory);
2421        self->insert_comments = 0;
2422    }
2423
2424    if (pi_factory == Py_None) {
2425        elementtreestate *st = ET_STATE_GLOBAL;
2426        pi_factory = st->pi_factory;
2427    }
2428    if (pi_factory) {
2429        Py_INCREF(pi_factory);
2430        Py_XSETREF(self->pi_factory, pi_factory);
2431        self->insert_pis = insert_pis;
2432    } else {
2433        Py_CLEAR(self->pi_factory);
2434        self->insert_pis = 0;
2435    }
2436
2437    return 0;
2438}
2439
2440static int
2441treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2442{
2443    Py_VISIT(self->pi_event_obj);
2444    Py_VISIT(self->comment_event_obj);
2445    Py_VISIT(self->end_ns_event_obj);
2446    Py_VISIT(self->start_ns_event_obj);
2447    Py_VISIT(self->end_event_obj);
2448    Py_VISIT(self->start_event_obj);
2449    Py_VISIT(self->events_append);
2450    Py_VISIT(self->root);
2451    Py_VISIT(self->this);
2452    Py_VISIT(self->last);
2453    Py_VISIT(self->last_for_tail);
2454    Py_VISIT(self->data);
2455    Py_VISIT(self->stack);
2456    Py_VISIT(self->pi_factory);
2457    Py_VISIT(self->comment_factory);
2458    Py_VISIT(self->element_factory);
2459    return 0;
2460}
2461
2462static int
2463treebuilder_gc_clear(TreeBuilderObject *self)
2464{
2465    Py_CLEAR(self->pi_event_obj);
2466    Py_CLEAR(self->comment_event_obj);
2467    Py_CLEAR(self->end_ns_event_obj);
2468    Py_CLEAR(self->start_ns_event_obj);
2469    Py_CLEAR(self->end_event_obj);
2470    Py_CLEAR(self->start_event_obj);
2471    Py_CLEAR(self->events_append);
2472    Py_CLEAR(self->stack);
2473    Py_CLEAR(self->data);
2474    Py_CLEAR(self->last);
2475    Py_CLEAR(self->last_for_tail);
2476    Py_CLEAR(self->this);
2477    Py_CLEAR(self->pi_factory);
2478    Py_CLEAR(self->comment_factory);
2479    Py_CLEAR(self->element_factory);
2480    Py_CLEAR(self->root);
2481    return 0;
2482}
2483
2484static void
2485treebuilder_dealloc(TreeBuilderObject *self)
2486{
2487    PyObject_GC_UnTrack(self);
2488    treebuilder_gc_clear(self);
2489    Py_TYPE(self)->tp_free((PyObject *)self);
2490}
2491
2492/* -------------------------------------------------------------------- */
2493/* helpers for handling of arbitrary element-like objects */
2494
2495/*[clinic input]
2496_elementtree._set_factories
2497
2498    comment_factory: object
2499    pi_factory: object
2500    /
2501
2502Change the factories used to create comments and processing instructions.
2503
2504For internal use only.
2505[clinic start generated code]*/
2506
2507static PyObject *
2508_elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
2509                                 PyObject *pi_factory)
2510/*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
2511{
2512    elementtreestate *st = ET_STATE_GLOBAL;
2513    PyObject *old;
2514
2515    if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
2516        PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
2517                     Py_TYPE(comment_factory)->tp_name);
2518        return NULL;
2519    }
2520    if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
2521        PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
2522                     Py_TYPE(pi_factory)->tp_name);
2523        return NULL;
2524    }
2525
2526    old = PyTuple_Pack(2,
2527        st->comment_factory ? st->comment_factory : Py_None,
2528        st->pi_factory ? st->pi_factory : Py_None);
2529
2530    if (comment_factory == Py_None) {
2531        Py_CLEAR(st->comment_factory);
2532    } else {
2533        Py_INCREF(comment_factory);
2534        Py_XSETREF(st->comment_factory, comment_factory);
2535    }
2536    if (pi_factory == Py_None) {
2537        Py_CLEAR(st->pi_factory);
2538    } else {
2539        Py_INCREF(pi_factory);
2540        Py_XSETREF(st->pi_factory, pi_factory);
2541    }
2542
2543    return old;
2544}
2545
2546static int
2547treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data,
2548                                        PyObject **dest, _Py_Identifier *name)
2549{
2550    /* Fast paths for the "almost always" cases. */
2551    if (Element_CheckExact(element)) {
2552        PyObject *dest_obj = JOIN_OBJ(*dest);
2553        if (dest_obj == Py_None) {
2554            *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2555            *data = NULL;
2556            Py_DECREF(dest_obj);
2557            return 0;
2558        }
2559        else if (JOIN_GET(*dest)) {
2560            if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) {
2561                return -1;
2562            }
2563            Py_CLEAR(*data);
2564            return 0;
2565        }
2566    }
2567
2568    /*  Fallback for the non-Element / non-trivial cases. */
2569    {
2570        int r;
2571        PyObject* joined;
2572        PyObject* previous = _PyObject_GetAttrId(element, name);
2573        if (!previous)
2574            return -1;
2575        joined = list_join(*data);
2576        if (!joined) {
2577            Py_DECREF(previous);
2578            return -1;
2579        }
2580        if (previous != Py_None) {
2581            PyObject *tmp = PyNumber_Add(previous, joined);
2582            Py_DECREF(joined);
2583            Py_DECREF(previous);
2584            if (!tmp)
2585                return -1;
2586            joined = tmp;
2587        } else {
2588            Py_DECREF(previous);
2589        }
2590
2591        r = _PyObject_SetAttrId(element, name, joined);
2592        Py_DECREF(joined);
2593        if (r < 0)
2594            return -1;
2595        Py_CLEAR(*data);
2596        return 0;
2597    }
2598}
2599
2600LOCAL(int)
2601treebuilder_flush_data(TreeBuilderObject* self)
2602{
2603    if (!self->data) {
2604        return 0;
2605    }
2606
2607    if (!self->last_for_tail) {
2608        PyObject *element = self->last;
2609        _Py_IDENTIFIER(text);
2610        return treebuilder_extend_element_text_or_tail(
2611                element, &self->data,
2612                &((ElementObject *) element)->text, &PyId_text);
2613    }
2614    else {
2615        PyObject *element = self->last_for_tail;
2616        _Py_IDENTIFIER(tail);
2617        return treebuilder_extend_element_text_or_tail(
2618                element, &self->data,
2619                &((ElementObject *) element)->tail, &PyId_tail);
2620    }
2621}
2622
2623static int
2624treebuilder_add_subelement(PyObject *element, PyObject *child)
2625{
2626    _Py_IDENTIFIER(append);
2627    if (Element_CheckExact(element)) {
2628        ElementObject *elem = (ElementObject *) element;
2629        return element_add_subelement(elem, child);
2630    }
2631    else {
2632        PyObject *res;
2633        res = _PyObject_CallMethodIdOneArg(element, &PyId_append, child);
2634        if (res == NULL)
2635            return -1;
2636        Py_DECREF(res);
2637        return 0;
2638    }
2639}
2640
2641LOCAL(int)
2642treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2643                         PyObject *node)
2644{
2645    if (action != NULL) {
2646        PyObject *res;
2647        PyObject *event = PyTuple_Pack(2, action, node);
2648        if (event == NULL)
2649            return -1;
2650        res = PyObject_CallOneArg(self->events_append, event);
2651        Py_DECREF(event);
2652        if (res == NULL)
2653            return -1;
2654        Py_DECREF(res);
2655    }
2656    return 0;
2657}
2658
2659/* -------------------------------------------------------------------- */
2660/* handlers */
2661
2662LOCAL(PyObject*)
2663treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2664                         PyObject* attrib)
2665{
2666    PyObject* node;
2667    PyObject* this;
2668    elementtreestate *st = ET_STATE_GLOBAL;
2669
2670    if (treebuilder_flush_data(self) < 0) {
2671        return NULL;
2672    }
2673
2674    if (!self->element_factory) {
2675        node = create_new_element(tag, attrib);
2676    } else if (attrib == NULL) {
2677        attrib = PyDict_New();
2678        if (!attrib)
2679            return NULL;
2680        node = PyObject_CallFunctionObjArgs(self->element_factory,
2681                                            tag, attrib, NULL);
2682        Py_DECREF(attrib);
2683    }
2684    else {
2685        node = PyObject_CallFunctionObjArgs(self->element_factory,
2686                                            tag, attrib, NULL);
2687    }
2688    if (!node) {
2689        return NULL;
2690    }
2691
2692    this = self->this;
2693    Py_CLEAR(self->last_for_tail);
2694
2695    if (this != Py_None) {
2696        if (treebuilder_add_subelement(this, node) < 0)
2697            goto error;
2698    } else {
2699        if (self->root) {
2700            PyErr_SetString(
2701                st->parseerror_obj,
2702                "multiple elements on top level"
2703                );
2704            goto error;
2705        }
2706        Py_INCREF(node);
2707        self->root = node;
2708    }
2709
2710    if (self->index < PyList_GET_SIZE(self->stack)) {
2711        if (PyList_SetItem(self->stack, self->index, this) < 0)
2712            goto error;
2713        Py_INCREF(this);
2714    } else {
2715        if (PyList_Append(self->stack, this) < 0)
2716            goto error;
2717    }
2718    self->index++;
2719
2720    Py_INCREF(node);
2721    Py_SETREF(self->this, node);
2722    Py_INCREF(node);
2723    Py_SETREF(self->last, node);
2724
2725    if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2726        goto error;
2727
2728    return node;
2729
2730  error:
2731    Py_DECREF(node);
2732    return NULL;
2733}
2734
2735LOCAL(PyObject*)
2736treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2737{
2738    if (!self->data) {
2739        if (self->last == Py_None) {
2740            /* ignore calls to data before the first call to start */
2741            Py_RETURN_NONE;
2742        }
2743        /* store the first item as is */
2744        Py_INCREF(data); self->data = data;
2745    } else {
2746        /* more than one item; use a list to collect items */
2747        if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2748            PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
2749            /* XXX this code path unused in Python 3? */
2750            /* expat often generates single character data sections; handle
2751               the most common case by resizing the existing string... */
2752            Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2753            if (_PyBytes_Resize(&self->data, size + 1) < 0)
2754                return NULL;
2755            PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
2756        } else if (PyList_CheckExact(self->data)) {
2757            if (PyList_Append(self->data, data) < 0)
2758                return NULL;
2759        } else {
2760            PyObject* list = PyList_New(2);
2761            if (!list)
2762                return NULL;
2763            PyList_SET_ITEM(list, 0, self->data);
2764            Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2765            self->data = list;
2766        }
2767    }
2768
2769    Py_RETURN_NONE;
2770}
2771
2772LOCAL(PyObject*)
2773treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2774{
2775    PyObject* item;
2776
2777    if (treebuilder_flush_data(self) < 0) {
2778        return NULL;
2779    }
2780
2781    if (self->index == 0) {
2782        PyErr_SetString(
2783            PyExc_IndexError,
2784            "pop from empty stack"
2785            );
2786        return NULL;
2787    }
2788
2789    item = self->last;
2790    self->last = self->this;
2791    Py_INCREF(self->last);
2792    Py_XSETREF(self->last_for_tail, self->last);
2793    self->index--;
2794    self->this = PyList_GET_ITEM(self->stack, self->index);
2795    Py_INCREF(self->this);
2796    Py_DECREF(item);
2797
2798    if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2799        return NULL;
2800
2801    Py_INCREF(self->last);
2802    return (PyObject*) self->last;
2803}
2804
2805LOCAL(PyObject*)
2806treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
2807{
2808    PyObject* comment;
2809    PyObject* this;
2810
2811    if (treebuilder_flush_data(self) < 0) {
2812        return NULL;
2813    }
2814
2815    if (self->comment_factory) {
2816        comment = PyObject_CallOneArg(self->comment_factory, text);
2817        if (!comment)
2818            return NULL;
2819
2820        this = self->this;
2821        if (self->insert_comments && this != Py_None) {
2822            if (treebuilder_add_subelement(this, comment) < 0)
2823                goto error;
2824            Py_INCREF(comment);
2825            Py_XSETREF(self->last_for_tail, comment);
2826        }
2827    } else {
2828        Py_INCREF(text);
2829        comment = text;
2830    }
2831
2832    if (self->events_append && self->comment_event_obj) {
2833        if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
2834            goto error;
2835    }
2836
2837    return comment;
2838
2839  error:
2840    Py_DECREF(comment);
2841    return NULL;
2842}
2843
2844LOCAL(PyObject*)
2845treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
2846{
2847    PyObject* pi;
2848    PyObject* this;
2849    PyObject* stack[2] = {target, text};
2850
2851    if (treebuilder_flush_data(self) < 0) {
2852        return NULL;
2853    }
2854
2855    if (self->pi_factory) {
2856        pi = _PyObject_FastCall(self->pi_factory, stack, 2);
2857        if (!pi) {
2858            return NULL;
2859        }
2860
2861        this = self->this;
2862        if (self->insert_pis && this != Py_None) {
2863            if (treebuilder_add_subelement(this, pi) < 0)
2864                goto error;
2865            Py_INCREF(pi);
2866            Py_XSETREF(self->last_for_tail, pi);
2867        }
2868    } else {
2869        pi = PyTuple_Pack(2, target, text);
2870        if (!pi) {
2871            return NULL;
2872        }
2873    }
2874
2875    if (self->events_append && self->pi_event_obj) {
2876        if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
2877            goto error;
2878    }
2879
2880    return pi;
2881
2882  error:
2883    Py_DECREF(pi);
2884    return NULL;
2885}
2886
2887LOCAL(PyObject*)
2888treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri)
2889{
2890    PyObject* parcel;
2891
2892    if (self->events_append && self->start_ns_event_obj) {
2893        parcel = PyTuple_Pack(2, prefix, uri);
2894        if (!parcel) {
2895            return NULL;
2896        }
2897
2898        if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) {
2899            Py_DECREF(parcel);
2900            return NULL;
2901        }
2902        Py_DECREF(parcel);
2903    }
2904
2905    Py_RETURN_NONE;
2906}
2907
2908LOCAL(PyObject*)
2909treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix)
2910{
2911    if (self->events_append && self->end_ns_event_obj) {
2912        if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) {
2913            return NULL;
2914        }
2915    }
2916
2917    Py_RETURN_NONE;
2918}
2919
2920/* -------------------------------------------------------------------- */
2921/* methods (in alphabetical order) */
2922
2923/*[clinic input]
2924_elementtree.TreeBuilder.data
2925
2926    data: object
2927    /
2928
2929[clinic start generated code]*/
2930
2931static PyObject *
2932_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2933/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2934{
2935    return treebuilder_handle_data(self, data);
2936}
2937
2938/*[clinic input]
2939_elementtree.TreeBuilder.end
2940
2941    tag: object
2942    /
2943
2944[clinic start generated code]*/
2945
2946static PyObject *
2947_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2948/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2949{
2950    return treebuilder_handle_end(self, tag);
2951}
2952
2953/*[clinic input]
2954_elementtree.TreeBuilder.comment
2955
2956    text: object
2957    /
2958
2959[clinic start generated code]*/
2960
2961static PyObject *
2962_elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
2963/*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
2964{
2965    return treebuilder_handle_comment(self, text);
2966}
2967
2968/*[clinic input]
2969_elementtree.TreeBuilder.pi
2970
2971    target: object
2972    text: object = None
2973    /
2974
2975[clinic start generated code]*/
2976
2977static PyObject *
2978_elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
2979                                 PyObject *text)
2980/*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
2981{
2982    return treebuilder_handle_pi(self, target, text);
2983}
2984
2985LOCAL(PyObject*)
2986treebuilder_done(TreeBuilderObject* self)
2987{
2988    PyObject* res;
2989
2990    /* FIXME: check stack size? */
2991
2992    if (self->root)
2993        res = self->root;
2994    else
2995        res = Py_None;
2996
2997    Py_INCREF(res);
2998    return res;
2999}
3000
3001/*[clinic input]
3002_elementtree.TreeBuilder.close
3003
3004[clinic start generated code]*/
3005
3006static PyObject *
3007_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
3008/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
3009{
3010    return treebuilder_done(self);
3011}
3012
3013/*[clinic input]
3014_elementtree.TreeBuilder.start
3015
3016    tag: object
3017    attrs: object(subclass_of='&PyDict_Type')
3018    /
3019
3020[clinic start generated code]*/
3021
3022static PyObject *
3023_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
3024                                    PyObject *attrs)
3025/*[clinic end generated code: output=e7e9dc2861349411 input=7288e9e38e63b2b6]*/
3026{
3027    return treebuilder_handle_start(self, tag, attrs);
3028}
3029
3030/* ==================================================================== */
3031/* the expat interface */
3032
3033#include "expat.h"
3034#include "pyexpat.h"
3035
3036/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
3037 * cached globally without being in per-module state.
3038 */
3039static struct PyExpat_CAPI *expat_capi;
3040#define EXPAT(func) (expat_capi->func)
3041
3042static XML_Memory_Handling_Suite ExpatMemoryHandler = {
3043    PyObject_Malloc, PyObject_Realloc, PyObject_Free};
3044
3045typedef struct {
3046    PyObject_HEAD
3047
3048    XML_Parser parser;
3049
3050    PyObject *target;
3051    PyObject *entity;
3052
3053    PyObject *names;
3054
3055    PyObject *handle_start_ns;
3056    PyObject *handle_end_ns;
3057    PyObject *handle_start;
3058    PyObject *handle_data;
3059    PyObject *handle_end;
3060
3061    PyObject *handle_comment;
3062    PyObject *handle_pi;
3063    PyObject *handle_doctype;
3064
3065    PyObject *handle_close;
3066
3067} XMLParserObject;
3068
3069/* helpers */
3070
3071LOCAL(PyObject*)
3072makeuniversal(XMLParserObject* self, const char* string)
3073{
3074    /* convert a UTF-8 tag/attribute name from the expat parser
3075       to a universal name string */
3076
3077    Py_ssize_t size = (Py_ssize_t) strlen(string);
3078    PyObject* key;
3079    PyObject* value;
3080
3081    /* look the 'raw' name up in the names dictionary */
3082    key = PyBytes_FromStringAndSize(string, size);
3083    if (!key)
3084        return NULL;
3085
3086    value = PyDict_GetItemWithError(self->names, key);
3087
3088    if (value) {
3089        Py_INCREF(value);
3090    }
3091    else if (!PyErr_Occurred()) {
3092        /* new name.  convert to universal name, and decode as
3093           necessary */
3094
3095        PyObject* tag;
3096        char* p;
3097        Py_ssize_t i;
3098
3099        /* look for namespace separator */
3100        for (i = 0; i < size; i++)
3101            if (string[i] == '}')
3102                break;
3103        if (i != size) {
3104            /* convert to universal name */
3105            tag = PyBytes_FromStringAndSize(NULL, size+1);
3106            if (tag == NULL) {
3107                Py_DECREF(key);
3108                return NULL;
3109            }
3110            p = PyBytes_AS_STRING(tag);
3111            p[0] = '{';
3112            memcpy(p+1, string, size);
3113            size++;
3114        } else {
3115            /* plain name; use key as tag */
3116            Py_INCREF(key);
3117            tag = key;
3118        }
3119
3120        /* decode universal name */
3121        p = PyBytes_AS_STRING(tag);
3122        value = PyUnicode_DecodeUTF8(p, size, "strict");
3123        Py_DECREF(tag);
3124        if (!value) {
3125            Py_DECREF(key);
3126            return NULL;
3127        }
3128
3129        /* add to names dictionary */
3130        if (PyDict_SetItem(self->names, key, value) < 0) {
3131            Py_DECREF(key);
3132            Py_DECREF(value);
3133            return NULL;
3134        }
3135    }
3136
3137    Py_DECREF(key);
3138    return value;
3139}
3140
3141/* Set the ParseError exception with the given parameters.
3142 * If message is not NULL, it's used as the error string. Otherwise, the
3143 * message string is the default for the given error_code.
3144*/
3145static void
3146expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
3147                const char *message)
3148{
3149    PyObject *errmsg, *error, *position, *code;
3150    elementtreestate *st = ET_STATE_GLOBAL;
3151
3152    errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
3153                message ? message : EXPAT(ErrorString)(error_code),
3154                line, column);
3155    if (errmsg == NULL)
3156        return;
3157
3158    error = PyObject_CallOneArg(st->parseerror_obj, errmsg);
3159    Py_DECREF(errmsg);
3160    if (!error)
3161        return;
3162
3163    /* Add code and position attributes */
3164    code = PyLong_FromLong((long)error_code);
3165    if (!code) {
3166        Py_DECREF(error);
3167        return;
3168    }
3169    if (PyObject_SetAttrString(error, "code", code) == -1) {
3170        Py_DECREF(error);
3171        Py_DECREF(code);
3172        return;
3173    }
3174    Py_DECREF(code);
3175
3176    position = Py_BuildValue("(nn)", line, column);
3177    if (!position) {
3178        Py_DECREF(error);
3179        return;
3180    }
3181    if (PyObject_SetAttrString(error, "position", position) == -1) {
3182        Py_DECREF(error);
3183        Py_DECREF(position);
3184        return;
3185    }
3186    Py_DECREF(position);
3187
3188    PyErr_SetObject(st->parseerror_obj, error);
3189    Py_DECREF(error);
3190}
3191
3192/* -------------------------------------------------------------------- */
3193/* handlers */
3194
3195static void
3196expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
3197                      int data_len)
3198{
3199    PyObject* key;
3200    PyObject* value;
3201    PyObject* res;
3202
3203    if (data_len < 2 || data_in[0] != '&')
3204        return;
3205
3206    if (PyErr_Occurred())
3207        return;
3208
3209    key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
3210    if (!key)
3211        return;
3212
3213    value = PyDict_GetItemWithError(self->entity, key);
3214
3215    if (value) {
3216        if (TreeBuilder_CheckExact(self->target))
3217            res = treebuilder_handle_data(
3218                (TreeBuilderObject*) self->target, value
3219                );
3220        else if (self->handle_data)
3221            res = PyObject_CallOneArg(self->handle_data, value);
3222        else
3223            res = NULL;
3224        Py_XDECREF(res);
3225    } else if (!PyErr_Occurred()) {
3226        /* Report the first error, not the last */
3227        char message[128] = "undefined entity ";
3228        strncat(message, data_in, data_len < 100?data_len:100);
3229        expat_set_error(
3230            XML_ERROR_UNDEFINED_ENTITY,
3231            EXPAT(GetErrorLineNumber)(self->parser),
3232            EXPAT(GetErrorColumnNumber)(self->parser),
3233            message
3234            );
3235    }
3236
3237    Py_DECREF(key);
3238}
3239
3240static void
3241expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3242                    const XML_Char **attrib_in)
3243{
3244    PyObject* res;
3245    PyObject* tag;
3246    PyObject* attrib;
3247    int ok;
3248
3249    if (PyErr_Occurred())
3250        return;
3251
3252    /* tag name */
3253    tag = makeuniversal(self, tag_in);
3254    if (!tag)
3255        return; /* parser will look for errors */
3256
3257    /* attributes */
3258    if (attrib_in[0]) {
3259        attrib = PyDict_New();
3260        if (!attrib) {
3261            Py_DECREF(tag);
3262            return;
3263        }
3264        while (attrib_in[0] && attrib_in[1]) {
3265            PyObject* key = makeuniversal(self, attrib_in[0]);
3266            PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
3267            if (!key || !value) {
3268                Py_XDECREF(value);
3269                Py_XDECREF(key);
3270                Py_DECREF(attrib);
3271                Py_DECREF(tag);
3272                return;
3273            }
3274            ok = PyDict_SetItem(attrib, key, value);
3275            Py_DECREF(value);
3276            Py_DECREF(key);
3277            if (ok < 0) {
3278                Py_DECREF(attrib);
3279                Py_DECREF(tag);
3280                return;
3281            }
3282            attrib_in += 2;
3283        }
3284    } else {
3285        attrib = NULL;
3286    }
3287
3288    if (TreeBuilder_CheckExact(self->target)) {
3289        /* shortcut */
3290        res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3291                                       tag, attrib);
3292    }
3293    else if (self->handle_start) {
3294        if (attrib == NULL) {
3295            attrib = PyDict_New();
3296            if (!attrib) {
3297                Py_DECREF(tag);
3298                return;
3299            }
3300        }
3301        res = PyObject_CallFunctionObjArgs(self->handle_start,
3302                                           tag, attrib, NULL);
3303    } else
3304        res = NULL;
3305
3306    Py_DECREF(tag);
3307    Py_XDECREF(attrib);
3308
3309    Py_XDECREF(res);
3310}
3311
3312static void
3313expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3314                   int data_len)
3315{
3316    PyObject* data;
3317    PyObject* res;
3318
3319    if (PyErr_Occurred())
3320        return;
3321
3322    data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
3323    if (!data)
3324        return; /* parser will look for errors */
3325
3326    if (TreeBuilder_CheckExact(self->target))
3327        /* shortcut */
3328        res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3329    else if (self->handle_data)
3330        res = PyObject_CallOneArg(self->handle_data, data);
3331    else
3332        res = NULL;
3333
3334    Py_DECREF(data);
3335
3336    Py_XDECREF(res);
3337}
3338
3339static void
3340expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3341{
3342    PyObject* tag;
3343    PyObject* res = NULL;
3344
3345    if (PyErr_Occurred())
3346        return;
3347
3348    if (TreeBuilder_CheckExact(self->target))
3349        /* shortcut */
3350        /* the standard tree builder doesn't look at the end tag */
3351        res = treebuilder_handle_end(
3352            (TreeBuilderObject*) self->target, Py_None
3353            );
3354    else if (self->handle_end) {
3355        tag = makeuniversal(self, tag_in);
3356        if (tag) {
3357            res = PyObject_CallOneArg(self->handle_end, tag);
3358            Py_DECREF(tag);
3359        }
3360    }
3361
3362    Py_XDECREF(res);
3363}
3364
3365static void
3366expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in,
3367                       const XML_Char *uri_in)
3368{
3369    PyObject* res = NULL;
3370    PyObject* uri;
3371    PyObject* prefix;
3372    PyObject* stack[2];
3373
3374    if (PyErr_Occurred())
3375        return;
3376
3377    if (!uri_in)
3378        uri_in = "";
3379    if (!prefix_in)
3380        prefix_in = "";
3381
3382    if (TreeBuilder_CheckExact(self->target)) {
3383        /* shortcut - TreeBuilder does not actually implement .start_ns() */
3384        TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3385
3386        if (target->events_append && target->start_ns_event_obj) {
3387            prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3388            if (!prefix)
3389                return;
3390            uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3391            if (!uri) {
3392                Py_DECREF(prefix);
3393                return;
3394            }
3395
3396            res = treebuilder_handle_start_ns(target, prefix, uri);
3397            Py_DECREF(uri);
3398            Py_DECREF(prefix);
3399        }
3400    } else if (self->handle_start_ns) {
3401        prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3402        if (!prefix)
3403            return;
3404        uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3405        if (!uri) {
3406            Py_DECREF(prefix);
3407            return;
3408        }
3409
3410        stack[0] = prefix;
3411        stack[1] = uri;
3412        res = _PyObject_FastCall(self->handle_start_ns, stack, 2);
3413        Py_DECREF(uri);
3414        Py_DECREF(prefix);
3415    }
3416
3417    Py_XDECREF(res);
3418}
3419
3420static void
3421expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3422{
3423    PyObject *res = NULL;
3424    PyObject* prefix;
3425
3426    if (PyErr_Occurred())
3427        return;
3428
3429    if (!prefix_in)
3430        prefix_in = "";
3431
3432    if (TreeBuilder_CheckExact(self->target)) {
3433        /* shortcut - TreeBuilder does not actually implement .end_ns() */
3434        TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3435
3436        if (target->events_append && target->end_ns_event_obj) {
3437            res = treebuilder_handle_end_ns(target, Py_None);
3438        }
3439    } else if (self->handle_end_ns) {
3440        prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3441        if (!prefix)
3442            return;
3443
3444        res = PyObject_CallOneArg(self->handle_end_ns, prefix);
3445        Py_DECREF(prefix);
3446    }
3447
3448    Py_XDECREF(res);
3449}
3450
3451static void
3452expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3453{
3454    PyObject* comment;
3455    PyObject* res;
3456
3457    if (PyErr_Occurred())
3458        return;
3459
3460    if (TreeBuilder_CheckExact(self->target)) {
3461        /* shortcut */
3462        TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3463
3464        comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3465        if (!comment)
3466            return; /* parser will look for errors */
3467
3468        res = treebuilder_handle_comment(target,  comment);
3469        Py_XDECREF(res);
3470        Py_DECREF(comment);
3471    } else if (self->handle_comment) {
3472        comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3473        if (!comment)
3474            return;
3475
3476        res = PyObject_CallOneArg(self->handle_comment, comment);
3477        Py_XDECREF(res);
3478        Py_DECREF(comment);
3479    }
3480}
3481
3482static void
3483expat_start_doctype_handler(XMLParserObject *self,
3484                            const XML_Char *doctype_name,
3485                            const XML_Char *sysid,
3486                            const XML_Char *pubid,
3487                            int has_internal_subset)
3488{
3489    _Py_IDENTIFIER(doctype);
3490    PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3491    PyObject *res;
3492
3493    if (PyErr_Occurred())
3494        return;
3495
3496    doctype_name_obj = makeuniversal(self, doctype_name);
3497    if (!doctype_name_obj)
3498        return;
3499
3500    if (sysid) {
3501        sysid_obj = makeuniversal(self, sysid);
3502        if (!sysid_obj) {
3503            Py_DECREF(doctype_name_obj);
3504            return;
3505        }
3506    } else {
3507        Py_INCREF(Py_None);
3508        sysid_obj = Py_None;
3509    }
3510
3511    if (pubid) {
3512        pubid_obj = makeuniversal(self, pubid);
3513        if (!pubid_obj) {
3514            Py_DECREF(doctype_name_obj);
3515            Py_DECREF(sysid_obj);
3516            return;
3517        }
3518    } else {
3519        Py_INCREF(Py_None);
3520        pubid_obj = Py_None;
3521    }
3522
3523    /* If the target has a handler for doctype, call it. */
3524    if (self->handle_doctype) {
3525        res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3526                                           doctype_name_obj, pubid_obj,
3527                                           sysid_obj, NULL);
3528        Py_XDECREF(res);
3529    }
3530    else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3531        (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3532                "The doctype() method of XMLParser is ignored.  "
3533                "Define doctype() method on the TreeBuilder target.",
3534                1);
3535        Py_DECREF(res);
3536    }
3537
3538    Py_DECREF(doctype_name_obj);
3539    Py_DECREF(pubid_obj);
3540    Py_DECREF(sysid_obj);
3541}
3542
3543static void
3544expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3545                 const XML_Char* data_in)
3546{
3547    PyObject* pi_target;
3548    PyObject* data;
3549    PyObject* res;
3550    PyObject* stack[2];
3551
3552    if (PyErr_Occurred())
3553        return;
3554
3555    if (TreeBuilder_CheckExact(self->target)) {
3556        /* shortcut */
3557        TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3558
3559        if ((target->events_append && target->pi_event_obj) || target->insert_pis) {
3560            pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3561            if (!pi_target)
3562                goto error;
3563            data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3564            if (!data)
3565                goto error;
3566            res = treebuilder_handle_pi(target, pi_target, data);
3567            Py_XDECREF(res);
3568            Py_DECREF(data);
3569            Py_DECREF(pi_target);
3570        }
3571    } else if (self->handle_pi) {
3572        pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3573        if (!pi_target)
3574            goto error;
3575        data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3576        if (!data)
3577            goto error;
3578
3579        stack[0] = pi_target;
3580        stack[1] = data;
3581        res = _PyObject_FastCall(self->handle_pi, stack, 2);
3582        Py_XDECREF(res);
3583        Py_DECREF(data);
3584        Py_DECREF(pi_target);
3585    }
3586
3587    return;
3588
3589  error:
3590    Py_XDECREF(pi_target);
3591    return;
3592}
3593
3594/* -------------------------------------------------------------------- */
3595
3596static PyObject *
3597xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3598{
3599    XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3600    if (self) {
3601        self->parser = NULL;
3602        self->target = self->entity = self->names = NULL;
3603        self->handle_start_ns = self->handle_end_ns = NULL;
3604        self->handle_start = self->handle_data = self->handle_end = NULL;
3605        self->handle_comment = self->handle_pi = self->handle_close = NULL;
3606        self->handle_doctype = NULL;
3607    }
3608    return (PyObject *)self;
3609}
3610
3611static int
3612ignore_attribute_error(PyObject *value)
3613{
3614    if (value == NULL) {
3615        if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3616            return -1;
3617        }
3618        PyErr_Clear();
3619    }
3620    return 0;
3621}
3622
3623/*[clinic input]
3624_elementtree.XMLParser.__init__
3625
3626    *
3627    target: object = None
3628    encoding: str(accept={str, NoneType}) = None
3629
3630[clinic start generated code]*/
3631
3632static int
3633_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3634                                     const char *encoding)
3635/*[clinic end generated code: output=3ae45ec6cdf344e4 input=7e716dd6e4f3e439]*/
3636{
3637    self->entity = PyDict_New();
3638    if (!self->entity)
3639        return -1;
3640
3641    self->names = PyDict_New();
3642    if (!self->names) {
3643        Py_CLEAR(self->entity);
3644        return -1;
3645    }
3646
3647    self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3648    if (!self->parser) {
3649        Py_CLEAR(self->entity);
3650        Py_CLEAR(self->names);
3651        PyErr_NoMemory();
3652        return -1;
3653    }
3654    /* expat < 2.1.0 has no XML_SetHashSalt() */
3655    if (EXPAT(SetHashSalt) != NULL) {
3656        EXPAT(SetHashSalt)(self->parser,
3657                           (unsigned long)_Py_HashSecret.expat.hashsalt);
3658    }
3659
3660    if (target != Py_None) {
3661        Py_INCREF(target);
3662    } else {
3663        target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
3664        if (!target) {
3665            Py_CLEAR(self->entity);
3666            Py_CLEAR(self->names);
3667            return -1;
3668        }
3669    }
3670    self->target = target;
3671
3672    self->handle_start_ns = PyObject_GetAttrString(target, "start_ns");
3673    if (ignore_attribute_error(self->handle_start_ns)) {
3674        return -1;
3675    }
3676    self->handle_end_ns = PyObject_GetAttrString(target, "end_ns");
3677    if (ignore_attribute_error(self->handle_end_ns)) {
3678        return -1;
3679    }
3680    self->handle_start = PyObject_GetAttrString(target, "start");
3681    if (ignore_attribute_error(self->handle_start)) {
3682        return -1;
3683    }
3684    self->handle_data = PyObject_GetAttrString(target, "data");
3685    if (ignore_attribute_error(self->handle_data)) {
3686        return -1;
3687    }
3688    self->handle_end = PyObject_GetAttrString(target, "end");
3689    if (ignore_attribute_error(self->handle_end)) {
3690        return -1;
3691    }
3692    self->handle_comment = PyObject_GetAttrString(target, "comment");
3693    if (ignore_attribute_error(self->handle_comment)) {
3694        return -1;
3695    }
3696    self->handle_pi = PyObject_GetAttrString(target, "pi");
3697    if (ignore_attribute_error(self->handle_pi)) {
3698        return -1;
3699    }
3700    self->handle_close = PyObject_GetAttrString(target, "close");
3701    if (ignore_attribute_error(self->handle_close)) {
3702        return -1;
3703    }
3704    self->handle_doctype = PyObject_GetAttrString(target, "doctype");
3705    if (ignore_attribute_error(self->handle_doctype)) {
3706        return -1;
3707    }
3708
3709    /* configure parser */
3710    EXPAT(SetUserData)(self->parser, self);
3711    if (self->handle_start_ns || self->handle_end_ns)
3712        EXPAT(SetNamespaceDeclHandler)(
3713            self->parser,
3714            (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3715            (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3716            );
3717    EXPAT(SetElementHandler)(
3718        self->parser,
3719        (XML_StartElementHandler) expat_start_handler,
3720        (XML_EndElementHandler) expat_end_handler
3721        );
3722    EXPAT(SetDefaultHandlerExpand)(
3723        self->parser,
3724        (XML_DefaultHandler) expat_default_handler
3725        );
3726    EXPAT(SetCharacterDataHandler)(
3727        self->parser,
3728        (XML_CharacterDataHandler) expat_data_handler
3729        );
3730    if (self->handle_comment)
3731        EXPAT(SetCommentHandler)(
3732            self->parser,
3733            (XML_CommentHandler) expat_comment_handler
3734            );
3735    if (self->handle_pi)
3736        EXPAT(SetProcessingInstructionHandler)(
3737            self->parser,
3738            (XML_ProcessingInstructionHandler) expat_pi_handler
3739            );
3740    EXPAT(SetStartDoctypeDeclHandler)(
3741        self->parser,
3742        (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3743        );
3744    EXPAT(SetUnknownEncodingHandler)(
3745        self->parser,
3746        EXPAT(DefaultUnknownEncodingHandler), NULL
3747        );
3748
3749    return 0;
3750}
3751
3752static int
3753xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3754{
3755    Py_VISIT(self->handle_close);
3756    Py_VISIT(self->handle_pi);
3757    Py_VISIT(self->handle_comment);
3758    Py_VISIT(self->handle_end);
3759    Py_VISIT(self->handle_data);
3760    Py_VISIT(self->handle_start);
3761    Py_VISIT(self->handle_start_ns);
3762    Py_VISIT(self->handle_end_ns);
3763    Py_VISIT(self->handle_doctype);
3764
3765    Py_VISIT(self->target);
3766    Py_VISIT(self->entity);
3767    Py_VISIT(self->names);
3768
3769    return 0;
3770}
3771
3772static int
3773xmlparser_gc_clear(XMLParserObject *self)
3774{
3775    if (self->parser != NULL) {
3776        XML_Parser parser = self->parser;
3777        self->parser = NULL;
3778        EXPAT(ParserFree)(parser);
3779    }
3780
3781    Py_CLEAR(self->handle_close);
3782    Py_CLEAR(self->handle_pi);
3783    Py_CLEAR(self->handle_comment);
3784    Py_CLEAR(self->handle_end);
3785    Py_CLEAR(self->handle_data);
3786    Py_CLEAR(self->handle_start);
3787    Py_CLEAR(self->handle_start_ns);
3788    Py_CLEAR(self->handle_end_ns);
3789    Py_CLEAR(self->handle_doctype);
3790
3791    Py_CLEAR(self->target);
3792    Py_CLEAR(self->entity);
3793    Py_CLEAR(self->names);
3794
3795    return 0;
3796}
3797
3798static void
3799xmlparser_dealloc(XMLParserObject* self)
3800{
3801    PyObject_GC_UnTrack(self);
3802    xmlparser_gc_clear(self);
3803    Py_TYPE(self)->tp_free((PyObject *)self);
3804}
3805
3806Py_LOCAL_INLINE(int)
3807_check_xmlparser(XMLParserObject* self)
3808{
3809    if (self->target == NULL) {
3810        PyErr_SetString(PyExc_ValueError,
3811                        "XMLParser.__init__() wasn't called");
3812        return 0;
3813    }
3814    return 1;
3815}
3816
3817LOCAL(PyObject*)
3818expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
3819{
3820    int ok;
3821
3822    assert(!PyErr_Occurred());
3823    ok = EXPAT(Parse)(self->parser, data, data_len, final);
3824
3825    if (PyErr_Occurred())
3826        return NULL;
3827
3828    if (!ok) {
3829        expat_set_error(
3830            EXPAT(GetErrorCode)(self->parser),
3831            EXPAT(GetErrorLineNumber)(self->parser),
3832            EXPAT(GetErrorColumnNumber)(self->parser),
3833            NULL
3834            );
3835        return NULL;
3836    }
3837
3838    Py_RETURN_NONE;
3839}
3840
3841/*[clinic input]
3842_elementtree.XMLParser.close
3843
3844[clinic start generated code]*/
3845
3846static PyObject *
3847_elementtree_XMLParser_close_impl(XMLParserObject *self)
3848/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
3849{
3850    /* end feeding data to parser */
3851
3852    PyObject* res;
3853
3854    if (!_check_xmlparser(self)) {
3855        return NULL;
3856    }
3857    res = expat_parse(self, "", 0, 1);
3858    if (!res)
3859        return NULL;
3860
3861    if (TreeBuilder_CheckExact(self->target)) {
3862        Py_DECREF(res);
3863        return treebuilder_done((TreeBuilderObject*) self->target);
3864    }
3865    else if (self->handle_close) {
3866        Py_DECREF(res);
3867        return PyObject_CallNoArgs(self->handle_close);
3868    }
3869    else {
3870        return res;
3871    }
3872}
3873
3874/*[clinic input]
3875_elementtree.XMLParser.feed
3876
3877    data: object
3878    /
3879
3880[clinic start generated code]*/
3881
3882static PyObject *
3883_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3884/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
3885{
3886    /* feed data to parser */
3887
3888    if (!_check_xmlparser(self)) {
3889        return NULL;
3890    }
3891    if (PyUnicode_Check(data)) {
3892        Py_ssize_t data_len;
3893        const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3894        if (data_ptr == NULL)
3895            return NULL;
3896        if (data_len > INT_MAX) {
3897            PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3898            return NULL;
3899        }
3900        /* Explicitly set UTF-8 encoding. Return code ignored. */
3901        (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3902        return expat_parse(self, data_ptr, (int)data_len, 0);
3903    }
3904    else {
3905        Py_buffer view;
3906        PyObject *res;
3907        if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
3908            return NULL;
3909        if (view.len > INT_MAX) {
3910            PyBuffer_Release(&view);
3911            PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3912            return NULL;
3913        }
3914        res = expat_parse(self, view.buf, (int)view.len, 0);
3915        PyBuffer_Release(&view);
3916        return res;
3917    }
3918}
3919
3920/*[clinic input]
3921_elementtree.XMLParser._parse_whole
3922
3923    file: object
3924    /
3925
3926[clinic start generated code]*/
3927
3928static PyObject *
3929_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3930/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
3931{
3932    /* (internal) parse the whole input, until end of stream */
3933    PyObject* reader;
3934    PyObject* buffer;
3935    PyObject* temp;
3936    PyObject* res;
3937
3938    if (!_check_xmlparser(self)) {
3939        return NULL;
3940    }
3941    reader = PyObject_GetAttrString(file, "read");
3942    if (!reader)
3943        return NULL;
3944
3945    /* read from open file object */
3946    for (;;) {
3947
3948        buffer = PyObject_CallFunction(reader, "i", 64*1024);
3949
3950        if (!buffer) {
3951            /* read failed (e.g. due to KeyboardInterrupt) */
3952            Py_DECREF(reader);
3953            return NULL;
3954        }
3955
3956        if (PyUnicode_CheckExact(buffer)) {
3957            /* A unicode object is encoded into bytes using UTF-8 */
3958            if (PyUnicode_GET_LENGTH(buffer) == 0) {
3959                Py_DECREF(buffer);
3960                break;
3961            }
3962            temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
3963            Py_DECREF(buffer);
3964            if (!temp) {
3965                /* Propagate exception from PyUnicode_AsEncodedString */
3966                Py_DECREF(reader);
3967                return NULL;
3968            }
3969            buffer = temp;
3970        }
3971        else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
3972            Py_DECREF(buffer);
3973            break;
3974        }
3975
3976        if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3977            Py_DECREF(buffer);
3978            Py_DECREF(reader);
3979            PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3980            return NULL;
3981        }
3982        res = expat_parse(
3983            self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
3984            );
3985
3986        Py_DECREF(buffer);
3987
3988        if (!res) {
3989            Py_DECREF(reader);
3990            return NULL;
3991        }
3992        Py_DECREF(res);
3993
3994    }
3995
3996    Py_DECREF(reader);
3997
3998    res = expat_parse(self, "", 0, 1);
3999
4000    if (res && TreeBuilder_CheckExact(self->target)) {
4001        Py_DECREF(res);
4002        return treebuilder_done((TreeBuilderObject*) self->target);
4003    }
4004
4005    return res;
4006}
4007
4008/*[clinic input]
4009_elementtree.XMLParser._setevents
4010
4011    events_queue: object
4012    events_to_report: object = None
4013    /
4014
4015[clinic start generated code]*/
4016
4017static PyObject *
4018_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
4019                                       PyObject *events_queue,
4020                                       PyObject *events_to_report)
4021/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
4022{
4023    /* activate element event reporting */
4024    Py_ssize_t i;
4025    TreeBuilderObject *target;
4026    PyObject *events_append, *events_seq;
4027
4028    if (!_check_xmlparser(self)) {
4029        return NULL;
4030    }
4031    if (!TreeBuilder_CheckExact(self->target)) {
4032        PyErr_SetString(
4033            PyExc_TypeError,
4034            "event handling only supported for ElementTree.TreeBuilder "
4035            "targets"
4036            );
4037        return NULL;
4038    }
4039
4040    target = (TreeBuilderObject*) self->target;
4041
4042    events_append = PyObject_GetAttrString(events_queue, "append");
4043    if (events_append == NULL)
4044        return NULL;
4045    Py_XSETREF(target->events_append, events_append);
4046
4047    /* clear out existing events */
4048    Py_CLEAR(target->start_event_obj);
4049    Py_CLEAR(target->end_event_obj);
4050    Py_CLEAR(target->start_ns_event_obj);
4051    Py_CLEAR(target->end_ns_event_obj);
4052    Py_CLEAR(target->comment_event_obj);
4053    Py_CLEAR(target->pi_event_obj);
4054
4055    if (events_to_report == Py_None) {
4056        /* default is "end" only */
4057        target->end_event_obj = PyUnicode_FromString("end");
4058        Py_RETURN_NONE;
4059    }
4060
4061    if (!(events_seq = PySequence_Fast(events_to_report,
4062                                       "events must be a sequence"))) {
4063        return NULL;
4064    }
4065
4066    for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
4067        PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
4068        const char *event_name = NULL;
4069        if (PyUnicode_Check(event_name_obj)) {
4070            event_name = PyUnicode_AsUTF8(event_name_obj);
4071        } else if (PyBytes_Check(event_name_obj)) {
4072            event_name = PyBytes_AS_STRING(event_name_obj);
4073        }
4074        if (event_name == NULL) {
4075            Py_DECREF(events_seq);
4076            PyErr_Format(PyExc_ValueError, "invalid events sequence");
4077            return NULL;
4078        }
4079
4080        Py_INCREF(event_name_obj);
4081        if (strcmp(event_name, "start") == 0) {
4082            Py_XSETREF(target->start_event_obj, event_name_obj);
4083        } else if (strcmp(event_name, "end") == 0) {
4084            Py_XSETREF(target->end_event_obj, event_name_obj);
4085        } else if (strcmp(event_name, "start-ns") == 0) {
4086            Py_XSETREF(target->start_ns_event_obj, event_name_obj);
4087            EXPAT(SetNamespaceDeclHandler)(
4088                self->parser,
4089                (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4090                (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4091                );
4092        } else if (strcmp(event_name, "end-ns") == 0) {
4093            Py_XSETREF(target->end_ns_event_obj, event_name_obj);
4094            EXPAT(SetNamespaceDeclHandler)(
4095                self->parser,
4096                (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4097                (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4098                );
4099        } else if (strcmp(event_name, "comment") == 0) {
4100            Py_XSETREF(target->comment_event_obj, event_name_obj);
4101            EXPAT(SetCommentHandler)(
4102                self->parser,
4103                (XML_CommentHandler) expat_comment_handler
4104                );
4105        } else if (strcmp(event_name, "pi") == 0) {
4106            Py_XSETREF(target->pi_event_obj, event_name_obj);
4107            EXPAT(SetProcessingInstructionHandler)(
4108                self->parser,
4109                (XML_ProcessingInstructionHandler) expat_pi_handler
4110                );
4111        } else {
4112            Py_DECREF(event_name_obj);
4113            Py_DECREF(events_seq);
4114            PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
4115            return NULL;
4116        }
4117    }
4118
4119    Py_DECREF(events_seq);
4120    Py_RETURN_NONE;
4121}
4122
4123static PyMemberDef xmlparser_members[] = {
4124    {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
4125    {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
4126    {NULL}
4127};
4128
4129static PyObject*
4130xmlparser_version_getter(XMLParserObject *self, void *closure)
4131{
4132    return PyUnicode_FromFormat(
4133        "Expat %d.%d.%d", XML_MAJOR_VERSION,
4134        XML_MINOR_VERSION, XML_MICRO_VERSION);
4135}
4136
4137static PyGetSetDef xmlparser_getsetlist[] = {
4138    {"version", (getter)xmlparser_version_getter, NULL, NULL},
4139    {NULL},
4140};
4141
4142#include "clinic/_elementtree.c.h"
4143
4144static PyMethodDef element_methods[] = {
4145
4146    _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
4147
4148    _ELEMENTTREE_ELEMENT_GET_METHODDEF
4149    _ELEMENTTREE_ELEMENT_SET_METHODDEF
4150
4151    _ELEMENTTREE_ELEMENT_FIND_METHODDEF
4152    _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
4153    _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
4154
4155    _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
4156    _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
4157    _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
4158    _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
4159
4160    _ELEMENTTREE_ELEMENT_ITER_METHODDEF
4161    _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
4162    _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
4163
4164    _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
4165    _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
4166
4167    _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
4168
4169    _ELEMENTTREE_ELEMENT___COPY___METHODDEF
4170    _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
4171    _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
4172    _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
4173    _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
4174
4175    {NULL, NULL}
4176};
4177
4178static PyMappingMethods element_as_mapping = {
4179    (lenfunc) element_length,
4180    (binaryfunc) element_subscr,
4181    (objobjargproc) element_ass_subscr,
4182};
4183
4184static PyGetSetDef element_getsetlist[] = {
4185    {"tag",
4186        (getter)element_tag_getter,
4187        (setter)element_tag_setter,
4188        "A string identifying what kind of data this element represents"},
4189    {"text",
4190        (getter)element_text_getter,
4191        (setter)element_text_setter,
4192        "A string of text directly after the start tag, or None"},
4193    {"tail",
4194        (getter)element_tail_getter,
4195        (setter)element_tail_setter,
4196        "A string of text directly after the end tag, or None"},
4197    {"attrib",
4198        (getter)element_attrib_getter,
4199        (setter)element_attrib_setter,
4200        "A dictionary containing the element's attributes"},
4201    {NULL},
4202};
4203
4204static PyTypeObject Element_Type = {
4205    PyVarObject_HEAD_INIT(NULL, 0)
4206    "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
4207    /* methods */
4208    (destructor)element_dealloc,                    /* tp_dealloc */
4209    0,                                              /* tp_vectorcall_offset */
4210    0,                                              /* tp_getattr */
4211    0,                                              /* tp_setattr */
4212    0,                                              /* tp_as_async */
4213    (reprfunc)element_repr,                         /* tp_repr */
4214    0,                                              /* tp_as_number */
4215    &element_as_sequence,                           /* tp_as_sequence */
4216    &element_as_mapping,                            /* tp_as_mapping */
4217    0,                                              /* tp_hash */
4218    0,                                              /* tp_call */
4219    0,                                              /* tp_str */
4220    PyObject_GenericGetAttr,                        /* tp_getattro */
4221    0,                                              /* tp_setattro */
4222    0,                                              /* tp_as_buffer */
4223    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4224                                                    /* tp_flags */
4225    0,                                              /* tp_doc */
4226    (traverseproc)element_gc_traverse,              /* tp_traverse */
4227    (inquiry)element_gc_clear,                      /* tp_clear */
4228    0,                                              /* tp_richcompare */
4229    offsetof(ElementObject, weakreflist),           /* tp_weaklistoffset */
4230    0,                                              /* tp_iter */
4231    0,                                              /* tp_iternext */
4232    element_methods,                                /* tp_methods */
4233    0,                                              /* tp_members */
4234    element_getsetlist,                             /* tp_getset */
4235    0,                                              /* tp_base */
4236    0,                                              /* tp_dict */
4237    0,                                              /* tp_descr_get */
4238    0,                                              /* tp_descr_set */
4239    0,                                              /* tp_dictoffset */
4240    (initproc)element_init,                         /* tp_init */
4241    PyType_GenericAlloc,                            /* tp_alloc */
4242    element_new,                                    /* tp_new */
4243    0,                                              /* tp_free */
4244};
4245
4246static PyMethodDef treebuilder_methods[] = {
4247    _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
4248    _ELEMENTTREE_TREEBUILDER_START_METHODDEF
4249    _ELEMENTTREE_TREEBUILDER_END_METHODDEF
4250    _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
4251    _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
4252    _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
4253    {NULL, NULL}
4254};
4255
4256static PyTypeObject TreeBuilder_Type = {
4257    PyVarObject_HEAD_INIT(NULL, 0)
4258    "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
4259    /* methods */
4260    (destructor)treebuilder_dealloc,                /* tp_dealloc */
4261    0,                                              /* tp_vectorcall_offset */
4262    0,                                              /* tp_getattr */
4263    0,                                              /* tp_setattr */
4264    0,                                              /* tp_as_async */
4265    0,                                              /* tp_repr */
4266    0,                                              /* tp_as_number */
4267    0,                                              /* tp_as_sequence */
4268    0,                                              /* tp_as_mapping */
4269    0,                                              /* tp_hash */
4270    0,                                              /* tp_call */
4271    0,                                              /* tp_str */
4272    0,                                              /* tp_getattro */
4273    0,                                              /* tp_setattro */
4274    0,                                              /* tp_as_buffer */
4275    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4276                                                    /* tp_flags */
4277    0,                                              /* tp_doc */
4278    (traverseproc)treebuilder_gc_traverse,          /* tp_traverse */
4279    (inquiry)treebuilder_gc_clear,                  /* tp_clear */
4280    0,                                              /* tp_richcompare */
4281    0,                                              /* tp_weaklistoffset */
4282    0,                                              /* tp_iter */
4283    0,                                              /* tp_iternext */
4284    treebuilder_methods,                            /* tp_methods */
4285    0,                                              /* tp_members */
4286    0,                                              /* tp_getset */
4287    0,                                              /* tp_base */
4288    0,                                              /* tp_dict */
4289    0,                                              /* tp_descr_get */
4290    0,                                              /* tp_descr_set */
4291    0,                                              /* tp_dictoffset */
4292    _elementtree_TreeBuilder___init__,              /* tp_init */
4293    PyType_GenericAlloc,                            /* tp_alloc */
4294    treebuilder_new,                                /* tp_new */
4295    0,                                              /* tp_free */
4296};
4297
4298static PyMethodDef xmlparser_methods[] = {
4299    _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
4300    _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
4301    _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
4302    _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
4303    {NULL, NULL}
4304};
4305
4306static PyTypeObject XMLParser_Type = {
4307    PyVarObject_HEAD_INIT(NULL, 0)
4308    "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
4309    /* methods */
4310    (destructor)xmlparser_dealloc,                  /* tp_dealloc */
4311    0,                                              /* tp_vectorcall_offset */
4312    0,                                              /* tp_getattr */
4313    0,                                              /* tp_setattr */
4314    0,                                              /* tp_as_async */
4315    0,                                              /* tp_repr */
4316    0,                                              /* tp_as_number */
4317    0,                                              /* tp_as_sequence */
4318    0,                                              /* tp_as_mapping */
4319    0,                                              /* tp_hash */
4320    0,                                              /* tp_call */
4321    0,                                              /* tp_str */
4322    0,                                              /* tp_getattro */
4323    0,                                              /* tp_setattro */
4324    0,                                              /* tp_as_buffer */
4325    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4326                                                    /* tp_flags */
4327    0,                                              /* tp_doc */
4328    (traverseproc)xmlparser_gc_traverse,            /* tp_traverse */
4329    (inquiry)xmlparser_gc_clear,                    /* tp_clear */
4330    0,                                              /* tp_richcompare */
4331    0,                                              /* tp_weaklistoffset */
4332    0,                                              /* tp_iter */
4333    0,                                              /* tp_iternext */
4334    xmlparser_methods,                              /* tp_methods */
4335    xmlparser_members,                              /* tp_members */
4336    xmlparser_getsetlist,                           /* tp_getset */
4337    0,                                              /* tp_base */
4338    0,                                              /* tp_dict */
4339    0,                                              /* tp_descr_get */
4340    0,                                              /* tp_descr_set */
4341    0,                                              /* tp_dictoffset */
4342    _elementtree_XMLParser___init__,                /* tp_init */
4343    PyType_GenericAlloc,                            /* tp_alloc */
4344    xmlparser_new,                                  /* tp_new */
4345    0,                                              /* tp_free */
4346};
4347
4348/* ==================================================================== */
4349/* python module interface */
4350
4351static PyMethodDef _functions[] = {
4352    {"SubElement", _PyCFunction_CAST(subelement), METH_VARARGS | METH_KEYWORDS},
4353    _ELEMENTTREE__SET_FACTORIES_METHODDEF
4354    {NULL, NULL}
4355};
4356
4357
4358static struct PyModuleDef elementtreemodule = {
4359    PyModuleDef_HEAD_INIT,
4360    "_elementtree",
4361    NULL,
4362    sizeof(elementtreestate),
4363    _functions,
4364    NULL,
4365    elementtree_traverse,
4366    elementtree_clear,
4367    elementtree_free
4368};
4369
4370PyMODINIT_FUNC
4371PyInit__elementtree(void)
4372{
4373    PyObject *m, *temp;
4374    elementtreestate *st;
4375
4376    m = PyState_FindModule(&elementtreemodule);
4377    if (m) {
4378        Py_INCREF(m);
4379        return m;
4380    }
4381
4382    /* Initialize object types */
4383    if (PyType_Ready(&ElementIter_Type) < 0)
4384        return NULL;
4385    if (PyType_Ready(&TreeBuilder_Type) < 0)
4386        return NULL;
4387    if (PyType_Ready(&Element_Type) < 0)
4388        return NULL;
4389    if (PyType_Ready(&XMLParser_Type) < 0)
4390        return NULL;
4391
4392    m = PyModule_Create(&elementtreemodule);
4393    if (!m)
4394        return NULL;
4395    st = get_elementtree_state(m);
4396
4397    if (!(temp = PyImport_ImportModule("copy")))
4398        return NULL;
4399    st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
4400    Py_XDECREF(temp);
4401
4402    if (st->deepcopy_obj == NULL) {
4403        return NULL;
4404    }
4405
4406    assert(!PyErr_Occurred());
4407    if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
4408        return NULL;
4409
4410    /* link against pyexpat */
4411    expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4412    if (expat_capi) {
4413        /* check that it's usable */
4414        if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
4415            (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
4416            expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4417            expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
4418            expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
4419            PyErr_SetString(PyExc_ImportError,
4420                            "pyexpat version is incompatible");
4421            return NULL;
4422        }
4423    } else {
4424        return NULL;
4425    }
4426
4427    st->parseerror_obj = PyErr_NewException(
4428        "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
4429        );
4430    Py_INCREF(st->parseerror_obj);
4431    if (PyModule_AddObject(m, "ParseError", st->parseerror_obj) < 0) {
4432        Py_DECREF(st->parseerror_obj);
4433        return NULL;
4434    }
4435
4436    PyTypeObject *types[] = {
4437        &Element_Type,
4438        &TreeBuilder_Type,
4439        &XMLParser_Type
4440    };
4441
4442    for (size_t i = 0; i < Py_ARRAY_LENGTH(types); i++) {
4443        if (PyModule_AddType(m, types[i]) < 0) {
4444            return NULL;
4445        }
4446    }
4447
4448    return m;
4449}
4450