1/*-------------------------------------------------------------------- 2 * Licensed to PSF under a Contributor Agreement. 3 * See https://www.python.org/psf/license for licensing details. 4 * 5 * _elementtree - C accelerator for xml.etree.ElementTree 6 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved. 7 * Copyright (c) 1999-2009 by Fredrik Lundh. 8 * 9 * info@pythonware.com 10 * http://www.pythonware.com 11 *-------------------------------------------------------------------- 12 */ 13 14#define PY_SSIZE_T_CLEAN 15#define NEEDS_PY_IDENTIFIER 16 17#include "Python.h" 18#include "structmember.h" // PyMemberDef 19 20/* -------------------------------------------------------------------- */ 21/* configuration */ 22 23/* An element can hold this many children without extra memory 24 allocations. */ 25#define STATIC_CHILDREN 4 26 27/* For best performance, chose a value so that 80-90% of all nodes 28 have no more than the given number of children. Set this to zero 29 to minimize the size of the element structure itself (this only 30 helps if you have lots of leaf nodes with attributes). */ 31 32/* Also note that pymalloc always allocates blocks in multiples of 33 eight bytes. For the current C version of ElementTree, this means 34 that the number of children should be an even number, at least on 35 32-bit platforms. */ 36 37/* -------------------------------------------------------------------- */ 38 39/* compiler tweaks */ 40#if defined(_MSC_VER) 41#define LOCAL(type) static __inline type __fastcall 42#else 43#define LOCAL(type) static type 44#endif 45 46/* macros used to store 'join' flags in string object pointers. note 47 that all use of text and tail as object pointers must be wrapped in 48 JOIN_OBJ. see comments in the ElementObject definition for more 49 info. */ 50#define JOIN_GET(p) ((uintptr_t) (p) & 1) 51#define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag))) 52#define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1)) 53 54/* Py_SETREF for a PyObject* that uses a join flag. */ 55Py_LOCAL_INLINE(void) 56_set_joined_ptr(PyObject **p, PyObject *new_joined_ptr) 57{ 58 PyObject *tmp = JOIN_OBJ(*p); 59 *p = new_joined_ptr; 60 Py_DECREF(tmp); 61} 62 63/* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by 64 * reference since this function sets it to NULL. 65*/ 66static void _clear_joined_ptr(PyObject **p) 67{ 68 if (*p) { 69 _set_joined_ptr(p, NULL); 70 } 71} 72 73/* Types defined by this extension */ 74static PyTypeObject Element_Type; 75static PyTypeObject ElementIter_Type; 76static PyTypeObject TreeBuilder_Type; 77static PyTypeObject XMLParser_Type; 78 79 80/* Per-module state; PEP 3121 */ 81typedef struct { 82 PyObject *parseerror_obj; 83 PyObject *deepcopy_obj; 84 PyObject *elementpath_obj; 85 PyObject *comment_factory; 86 PyObject *pi_factory; 87} elementtreestate; 88 89static struct PyModuleDef elementtreemodule; 90 91/* Given a module object (assumed to be _elementtree), get its per-module 92 * state. 93 */ 94static inline elementtreestate* 95get_elementtree_state(PyObject *module) 96{ 97 void *state = PyModule_GetState(module); 98 assert(state != NULL); 99 return (elementtreestate *)state; 100} 101 102/* Find the module instance imported in the currently running sub-interpreter 103 * and get its state. 104 */ 105#define ET_STATE_GLOBAL \ 106 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule))) 107 108static int 109elementtree_clear(PyObject *m) 110{ 111 elementtreestate *st = get_elementtree_state(m); 112 Py_CLEAR(st->parseerror_obj); 113 Py_CLEAR(st->deepcopy_obj); 114 Py_CLEAR(st->elementpath_obj); 115 Py_CLEAR(st->comment_factory); 116 Py_CLEAR(st->pi_factory); 117 return 0; 118} 119 120static int 121elementtree_traverse(PyObject *m, visitproc visit, void *arg) 122{ 123 elementtreestate *st = get_elementtree_state(m); 124 Py_VISIT(st->parseerror_obj); 125 Py_VISIT(st->deepcopy_obj); 126 Py_VISIT(st->elementpath_obj); 127 Py_VISIT(st->comment_factory); 128 Py_VISIT(st->pi_factory); 129 return 0; 130} 131 132static void 133elementtree_free(void *m) 134{ 135 elementtree_clear((PyObject *)m); 136} 137 138/* helpers */ 139 140LOCAL(PyObject*) 141list_join(PyObject* list) 142{ 143 /* join list elements */ 144 PyObject* joiner; 145 PyObject* result; 146 147 joiner = PyUnicode_FromStringAndSize("", 0); 148 if (!joiner) 149 return NULL; 150 result = PyUnicode_Join(joiner, list); 151 Py_DECREF(joiner); 152 return result; 153} 154 155/* Is the given object an empty dictionary? 156*/ 157static int 158is_empty_dict(PyObject *obj) 159{ 160 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0; 161} 162 163 164/* -------------------------------------------------------------------- */ 165/* the Element type */ 166 167typedef struct { 168 169 /* attributes (a dictionary object), or NULL if no attributes */ 170 PyObject* attrib; 171 172 /* child elements */ 173 Py_ssize_t length; /* actual number of items */ 174 Py_ssize_t allocated; /* allocated items */ 175 176 /* this either points to _children or to a malloced buffer */ 177 PyObject* *children; 178 179 PyObject* _children[STATIC_CHILDREN]; 180 181} ElementObjectExtra; 182 183typedef struct { 184 PyObject_HEAD 185 186 /* element tag (a string). */ 187 PyObject* tag; 188 189 /* text before first child. note that this is a tagged pointer; 190 use JOIN_OBJ to get the object pointer. the join flag is used 191 to distinguish lists created by the tree builder from lists 192 assigned to the attribute by application code; the former 193 should be joined before being returned to the user, the latter 194 should be left intact. */ 195 PyObject* text; 196 197 /* text after this element, in parent. note that this is a tagged 198 pointer; use JOIN_OBJ to get the object pointer. */ 199 PyObject* tail; 200 201 ElementObjectExtra* extra; 202 203 PyObject *weakreflist; /* For tp_weaklistoffset */ 204 205} ElementObject; 206 207 208#define Element_CheckExact(op) Py_IS_TYPE(op, &Element_Type) 209#define Element_Check(op) PyObject_TypeCheck(op, &Element_Type) 210 211 212/* -------------------------------------------------------------------- */ 213/* Element constructors and destructor */ 214 215LOCAL(int) 216create_extra(ElementObject* self, PyObject* attrib) 217{ 218 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra)); 219 if (!self->extra) { 220 PyErr_NoMemory(); 221 return -1; 222 } 223 224 Py_XINCREF(attrib); 225 self->extra->attrib = attrib; 226 227 self->extra->length = 0; 228 self->extra->allocated = STATIC_CHILDREN; 229 self->extra->children = self->extra->_children; 230 231 return 0; 232} 233 234LOCAL(void) 235dealloc_extra(ElementObjectExtra *extra) 236{ 237 Py_ssize_t i; 238 239 if (!extra) 240 return; 241 242 Py_XDECREF(extra->attrib); 243 244 for (i = 0; i < extra->length; i++) 245 Py_DECREF(extra->children[i]); 246 247 if (extra->children != extra->_children) 248 PyObject_Free(extra->children); 249 250 PyObject_Free(extra); 251} 252 253LOCAL(void) 254clear_extra(ElementObject* self) 255{ 256 ElementObjectExtra *myextra; 257 258 if (!self->extra) 259 return; 260 261 /* Avoid DECREFs calling into this code again (cycles, etc.) 262 */ 263 myextra = self->extra; 264 self->extra = NULL; 265 266 dealloc_extra(myextra); 267} 268 269/* Convenience internal function to create new Element objects with the given 270 * tag and attributes. 271*/ 272LOCAL(PyObject*) 273create_new_element(PyObject* tag, PyObject* attrib) 274{ 275 ElementObject* self; 276 277 self = PyObject_GC_New(ElementObject, &Element_Type); 278 if (self == NULL) 279 return NULL; 280 self->extra = NULL; 281 282 Py_INCREF(tag); 283 self->tag = tag; 284 285 Py_INCREF(Py_None); 286 self->text = Py_None; 287 288 Py_INCREF(Py_None); 289 self->tail = Py_None; 290 291 self->weakreflist = NULL; 292 293 PyObject_GC_Track(self); 294 295 if (attrib != NULL && !is_empty_dict(attrib)) { 296 if (create_extra(self, attrib) < 0) { 297 Py_DECREF(self); 298 return NULL; 299 } 300 } 301 302 return (PyObject*) self; 303} 304 305static PyObject * 306element_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 307{ 308 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0); 309 if (e != NULL) { 310 Py_INCREF(Py_None); 311 e->tag = Py_None; 312 313 Py_INCREF(Py_None); 314 e->text = Py_None; 315 316 Py_INCREF(Py_None); 317 e->tail = Py_None; 318 319 e->extra = NULL; 320 e->weakreflist = NULL; 321 } 322 return (PyObject *)e; 323} 324 325/* Helper function for extracting the attrib dictionary from a keywords dict. 326 * This is required by some constructors/functions in this module that can 327 * either accept attrib as a keyword argument or all attributes splashed 328 * directly into *kwds. 329 * 330 * Return a dictionary with the content of kwds merged into the content of 331 * attrib. If there is no attrib keyword, return a copy of kwds. 332 */ 333static PyObject* 334get_attrib_from_keywords(PyObject *kwds) 335{ 336 PyObject *attrib_str = PyUnicode_FromString("attrib"); 337 if (attrib_str == NULL) { 338 return NULL; 339 } 340 PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str); 341 342 if (attrib) { 343 /* If attrib was found in kwds, copy its value and remove it from 344 * kwds 345 */ 346 if (!PyDict_Check(attrib)) { 347 Py_DECREF(attrib_str); 348 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s", 349 Py_TYPE(attrib)->tp_name); 350 return NULL; 351 } 352 attrib = PyDict_Copy(attrib); 353 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) { 354 Py_DECREF(attrib); 355 attrib = NULL; 356 } 357 } 358 else if (!PyErr_Occurred()) { 359 attrib = PyDict_New(); 360 } 361 362 Py_DECREF(attrib_str); 363 364 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) { 365 Py_DECREF(attrib); 366 return NULL; 367 } 368 return attrib; 369} 370 371/*[clinic input] 372module _elementtree 373class _elementtree.Element "ElementObject *" "&Element_Type" 374class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type" 375class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type" 376[clinic start generated code]*/ 377/*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/ 378 379static int 380element_init(PyObject *self, PyObject *args, PyObject *kwds) 381{ 382 PyObject *tag; 383 PyObject *attrib = NULL; 384 ElementObject *self_elem; 385 386 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib)) 387 return -1; 388 389 if (attrib) { 390 /* attrib passed as positional arg */ 391 attrib = PyDict_Copy(attrib); 392 if (!attrib) 393 return -1; 394 if (kwds) { 395 if (PyDict_Update(attrib, kwds) < 0) { 396 Py_DECREF(attrib); 397 return -1; 398 } 399 } 400 } else if (kwds) { 401 /* have keywords args */ 402 attrib = get_attrib_from_keywords(kwds); 403 if (!attrib) 404 return -1; 405 } 406 407 self_elem = (ElementObject *)self; 408 409 if (attrib != NULL && !is_empty_dict(attrib)) { 410 if (create_extra(self_elem, attrib) < 0) { 411 Py_DECREF(attrib); 412 return -1; 413 } 414 } 415 416 /* We own a reference to attrib here and it's no longer needed. */ 417 Py_XDECREF(attrib); 418 419 /* Replace the objects already pointed to by tag, text and tail. */ 420 Py_INCREF(tag); 421 Py_XSETREF(self_elem->tag, tag); 422 423 Py_INCREF(Py_None); 424 _set_joined_ptr(&self_elem->text, Py_None); 425 426 Py_INCREF(Py_None); 427 _set_joined_ptr(&self_elem->tail, Py_None); 428 429 return 0; 430} 431 432LOCAL(int) 433element_resize(ElementObject* self, Py_ssize_t extra) 434{ 435 Py_ssize_t size; 436 PyObject* *children; 437 438 assert(extra >= 0); 439 /* make sure self->children can hold the given number of extra 440 elements. set an exception and return -1 if allocation failed */ 441 442 if (!self->extra) { 443 if (create_extra(self, NULL) < 0) 444 return -1; 445 } 446 447 size = self->extra->length + extra; /* never overflows */ 448 449 if (size > self->extra->allocated) { 450 /* use Python 2.4's list growth strategy */ 451 size = (size >> 3) + (size < 9 ? 3 : 6) + size; 452 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children" 453 * which needs at least 4 bytes. 454 * Although it's a false alarm always assume at least one child to 455 * be safe. 456 */ 457 size = size ? size : 1; 458 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*)) 459 goto nomemory; 460 if (self->extra->children != self->extra->_children) { 461 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer 462 * "children", which needs at least 4 bytes. Although it's a 463 * false alarm always assume at least one child to be safe. 464 */ 465 children = PyObject_Realloc(self->extra->children, 466 size * sizeof(PyObject*)); 467 if (!children) 468 goto nomemory; 469 } else { 470 children = PyObject_Malloc(size * sizeof(PyObject*)); 471 if (!children) 472 goto nomemory; 473 /* copy existing children from static area to malloc buffer */ 474 memcpy(children, self->extra->children, 475 self->extra->length * sizeof(PyObject*)); 476 } 477 self->extra->children = children; 478 self->extra->allocated = size; 479 } 480 481 return 0; 482 483 nomemory: 484 PyErr_NoMemory(); 485 return -1; 486} 487 488LOCAL(void) 489raise_type_error(PyObject *element) 490{ 491 PyErr_Format(PyExc_TypeError, 492 "expected an Element, not \"%.200s\"", 493 Py_TYPE(element)->tp_name); 494} 495 496LOCAL(int) 497element_add_subelement(ElementObject* self, PyObject* element) 498{ 499 /* add a child element to a parent */ 500 501 if (!Element_Check(element)) { 502 raise_type_error(element); 503 return -1; 504 } 505 506 if (element_resize(self, 1) < 0) 507 return -1; 508 509 Py_INCREF(element); 510 self->extra->children[self->extra->length] = element; 511 512 self->extra->length++; 513 514 return 0; 515} 516 517LOCAL(PyObject*) 518element_get_attrib(ElementObject* self) 519{ 520 /* return borrowed reference to attrib dictionary */ 521 /* note: this function assumes that the extra section exists */ 522 523 PyObject* res = self->extra->attrib; 524 525 if (!res) { 526 /* create missing dictionary */ 527 res = self->extra->attrib = PyDict_New(); 528 } 529 530 return res; 531} 532 533LOCAL(PyObject*) 534element_get_text(ElementObject* self) 535{ 536 /* return borrowed reference to text attribute */ 537 538 PyObject *res = self->text; 539 540 if (JOIN_GET(res)) { 541 res = JOIN_OBJ(res); 542 if (PyList_CheckExact(res)) { 543 PyObject *tmp = list_join(res); 544 if (!tmp) 545 return NULL; 546 self->text = tmp; 547 Py_DECREF(res); 548 res = tmp; 549 } 550 } 551 552 return res; 553} 554 555LOCAL(PyObject*) 556element_get_tail(ElementObject* self) 557{ 558 /* return borrowed reference to text attribute */ 559 560 PyObject *res = self->tail; 561 562 if (JOIN_GET(res)) { 563 res = JOIN_OBJ(res); 564 if (PyList_CheckExact(res)) { 565 PyObject *tmp = list_join(res); 566 if (!tmp) 567 return NULL; 568 self->tail = tmp; 569 Py_DECREF(res); 570 res = tmp; 571 } 572 } 573 574 return res; 575} 576 577static PyObject* 578subelement(PyObject *self, PyObject *args, PyObject *kwds) 579{ 580 PyObject* elem; 581 582 ElementObject* parent; 583 PyObject* tag; 584 PyObject* attrib = NULL; 585 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement", 586 &Element_Type, &parent, &tag, 587 &PyDict_Type, &attrib)) { 588 return NULL; 589 } 590 591 if (attrib) { 592 /* attrib passed as positional arg */ 593 attrib = PyDict_Copy(attrib); 594 if (!attrib) 595 return NULL; 596 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) { 597 Py_DECREF(attrib); 598 return NULL; 599 } 600 } else if (kwds) { 601 /* have keyword args */ 602 attrib = get_attrib_from_keywords(kwds); 603 if (!attrib) 604 return NULL; 605 } else { 606 /* no attrib arg, no kwds, so no attribute */ 607 } 608 609 elem = create_new_element(tag, attrib); 610 Py_XDECREF(attrib); 611 if (elem == NULL) 612 return NULL; 613 614 if (element_add_subelement(parent, elem) < 0) { 615 Py_DECREF(elem); 616 return NULL; 617 } 618 619 return elem; 620} 621 622static int 623element_gc_traverse(ElementObject *self, visitproc visit, void *arg) 624{ 625 Py_VISIT(self->tag); 626 Py_VISIT(JOIN_OBJ(self->text)); 627 Py_VISIT(JOIN_OBJ(self->tail)); 628 629 if (self->extra) { 630 Py_ssize_t i; 631 Py_VISIT(self->extra->attrib); 632 633 for (i = 0; i < self->extra->length; ++i) 634 Py_VISIT(self->extra->children[i]); 635 } 636 return 0; 637} 638 639static int 640element_gc_clear(ElementObject *self) 641{ 642 Py_CLEAR(self->tag); 643 _clear_joined_ptr(&self->text); 644 _clear_joined_ptr(&self->tail); 645 646 /* After dropping all references from extra, it's no longer valid anyway, 647 * so fully deallocate it. 648 */ 649 clear_extra(self); 650 return 0; 651} 652 653static void 654element_dealloc(ElementObject* self) 655{ 656 /* bpo-31095: UnTrack is needed before calling any callbacks */ 657 PyObject_GC_UnTrack(self); 658 Py_TRASHCAN_BEGIN(self, element_dealloc) 659 660 if (self->weakreflist != NULL) 661 PyObject_ClearWeakRefs((PyObject *) self); 662 663 /* element_gc_clear clears all references and deallocates extra 664 */ 665 element_gc_clear(self); 666 667 Py_TYPE(self)->tp_free((PyObject *)self); 668 Py_TRASHCAN_END 669} 670 671/* -------------------------------------------------------------------- */ 672 673/*[clinic input] 674_elementtree.Element.append 675 676 subelement: object(subclass_of='&Element_Type') 677 / 678 679[clinic start generated code]*/ 680 681static PyObject * 682_elementtree_Element_append_impl(ElementObject *self, PyObject *subelement) 683/*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/ 684{ 685 if (element_add_subelement(self, subelement) < 0) 686 return NULL; 687 688 Py_RETURN_NONE; 689} 690 691/*[clinic input] 692_elementtree.Element.clear 693 694[clinic start generated code]*/ 695 696static PyObject * 697_elementtree_Element_clear_impl(ElementObject *self) 698/*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/ 699{ 700 clear_extra(self); 701 702 Py_INCREF(Py_None); 703 _set_joined_ptr(&self->text, Py_None); 704 705 Py_INCREF(Py_None); 706 _set_joined_ptr(&self->tail, Py_None); 707 708 Py_RETURN_NONE; 709} 710 711/*[clinic input] 712_elementtree.Element.__copy__ 713 714[clinic start generated code]*/ 715 716static PyObject * 717_elementtree_Element___copy___impl(ElementObject *self) 718/*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/ 719{ 720 Py_ssize_t i; 721 ElementObject* element; 722 723 element = (ElementObject*) create_new_element( 724 self->tag, self->extra ? self->extra->attrib : NULL); 725 if (!element) 726 return NULL; 727 728 Py_INCREF(JOIN_OBJ(self->text)); 729 _set_joined_ptr(&element->text, self->text); 730 731 Py_INCREF(JOIN_OBJ(self->tail)); 732 _set_joined_ptr(&element->tail, self->tail); 733 734 assert(!element->extra || !element->extra->length); 735 if (self->extra) { 736 if (element_resize(element, self->extra->length) < 0) { 737 Py_DECREF(element); 738 return NULL; 739 } 740 741 for (i = 0; i < self->extra->length; i++) { 742 Py_INCREF(self->extra->children[i]); 743 element->extra->children[i] = self->extra->children[i]; 744 } 745 746 assert(!element->extra->length); 747 element->extra->length = self->extra->length; 748 } 749 750 return (PyObject*) element; 751} 752 753/* Helper for a deep copy. */ 754LOCAL(PyObject *) deepcopy(PyObject *, PyObject *); 755 756/*[clinic input] 757_elementtree.Element.__deepcopy__ 758 759 memo: object(subclass_of="&PyDict_Type") 760 / 761 762[clinic start generated code]*/ 763 764static PyObject * 765_elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) 766/*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/ 767{ 768 Py_ssize_t i; 769 ElementObject* element; 770 PyObject* tag; 771 PyObject* attrib; 772 PyObject* text; 773 PyObject* tail; 774 PyObject* id; 775 776 tag = deepcopy(self->tag, memo); 777 if (!tag) 778 return NULL; 779 780 if (self->extra && self->extra->attrib) { 781 attrib = deepcopy(self->extra->attrib, memo); 782 if (!attrib) { 783 Py_DECREF(tag); 784 return NULL; 785 } 786 } else { 787 attrib = NULL; 788 } 789 790 element = (ElementObject*) create_new_element(tag, attrib); 791 792 Py_DECREF(tag); 793 Py_XDECREF(attrib); 794 795 if (!element) 796 return NULL; 797 798 text = deepcopy(JOIN_OBJ(self->text), memo); 799 if (!text) 800 goto error; 801 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text))); 802 803 tail = deepcopy(JOIN_OBJ(self->tail), memo); 804 if (!tail) 805 goto error; 806 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail))); 807 808 assert(!element->extra || !element->extra->length); 809 if (self->extra) { 810 if (element_resize(element, self->extra->length) < 0) 811 goto error; 812 813 for (i = 0; i < self->extra->length; i++) { 814 PyObject* child = deepcopy(self->extra->children[i], memo); 815 if (!child || !Element_Check(child)) { 816 if (child) { 817 raise_type_error(child); 818 Py_DECREF(child); 819 } 820 element->extra->length = i; 821 goto error; 822 } 823 element->extra->children[i] = child; 824 } 825 826 assert(!element->extra->length); 827 element->extra->length = self->extra->length; 828 } 829 830 /* add object to memo dictionary (so deepcopy won't visit it again) */ 831 id = PyLong_FromSsize_t((uintptr_t) self); 832 if (!id) 833 goto error; 834 835 i = PyDict_SetItem(memo, id, (PyObject*) element); 836 837 Py_DECREF(id); 838 839 if (i < 0) 840 goto error; 841 842 return (PyObject*) element; 843 844 error: 845 Py_DECREF(element); 846 return NULL; 847} 848 849LOCAL(PyObject *) 850deepcopy(PyObject *object, PyObject *memo) 851{ 852 /* do a deep copy of the given object */ 853 elementtreestate *st; 854 PyObject *stack[2]; 855 856 /* Fast paths */ 857 if (object == Py_None || PyUnicode_CheckExact(object)) { 858 Py_INCREF(object); 859 return object; 860 } 861 862 if (Py_REFCNT(object) == 1) { 863 if (PyDict_CheckExact(object)) { 864 PyObject *key, *value; 865 Py_ssize_t pos = 0; 866 int simple = 1; 867 while (PyDict_Next(object, &pos, &key, &value)) { 868 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) { 869 simple = 0; 870 break; 871 } 872 } 873 if (simple) 874 return PyDict_Copy(object); 875 /* Fall through to general case */ 876 } 877 else if (Element_CheckExact(object)) { 878 return _elementtree_Element___deepcopy___impl( 879 (ElementObject *)object, memo); 880 } 881 } 882 883 /* General case */ 884 st = ET_STATE_GLOBAL; 885 if (!st->deepcopy_obj) { 886 PyErr_SetString(PyExc_RuntimeError, 887 "deepcopy helper not found"); 888 return NULL; 889 } 890 891 stack[0] = object; 892 stack[1] = memo; 893 return _PyObject_FastCall(st->deepcopy_obj, stack, 2); 894} 895 896 897/*[clinic input] 898_elementtree.Element.__sizeof__ -> Py_ssize_t 899 900[clinic start generated code]*/ 901 902static Py_ssize_t 903_elementtree_Element___sizeof___impl(ElementObject *self) 904/*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/ 905{ 906 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self)); 907 if (self->extra) { 908 result += sizeof(ElementObjectExtra); 909 if (self->extra->children != self->extra->_children) 910 result += sizeof(PyObject*) * self->extra->allocated; 911 } 912 return result; 913} 914 915/* dict keys for getstate/setstate. */ 916#define PICKLED_TAG "tag" 917#define PICKLED_CHILDREN "_children" 918#define PICKLED_ATTRIB "attrib" 919#define PICKLED_TAIL "tail" 920#define PICKLED_TEXT "text" 921 922/* __getstate__ returns a fabricated instance dict as in the pure-Python 923 * Element implementation, for interoperability/interchangeability. This 924 * makes the pure-Python implementation details an API, but (a) there aren't 925 * any unnecessary structures there; and (b) it buys compatibility with 3.2 926 * pickles. See issue #16076. 927 */ 928/*[clinic input] 929_elementtree.Element.__getstate__ 930 931[clinic start generated code]*/ 932 933static PyObject * 934_elementtree_Element___getstate___impl(ElementObject *self) 935/*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/ 936{ 937 Py_ssize_t i; 938 PyObject *children, *attrib; 939 940 /* Build a list of children. */ 941 children = PyList_New(self->extra ? self->extra->length : 0); 942 if (!children) 943 return NULL; 944 for (i = 0; i < PyList_GET_SIZE(children); i++) { 945 PyObject *child = self->extra->children[i]; 946 Py_INCREF(child); 947 PyList_SET_ITEM(children, i, child); 948 } 949 950 if (self->extra && self->extra->attrib) { 951 attrib = self->extra->attrib; 952 Py_INCREF(attrib); 953 } 954 else { 955 attrib = PyDict_New(); 956 if (!attrib) { 957 Py_DECREF(children); 958 return NULL; 959 } 960 } 961 962 return Py_BuildValue("{sOsNsNsOsO}", 963 PICKLED_TAG, self->tag, 964 PICKLED_CHILDREN, children, 965 PICKLED_ATTRIB, attrib, 966 PICKLED_TEXT, JOIN_OBJ(self->text), 967 PICKLED_TAIL, JOIN_OBJ(self->tail)); 968} 969 970static PyObject * 971element_setstate_from_attributes(ElementObject *self, 972 PyObject *tag, 973 PyObject *attrib, 974 PyObject *text, 975 PyObject *tail, 976 PyObject *children) 977{ 978 Py_ssize_t i, nchildren; 979 ElementObjectExtra *oldextra = NULL; 980 981 if (!tag) { 982 PyErr_SetString(PyExc_TypeError, "tag may not be NULL"); 983 return NULL; 984 } 985 986 Py_INCREF(tag); 987 Py_XSETREF(self->tag, tag); 988 989 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None; 990 Py_INCREF(JOIN_OBJ(text)); 991 _set_joined_ptr(&self->text, text); 992 993 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None; 994 Py_INCREF(JOIN_OBJ(tail)); 995 _set_joined_ptr(&self->tail, tail); 996 997 /* Handle ATTRIB and CHILDREN. */ 998 if (!children && !attrib) { 999 Py_RETURN_NONE; 1000 } 1001 1002 /* Compute 'nchildren'. */ 1003 if (children) { 1004 if (!PyList_Check(children)) { 1005 PyErr_SetString(PyExc_TypeError, "'_children' is not a list"); 1006 return NULL; 1007 } 1008 nchildren = PyList_GET_SIZE(children); 1009 1010 /* (Re-)allocate 'extra'. 1011 Avoid DECREFs calling into this code again (cycles, etc.) 1012 */ 1013 oldextra = self->extra; 1014 self->extra = NULL; 1015 if (element_resize(self, nchildren)) { 1016 assert(!self->extra || !self->extra->length); 1017 clear_extra(self); 1018 self->extra = oldextra; 1019 return NULL; 1020 } 1021 assert(self->extra); 1022 assert(self->extra->allocated >= nchildren); 1023 if (oldextra) { 1024 assert(self->extra->attrib == NULL); 1025 self->extra->attrib = oldextra->attrib; 1026 oldextra->attrib = NULL; 1027 } 1028 1029 /* Copy children */ 1030 for (i = 0; i < nchildren; i++) { 1031 PyObject *child = PyList_GET_ITEM(children, i); 1032 if (!Element_Check(child)) { 1033 raise_type_error(child); 1034 self->extra->length = i; 1035 dealloc_extra(oldextra); 1036 return NULL; 1037 } 1038 Py_INCREF(child); 1039 self->extra->children[i] = child; 1040 } 1041 1042 assert(!self->extra->length); 1043 self->extra->length = nchildren; 1044 } 1045 else { 1046 if (element_resize(self, 0)) { 1047 return NULL; 1048 } 1049 } 1050 1051 /* Stash attrib. */ 1052 Py_XINCREF(attrib); 1053 Py_XSETREF(self->extra->attrib, attrib); 1054 dealloc_extra(oldextra); 1055 1056 Py_RETURN_NONE; 1057} 1058 1059/* __setstate__ for Element instance from the Python implementation. 1060 * 'state' should be the instance dict. 1061 */ 1062 1063static PyObject * 1064element_setstate_from_Python(ElementObject *self, PyObject *state) 1065{ 1066 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT, 1067 PICKLED_TAIL, PICKLED_CHILDREN, 0}; 1068 PyObject *args; 1069 PyObject *tag, *attrib, *text, *tail, *children; 1070 PyObject *retval; 1071 1072 tag = attrib = text = tail = children = NULL; 1073 args = PyTuple_New(0); 1074 if (!args) 1075 return NULL; 1076 1077 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag, 1078 &attrib, &text, &tail, &children)) 1079 retval = element_setstate_from_attributes(self, tag, attrib, text, 1080 tail, children); 1081 else 1082 retval = NULL; 1083 1084 Py_DECREF(args); 1085 return retval; 1086} 1087 1088/*[clinic input] 1089_elementtree.Element.__setstate__ 1090 1091 state: object 1092 / 1093 1094[clinic start generated code]*/ 1095 1096static PyObject * 1097_elementtree_Element___setstate__(ElementObject *self, PyObject *state) 1098/*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/ 1099{ 1100 if (!PyDict_CheckExact(state)) { 1101 PyErr_Format(PyExc_TypeError, 1102 "Don't know how to unpickle \"%.200R\" as an Element", 1103 state); 1104 return NULL; 1105 } 1106 else 1107 return element_setstate_from_Python(self, state); 1108} 1109 1110LOCAL(int) 1111checkpath(PyObject* tag) 1112{ 1113 Py_ssize_t i; 1114 int check = 1; 1115 1116 /* check if a tag contains an xpath character */ 1117 1118#define PATHCHAR(ch) \ 1119 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.') 1120 1121 if (PyUnicode_Check(tag)) { 1122 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag); 1123 const void *data = PyUnicode_DATA(tag); 1124 unsigned int kind = PyUnicode_KIND(tag); 1125 if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && ( 1126 PyUnicode_READ(kind, data, 1) == '}' || ( 1127 PyUnicode_READ(kind, data, 1) == '*' && 1128 PyUnicode_READ(kind, data, 2) == '}'))) { 1129 /* wildcard: '{}tag' or '{*}tag' */ 1130 return 1; 1131 } 1132 for (i = 0; i < len; i++) { 1133 Py_UCS4 ch = PyUnicode_READ(kind, data, i); 1134 if (ch == '{') 1135 check = 0; 1136 else if (ch == '}') 1137 check = 1; 1138 else if (check && PATHCHAR(ch)) 1139 return 1; 1140 } 1141 return 0; 1142 } 1143 if (PyBytes_Check(tag)) { 1144 const char *p = PyBytes_AS_STRING(tag); 1145 const Py_ssize_t len = PyBytes_GET_SIZE(tag); 1146 if (len >= 3 && p[0] == '{' && ( 1147 p[1] == '}' || (p[1] == '*' && p[2] == '}'))) { 1148 /* wildcard: '{}tag' or '{*}tag' */ 1149 return 1; 1150 } 1151 for (i = 0; i < len; i++) { 1152 if (p[i] == '{') 1153 check = 0; 1154 else if (p[i] == '}') 1155 check = 1; 1156 else if (check && PATHCHAR(p[i])) 1157 return 1; 1158 } 1159 return 0; 1160 } 1161 1162 return 1; /* unknown type; might be path expression */ 1163} 1164 1165/*[clinic input] 1166_elementtree.Element.extend 1167 1168 elements: object 1169 / 1170 1171[clinic start generated code]*/ 1172 1173static PyObject * 1174_elementtree_Element_extend(ElementObject *self, PyObject *elements) 1175/*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/ 1176{ 1177 PyObject* seq; 1178 Py_ssize_t i; 1179 1180 seq = PySequence_Fast(elements, ""); 1181 if (!seq) { 1182 PyErr_Format( 1183 PyExc_TypeError, 1184 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name 1185 ); 1186 return NULL; 1187 } 1188 1189 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) { 1190 PyObject* element = PySequence_Fast_GET_ITEM(seq, i); 1191 Py_INCREF(element); 1192 if (element_add_subelement(self, element) < 0) { 1193 Py_DECREF(seq); 1194 Py_DECREF(element); 1195 return NULL; 1196 } 1197 Py_DECREF(element); 1198 } 1199 1200 Py_DECREF(seq); 1201 1202 Py_RETURN_NONE; 1203} 1204 1205/*[clinic input] 1206_elementtree.Element.find 1207 1208 path: object 1209 namespaces: object = None 1210 1211[clinic start generated code]*/ 1212 1213static PyObject * 1214_elementtree_Element_find_impl(ElementObject *self, PyObject *path, 1215 PyObject *namespaces) 1216/*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/ 1217{ 1218 Py_ssize_t i; 1219 elementtreestate *st = ET_STATE_GLOBAL; 1220 1221 if (checkpath(path) || namespaces != Py_None) { 1222 _Py_IDENTIFIER(find); 1223 return _PyObject_CallMethodIdObjArgs( 1224 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL 1225 ); 1226 } 1227 1228 if (!self->extra) 1229 Py_RETURN_NONE; 1230 1231 for (i = 0; i < self->extra->length; i++) { 1232 PyObject* item = self->extra->children[i]; 1233 int rc; 1234 assert(Element_Check(item)); 1235 Py_INCREF(item); 1236 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ); 1237 if (rc > 0) 1238 return item; 1239 Py_DECREF(item); 1240 if (rc < 0) 1241 return NULL; 1242 } 1243 1244 Py_RETURN_NONE; 1245} 1246 1247/*[clinic input] 1248_elementtree.Element.findtext 1249 1250 path: object 1251 default: object = None 1252 namespaces: object = None 1253 1254[clinic start generated code]*/ 1255 1256static PyObject * 1257_elementtree_Element_findtext_impl(ElementObject *self, PyObject *path, 1258 PyObject *default_value, 1259 PyObject *namespaces) 1260/*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/ 1261{ 1262 Py_ssize_t i; 1263 _Py_IDENTIFIER(findtext); 1264 elementtreestate *st = ET_STATE_GLOBAL; 1265 1266 if (checkpath(path) || namespaces != Py_None) 1267 return _PyObject_CallMethodIdObjArgs( 1268 st->elementpath_obj, &PyId_findtext, 1269 self, path, default_value, namespaces, NULL 1270 ); 1271 1272 if (!self->extra) { 1273 Py_INCREF(default_value); 1274 return default_value; 1275 } 1276 1277 for (i = 0; i < self->extra->length; i++) { 1278 PyObject *item = self->extra->children[i]; 1279 int rc; 1280 assert(Element_Check(item)); 1281 Py_INCREF(item); 1282 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ); 1283 if (rc > 0) { 1284 PyObject* text = element_get_text((ElementObject*)item); 1285 if (text == Py_None) { 1286 Py_DECREF(item); 1287 return PyUnicode_New(0, 0); 1288 } 1289 Py_XINCREF(text); 1290 Py_DECREF(item); 1291 return text; 1292 } 1293 Py_DECREF(item); 1294 if (rc < 0) 1295 return NULL; 1296 } 1297 1298 Py_INCREF(default_value); 1299 return default_value; 1300} 1301 1302/*[clinic input] 1303_elementtree.Element.findall 1304 1305 path: object 1306 namespaces: object = None 1307 1308[clinic start generated code]*/ 1309 1310static PyObject * 1311_elementtree_Element_findall_impl(ElementObject *self, PyObject *path, 1312 PyObject *namespaces) 1313/*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/ 1314{ 1315 Py_ssize_t i; 1316 PyObject* out; 1317 elementtreestate *st = ET_STATE_GLOBAL; 1318 1319 if (checkpath(path) || namespaces != Py_None) { 1320 _Py_IDENTIFIER(findall); 1321 return _PyObject_CallMethodIdObjArgs( 1322 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL 1323 ); 1324 } 1325 1326 out = PyList_New(0); 1327 if (!out) 1328 return NULL; 1329 1330 if (!self->extra) 1331 return out; 1332 1333 for (i = 0; i < self->extra->length; i++) { 1334 PyObject* item = self->extra->children[i]; 1335 int rc; 1336 assert(Element_Check(item)); 1337 Py_INCREF(item); 1338 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ); 1339 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) { 1340 Py_DECREF(item); 1341 Py_DECREF(out); 1342 return NULL; 1343 } 1344 Py_DECREF(item); 1345 } 1346 1347 return out; 1348} 1349 1350/*[clinic input] 1351_elementtree.Element.iterfind 1352 1353 path: object 1354 namespaces: object = None 1355 1356[clinic start generated code]*/ 1357 1358static PyObject * 1359_elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path, 1360 PyObject *namespaces) 1361/*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/ 1362{ 1363 PyObject* tag = path; 1364 _Py_IDENTIFIER(iterfind); 1365 elementtreestate *st = ET_STATE_GLOBAL; 1366 1367 return _PyObject_CallMethodIdObjArgs( 1368 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL); 1369} 1370 1371/*[clinic input] 1372_elementtree.Element.get 1373 1374 key: object 1375 default: object = None 1376 1377[clinic start generated code]*/ 1378 1379static PyObject * 1380_elementtree_Element_get_impl(ElementObject *self, PyObject *key, 1381 PyObject *default_value) 1382/*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/ 1383{ 1384 if (self->extra && self->extra->attrib) { 1385 PyObject *attrib = self->extra->attrib; 1386 Py_INCREF(attrib); 1387 PyObject *value = PyDict_GetItemWithError(attrib, key); 1388 Py_XINCREF(value); 1389 Py_DECREF(attrib); 1390 if (value != NULL || PyErr_Occurred()) { 1391 return value; 1392 } 1393 } 1394 1395 Py_INCREF(default_value); 1396 return default_value; 1397} 1398 1399static PyObject * 1400create_elementiter(ElementObject *self, PyObject *tag, int gettext); 1401 1402 1403/*[clinic input] 1404_elementtree.Element.iter 1405 1406 tag: object = None 1407 1408[clinic start generated code]*/ 1409 1410static PyObject * 1411_elementtree_Element_iter_impl(ElementObject *self, PyObject *tag) 1412/*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/ 1413{ 1414 if (PyUnicode_Check(tag)) { 1415 if (PyUnicode_READY(tag) < 0) 1416 return NULL; 1417 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*') 1418 tag = Py_None; 1419 } 1420 else if (PyBytes_Check(tag)) { 1421 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*') 1422 tag = Py_None; 1423 } 1424 1425 return create_elementiter(self, tag, 0); 1426} 1427 1428 1429/*[clinic input] 1430_elementtree.Element.itertext 1431 1432[clinic start generated code]*/ 1433 1434static PyObject * 1435_elementtree_Element_itertext_impl(ElementObject *self) 1436/*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/ 1437{ 1438 return create_elementiter(self, Py_None, 1); 1439} 1440 1441 1442static PyObject* 1443element_getitem(PyObject* self_, Py_ssize_t index) 1444{ 1445 ElementObject* self = (ElementObject*) self_; 1446 1447 if (!self->extra || index < 0 || index >= self->extra->length) { 1448 PyErr_SetString( 1449 PyExc_IndexError, 1450 "child index out of range" 1451 ); 1452 return NULL; 1453 } 1454 1455 Py_INCREF(self->extra->children[index]); 1456 return self->extra->children[index]; 1457} 1458 1459/*[clinic input] 1460_elementtree.Element.insert 1461 1462 index: Py_ssize_t 1463 subelement: object(subclass_of='&Element_Type') 1464 / 1465 1466[clinic start generated code]*/ 1467 1468static PyObject * 1469_elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index, 1470 PyObject *subelement) 1471/*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/ 1472{ 1473 Py_ssize_t i; 1474 1475 if (!self->extra) { 1476 if (create_extra(self, NULL) < 0) 1477 return NULL; 1478 } 1479 1480 if (index < 0) { 1481 index += self->extra->length; 1482 if (index < 0) 1483 index = 0; 1484 } 1485 if (index > self->extra->length) 1486 index = self->extra->length; 1487 1488 if (element_resize(self, 1) < 0) 1489 return NULL; 1490 1491 for (i = self->extra->length; i > index; i--) 1492 self->extra->children[i] = self->extra->children[i-1]; 1493 1494 Py_INCREF(subelement); 1495 self->extra->children[index] = subelement; 1496 1497 self->extra->length++; 1498 1499 Py_RETURN_NONE; 1500} 1501 1502/*[clinic input] 1503_elementtree.Element.items 1504 1505[clinic start generated code]*/ 1506 1507static PyObject * 1508_elementtree_Element_items_impl(ElementObject *self) 1509/*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/ 1510{ 1511 if (!self->extra || !self->extra->attrib) 1512 return PyList_New(0); 1513 1514 return PyDict_Items(self->extra->attrib); 1515} 1516 1517/*[clinic input] 1518_elementtree.Element.keys 1519 1520[clinic start generated code]*/ 1521 1522static PyObject * 1523_elementtree_Element_keys_impl(ElementObject *self) 1524/*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/ 1525{ 1526 if (!self->extra || !self->extra->attrib) 1527 return PyList_New(0); 1528 1529 return PyDict_Keys(self->extra->attrib); 1530} 1531 1532static Py_ssize_t 1533element_length(ElementObject* self) 1534{ 1535 if (!self->extra) 1536 return 0; 1537 1538 return self->extra->length; 1539} 1540 1541/*[clinic input] 1542_elementtree.Element.makeelement 1543 1544 tag: object 1545 attrib: object(subclass_of='&PyDict_Type') 1546 / 1547 1548[clinic start generated code]*/ 1549 1550static PyObject * 1551_elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag, 1552 PyObject *attrib) 1553/*[clinic end generated code: output=4109832d5bb789ef input=2279d974529c3861]*/ 1554{ 1555 PyObject* elem; 1556 1557 attrib = PyDict_Copy(attrib); 1558 if (!attrib) 1559 return NULL; 1560 1561 elem = create_new_element(tag, attrib); 1562 1563 Py_DECREF(attrib); 1564 1565 return elem; 1566} 1567 1568/*[clinic input] 1569_elementtree.Element.remove 1570 1571 subelement: object(subclass_of='&Element_Type') 1572 / 1573 1574[clinic start generated code]*/ 1575 1576static PyObject * 1577_elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) 1578/*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/ 1579{ 1580 Py_ssize_t i; 1581 int rc; 1582 PyObject *found; 1583 1584 if (!self->extra) { 1585 /* element has no children, so raise exception */ 1586 PyErr_SetString( 1587 PyExc_ValueError, 1588 "list.remove(x): x not in list" 1589 ); 1590 return NULL; 1591 } 1592 1593 for (i = 0; i < self->extra->length; i++) { 1594 if (self->extra->children[i] == subelement) 1595 break; 1596 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ); 1597 if (rc > 0) 1598 break; 1599 if (rc < 0) 1600 return NULL; 1601 } 1602 1603 if (i >= self->extra->length) { 1604 /* subelement is not in children, so raise exception */ 1605 PyErr_SetString( 1606 PyExc_ValueError, 1607 "list.remove(x): x not in list" 1608 ); 1609 return NULL; 1610 } 1611 1612 found = self->extra->children[i]; 1613 1614 self->extra->length--; 1615 for (; i < self->extra->length; i++) 1616 self->extra->children[i] = self->extra->children[i+1]; 1617 1618 Py_DECREF(found); 1619 Py_RETURN_NONE; 1620} 1621 1622static PyObject* 1623element_repr(ElementObject* self) 1624{ 1625 int status; 1626 1627 if (self->tag == NULL) 1628 return PyUnicode_FromFormat("<Element at %p>", self); 1629 1630 status = Py_ReprEnter((PyObject *)self); 1631 if (status == 0) { 1632 PyObject *res; 1633 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self); 1634 Py_ReprLeave((PyObject *)self); 1635 return res; 1636 } 1637 if (status > 0) 1638 PyErr_Format(PyExc_RuntimeError, 1639 "reentrant call inside %s.__repr__", 1640 Py_TYPE(self)->tp_name); 1641 return NULL; 1642} 1643 1644/*[clinic input] 1645_elementtree.Element.set 1646 1647 key: object 1648 value: object 1649 / 1650 1651[clinic start generated code]*/ 1652 1653static PyObject * 1654_elementtree_Element_set_impl(ElementObject *self, PyObject *key, 1655 PyObject *value) 1656/*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/ 1657{ 1658 PyObject* attrib; 1659 1660 if (!self->extra) { 1661 if (create_extra(self, NULL) < 0) 1662 return NULL; 1663 } 1664 1665 attrib = element_get_attrib(self); 1666 if (!attrib) 1667 return NULL; 1668 1669 if (PyDict_SetItem(attrib, key, value) < 0) 1670 return NULL; 1671 1672 Py_RETURN_NONE; 1673} 1674 1675static int 1676element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item) 1677{ 1678 ElementObject* self = (ElementObject*) self_; 1679 Py_ssize_t i; 1680 PyObject* old; 1681 1682 if (!self->extra || index < 0 || index >= self->extra->length) { 1683 PyErr_SetString( 1684 PyExc_IndexError, 1685 "child assignment index out of range"); 1686 return -1; 1687 } 1688 1689 old = self->extra->children[index]; 1690 1691 if (item) { 1692 if (!Element_Check(item)) { 1693 raise_type_error(item); 1694 return -1; 1695 } 1696 Py_INCREF(item); 1697 self->extra->children[index] = item; 1698 } else { 1699 self->extra->length--; 1700 for (i = index; i < self->extra->length; i++) 1701 self->extra->children[i] = self->extra->children[i+1]; 1702 } 1703 1704 Py_DECREF(old); 1705 1706 return 0; 1707} 1708 1709static PyObject* 1710element_subscr(PyObject* self_, PyObject* item) 1711{ 1712 ElementObject* self = (ElementObject*) self_; 1713 1714 if (PyIndex_Check(item)) { 1715 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); 1716 1717 if (i == -1 && PyErr_Occurred()) { 1718 return NULL; 1719 } 1720 if (i < 0 && self->extra) 1721 i += self->extra->length; 1722 return element_getitem(self_, i); 1723 } 1724 else if (PySlice_Check(item)) { 1725 Py_ssize_t start, stop, step, slicelen, i; 1726 size_t cur; 1727 PyObject* list; 1728 1729 if (!self->extra) 1730 return PyList_New(0); 1731 1732 if (PySlice_Unpack(item, &start, &stop, &step) < 0) { 1733 return NULL; 1734 } 1735 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop, 1736 step); 1737 1738 if (slicelen <= 0) 1739 return PyList_New(0); 1740 else { 1741 list = PyList_New(slicelen); 1742 if (!list) 1743 return NULL; 1744 1745 for (cur = start, i = 0; i < slicelen; 1746 cur += step, i++) { 1747 PyObject* item = self->extra->children[cur]; 1748 Py_INCREF(item); 1749 PyList_SET_ITEM(list, i, item); 1750 } 1751 1752 return list; 1753 } 1754 } 1755 else { 1756 PyErr_SetString(PyExc_TypeError, 1757 "element indices must be integers"); 1758 return NULL; 1759 } 1760} 1761 1762static int 1763element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value) 1764{ 1765 ElementObject* self = (ElementObject*) self_; 1766 1767 if (PyIndex_Check(item)) { 1768 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); 1769 1770 if (i == -1 && PyErr_Occurred()) { 1771 return -1; 1772 } 1773 if (i < 0 && self->extra) 1774 i += self->extra->length; 1775 return element_setitem(self_, i, value); 1776 } 1777 else if (PySlice_Check(item)) { 1778 Py_ssize_t start, stop, step, slicelen, newlen, i; 1779 size_t cur; 1780 1781 PyObject* recycle = NULL; 1782 PyObject* seq; 1783 1784 if (!self->extra) { 1785 if (create_extra(self, NULL) < 0) 1786 return -1; 1787 } 1788 1789 if (PySlice_Unpack(item, &start, &stop, &step) < 0) { 1790 return -1; 1791 } 1792 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop, 1793 step); 1794 1795 if (value == NULL) { 1796 /* Delete slice */ 1797 size_t cur; 1798 Py_ssize_t i; 1799 1800 if (slicelen <= 0) 1801 return 0; 1802 1803 /* Since we're deleting, the direction of the range doesn't matter, 1804 * so for simplicity make it always ascending. 1805 */ 1806 if (step < 0) { 1807 stop = start + 1; 1808 start = stop + step * (slicelen - 1) - 1; 1809 step = -step; 1810 } 1811 1812 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *)); 1813 1814 /* recycle is a list that will contain all the children 1815 * scheduled for removal. 1816 */ 1817 if (!(recycle = PyList_New(slicelen))) { 1818 return -1; 1819 } 1820 1821 /* This loop walks over all the children that have to be deleted, 1822 * with cur pointing at them. num_moved is the amount of children 1823 * until the next deleted child that have to be "shifted down" to 1824 * occupy the deleted's places. 1825 * Note that in the ith iteration, shifting is done i+i places down 1826 * because i children were already removed. 1827 */ 1828 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) { 1829 /* Compute how many children have to be moved, clipping at the 1830 * list end. 1831 */ 1832 Py_ssize_t num_moved = step - 1; 1833 if (cur + step >= (size_t)self->extra->length) { 1834 num_moved = self->extra->length - cur - 1; 1835 } 1836 1837 PyList_SET_ITEM(recycle, i, self->extra->children[cur]); 1838 1839 memmove( 1840 self->extra->children + cur - i, 1841 self->extra->children + cur + 1, 1842 num_moved * sizeof(PyObject *)); 1843 } 1844 1845 /* Leftover "tail" after the last removed child */ 1846 cur = start + (size_t)slicelen * step; 1847 if (cur < (size_t)self->extra->length) { 1848 memmove( 1849 self->extra->children + cur - slicelen, 1850 self->extra->children + cur, 1851 (self->extra->length - cur) * sizeof(PyObject *)); 1852 } 1853 1854 self->extra->length -= slicelen; 1855 1856 /* Discard the recycle list with all the deleted sub-elements */ 1857 Py_DECREF(recycle); 1858 return 0; 1859 } 1860 1861 /* A new slice is actually being assigned */ 1862 seq = PySequence_Fast(value, ""); 1863 if (!seq) { 1864 PyErr_Format( 1865 PyExc_TypeError, 1866 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name 1867 ); 1868 return -1; 1869 } 1870 newlen = PySequence_Fast_GET_SIZE(seq); 1871 1872 if (step != 1 && newlen != slicelen) 1873 { 1874 Py_DECREF(seq); 1875 PyErr_Format(PyExc_ValueError, 1876 "attempt to assign sequence of size %zd " 1877 "to extended slice of size %zd", 1878 newlen, slicelen 1879 ); 1880 return -1; 1881 } 1882 1883 /* Resize before creating the recycle bin, to prevent refleaks. */ 1884 if (newlen > slicelen) { 1885 if (element_resize(self, newlen - slicelen) < 0) { 1886 Py_DECREF(seq); 1887 return -1; 1888 } 1889 } 1890 1891 for (i = 0; i < newlen; i++) { 1892 PyObject *element = PySequence_Fast_GET_ITEM(seq, i); 1893 if (!Element_Check(element)) { 1894 raise_type_error(element); 1895 Py_DECREF(seq); 1896 return -1; 1897 } 1898 } 1899 1900 if (slicelen > 0) { 1901 /* to avoid recursive calls to this method (via decref), move 1902 old items to the recycle bin here, and get rid of them when 1903 we're done modifying the element */ 1904 recycle = PyList_New(slicelen); 1905 if (!recycle) { 1906 Py_DECREF(seq); 1907 return -1; 1908 } 1909 for (cur = start, i = 0; i < slicelen; 1910 cur += step, i++) 1911 PyList_SET_ITEM(recycle, i, self->extra->children[cur]); 1912 } 1913 1914 if (newlen < slicelen) { 1915 /* delete slice */ 1916 for (i = stop; i < self->extra->length; i++) 1917 self->extra->children[i + newlen - slicelen] = self->extra->children[i]; 1918 } else if (newlen > slicelen) { 1919 /* insert slice */ 1920 for (i = self->extra->length-1; i >= stop; i--) 1921 self->extra->children[i + newlen - slicelen] = self->extra->children[i]; 1922 } 1923 1924 /* replace the slice */ 1925 for (cur = start, i = 0; i < newlen; 1926 cur += step, i++) { 1927 PyObject* element = PySequence_Fast_GET_ITEM(seq, i); 1928 Py_INCREF(element); 1929 self->extra->children[cur] = element; 1930 } 1931 1932 self->extra->length += newlen - slicelen; 1933 1934 Py_DECREF(seq); 1935 1936 /* discard the recycle bin, and everything in it */ 1937 Py_XDECREF(recycle); 1938 1939 return 0; 1940 } 1941 else { 1942 PyErr_SetString(PyExc_TypeError, 1943 "element indices must be integers"); 1944 return -1; 1945 } 1946} 1947 1948static PyObject* 1949element_tag_getter(ElementObject *self, void *closure) 1950{ 1951 PyObject *res = self->tag; 1952 Py_INCREF(res); 1953 return res; 1954} 1955 1956static PyObject* 1957element_text_getter(ElementObject *self, void *closure) 1958{ 1959 PyObject *res = element_get_text(self); 1960 Py_XINCREF(res); 1961 return res; 1962} 1963 1964static PyObject* 1965element_tail_getter(ElementObject *self, void *closure) 1966{ 1967 PyObject *res = element_get_tail(self); 1968 Py_XINCREF(res); 1969 return res; 1970} 1971 1972static PyObject* 1973element_attrib_getter(ElementObject *self, void *closure) 1974{ 1975 PyObject *res; 1976 if (!self->extra) { 1977 if (create_extra(self, NULL) < 0) 1978 return NULL; 1979 } 1980 res = element_get_attrib(self); 1981 Py_XINCREF(res); 1982 return res; 1983} 1984 1985/* macro for setter validation */ 1986#define _VALIDATE_ATTR_VALUE(V) \ 1987 if ((V) == NULL) { \ 1988 PyErr_SetString( \ 1989 PyExc_AttributeError, \ 1990 "can't delete element attribute"); \ 1991 return -1; \ 1992 } 1993 1994static int 1995element_tag_setter(ElementObject *self, PyObject *value, void *closure) 1996{ 1997 _VALIDATE_ATTR_VALUE(value); 1998 Py_INCREF(value); 1999 Py_SETREF(self->tag, value); 2000 return 0; 2001} 2002 2003static int 2004element_text_setter(ElementObject *self, PyObject *value, void *closure) 2005{ 2006 _VALIDATE_ATTR_VALUE(value); 2007 Py_INCREF(value); 2008 _set_joined_ptr(&self->text, value); 2009 return 0; 2010} 2011 2012static int 2013element_tail_setter(ElementObject *self, PyObject *value, void *closure) 2014{ 2015 _VALIDATE_ATTR_VALUE(value); 2016 Py_INCREF(value); 2017 _set_joined_ptr(&self->tail, value); 2018 return 0; 2019} 2020 2021static int 2022element_attrib_setter(ElementObject *self, PyObject *value, void *closure) 2023{ 2024 _VALIDATE_ATTR_VALUE(value); 2025 if (!PyDict_Check(value)) { 2026 PyErr_Format(PyExc_TypeError, 2027 "attrib must be dict, not %.200s", 2028 Py_TYPE(value)->tp_name); 2029 return -1; 2030 } 2031 if (!self->extra) { 2032 if (create_extra(self, NULL) < 0) 2033 return -1; 2034 } 2035 Py_INCREF(value); 2036 Py_XSETREF(self->extra->attrib, value); 2037 return 0; 2038} 2039 2040static PySequenceMethods element_as_sequence = { 2041 (lenfunc) element_length, 2042 0, /* sq_concat */ 2043 0, /* sq_repeat */ 2044 element_getitem, 2045 0, 2046 element_setitem, 2047 0, 2048}; 2049 2050/******************************* Element iterator ****************************/ 2051 2052/* ElementIterObject represents the iteration state over an XML element in 2053 * pre-order traversal. To keep track of which sub-element should be returned 2054 * next, a stack of parents is maintained. This is a standard stack-based 2055 * iterative pre-order traversal of a tree. 2056 * The stack is managed using a continuous array. 2057 * Each stack item contains the saved parent to which we should return after 2058 * the current one is exhausted, and the next child to examine in that parent. 2059 */ 2060typedef struct ParentLocator_t { 2061 ElementObject *parent; 2062 Py_ssize_t child_index; 2063} ParentLocator; 2064 2065typedef struct { 2066 PyObject_HEAD 2067 ParentLocator *parent_stack; 2068 Py_ssize_t parent_stack_used; 2069 Py_ssize_t parent_stack_size; 2070 ElementObject *root_element; 2071 PyObject *sought_tag; 2072 int gettext; 2073} ElementIterObject; 2074 2075 2076static void 2077elementiter_dealloc(ElementIterObject *it) 2078{ 2079 Py_ssize_t i = it->parent_stack_used; 2080 it->parent_stack_used = 0; 2081 /* bpo-31095: UnTrack is needed before calling any callbacks */ 2082 PyObject_GC_UnTrack(it); 2083 while (i--) 2084 Py_XDECREF(it->parent_stack[i].parent); 2085 PyMem_Free(it->parent_stack); 2086 2087 Py_XDECREF(it->sought_tag); 2088 Py_XDECREF(it->root_element); 2089 2090 PyObject_GC_Del(it); 2091} 2092 2093static int 2094elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg) 2095{ 2096 Py_ssize_t i = it->parent_stack_used; 2097 while (i--) 2098 Py_VISIT(it->parent_stack[i].parent); 2099 2100 Py_VISIT(it->root_element); 2101 Py_VISIT(it->sought_tag); 2102 return 0; 2103} 2104 2105/* Helper function for elementiter_next. Add a new parent to the parent stack. 2106 */ 2107static int 2108parent_stack_push_new(ElementIterObject *it, ElementObject *parent) 2109{ 2110 ParentLocator *item; 2111 2112 if (it->parent_stack_used >= it->parent_stack_size) { 2113 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */ 2114 ParentLocator *parent_stack = it->parent_stack; 2115 PyMem_Resize(parent_stack, ParentLocator, new_size); 2116 if (parent_stack == NULL) 2117 return -1; 2118 it->parent_stack = parent_stack; 2119 it->parent_stack_size = new_size; 2120 } 2121 item = it->parent_stack + it->parent_stack_used++; 2122 Py_INCREF(parent); 2123 item->parent = parent; 2124 item->child_index = 0; 2125 return 0; 2126} 2127 2128static PyObject * 2129elementiter_next(ElementIterObject *it) 2130{ 2131 /* Sub-element iterator. 2132 * 2133 * A short note on gettext: this function serves both the iter() and 2134 * itertext() methods to avoid code duplication. However, there are a few 2135 * small differences in the way these iterations work. Namely: 2136 * - itertext() only yields text from nodes that have it, and continues 2137 * iterating when a node doesn't have text (so it doesn't return any 2138 * node like iter()) 2139 * - itertext() also has to handle tail, after finishing with all the 2140 * children of a node. 2141 */ 2142 int rc; 2143 ElementObject *elem; 2144 PyObject *text; 2145 2146 while (1) { 2147 /* Handle the case reached in the beginning and end of iteration, where 2148 * the parent stack is empty. If root_element is NULL and we're here, the 2149 * iterator is exhausted. 2150 */ 2151 if (!it->parent_stack_used) { 2152 if (!it->root_element) { 2153 PyErr_SetNone(PyExc_StopIteration); 2154 return NULL; 2155 } 2156 2157 elem = it->root_element; /* steals a reference */ 2158 it->root_element = NULL; 2159 } 2160 else { 2161 /* See if there are children left to traverse in the current parent. If 2162 * yes, visit the next child. If not, pop the stack and try again. 2163 */ 2164 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1]; 2165 Py_ssize_t child_index = item->child_index; 2166 ElementObjectExtra *extra; 2167 elem = item->parent; 2168 extra = elem->extra; 2169 if (!extra || child_index >= extra->length) { 2170 it->parent_stack_used--; 2171 /* Note that extra condition on it->parent_stack_used here; 2172 * this is because itertext() is supposed to only return *inner* 2173 * text, not text following the element it began iteration with. 2174 */ 2175 if (it->gettext && it->parent_stack_used) { 2176 text = element_get_tail(elem); 2177 goto gettext; 2178 } 2179 Py_DECREF(elem); 2180 continue; 2181 } 2182 2183 assert(Element_Check(extra->children[child_index])); 2184 elem = (ElementObject *)extra->children[child_index]; 2185 item->child_index++; 2186 Py_INCREF(elem); 2187 } 2188 2189 if (parent_stack_push_new(it, elem) < 0) { 2190 Py_DECREF(elem); 2191 PyErr_NoMemory(); 2192 return NULL; 2193 } 2194 if (it->gettext) { 2195 text = element_get_text(elem); 2196 goto gettext; 2197 } 2198 2199 if (it->sought_tag == Py_None) 2200 return (PyObject *)elem; 2201 2202 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ); 2203 if (rc > 0) 2204 return (PyObject *)elem; 2205 2206 Py_DECREF(elem); 2207 if (rc < 0) 2208 return NULL; 2209 continue; 2210 2211gettext: 2212 if (!text) { 2213 Py_DECREF(elem); 2214 return NULL; 2215 } 2216 if (text == Py_None) { 2217 Py_DECREF(elem); 2218 } 2219 else { 2220 Py_INCREF(text); 2221 Py_DECREF(elem); 2222 rc = PyObject_IsTrue(text); 2223 if (rc > 0) 2224 return text; 2225 Py_DECREF(text); 2226 if (rc < 0) 2227 return NULL; 2228 } 2229 } 2230 2231 return NULL; 2232} 2233 2234 2235static PyTypeObject ElementIter_Type = { 2236 PyVarObject_HEAD_INIT(NULL, 0) 2237 /* Using the module's name since the pure-Python implementation does not 2238 have such a type. */ 2239 "_elementtree._element_iterator", /* tp_name */ 2240 sizeof(ElementIterObject), /* tp_basicsize */ 2241 0, /* tp_itemsize */ 2242 /* methods */ 2243 (destructor)elementiter_dealloc, /* tp_dealloc */ 2244 0, /* tp_vectorcall_offset */ 2245 0, /* tp_getattr */ 2246 0, /* tp_setattr */ 2247 0, /* tp_as_async */ 2248 0, /* tp_repr */ 2249 0, /* tp_as_number */ 2250 0, /* tp_as_sequence */ 2251 0, /* tp_as_mapping */ 2252 0, /* tp_hash */ 2253 0, /* tp_call */ 2254 0, /* tp_str */ 2255 0, /* tp_getattro */ 2256 0, /* tp_setattro */ 2257 0, /* tp_as_buffer */ 2258 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ 2259 0, /* tp_doc */ 2260 (traverseproc)elementiter_traverse, /* tp_traverse */ 2261 0, /* tp_clear */ 2262 0, /* tp_richcompare */ 2263 0, /* tp_weaklistoffset */ 2264 PyObject_SelfIter, /* tp_iter */ 2265 (iternextfunc)elementiter_next, /* tp_iternext */ 2266 0, /* tp_methods */ 2267 0, /* tp_members */ 2268 0, /* tp_getset */ 2269 0, /* tp_base */ 2270 0, /* tp_dict */ 2271 0, /* tp_descr_get */ 2272 0, /* tp_descr_set */ 2273 0, /* tp_dictoffset */ 2274 0, /* tp_init */ 2275 0, /* tp_alloc */ 2276 0, /* tp_new */ 2277}; 2278 2279#define INIT_PARENT_STACK_SIZE 8 2280 2281static PyObject * 2282create_elementiter(ElementObject *self, PyObject *tag, int gettext) 2283{ 2284 ElementIterObject *it; 2285 2286 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type); 2287 if (!it) 2288 return NULL; 2289 2290 Py_INCREF(tag); 2291 it->sought_tag = tag; 2292 it->gettext = gettext; 2293 Py_INCREF(self); 2294 it->root_element = self; 2295 2296 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE); 2297 if (it->parent_stack == NULL) { 2298 Py_DECREF(it); 2299 PyErr_NoMemory(); 2300 return NULL; 2301 } 2302 it->parent_stack_used = 0; 2303 it->parent_stack_size = INIT_PARENT_STACK_SIZE; 2304 2305 PyObject_GC_Track(it); 2306 2307 return (PyObject *)it; 2308} 2309 2310 2311/* ==================================================================== */ 2312/* the tree builder type */ 2313 2314typedef struct { 2315 PyObject_HEAD 2316 2317 PyObject *root; /* root node (first created node) */ 2318 2319 PyObject *this; /* current node */ 2320 PyObject *last; /* most recently created node */ 2321 PyObject *last_for_tail; /* most recently created node that takes a tail */ 2322 2323 PyObject *data; /* data collector (string or list), or NULL */ 2324 2325 PyObject *stack; /* element stack */ 2326 Py_ssize_t index; /* current stack size (0 means empty) */ 2327 2328 PyObject *element_factory; 2329 PyObject *comment_factory; 2330 PyObject *pi_factory; 2331 2332 /* element tracing */ 2333 PyObject *events_append; /* the append method of the list of events, or NULL */ 2334 PyObject *start_event_obj; /* event objects (NULL to ignore) */ 2335 PyObject *end_event_obj; 2336 PyObject *start_ns_event_obj; 2337 PyObject *end_ns_event_obj; 2338 PyObject *comment_event_obj; 2339 PyObject *pi_event_obj; 2340 2341 char insert_comments; 2342 char insert_pis; 2343} TreeBuilderObject; 2344 2345#define TreeBuilder_CheckExact(op) Py_IS_TYPE((op), &TreeBuilder_Type) 2346 2347/* -------------------------------------------------------------------- */ 2348/* constructor and destructor */ 2349 2350static PyObject * 2351treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 2352{ 2353 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0); 2354 if (t != NULL) { 2355 t->root = NULL; 2356 2357 Py_INCREF(Py_None); 2358 t->this = Py_None; 2359 Py_INCREF(Py_None); 2360 t->last = Py_None; 2361 2362 t->data = NULL; 2363 t->element_factory = NULL; 2364 t->comment_factory = NULL; 2365 t->pi_factory = NULL; 2366 t->stack = PyList_New(20); 2367 if (!t->stack) { 2368 Py_DECREF(t->this); 2369 Py_DECREF(t->last); 2370 Py_DECREF((PyObject *) t); 2371 return NULL; 2372 } 2373 t->index = 0; 2374 2375 t->events_append = NULL; 2376 t->start_event_obj = t->end_event_obj = NULL; 2377 t->start_ns_event_obj = t->end_ns_event_obj = NULL; 2378 t->comment_event_obj = t->pi_event_obj = NULL; 2379 t->insert_comments = t->insert_pis = 0; 2380 } 2381 return (PyObject *)t; 2382} 2383 2384/*[clinic input] 2385_elementtree.TreeBuilder.__init__ 2386 2387 element_factory: object = None 2388 * 2389 comment_factory: object = None 2390 pi_factory: object = None 2391 insert_comments: bool = False 2392 insert_pis: bool = False 2393 2394[clinic start generated code]*/ 2395 2396static int 2397_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self, 2398 PyObject *element_factory, 2399 PyObject *comment_factory, 2400 PyObject *pi_factory, 2401 int insert_comments, int insert_pis) 2402/*[clinic end generated code: output=8571d4dcadfdf952 input=ae98a94df20b5cc3]*/ 2403{ 2404 if (element_factory != Py_None) { 2405 Py_INCREF(element_factory); 2406 Py_XSETREF(self->element_factory, element_factory); 2407 } else { 2408 Py_CLEAR(self->element_factory); 2409 } 2410 2411 if (comment_factory == Py_None) { 2412 elementtreestate *st = ET_STATE_GLOBAL; 2413 comment_factory = st->comment_factory; 2414 } 2415 if (comment_factory) { 2416 Py_INCREF(comment_factory); 2417 Py_XSETREF(self->comment_factory, comment_factory); 2418 self->insert_comments = insert_comments; 2419 } else { 2420 Py_CLEAR(self->comment_factory); 2421 self->insert_comments = 0; 2422 } 2423 2424 if (pi_factory == Py_None) { 2425 elementtreestate *st = ET_STATE_GLOBAL; 2426 pi_factory = st->pi_factory; 2427 } 2428 if (pi_factory) { 2429 Py_INCREF(pi_factory); 2430 Py_XSETREF(self->pi_factory, pi_factory); 2431 self->insert_pis = insert_pis; 2432 } else { 2433 Py_CLEAR(self->pi_factory); 2434 self->insert_pis = 0; 2435 } 2436 2437 return 0; 2438} 2439 2440static int 2441treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg) 2442{ 2443 Py_VISIT(self->pi_event_obj); 2444 Py_VISIT(self->comment_event_obj); 2445 Py_VISIT(self->end_ns_event_obj); 2446 Py_VISIT(self->start_ns_event_obj); 2447 Py_VISIT(self->end_event_obj); 2448 Py_VISIT(self->start_event_obj); 2449 Py_VISIT(self->events_append); 2450 Py_VISIT(self->root); 2451 Py_VISIT(self->this); 2452 Py_VISIT(self->last); 2453 Py_VISIT(self->last_for_tail); 2454 Py_VISIT(self->data); 2455 Py_VISIT(self->stack); 2456 Py_VISIT(self->pi_factory); 2457 Py_VISIT(self->comment_factory); 2458 Py_VISIT(self->element_factory); 2459 return 0; 2460} 2461 2462static int 2463treebuilder_gc_clear(TreeBuilderObject *self) 2464{ 2465 Py_CLEAR(self->pi_event_obj); 2466 Py_CLEAR(self->comment_event_obj); 2467 Py_CLEAR(self->end_ns_event_obj); 2468 Py_CLEAR(self->start_ns_event_obj); 2469 Py_CLEAR(self->end_event_obj); 2470 Py_CLEAR(self->start_event_obj); 2471 Py_CLEAR(self->events_append); 2472 Py_CLEAR(self->stack); 2473 Py_CLEAR(self->data); 2474 Py_CLEAR(self->last); 2475 Py_CLEAR(self->last_for_tail); 2476 Py_CLEAR(self->this); 2477 Py_CLEAR(self->pi_factory); 2478 Py_CLEAR(self->comment_factory); 2479 Py_CLEAR(self->element_factory); 2480 Py_CLEAR(self->root); 2481 return 0; 2482} 2483 2484static void 2485treebuilder_dealloc(TreeBuilderObject *self) 2486{ 2487 PyObject_GC_UnTrack(self); 2488 treebuilder_gc_clear(self); 2489 Py_TYPE(self)->tp_free((PyObject *)self); 2490} 2491 2492/* -------------------------------------------------------------------- */ 2493/* helpers for handling of arbitrary element-like objects */ 2494 2495/*[clinic input] 2496_elementtree._set_factories 2497 2498 comment_factory: object 2499 pi_factory: object 2500 / 2501 2502Change the factories used to create comments and processing instructions. 2503 2504For internal use only. 2505[clinic start generated code]*/ 2506 2507static PyObject * 2508_elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory, 2509 PyObject *pi_factory) 2510/*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/ 2511{ 2512 elementtreestate *st = ET_STATE_GLOBAL; 2513 PyObject *old; 2514 2515 if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) { 2516 PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s", 2517 Py_TYPE(comment_factory)->tp_name); 2518 return NULL; 2519 } 2520 if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) { 2521 PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s", 2522 Py_TYPE(pi_factory)->tp_name); 2523 return NULL; 2524 } 2525 2526 old = PyTuple_Pack(2, 2527 st->comment_factory ? st->comment_factory : Py_None, 2528 st->pi_factory ? st->pi_factory : Py_None); 2529 2530 if (comment_factory == Py_None) { 2531 Py_CLEAR(st->comment_factory); 2532 } else { 2533 Py_INCREF(comment_factory); 2534 Py_XSETREF(st->comment_factory, comment_factory); 2535 } 2536 if (pi_factory == Py_None) { 2537 Py_CLEAR(st->pi_factory); 2538 } else { 2539 Py_INCREF(pi_factory); 2540 Py_XSETREF(st->pi_factory, pi_factory); 2541 } 2542 2543 return old; 2544} 2545 2546static int 2547treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data, 2548 PyObject **dest, _Py_Identifier *name) 2549{ 2550 /* Fast paths for the "almost always" cases. */ 2551 if (Element_CheckExact(element)) { 2552 PyObject *dest_obj = JOIN_OBJ(*dest); 2553 if (dest_obj == Py_None) { 2554 *dest = JOIN_SET(*data, PyList_CheckExact(*data)); 2555 *data = NULL; 2556 Py_DECREF(dest_obj); 2557 return 0; 2558 } 2559 else if (JOIN_GET(*dest)) { 2560 if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) { 2561 return -1; 2562 } 2563 Py_CLEAR(*data); 2564 return 0; 2565 } 2566 } 2567 2568 /* Fallback for the non-Element / non-trivial cases. */ 2569 { 2570 int r; 2571 PyObject* joined; 2572 PyObject* previous = _PyObject_GetAttrId(element, name); 2573 if (!previous) 2574 return -1; 2575 joined = list_join(*data); 2576 if (!joined) { 2577 Py_DECREF(previous); 2578 return -1; 2579 } 2580 if (previous != Py_None) { 2581 PyObject *tmp = PyNumber_Add(previous, joined); 2582 Py_DECREF(joined); 2583 Py_DECREF(previous); 2584 if (!tmp) 2585 return -1; 2586 joined = tmp; 2587 } else { 2588 Py_DECREF(previous); 2589 } 2590 2591 r = _PyObject_SetAttrId(element, name, joined); 2592 Py_DECREF(joined); 2593 if (r < 0) 2594 return -1; 2595 Py_CLEAR(*data); 2596 return 0; 2597 } 2598} 2599 2600LOCAL(int) 2601treebuilder_flush_data(TreeBuilderObject* self) 2602{ 2603 if (!self->data) { 2604 return 0; 2605 } 2606 2607 if (!self->last_for_tail) { 2608 PyObject *element = self->last; 2609 _Py_IDENTIFIER(text); 2610 return treebuilder_extend_element_text_or_tail( 2611 element, &self->data, 2612 &((ElementObject *) element)->text, &PyId_text); 2613 } 2614 else { 2615 PyObject *element = self->last_for_tail; 2616 _Py_IDENTIFIER(tail); 2617 return treebuilder_extend_element_text_or_tail( 2618 element, &self->data, 2619 &((ElementObject *) element)->tail, &PyId_tail); 2620 } 2621} 2622 2623static int 2624treebuilder_add_subelement(PyObject *element, PyObject *child) 2625{ 2626 _Py_IDENTIFIER(append); 2627 if (Element_CheckExact(element)) { 2628 ElementObject *elem = (ElementObject *) element; 2629 return element_add_subelement(elem, child); 2630 } 2631 else { 2632 PyObject *res; 2633 res = _PyObject_CallMethodIdOneArg(element, &PyId_append, child); 2634 if (res == NULL) 2635 return -1; 2636 Py_DECREF(res); 2637 return 0; 2638 } 2639} 2640 2641LOCAL(int) 2642treebuilder_append_event(TreeBuilderObject *self, PyObject *action, 2643 PyObject *node) 2644{ 2645 if (action != NULL) { 2646 PyObject *res; 2647 PyObject *event = PyTuple_Pack(2, action, node); 2648 if (event == NULL) 2649 return -1; 2650 res = PyObject_CallOneArg(self->events_append, event); 2651 Py_DECREF(event); 2652 if (res == NULL) 2653 return -1; 2654 Py_DECREF(res); 2655 } 2656 return 0; 2657} 2658 2659/* -------------------------------------------------------------------- */ 2660/* handlers */ 2661 2662LOCAL(PyObject*) 2663treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, 2664 PyObject* attrib) 2665{ 2666 PyObject* node; 2667 PyObject* this; 2668 elementtreestate *st = ET_STATE_GLOBAL; 2669 2670 if (treebuilder_flush_data(self) < 0) { 2671 return NULL; 2672 } 2673 2674 if (!self->element_factory) { 2675 node = create_new_element(tag, attrib); 2676 } else if (attrib == NULL) { 2677 attrib = PyDict_New(); 2678 if (!attrib) 2679 return NULL; 2680 node = PyObject_CallFunctionObjArgs(self->element_factory, 2681 tag, attrib, NULL); 2682 Py_DECREF(attrib); 2683 } 2684 else { 2685 node = PyObject_CallFunctionObjArgs(self->element_factory, 2686 tag, attrib, NULL); 2687 } 2688 if (!node) { 2689 return NULL; 2690 } 2691 2692 this = self->this; 2693 Py_CLEAR(self->last_for_tail); 2694 2695 if (this != Py_None) { 2696 if (treebuilder_add_subelement(this, node) < 0) 2697 goto error; 2698 } else { 2699 if (self->root) { 2700 PyErr_SetString( 2701 st->parseerror_obj, 2702 "multiple elements on top level" 2703 ); 2704 goto error; 2705 } 2706 Py_INCREF(node); 2707 self->root = node; 2708 } 2709 2710 if (self->index < PyList_GET_SIZE(self->stack)) { 2711 if (PyList_SetItem(self->stack, self->index, this) < 0) 2712 goto error; 2713 Py_INCREF(this); 2714 } else { 2715 if (PyList_Append(self->stack, this) < 0) 2716 goto error; 2717 } 2718 self->index++; 2719 2720 Py_INCREF(node); 2721 Py_SETREF(self->this, node); 2722 Py_INCREF(node); 2723 Py_SETREF(self->last, node); 2724 2725 if (treebuilder_append_event(self, self->start_event_obj, node) < 0) 2726 goto error; 2727 2728 return node; 2729 2730 error: 2731 Py_DECREF(node); 2732 return NULL; 2733} 2734 2735LOCAL(PyObject*) 2736treebuilder_handle_data(TreeBuilderObject* self, PyObject* data) 2737{ 2738 if (!self->data) { 2739 if (self->last == Py_None) { 2740 /* ignore calls to data before the first call to start */ 2741 Py_RETURN_NONE; 2742 } 2743 /* store the first item as is */ 2744 Py_INCREF(data); self->data = data; 2745 } else { 2746 /* more than one item; use a list to collect items */ 2747 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 && 2748 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) { 2749 /* XXX this code path unused in Python 3? */ 2750 /* expat often generates single character data sections; handle 2751 the most common case by resizing the existing string... */ 2752 Py_ssize_t size = PyBytes_GET_SIZE(self->data); 2753 if (_PyBytes_Resize(&self->data, size + 1) < 0) 2754 return NULL; 2755 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0]; 2756 } else if (PyList_CheckExact(self->data)) { 2757 if (PyList_Append(self->data, data) < 0) 2758 return NULL; 2759 } else { 2760 PyObject* list = PyList_New(2); 2761 if (!list) 2762 return NULL; 2763 PyList_SET_ITEM(list, 0, self->data); 2764 Py_INCREF(data); PyList_SET_ITEM(list, 1, data); 2765 self->data = list; 2766 } 2767 } 2768 2769 Py_RETURN_NONE; 2770} 2771 2772LOCAL(PyObject*) 2773treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag) 2774{ 2775 PyObject* item; 2776 2777 if (treebuilder_flush_data(self) < 0) { 2778 return NULL; 2779 } 2780 2781 if (self->index == 0) { 2782 PyErr_SetString( 2783 PyExc_IndexError, 2784 "pop from empty stack" 2785 ); 2786 return NULL; 2787 } 2788 2789 item = self->last; 2790 self->last = self->this; 2791 Py_INCREF(self->last); 2792 Py_XSETREF(self->last_for_tail, self->last); 2793 self->index--; 2794 self->this = PyList_GET_ITEM(self->stack, self->index); 2795 Py_INCREF(self->this); 2796 Py_DECREF(item); 2797 2798 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0) 2799 return NULL; 2800 2801 Py_INCREF(self->last); 2802 return (PyObject*) self->last; 2803} 2804 2805LOCAL(PyObject*) 2806treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text) 2807{ 2808 PyObject* comment; 2809 PyObject* this; 2810 2811 if (treebuilder_flush_data(self) < 0) { 2812 return NULL; 2813 } 2814 2815 if (self->comment_factory) { 2816 comment = PyObject_CallOneArg(self->comment_factory, text); 2817 if (!comment) 2818 return NULL; 2819 2820 this = self->this; 2821 if (self->insert_comments && this != Py_None) { 2822 if (treebuilder_add_subelement(this, comment) < 0) 2823 goto error; 2824 Py_INCREF(comment); 2825 Py_XSETREF(self->last_for_tail, comment); 2826 } 2827 } else { 2828 Py_INCREF(text); 2829 comment = text; 2830 } 2831 2832 if (self->events_append && self->comment_event_obj) { 2833 if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0) 2834 goto error; 2835 } 2836 2837 return comment; 2838 2839 error: 2840 Py_DECREF(comment); 2841 return NULL; 2842} 2843 2844LOCAL(PyObject*) 2845treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text) 2846{ 2847 PyObject* pi; 2848 PyObject* this; 2849 PyObject* stack[2] = {target, text}; 2850 2851 if (treebuilder_flush_data(self) < 0) { 2852 return NULL; 2853 } 2854 2855 if (self->pi_factory) { 2856 pi = _PyObject_FastCall(self->pi_factory, stack, 2); 2857 if (!pi) { 2858 return NULL; 2859 } 2860 2861 this = self->this; 2862 if (self->insert_pis && this != Py_None) { 2863 if (treebuilder_add_subelement(this, pi) < 0) 2864 goto error; 2865 Py_INCREF(pi); 2866 Py_XSETREF(self->last_for_tail, pi); 2867 } 2868 } else { 2869 pi = PyTuple_Pack(2, target, text); 2870 if (!pi) { 2871 return NULL; 2872 } 2873 } 2874 2875 if (self->events_append && self->pi_event_obj) { 2876 if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0) 2877 goto error; 2878 } 2879 2880 return pi; 2881 2882 error: 2883 Py_DECREF(pi); 2884 return NULL; 2885} 2886 2887LOCAL(PyObject*) 2888treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri) 2889{ 2890 PyObject* parcel; 2891 2892 if (self->events_append && self->start_ns_event_obj) { 2893 parcel = PyTuple_Pack(2, prefix, uri); 2894 if (!parcel) { 2895 return NULL; 2896 } 2897 2898 if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) { 2899 Py_DECREF(parcel); 2900 return NULL; 2901 } 2902 Py_DECREF(parcel); 2903 } 2904 2905 Py_RETURN_NONE; 2906} 2907 2908LOCAL(PyObject*) 2909treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix) 2910{ 2911 if (self->events_append && self->end_ns_event_obj) { 2912 if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) { 2913 return NULL; 2914 } 2915 } 2916 2917 Py_RETURN_NONE; 2918} 2919 2920/* -------------------------------------------------------------------- */ 2921/* methods (in alphabetical order) */ 2922 2923/*[clinic input] 2924_elementtree.TreeBuilder.data 2925 2926 data: object 2927 / 2928 2929[clinic start generated code]*/ 2930 2931static PyObject * 2932_elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data) 2933/*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/ 2934{ 2935 return treebuilder_handle_data(self, data); 2936} 2937 2938/*[clinic input] 2939_elementtree.TreeBuilder.end 2940 2941 tag: object 2942 / 2943 2944[clinic start generated code]*/ 2945 2946static PyObject * 2947_elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag) 2948/*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/ 2949{ 2950 return treebuilder_handle_end(self, tag); 2951} 2952 2953/*[clinic input] 2954_elementtree.TreeBuilder.comment 2955 2956 text: object 2957 / 2958 2959[clinic start generated code]*/ 2960 2961static PyObject * 2962_elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text) 2963/*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/ 2964{ 2965 return treebuilder_handle_comment(self, text); 2966} 2967 2968/*[clinic input] 2969_elementtree.TreeBuilder.pi 2970 2971 target: object 2972 text: object = None 2973 / 2974 2975[clinic start generated code]*/ 2976 2977static PyObject * 2978_elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target, 2979 PyObject *text) 2980/*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/ 2981{ 2982 return treebuilder_handle_pi(self, target, text); 2983} 2984 2985LOCAL(PyObject*) 2986treebuilder_done(TreeBuilderObject* self) 2987{ 2988 PyObject* res; 2989 2990 /* FIXME: check stack size? */ 2991 2992 if (self->root) 2993 res = self->root; 2994 else 2995 res = Py_None; 2996 2997 Py_INCREF(res); 2998 return res; 2999} 3000 3001/*[clinic input] 3002_elementtree.TreeBuilder.close 3003 3004[clinic start generated code]*/ 3005 3006static PyObject * 3007_elementtree_TreeBuilder_close_impl(TreeBuilderObject *self) 3008/*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/ 3009{ 3010 return treebuilder_done(self); 3011} 3012 3013/*[clinic input] 3014_elementtree.TreeBuilder.start 3015 3016 tag: object 3017 attrs: object(subclass_of='&PyDict_Type') 3018 / 3019 3020[clinic start generated code]*/ 3021 3022static PyObject * 3023_elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag, 3024 PyObject *attrs) 3025/*[clinic end generated code: output=e7e9dc2861349411 input=7288e9e38e63b2b6]*/ 3026{ 3027 return treebuilder_handle_start(self, tag, attrs); 3028} 3029 3030/* ==================================================================== */ 3031/* the expat interface */ 3032 3033#include "expat.h" 3034#include "pyexpat.h" 3035 3036/* The PyExpat_CAPI structure is an immutable dispatch table, so it can be 3037 * cached globally without being in per-module state. 3038 */ 3039static struct PyExpat_CAPI *expat_capi; 3040#define EXPAT(func) (expat_capi->func) 3041 3042static XML_Memory_Handling_Suite ExpatMemoryHandler = { 3043 PyObject_Malloc, PyObject_Realloc, PyObject_Free}; 3044 3045typedef struct { 3046 PyObject_HEAD 3047 3048 XML_Parser parser; 3049 3050 PyObject *target; 3051 PyObject *entity; 3052 3053 PyObject *names; 3054 3055 PyObject *handle_start_ns; 3056 PyObject *handle_end_ns; 3057 PyObject *handle_start; 3058 PyObject *handle_data; 3059 PyObject *handle_end; 3060 3061 PyObject *handle_comment; 3062 PyObject *handle_pi; 3063 PyObject *handle_doctype; 3064 3065 PyObject *handle_close; 3066 3067} XMLParserObject; 3068 3069/* helpers */ 3070 3071LOCAL(PyObject*) 3072makeuniversal(XMLParserObject* self, const char* string) 3073{ 3074 /* convert a UTF-8 tag/attribute name from the expat parser 3075 to a universal name string */ 3076 3077 Py_ssize_t size = (Py_ssize_t) strlen(string); 3078 PyObject* key; 3079 PyObject* value; 3080 3081 /* look the 'raw' name up in the names dictionary */ 3082 key = PyBytes_FromStringAndSize(string, size); 3083 if (!key) 3084 return NULL; 3085 3086 value = PyDict_GetItemWithError(self->names, key); 3087 3088 if (value) { 3089 Py_INCREF(value); 3090 } 3091 else if (!PyErr_Occurred()) { 3092 /* new name. convert to universal name, and decode as 3093 necessary */ 3094 3095 PyObject* tag; 3096 char* p; 3097 Py_ssize_t i; 3098 3099 /* look for namespace separator */ 3100 for (i = 0; i < size; i++) 3101 if (string[i] == '}') 3102 break; 3103 if (i != size) { 3104 /* convert to universal name */ 3105 tag = PyBytes_FromStringAndSize(NULL, size+1); 3106 if (tag == NULL) { 3107 Py_DECREF(key); 3108 return NULL; 3109 } 3110 p = PyBytes_AS_STRING(tag); 3111 p[0] = '{'; 3112 memcpy(p+1, string, size); 3113 size++; 3114 } else { 3115 /* plain name; use key as tag */ 3116 Py_INCREF(key); 3117 tag = key; 3118 } 3119 3120 /* decode universal name */ 3121 p = PyBytes_AS_STRING(tag); 3122 value = PyUnicode_DecodeUTF8(p, size, "strict"); 3123 Py_DECREF(tag); 3124 if (!value) { 3125 Py_DECREF(key); 3126 return NULL; 3127 } 3128 3129 /* add to names dictionary */ 3130 if (PyDict_SetItem(self->names, key, value) < 0) { 3131 Py_DECREF(key); 3132 Py_DECREF(value); 3133 return NULL; 3134 } 3135 } 3136 3137 Py_DECREF(key); 3138 return value; 3139} 3140 3141/* Set the ParseError exception with the given parameters. 3142 * If message is not NULL, it's used as the error string. Otherwise, the 3143 * message string is the default for the given error_code. 3144*/ 3145static void 3146expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column, 3147 const char *message) 3148{ 3149 PyObject *errmsg, *error, *position, *code; 3150 elementtreestate *st = ET_STATE_GLOBAL; 3151 3152 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd", 3153 message ? message : EXPAT(ErrorString)(error_code), 3154 line, column); 3155 if (errmsg == NULL) 3156 return; 3157 3158 error = PyObject_CallOneArg(st->parseerror_obj, errmsg); 3159 Py_DECREF(errmsg); 3160 if (!error) 3161 return; 3162 3163 /* Add code and position attributes */ 3164 code = PyLong_FromLong((long)error_code); 3165 if (!code) { 3166 Py_DECREF(error); 3167 return; 3168 } 3169 if (PyObject_SetAttrString(error, "code", code) == -1) { 3170 Py_DECREF(error); 3171 Py_DECREF(code); 3172 return; 3173 } 3174 Py_DECREF(code); 3175 3176 position = Py_BuildValue("(nn)", line, column); 3177 if (!position) { 3178 Py_DECREF(error); 3179 return; 3180 } 3181 if (PyObject_SetAttrString(error, "position", position) == -1) { 3182 Py_DECREF(error); 3183 Py_DECREF(position); 3184 return; 3185 } 3186 Py_DECREF(position); 3187 3188 PyErr_SetObject(st->parseerror_obj, error); 3189 Py_DECREF(error); 3190} 3191 3192/* -------------------------------------------------------------------- */ 3193/* handlers */ 3194 3195static void 3196expat_default_handler(XMLParserObject* self, const XML_Char* data_in, 3197 int data_len) 3198{ 3199 PyObject* key; 3200 PyObject* value; 3201 PyObject* res; 3202 3203 if (data_len < 2 || data_in[0] != '&') 3204 return; 3205 3206 if (PyErr_Occurred()) 3207 return; 3208 3209 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict"); 3210 if (!key) 3211 return; 3212 3213 value = PyDict_GetItemWithError(self->entity, key); 3214 3215 if (value) { 3216 if (TreeBuilder_CheckExact(self->target)) 3217 res = treebuilder_handle_data( 3218 (TreeBuilderObject*) self->target, value 3219 ); 3220 else if (self->handle_data) 3221 res = PyObject_CallOneArg(self->handle_data, value); 3222 else 3223 res = NULL; 3224 Py_XDECREF(res); 3225 } else if (!PyErr_Occurred()) { 3226 /* Report the first error, not the last */ 3227 char message[128] = "undefined entity "; 3228 strncat(message, data_in, data_len < 100?data_len:100); 3229 expat_set_error( 3230 XML_ERROR_UNDEFINED_ENTITY, 3231 EXPAT(GetErrorLineNumber)(self->parser), 3232 EXPAT(GetErrorColumnNumber)(self->parser), 3233 message 3234 ); 3235 } 3236 3237 Py_DECREF(key); 3238} 3239 3240static void 3241expat_start_handler(XMLParserObject* self, const XML_Char* tag_in, 3242 const XML_Char **attrib_in) 3243{ 3244 PyObject* res; 3245 PyObject* tag; 3246 PyObject* attrib; 3247 int ok; 3248 3249 if (PyErr_Occurred()) 3250 return; 3251 3252 /* tag name */ 3253 tag = makeuniversal(self, tag_in); 3254 if (!tag) 3255 return; /* parser will look for errors */ 3256 3257 /* attributes */ 3258 if (attrib_in[0]) { 3259 attrib = PyDict_New(); 3260 if (!attrib) { 3261 Py_DECREF(tag); 3262 return; 3263 } 3264 while (attrib_in[0] && attrib_in[1]) { 3265 PyObject* key = makeuniversal(self, attrib_in[0]); 3266 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict"); 3267 if (!key || !value) { 3268 Py_XDECREF(value); 3269 Py_XDECREF(key); 3270 Py_DECREF(attrib); 3271 Py_DECREF(tag); 3272 return; 3273 } 3274 ok = PyDict_SetItem(attrib, key, value); 3275 Py_DECREF(value); 3276 Py_DECREF(key); 3277 if (ok < 0) { 3278 Py_DECREF(attrib); 3279 Py_DECREF(tag); 3280 return; 3281 } 3282 attrib_in += 2; 3283 } 3284 } else { 3285 attrib = NULL; 3286 } 3287 3288 if (TreeBuilder_CheckExact(self->target)) { 3289 /* shortcut */ 3290 res = treebuilder_handle_start((TreeBuilderObject*) self->target, 3291 tag, attrib); 3292 } 3293 else if (self->handle_start) { 3294 if (attrib == NULL) { 3295 attrib = PyDict_New(); 3296 if (!attrib) { 3297 Py_DECREF(tag); 3298 return; 3299 } 3300 } 3301 res = PyObject_CallFunctionObjArgs(self->handle_start, 3302 tag, attrib, NULL); 3303 } else 3304 res = NULL; 3305 3306 Py_DECREF(tag); 3307 Py_XDECREF(attrib); 3308 3309 Py_XDECREF(res); 3310} 3311 3312static void 3313expat_data_handler(XMLParserObject* self, const XML_Char* data_in, 3314 int data_len) 3315{ 3316 PyObject* data; 3317 PyObject* res; 3318 3319 if (PyErr_Occurred()) 3320 return; 3321 3322 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict"); 3323 if (!data) 3324 return; /* parser will look for errors */ 3325 3326 if (TreeBuilder_CheckExact(self->target)) 3327 /* shortcut */ 3328 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data); 3329 else if (self->handle_data) 3330 res = PyObject_CallOneArg(self->handle_data, data); 3331 else 3332 res = NULL; 3333 3334 Py_DECREF(data); 3335 3336 Py_XDECREF(res); 3337} 3338 3339static void 3340expat_end_handler(XMLParserObject* self, const XML_Char* tag_in) 3341{ 3342 PyObject* tag; 3343 PyObject* res = NULL; 3344 3345 if (PyErr_Occurred()) 3346 return; 3347 3348 if (TreeBuilder_CheckExact(self->target)) 3349 /* shortcut */ 3350 /* the standard tree builder doesn't look at the end tag */ 3351 res = treebuilder_handle_end( 3352 (TreeBuilderObject*) self->target, Py_None 3353 ); 3354 else if (self->handle_end) { 3355 tag = makeuniversal(self, tag_in); 3356 if (tag) { 3357 res = PyObject_CallOneArg(self->handle_end, tag); 3358 Py_DECREF(tag); 3359 } 3360 } 3361 3362 Py_XDECREF(res); 3363} 3364 3365static void 3366expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in, 3367 const XML_Char *uri_in) 3368{ 3369 PyObject* res = NULL; 3370 PyObject* uri; 3371 PyObject* prefix; 3372 PyObject* stack[2]; 3373 3374 if (PyErr_Occurred()) 3375 return; 3376 3377 if (!uri_in) 3378 uri_in = ""; 3379 if (!prefix_in) 3380 prefix_in = ""; 3381 3382 if (TreeBuilder_CheckExact(self->target)) { 3383 /* shortcut - TreeBuilder does not actually implement .start_ns() */ 3384 TreeBuilderObject *target = (TreeBuilderObject*) self->target; 3385 3386 if (target->events_append && target->start_ns_event_obj) { 3387 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict"); 3388 if (!prefix) 3389 return; 3390 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict"); 3391 if (!uri) { 3392 Py_DECREF(prefix); 3393 return; 3394 } 3395 3396 res = treebuilder_handle_start_ns(target, prefix, uri); 3397 Py_DECREF(uri); 3398 Py_DECREF(prefix); 3399 } 3400 } else if (self->handle_start_ns) { 3401 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict"); 3402 if (!prefix) 3403 return; 3404 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict"); 3405 if (!uri) { 3406 Py_DECREF(prefix); 3407 return; 3408 } 3409 3410 stack[0] = prefix; 3411 stack[1] = uri; 3412 res = _PyObject_FastCall(self->handle_start_ns, stack, 2); 3413 Py_DECREF(uri); 3414 Py_DECREF(prefix); 3415 } 3416 3417 Py_XDECREF(res); 3418} 3419 3420static void 3421expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in) 3422{ 3423 PyObject *res = NULL; 3424 PyObject* prefix; 3425 3426 if (PyErr_Occurred()) 3427 return; 3428 3429 if (!prefix_in) 3430 prefix_in = ""; 3431 3432 if (TreeBuilder_CheckExact(self->target)) { 3433 /* shortcut - TreeBuilder does not actually implement .end_ns() */ 3434 TreeBuilderObject *target = (TreeBuilderObject*) self->target; 3435 3436 if (target->events_append && target->end_ns_event_obj) { 3437 res = treebuilder_handle_end_ns(target, Py_None); 3438 } 3439 } else if (self->handle_end_ns) { 3440 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict"); 3441 if (!prefix) 3442 return; 3443 3444 res = PyObject_CallOneArg(self->handle_end_ns, prefix); 3445 Py_DECREF(prefix); 3446 } 3447 3448 Py_XDECREF(res); 3449} 3450 3451static void 3452expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in) 3453{ 3454 PyObject* comment; 3455 PyObject* res; 3456 3457 if (PyErr_Occurred()) 3458 return; 3459 3460 if (TreeBuilder_CheckExact(self->target)) { 3461 /* shortcut */ 3462 TreeBuilderObject *target = (TreeBuilderObject*) self->target; 3463 3464 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict"); 3465 if (!comment) 3466 return; /* parser will look for errors */ 3467 3468 res = treebuilder_handle_comment(target, comment); 3469 Py_XDECREF(res); 3470 Py_DECREF(comment); 3471 } else if (self->handle_comment) { 3472 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict"); 3473 if (!comment) 3474 return; 3475 3476 res = PyObject_CallOneArg(self->handle_comment, comment); 3477 Py_XDECREF(res); 3478 Py_DECREF(comment); 3479 } 3480} 3481 3482static void 3483expat_start_doctype_handler(XMLParserObject *self, 3484 const XML_Char *doctype_name, 3485 const XML_Char *sysid, 3486 const XML_Char *pubid, 3487 int has_internal_subset) 3488{ 3489 _Py_IDENTIFIER(doctype); 3490 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj; 3491 PyObject *res; 3492 3493 if (PyErr_Occurred()) 3494 return; 3495 3496 doctype_name_obj = makeuniversal(self, doctype_name); 3497 if (!doctype_name_obj) 3498 return; 3499 3500 if (sysid) { 3501 sysid_obj = makeuniversal(self, sysid); 3502 if (!sysid_obj) { 3503 Py_DECREF(doctype_name_obj); 3504 return; 3505 } 3506 } else { 3507 Py_INCREF(Py_None); 3508 sysid_obj = Py_None; 3509 } 3510 3511 if (pubid) { 3512 pubid_obj = makeuniversal(self, pubid); 3513 if (!pubid_obj) { 3514 Py_DECREF(doctype_name_obj); 3515 Py_DECREF(sysid_obj); 3516 return; 3517 } 3518 } else { 3519 Py_INCREF(Py_None); 3520 pubid_obj = Py_None; 3521 } 3522 3523 /* If the target has a handler for doctype, call it. */ 3524 if (self->handle_doctype) { 3525 res = PyObject_CallFunctionObjArgs(self->handle_doctype, 3526 doctype_name_obj, pubid_obj, 3527 sysid_obj, NULL); 3528 Py_XDECREF(res); 3529 } 3530 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) { 3531 (void)PyErr_WarnEx(PyExc_RuntimeWarning, 3532 "The doctype() method of XMLParser is ignored. " 3533 "Define doctype() method on the TreeBuilder target.", 3534 1); 3535 Py_DECREF(res); 3536 } 3537 3538 Py_DECREF(doctype_name_obj); 3539 Py_DECREF(pubid_obj); 3540 Py_DECREF(sysid_obj); 3541} 3542 3543static void 3544expat_pi_handler(XMLParserObject* self, const XML_Char* target_in, 3545 const XML_Char* data_in) 3546{ 3547 PyObject* pi_target; 3548 PyObject* data; 3549 PyObject* res; 3550 PyObject* stack[2]; 3551 3552 if (PyErr_Occurred()) 3553 return; 3554 3555 if (TreeBuilder_CheckExact(self->target)) { 3556 /* shortcut */ 3557 TreeBuilderObject *target = (TreeBuilderObject*) self->target; 3558 3559 if ((target->events_append && target->pi_event_obj) || target->insert_pis) { 3560 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict"); 3561 if (!pi_target) 3562 goto error; 3563 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict"); 3564 if (!data) 3565 goto error; 3566 res = treebuilder_handle_pi(target, pi_target, data); 3567 Py_XDECREF(res); 3568 Py_DECREF(data); 3569 Py_DECREF(pi_target); 3570 } 3571 } else if (self->handle_pi) { 3572 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict"); 3573 if (!pi_target) 3574 goto error; 3575 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict"); 3576 if (!data) 3577 goto error; 3578 3579 stack[0] = pi_target; 3580 stack[1] = data; 3581 res = _PyObject_FastCall(self->handle_pi, stack, 2); 3582 Py_XDECREF(res); 3583 Py_DECREF(data); 3584 Py_DECREF(pi_target); 3585 } 3586 3587 return; 3588 3589 error: 3590 Py_XDECREF(pi_target); 3591 return; 3592} 3593 3594/* -------------------------------------------------------------------- */ 3595 3596static PyObject * 3597xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 3598{ 3599 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0); 3600 if (self) { 3601 self->parser = NULL; 3602 self->target = self->entity = self->names = NULL; 3603 self->handle_start_ns = self->handle_end_ns = NULL; 3604 self->handle_start = self->handle_data = self->handle_end = NULL; 3605 self->handle_comment = self->handle_pi = self->handle_close = NULL; 3606 self->handle_doctype = NULL; 3607 } 3608 return (PyObject *)self; 3609} 3610 3611static int 3612ignore_attribute_error(PyObject *value) 3613{ 3614 if (value == NULL) { 3615 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) { 3616 return -1; 3617 } 3618 PyErr_Clear(); 3619 } 3620 return 0; 3621} 3622 3623/*[clinic input] 3624_elementtree.XMLParser.__init__ 3625 3626 * 3627 target: object = None 3628 encoding: str(accept={str, NoneType}) = None 3629 3630[clinic start generated code]*/ 3631 3632static int 3633_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target, 3634 const char *encoding) 3635/*[clinic end generated code: output=3ae45ec6cdf344e4 input=7e716dd6e4f3e439]*/ 3636{ 3637 self->entity = PyDict_New(); 3638 if (!self->entity) 3639 return -1; 3640 3641 self->names = PyDict_New(); 3642 if (!self->names) { 3643 Py_CLEAR(self->entity); 3644 return -1; 3645 } 3646 3647 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}"); 3648 if (!self->parser) { 3649 Py_CLEAR(self->entity); 3650 Py_CLEAR(self->names); 3651 PyErr_NoMemory(); 3652 return -1; 3653 } 3654 /* expat < 2.1.0 has no XML_SetHashSalt() */ 3655 if (EXPAT(SetHashSalt) != NULL) { 3656 EXPAT(SetHashSalt)(self->parser, 3657 (unsigned long)_Py_HashSecret.expat.hashsalt); 3658 } 3659 3660 if (target != Py_None) { 3661 Py_INCREF(target); 3662 } else { 3663 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL); 3664 if (!target) { 3665 Py_CLEAR(self->entity); 3666 Py_CLEAR(self->names); 3667 return -1; 3668 } 3669 } 3670 self->target = target; 3671 3672 self->handle_start_ns = PyObject_GetAttrString(target, "start_ns"); 3673 if (ignore_attribute_error(self->handle_start_ns)) { 3674 return -1; 3675 } 3676 self->handle_end_ns = PyObject_GetAttrString(target, "end_ns"); 3677 if (ignore_attribute_error(self->handle_end_ns)) { 3678 return -1; 3679 } 3680 self->handle_start = PyObject_GetAttrString(target, "start"); 3681 if (ignore_attribute_error(self->handle_start)) { 3682 return -1; 3683 } 3684 self->handle_data = PyObject_GetAttrString(target, "data"); 3685 if (ignore_attribute_error(self->handle_data)) { 3686 return -1; 3687 } 3688 self->handle_end = PyObject_GetAttrString(target, "end"); 3689 if (ignore_attribute_error(self->handle_end)) { 3690 return -1; 3691 } 3692 self->handle_comment = PyObject_GetAttrString(target, "comment"); 3693 if (ignore_attribute_error(self->handle_comment)) { 3694 return -1; 3695 } 3696 self->handle_pi = PyObject_GetAttrString(target, "pi"); 3697 if (ignore_attribute_error(self->handle_pi)) { 3698 return -1; 3699 } 3700 self->handle_close = PyObject_GetAttrString(target, "close"); 3701 if (ignore_attribute_error(self->handle_close)) { 3702 return -1; 3703 } 3704 self->handle_doctype = PyObject_GetAttrString(target, "doctype"); 3705 if (ignore_attribute_error(self->handle_doctype)) { 3706 return -1; 3707 } 3708 3709 /* configure parser */ 3710 EXPAT(SetUserData)(self->parser, self); 3711 if (self->handle_start_ns || self->handle_end_ns) 3712 EXPAT(SetNamespaceDeclHandler)( 3713 self->parser, 3714 (XML_StartNamespaceDeclHandler) expat_start_ns_handler, 3715 (XML_EndNamespaceDeclHandler) expat_end_ns_handler 3716 ); 3717 EXPAT(SetElementHandler)( 3718 self->parser, 3719 (XML_StartElementHandler) expat_start_handler, 3720 (XML_EndElementHandler) expat_end_handler 3721 ); 3722 EXPAT(SetDefaultHandlerExpand)( 3723 self->parser, 3724 (XML_DefaultHandler) expat_default_handler 3725 ); 3726 EXPAT(SetCharacterDataHandler)( 3727 self->parser, 3728 (XML_CharacterDataHandler) expat_data_handler 3729 ); 3730 if (self->handle_comment) 3731 EXPAT(SetCommentHandler)( 3732 self->parser, 3733 (XML_CommentHandler) expat_comment_handler 3734 ); 3735 if (self->handle_pi) 3736 EXPAT(SetProcessingInstructionHandler)( 3737 self->parser, 3738 (XML_ProcessingInstructionHandler) expat_pi_handler 3739 ); 3740 EXPAT(SetStartDoctypeDeclHandler)( 3741 self->parser, 3742 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler 3743 ); 3744 EXPAT(SetUnknownEncodingHandler)( 3745 self->parser, 3746 EXPAT(DefaultUnknownEncodingHandler), NULL 3747 ); 3748 3749 return 0; 3750} 3751 3752static int 3753xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg) 3754{ 3755 Py_VISIT(self->handle_close); 3756 Py_VISIT(self->handle_pi); 3757 Py_VISIT(self->handle_comment); 3758 Py_VISIT(self->handle_end); 3759 Py_VISIT(self->handle_data); 3760 Py_VISIT(self->handle_start); 3761 Py_VISIT(self->handle_start_ns); 3762 Py_VISIT(self->handle_end_ns); 3763 Py_VISIT(self->handle_doctype); 3764 3765 Py_VISIT(self->target); 3766 Py_VISIT(self->entity); 3767 Py_VISIT(self->names); 3768 3769 return 0; 3770} 3771 3772static int 3773xmlparser_gc_clear(XMLParserObject *self) 3774{ 3775 if (self->parser != NULL) { 3776 XML_Parser parser = self->parser; 3777 self->parser = NULL; 3778 EXPAT(ParserFree)(parser); 3779 } 3780 3781 Py_CLEAR(self->handle_close); 3782 Py_CLEAR(self->handle_pi); 3783 Py_CLEAR(self->handle_comment); 3784 Py_CLEAR(self->handle_end); 3785 Py_CLEAR(self->handle_data); 3786 Py_CLEAR(self->handle_start); 3787 Py_CLEAR(self->handle_start_ns); 3788 Py_CLEAR(self->handle_end_ns); 3789 Py_CLEAR(self->handle_doctype); 3790 3791 Py_CLEAR(self->target); 3792 Py_CLEAR(self->entity); 3793 Py_CLEAR(self->names); 3794 3795 return 0; 3796} 3797 3798static void 3799xmlparser_dealloc(XMLParserObject* self) 3800{ 3801 PyObject_GC_UnTrack(self); 3802 xmlparser_gc_clear(self); 3803 Py_TYPE(self)->tp_free((PyObject *)self); 3804} 3805 3806Py_LOCAL_INLINE(int) 3807_check_xmlparser(XMLParserObject* self) 3808{ 3809 if (self->target == NULL) { 3810 PyErr_SetString(PyExc_ValueError, 3811 "XMLParser.__init__() wasn't called"); 3812 return 0; 3813 } 3814 return 1; 3815} 3816 3817LOCAL(PyObject*) 3818expat_parse(XMLParserObject* self, const char* data, int data_len, int final) 3819{ 3820 int ok; 3821 3822 assert(!PyErr_Occurred()); 3823 ok = EXPAT(Parse)(self->parser, data, data_len, final); 3824 3825 if (PyErr_Occurred()) 3826 return NULL; 3827 3828 if (!ok) { 3829 expat_set_error( 3830 EXPAT(GetErrorCode)(self->parser), 3831 EXPAT(GetErrorLineNumber)(self->parser), 3832 EXPAT(GetErrorColumnNumber)(self->parser), 3833 NULL 3834 ); 3835 return NULL; 3836 } 3837 3838 Py_RETURN_NONE; 3839} 3840 3841/*[clinic input] 3842_elementtree.XMLParser.close 3843 3844[clinic start generated code]*/ 3845 3846static PyObject * 3847_elementtree_XMLParser_close_impl(XMLParserObject *self) 3848/*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/ 3849{ 3850 /* end feeding data to parser */ 3851 3852 PyObject* res; 3853 3854 if (!_check_xmlparser(self)) { 3855 return NULL; 3856 } 3857 res = expat_parse(self, "", 0, 1); 3858 if (!res) 3859 return NULL; 3860 3861 if (TreeBuilder_CheckExact(self->target)) { 3862 Py_DECREF(res); 3863 return treebuilder_done((TreeBuilderObject*) self->target); 3864 } 3865 else if (self->handle_close) { 3866 Py_DECREF(res); 3867 return PyObject_CallNoArgs(self->handle_close); 3868 } 3869 else { 3870 return res; 3871 } 3872} 3873 3874/*[clinic input] 3875_elementtree.XMLParser.feed 3876 3877 data: object 3878 / 3879 3880[clinic start generated code]*/ 3881 3882static PyObject * 3883_elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data) 3884/*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/ 3885{ 3886 /* feed data to parser */ 3887 3888 if (!_check_xmlparser(self)) { 3889 return NULL; 3890 } 3891 if (PyUnicode_Check(data)) { 3892 Py_ssize_t data_len; 3893 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len); 3894 if (data_ptr == NULL) 3895 return NULL; 3896 if (data_len > INT_MAX) { 3897 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int"); 3898 return NULL; 3899 } 3900 /* Explicitly set UTF-8 encoding. Return code ignored. */ 3901 (void)EXPAT(SetEncoding)(self->parser, "utf-8"); 3902 return expat_parse(self, data_ptr, (int)data_len, 0); 3903 } 3904 else { 3905 Py_buffer view; 3906 PyObject *res; 3907 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0) 3908 return NULL; 3909 if (view.len > INT_MAX) { 3910 PyBuffer_Release(&view); 3911 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int"); 3912 return NULL; 3913 } 3914 res = expat_parse(self, view.buf, (int)view.len, 0); 3915 PyBuffer_Release(&view); 3916 return res; 3917 } 3918} 3919 3920/*[clinic input] 3921_elementtree.XMLParser._parse_whole 3922 3923 file: object 3924 / 3925 3926[clinic start generated code]*/ 3927 3928static PyObject * 3929_elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file) 3930/*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/ 3931{ 3932 /* (internal) parse the whole input, until end of stream */ 3933 PyObject* reader; 3934 PyObject* buffer; 3935 PyObject* temp; 3936 PyObject* res; 3937 3938 if (!_check_xmlparser(self)) { 3939 return NULL; 3940 } 3941 reader = PyObject_GetAttrString(file, "read"); 3942 if (!reader) 3943 return NULL; 3944 3945 /* read from open file object */ 3946 for (;;) { 3947 3948 buffer = PyObject_CallFunction(reader, "i", 64*1024); 3949 3950 if (!buffer) { 3951 /* read failed (e.g. due to KeyboardInterrupt) */ 3952 Py_DECREF(reader); 3953 return NULL; 3954 } 3955 3956 if (PyUnicode_CheckExact(buffer)) { 3957 /* A unicode object is encoded into bytes using UTF-8 */ 3958 if (PyUnicode_GET_LENGTH(buffer) == 0) { 3959 Py_DECREF(buffer); 3960 break; 3961 } 3962 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass"); 3963 Py_DECREF(buffer); 3964 if (!temp) { 3965 /* Propagate exception from PyUnicode_AsEncodedString */ 3966 Py_DECREF(reader); 3967 return NULL; 3968 } 3969 buffer = temp; 3970 } 3971 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) { 3972 Py_DECREF(buffer); 3973 break; 3974 } 3975 3976 if (PyBytes_GET_SIZE(buffer) > INT_MAX) { 3977 Py_DECREF(buffer); 3978 Py_DECREF(reader); 3979 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int"); 3980 return NULL; 3981 } 3982 res = expat_parse( 3983 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0 3984 ); 3985 3986 Py_DECREF(buffer); 3987 3988 if (!res) { 3989 Py_DECREF(reader); 3990 return NULL; 3991 } 3992 Py_DECREF(res); 3993 3994 } 3995 3996 Py_DECREF(reader); 3997 3998 res = expat_parse(self, "", 0, 1); 3999 4000 if (res && TreeBuilder_CheckExact(self->target)) { 4001 Py_DECREF(res); 4002 return treebuilder_done((TreeBuilderObject*) self->target); 4003 } 4004 4005 return res; 4006} 4007 4008/*[clinic input] 4009_elementtree.XMLParser._setevents 4010 4011 events_queue: object 4012 events_to_report: object = None 4013 / 4014 4015[clinic start generated code]*/ 4016 4017static PyObject * 4018_elementtree_XMLParser__setevents_impl(XMLParserObject *self, 4019 PyObject *events_queue, 4020 PyObject *events_to_report) 4021/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/ 4022{ 4023 /* activate element event reporting */ 4024 Py_ssize_t i; 4025 TreeBuilderObject *target; 4026 PyObject *events_append, *events_seq; 4027 4028 if (!_check_xmlparser(self)) { 4029 return NULL; 4030 } 4031 if (!TreeBuilder_CheckExact(self->target)) { 4032 PyErr_SetString( 4033 PyExc_TypeError, 4034 "event handling only supported for ElementTree.TreeBuilder " 4035 "targets" 4036 ); 4037 return NULL; 4038 } 4039 4040 target = (TreeBuilderObject*) self->target; 4041 4042 events_append = PyObject_GetAttrString(events_queue, "append"); 4043 if (events_append == NULL) 4044 return NULL; 4045 Py_XSETREF(target->events_append, events_append); 4046 4047 /* clear out existing events */ 4048 Py_CLEAR(target->start_event_obj); 4049 Py_CLEAR(target->end_event_obj); 4050 Py_CLEAR(target->start_ns_event_obj); 4051 Py_CLEAR(target->end_ns_event_obj); 4052 Py_CLEAR(target->comment_event_obj); 4053 Py_CLEAR(target->pi_event_obj); 4054 4055 if (events_to_report == Py_None) { 4056 /* default is "end" only */ 4057 target->end_event_obj = PyUnicode_FromString("end"); 4058 Py_RETURN_NONE; 4059 } 4060 4061 if (!(events_seq = PySequence_Fast(events_to_report, 4062 "events must be a sequence"))) { 4063 return NULL; 4064 } 4065 4066 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) { 4067 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i); 4068 const char *event_name = NULL; 4069 if (PyUnicode_Check(event_name_obj)) { 4070 event_name = PyUnicode_AsUTF8(event_name_obj); 4071 } else if (PyBytes_Check(event_name_obj)) { 4072 event_name = PyBytes_AS_STRING(event_name_obj); 4073 } 4074 if (event_name == NULL) { 4075 Py_DECREF(events_seq); 4076 PyErr_Format(PyExc_ValueError, "invalid events sequence"); 4077 return NULL; 4078 } 4079 4080 Py_INCREF(event_name_obj); 4081 if (strcmp(event_name, "start") == 0) { 4082 Py_XSETREF(target->start_event_obj, event_name_obj); 4083 } else if (strcmp(event_name, "end") == 0) { 4084 Py_XSETREF(target->end_event_obj, event_name_obj); 4085 } else if (strcmp(event_name, "start-ns") == 0) { 4086 Py_XSETREF(target->start_ns_event_obj, event_name_obj); 4087 EXPAT(SetNamespaceDeclHandler)( 4088 self->parser, 4089 (XML_StartNamespaceDeclHandler) expat_start_ns_handler, 4090 (XML_EndNamespaceDeclHandler) expat_end_ns_handler 4091 ); 4092 } else if (strcmp(event_name, "end-ns") == 0) { 4093 Py_XSETREF(target->end_ns_event_obj, event_name_obj); 4094 EXPAT(SetNamespaceDeclHandler)( 4095 self->parser, 4096 (XML_StartNamespaceDeclHandler) expat_start_ns_handler, 4097 (XML_EndNamespaceDeclHandler) expat_end_ns_handler 4098 ); 4099 } else if (strcmp(event_name, "comment") == 0) { 4100 Py_XSETREF(target->comment_event_obj, event_name_obj); 4101 EXPAT(SetCommentHandler)( 4102 self->parser, 4103 (XML_CommentHandler) expat_comment_handler 4104 ); 4105 } else if (strcmp(event_name, "pi") == 0) { 4106 Py_XSETREF(target->pi_event_obj, event_name_obj); 4107 EXPAT(SetProcessingInstructionHandler)( 4108 self->parser, 4109 (XML_ProcessingInstructionHandler) expat_pi_handler 4110 ); 4111 } else { 4112 Py_DECREF(event_name_obj); 4113 Py_DECREF(events_seq); 4114 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name); 4115 return NULL; 4116 } 4117 } 4118 4119 Py_DECREF(events_seq); 4120 Py_RETURN_NONE; 4121} 4122 4123static PyMemberDef xmlparser_members[] = { 4124 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL}, 4125 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL}, 4126 {NULL} 4127}; 4128 4129static PyObject* 4130xmlparser_version_getter(XMLParserObject *self, void *closure) 4131{ 4132 return PyUnicode_FromFormat( 4133 "Expat %d.%d.%d", XML_MAJOR_VERSION, 4134 XML_MINOR_VERSION, XML_MICRO_VERSION); 4135} 4136 4137static PyGetSetDef xmlparser_getsetlist[] = { 4138 {"version", (getter)xmlparser_version_getter, NULL, NULL}, 4139 {NULL}, 4140}; 4141 4142#include "clinic/_elementtree.c.h" 4143 4144static PyMethodDef element_methods[] = { 4145 4146 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF 4147 4148 _ELEMENTTREE_ELEMENT_GET_METHODDEF 4149 _ELEMENTTREE_ELEMENT_SET_METHODDEF 4150 4151 _ELEMENTTREE_ELEMENT_FIND_METHODDEF 4152 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF 4153 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF 4154 4155 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF 4156 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF 4157 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF 4158 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF 4159 4160 _ELEMENTTREE_ELEMENT_ITER_METHODDEF 4161 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF 4162 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF 4163 4164 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF 4165 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF 4166 4167 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF 4168 4169 _ELEMENTTREE_ELEMENT___COPY___METHODDEF 4170 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF 4171 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF 4172 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF 4173 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF 4174 4175 {NULL, NULL} 4176}; 4177 4178static PyMappingMethods element_as_mapping = { 4179 (lenfunc) element_length, 4180 (binaryfunc) element_subscr, 4181 (objobjargproc) element_ass_subscr, 4182}; 4183 4184static PyGetSetDef element_getsetlist[] = { 4185 {"tag", 4186 (getter)element_tag_getter, 4187 (setter)element_tag_setter, 4188 "A string identifying what kind of data this element represents"}, 4189 {"text", 4190 (getter)element_text_getter, 4191 (setter)element_text_setter, 4192 "A string of text directly after the start tag, or None"}, 4193 {"tail", 4194 (getter)element_tail_getter, 4195 (setter)element_tail_setter, 4196 "A string of text directly after the end tag, or None"}, 4197 {"attrib", 4198 (getter)element_attrib_getter, 4199 (setter)element_attrib_setter, 4200 "A dictionary containing the element's attributes"}, 4201 {NULL}, 4202}; 4203 4204static PyTypeObject Element_Type = { 4205 PyVarObject_HEAD_INIT(NULL, 0) 4206 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0, 4207 /* methods */ 4208 (destructor)element_dealloc, /* tp_dealloc */ 4209 0, /* tp_vectorcall_offset */ 4210 0, /* tp_getattr */ 4211 0, /* tp_setattr */ 4212 0, /* tp_as_async */ 4213 (reprfunc)element_repr, /* tp_repr */ 4214 0, /* tp_as_number */ 4215 &element_as_sequence, /* tp_as_sequence */ 4216 &element_as_mapping, /* tp_as_mapping */ 4217 0, /* tp_hash */ 4218 0, /* tp_call */ 4219 0, /* tp_str */ 4220 PyObject_GenericGetAttr, /* tp_getattro */ 4221 0, /* tp_setattro */ 4222 0, /* tp_as_buffer */ 4223 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, 4224 /* tp_flags */ 4225 0, /* tp_doc */ 4226 (traverseproc)element_gc_traverse, /* tp_traverse */ 4227 (inquiry)element_gc_clear, /* tp_clear */ 4228 0, /* tp_richcompare */ 4229 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */ 4230 0, /* tp_iter */ 4231 0, /* tp_iternext */ 4232 element_methods, /* tp_methods */ 4233 0, /* tp_members */ 4234 element_getsetlist, /* tp_getset */ 4235 0, /* tp_base */ 4236 0, /* tp_dict */ 4237 0, /* tp_descr_get */ 4238 0, /* tp_descr_set */ 4239 0, /* tp_dictoffset */ 4240 (initproc)element_init, /* tp_init */ 4241 PyType_GenericAlloc, /* tp_alloc */ 4242 element_new, /* tp_new */ 4243 0, /* tp_free */ 4244}; 4245 4246static PyMethodDef treebuilder_methods[] = { 4247 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF 4248 _ELEMENTTREE_TREEBUILDER_START_METHODDEF 4249 _ELEMENTTREE_TREEBUILDER_END_METHODDEF 4250 _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF 4251 _ELEMENTTREE_TREEBUILDER_PI_METHODDEF 4252 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF 4253 {NULL, NULL} 4254}; 4255 4256static PyTypeObject TreeBuilder_Type = { 4257 PyVarObject_HEAD_INIT(NULL, 0) 4258 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0, 4259 /* methods */ 4260 (destructor)treebuilder_dealloc, /* tp_dealloc */ 4261 0, /* tp_vectorcall_offset */ 4262 0, /* tp_getattr */ 4263 0, /* tp_setattr */ 4264 0, /* tp_as_async */ 4265 0, /* tp_repr */ 4266 0, /* tp_as_number */ 4267 0, /* tp_as_sequence */ 4268 0, /* tp_as_mapping */ 4269 0, /* tp_hash */ 4270 0, /* tp_call */ 4271 0, /* tp_str */ 4272 0, /* tp_getattro */ 4273 0, /* tp_setattro */ 4274 0, /* tp_as_buffer */ 4275 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, 4276 /* tp_flags */ 4277 0, /* tp_doc */ 4278 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */ 4279 (inquiry)treebuilder_gc_clear, /* tp_clear */ 4280 0, /* tp_richcompare */ 4281 0, /* tp_weaklistoffset */ 4282 0, /* tp_iter */ 4283 0, /* tp_iternext */ 4284 treebuilder_methods, /* tp_methods */ 4285 0, /* tp_members */ 4286 0, /* tp_getset */ 4287 0, /* tp_base */ 4288 0, /* tp_dict */ 4289 0, /* tp_descr_get */ 4290 0, /* tp_descr_set */ 4291 0, /* tp_dictoffset */ 4292 _elementtree_TreeBuilder___init__, /* tp_init */ 4293 PyType_GenericAlloc, /* tp_alloc */ 4294 treebuilder_new, /* tp_new */ 4295 0, /* tp_free */ 4296}; 4297 4298static PyMethodDef xmlparser_methods[] = { 4299 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF 4300 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF 4301 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF 4302 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF 4303 {NULL, NULL} 4304}; 4305 4306static PyTypeObject XMLParser_Type = { 4307 PyVarObject_HEAD_INIT(NULL, 0) 4308 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0, 4309 /* methods */ 4310 (destructor)xmlparser_dealloc, /* tp_dealloc */ 4311 0, /* tp_vectorcall_offset */ 4312 0, /* tp_getattr */ 4313 0, /* tp_setattr */ 4314 0, /* tp_as_async */ 4315 0, /* tp_repr */ 4316 0, /* tp_as_number */ 4317 0, /* tp_as_sequence */ 4318 0, /* tp_as_mapping */ 4319 0, /* tp_hash */ 4320 0, /* tp_call */ 4321 0, /* tp_str */ 4322 0, /* tp_getattro */ 4323 0, /* tp_setattro */ 4324 0, /* tp_as_buffer */ 4325 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, 4326 /* tp_flags */ 4327 0, /* tp_doc */ 4328 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */ 4329 (inquiry)xmlparser_gc_clear, /* tp_clear */ 4330 0, /* tp_richcompare */ 4331 0, /* tp_weaklistoffset */ 4332 0, /* tp_iter */ 4333 0, /* tp_iternext */ 4334 xmlparser_methods, /* tp_methods */ 4335 xmlparser_members, /* tp_members */ 4336 xmlparser_getsetlist, /* tp_getset */ 4337 0, /* tp_base */ 4338 0, /* tp_dict */ 4339 0, /* tp_descr_get */ 4340 0, /* tp_descr_set */ 4341 0, /* tp_dictoffset */ 4342 _elementtree_XMLParser___init__, /* tp_init */ 4343 PyType_GenericAlloc, /* tp_alloc */ 4344 xmlparser_new, /* tp_new */ 4345 0, /* tp_free */ 4346}; 4347 4348/* ==================================================================== */ 4349/* python module interface */ 4350 4351static PyMethodDef _functions[] = { 4352 {"SubElement", _PyCFunction_CAST(subelement), METH_VARARGS | METH_KEYWORDS}, 4353 _ELEMENTTREE__SET_FACTORIES_METHODDEF 4354 {NULL, NULL} 4355}; 4356 4357 4358static struct PyModuleDef elementtreemodule = { 4359 PyModuleDef_HEAD_INIT, 4360 "_elementtree", 4361 NULL, 4362 sizeof(elementtreestate), 4363 _functions, 4364 NULL, 4365 elementtree_traverse, 4366 elementtree_clear, 4367 elementtree_free 4368}; 4369 4370PyMODINIT_FUNC 4371PyInit__elementtree(void) 4372{ 4373 PyObject *m, *temp; 4374 elementtreestate *st; 4375 4376 m = PyState_FindModule(&elementtreemodule); 4377 if (m) { 4378 Py_INCREF(m); 4379 return m; 4380 } 4381 4382 /* Initialize object types */ 4383 if (PyType_Ready(&ElementIter_Type) < 0) 4384 return NULL; 4385 if (PyType_Ready(&TreeBuilder_Type) < 0) 4386 return NULL; 4387 if (PyType_Ready(&Element_Type) < 0) 4388 return NULL; 4389 if (PyType_Ready(&XMLParser_Type) < 0) 4390 return NULL; 4391 4392 m = PyModule_Create(&elementtreemodule); 4393 if (!m) 4394 return NULL; 4395 st = get_elementtree_state(m); 4396 4397 if (!(temp = PyImport_ImportModule("copy"))) 4398 return NULL; 4399 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy"); 4400 Py_XDECREF(temp); 4401 4402 if (st->deepcopy_obj == NULL) { 4403 return NULL; 4404 } 4405 4406 assert(!PyErr_Occurred()); 4407 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath"))) 4408 return NULL; 4409 4410 /* link against pyexpat */ 4411 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0); 4412 if (expat_capi) { 4413 /* check that it's usable */ 4414 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 || 4415 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) || 4416 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION || 4417 expat_capi->MINOR_VERSION != XML_MINOR_VERSION || 4418 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) { 4419 PyErr_SetString(PyExc_ImportError, 4420 "pyexpat version is incompatible"); 4421 return NULL; 4422 } 4423 } else { 4424 return NULL; 4425 } 4426 4427 st->parseerror_obj = PyErr_NewException( 4428 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL 4429 ); 4430 Py_INCREF(st->parseerror_obj); 4431 if (PyModule_AddObject(m, "ParseError", st->parseerror_obj) < 0) { 4432 Py_DECREF(st->parseerror_obj); 4433 return NULL; 4434 } 4435 4436 PyTypeObject *types[] = { 4437 &Element_Type, 4438 &TreeBuilder_Type, 4439 &XMLParser_Type 4440 }; 4441 4442 for (size_t i = 0; i < Py_ARRAY_LENGTH(types); i++) { 4443 if (PyModule_AddType(m, types[i]) < 0) { 4444 return NULL; 4445 } 4446 } 4447 4448 return m; 4449} 4450