1/* 2 * Secret Labs' Regular Expression Engine 3 * 4 * regular expression matching engine 5 * 6 * partial history: 7 * 1999-10-24 fl created (based on existing template matcher code) 8 * 2000-03-06 fl first alpha, sort of 9 * 2000-08-01 fl fixes for 1.6b1 10 * 2000-08-07 fl use PyOS_CheckStack() if available 11 * 2000-09-20 fl added expand method 12 * 2001-03-20 fl lots of fixes for 2.1b2 13 * 2001-04-15 fl export copyright as Python attribute, not global 14 * 2001-04-28 fl added __copy__ methods (work in progress) 15 * 2001-05-14 fl fixes for 1.5.2 compatibility 16 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis) 17 * 2001-10-18 fl fixed group reset issue (from Matthew Mueller) 18 * 2001-10-20 fl added split primitive; re-enable unicode for 1.6/2.0/2.1 19 * 2001-10-21 fl added sub/subn primitive 20 * 2001-10-24 fl added finditer primitive (for 2.2 only) 21 * 2001-12-07 fl fixed memory leak in sub/subn (Guido van Rossum) 22 * 2002-11-09 fl fixed empty sub/subn return type 23 * 2003-04-18 mvl fully support 4-byte codes 24 * 2003-10-17 gn implemented non recursive scheme 25 * 2013-02-04 mrab added fullmatch primitive 26 * 27 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. 28 * 29 * This version of the SRE library can be redistributed under CNRI's 30 * Python 1.6 license. For any other use, please contact Secret Labs 31 * AB (info@pythonware.com). 32 * 33 * Portions of this engine have been developed in cooperation with 34 * CNRI. Hewlett-Packard provided funding for 1.6 integration and 35 * other compatibility work. 36 */ 37 38static const char copyright[] = 39 " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB "; 40 41#define PY_SSIZE_T_CLEAN 42 43#include "Python.h" 44#include "pycore_long.h" // _PyLong_GetZero() 45#include "pycore_moduleobject.h" // _PyModule_GetState() 46#include "structmember.h" // PyMemberDef 47 48#include "sre.h" 49 50#define SRE_CODE_BITS (8 * sizeof(SRE_CODE)) 51 52#include <ctype.h> 53 54/* name of this module, minus the leading underscore */ 55#if !defined(SRE_MODULE) 56#define SRE_MODULE "sre" 57#endif 58 59#define SRE_PY_MODULE "re" 60 61/* defining this one enables tracing */ 62#undef VERBOSE 63 64/* -------------------------------------------------------------------- */ 65 66#if defined(_MSC_VER) 67#pragma optimize("agtw", on) /* doesn't seem to make much difference... */ 68#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */ 69/* fastest possible local call under MSVC */ 70#define LOCAL(type) static __inline type __fastcall 71#else 72#define LOCAL(type) static inline type 73#endif 74 75/* error codes */ 76#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */ 77#define SRE_ERROR_STATE -2 /* illegal state */ 78#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */ 79#define SRE_ERROR_MEMORY -9 /* out of memory */ 80#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */ 81 82#if defined(VERBOSE) 83#define TRACE(v) printf v 84#else 85#define TRACE(v) 86#endif 87 88/* -------------------------------------------------------------------- */ 89/* search engine state */ 90 91#define SRE_IS_DIGIT(ch)\ 92 ((ch) <= '9' && Py_ISDIGIT(ch)) 93#define SRE_IS_SPACE(ch)\ 94 ((ch) <= ' ' && Py_ISSPACE(ch)) 95#define SRE_IS_LINEBREAK(ch)\ 96 ((ch) == '\n') 97#define SRE_IS_WORD(ch)\ 98 ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_')) 99 100static unsigned int sre_lower_ascii(unsigned int ch) 101{ 102 return ((ch) < 128 ? Py_TOLOWER(ch) : ch); 103} 104 105/* locale-specific character predicates */ 106/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids 107 * warnings when c's type supports only numbers < N+1 */ 108#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0) 109#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_') 110 111static unsigned int sre_lower_locale(unsigned int ch) 112{ 113 return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch); 114} 115 116static unsigned int sre_upper_locale(unsigned int ch) 117{ 118 return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch); 119} 120 121/* unicode-specific character predicates */ 122 123#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch) 124#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch) 125#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch) 126#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch) 127#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_') 128 129static unsigned int sre_lower_unicode(unsigned int ch) 130{ 131 return (unsigned int) Py_UNICODE_TOLOWER(ch); 132} 133 134static unsigned int sre_upper_unicode(unsigned int ch) 135{ 136 return (unsigned int) Py_UNICODE_TOUPPER(ch); 137} 138 139LOCAL(int) 140sre_category(SRE_CODE category, unsigned int ch) 141{ 142 switch (category) { 143 144 case SRE_CATEGORY_DIGIT: 145 return SRE_IS_DIGIT(ch); 146 case SRE_CATEGORY_NOT_DIGIT: 147 return !SRE_IS_DIGIT(ch); 148 case SRE_CATEGORY_SPACE: 149 return SRE_IS_SPACE(ch); 150 case SRE_CATEGORY_NOT_SPACE: 151 return !SRE_IS_SPACE(ch); 152 case SRE_CATEGORY_WORD: 153 return SRE_IS_WORD(ch); 154 case SRE_CATEGORY_NOT_WORD: 155 return !SRE_IS_WORD(ch); 156 case SRE_CATEGORY_LINEBREAK: 157 return SRE_IS_LINEBREAK(ch); 158 case SRE_CATEGORY_NOT_LINEBREAK: 159 return !SRE_IS_LINEBREAK(ch); 160 161 case SRE_CATEGORY_LOC_WORD: 162 return SRE_LOC_IS_WORD(ch); 163 case SRE_CATEGORY_LOC_NOT_WORD: 164 return !SRE_LOC_IS_WORD(ch); 165 166 case SRE_CATEGORY_UNI_DIGIT: 167 return SRE_UNI_IS_DIGIT(ch); 168 case SRE_CATEGORY_UNI_NOT_DIGIT: 169 return !SRE_UNI_IS_DIGIT(ch); 170 case SRE_CATEGORY_UNI_SPACE: 171 return SRE_UNI_IS_SPACE(ch); 172 case SRE_CATEGORY_UNI_NOT_SPACE: 173 return !SRE_UNI_IS_SPACE(ch); 174 case SRE_CATEGORY_UNI_WORD: 175 return SRE_UNI_IS_WORD(ch); 176 case SRE_CATEGORY_UNI_NOT_WORD: 177 return !SRE_UNI_IS_WORD(ch); 178 case SRE_CATEGORY_UNI_LINEBREAK: 179 return SRE_UNI_IS_LINEBREAK(ch); 180 case SRE_CATEGORY_UNI_NOT_LINEBREAK: 181 return !SRE_UNI_IS_LINEBREAK(ch); 182 } 183 return 0; 184} 185 186LOCAL(int) 187char_loc_ignore(SRE_CODE pattern, SRE_CODE ch) 188{ 189 return ch == pattern 190 || (SRE_CODE) sre_lower_locale(ch) == pattern 191 || (SRE_CODE) sre_upper_locale(ch) == pattern; 192} 193 194 195/* helpers */ 196 197static void 198data_stack_dealloc(SRE_STATE* state) 199{ 200 if (state->data_stack) { 201 PyMem_Free(state->data_stack); 202 state->data_stack = NULL; 203 } 204 state->data_stack_size = state->data_stack_base = 0; 205} 206 207static int 208data_stack_grow(SRE_STATE* state, Py_ssize_t size) 209{ 210 Py_ssize_t minsize, cursize; 211 minsize = state->data_stack_base+size; 212 cursize = state->data_stack_size; 213 if (cursize < minsize) { 214 void* stack; 215 cursize = minsize+minsize/4+1024; 216 TRACE(("allocate/grow stack %zd\n", cursize)); 217 stack = PyMem_Realloc(state->data_stack, cursize); 218 if (!stack) { 219 data_stack_dealloc(state); 220 return SRE_ERROR_MEMORY; 221 } 222 state->data_stack = (char *)stack; 223 state->data_stack_size = cursize; 224 } 225 return 0; 226} 227 228/* generate 8-bit version */ 229 230#define SRE_CHAR Py_UCS1 231#define SIZEOF_SRE_CHAR 1 232#define SRE(F) sre_ucs1_##F 233#include "sre_lib.h" 234 235/* generate 16-bit unicode version */ 236 237#define SRE_CHAR Py_UCS2 238#define SIZEOF_SRE_CHAR 2 239#define SRE(F) sre_ucs2_##F 240#include "sre_lib.h" 241 242/* generate 32-bit unicode version */ 243 244#define SRE_CHAR Py_UCS4 245#define SIZEOF_SRE_CHAR 4 246#define SRE(F) sre_ucs4_##F 247#include "sre_lib.h" 248 249/* -------------------------------------------------------------------- */ 250/* factories and destructors */ 251 252/* module state */ 253typedef struct { 254 PyTypeObject *Pattern_Type; 255 PyTypeObject *Match_Type; 256 PyTypeObject *Scanner_Type; 257} _sremodulestate; 258 259static _sremodulestate * 260get_sre_module_state(PyObject *m) 261{ 262 _sremodulestate *state = (_sremodulestate *)_PyModule_GetState(m); 263 assert(state); 264 return state; 265} 266 267static struct PyModuleDef sremodule; 268#define get_sre_module_state_by_class(cls) \ 269 (get_sre_module_state(PyType_GetModule(cls))) 270 271/* see sre.h for object declarations */ 272static PyObject*pattern_new_match(_sremodulestate *, PatternObject*, SRE_STATE*, Py_ssize_t); 273static PyObject *pattern_scanner(_sremodulestate *, PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t); 274 275/*[clinic input] 276module _sre 277class _sre.SRE_Pattern "PatternObject *" "get_sre_module_state_by_class(tp)->Pattern_Type" 278class _sre.SRE_Match "MatchObject *" "get_sre_module_state_by_class(tp)->Match_Type" 279class _sre.SRE_Scanner "ScannerObject *" "get_sre_module_state_by_class(tp)->Scanner_Type" 280[clinic start generated code]*/ 281/*[clinic end generated code: output=da39a3ee5e6b4b0d input=fe2966e32b66a231]*/ 282 283/*[clinic input] 284_sre.getcodesize -> int 285[clinic start generated code]*/ 286 287static int 288_sre_getcodesize_impl(PyObject *module) 289/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/ 290{ 291 return sizeof(SRE_CODE); 292} 293 294/*[clinic input] 295_sre.ascii_iscased -> bool 296 297 character: int 298 / 299 300[clinic start generated code]*/ 301 302static int 303_sre_ascii_iscased_impl(PyObject *module, int character) 304/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/ 305{ 306 unsigned int ch = (unsigned int)character; 307 return ch < 128 && Py_ISALPHA(ch); 308} 309 310/*[clinic input] 311_sre.unicode_iscased -> bool 312 313 character: int 314 / 315 316[clinic start generated code]*/ 317 318static int 319_sre_unicode_iscased_impl(PyObject *module, int character) 320/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/ 321{ 322 unsigned int ch = (unsigned int)character; 323 return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch); 324} 325 326/*[clinic input] 327_sre.ascii_tolower -> int 328 329 character: int 330 / 331 332[clinic start generated code]*/ 333 334static int 335_sre_ascii_tolower_impl(PyObject *module, int character) 336/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/ 337{ 338 return sre_lower_ascii(character); 339} 340 341/*[clinic input] 342_sre.unicode_tolower -> int 343 344 character: int 345 / 346 347[clinic start generated code]*/ 348 349static int 350_sre_unicode_tolower_impl(PyObject *module, int character) 351/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/ 352{ 353 return sre_lower_unicode(character); 354} 355 356LOCAL(void) 357state_reset(SRE_STATE* state) 358{ 359 /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */ 360 /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/ 361 362 state->lastmark = -1; 363 state->lastindex = -1; 364 365 state->repeat = NULL; 366 367 data_stack_dealloc(state); 368} 369 370static const void* 371getstring(PyObject* string, Py_ssize_t* p_length, 372 int* p_isbytes, int* p_charsize, 373 Py_buffer *view) 374{ 375 /* given a python object, return a data pointer, a length (in 376 characters), and a character size. return NULL if the object 377 is not a string (or not compatible) */ 378 379 /* Unicode objects do not support the buffer API. So, get the data 380 directly instead. */ 381 if (PyUnicode_Check(string)) { 382 if (PyUnicode_READY(string) == -1) 383 return NULL; 384 *p_length = PyUnicode_GET_LENGTH(string); 385 *p_charsize = PyUnicode_KIND(string); 386 *p_isbytes = 0; 387 return PyUnicode_DATA(string); 388 } 389 390 /* get pointer to byte string buffer */ 391 if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) { 392 PyErr_Format(PyExc_TypeError, "expected string or bytes-like " 393 "object, got '%.200s'", Py_TYPE(string)->tp_name); 394 return NULL; 395 } 396 397 *p_length = view->len; 398 *p_charsize = 1; 399 *p_isbytes = 1; 400 401 if (view->buf == NULL) { 402 PyErr_SetString(PyExc_ValueError, "Buffer is NULL"); 403 PyBuffer_Release(view); 404 view->buf = NULL; 405 return NULL; 406 } 407 return view->buf; 408} 409 410LOCAL(PyObject*) 411state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, 412 Py_ssize_t start, Py_ssize_t end) 413{ 414 /* prepare state object */ 415 416 Py_ssize_t length; 417 int isbytes, charsize; 418 const void* ptr; 419 420 memset(state, 0, sizeof(SRE_STATE)); 421 422 state->mark = PyMem_New(const void *, pattern->groups * 2); 423 if (!state->mark) { 424 PyErr_NoMemory(); 425 goto err; 426 } 427 state->lastmark = -1; 428 state->lastindex = -1; 429 430 state->buffer.buf = NULL; 431 ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer); 432 if (!ptr) 433 goto err; 434 435 if (isbytes && pattern->isbytes == 0) { 436 PyErr_SetString(PyExc_TypeError, 437 "cannot use a string pattern on a bytes-like object"); 438 goto err; 439 } 440 if (!isbytes && pattern->isbytes > 0) { 441 PyErr_SetString(PyExc_TypeError, 442 "cannot use a bytes pattern on a string-like object"); 443 goto err; 444 } 445 446 /* adjust boundaries */ 447 if (start < 0) 448 start = 0; 449 else if (start > length) 450 start = length; 451 452 if (end < 0) 453 end = 0; 454 else if (end > length) 455 end = length; 456 457 state->isbytes = isbytes; 458 state->charsize = charsize; 459 state->match_all = 0; 460 state->must_advance = 0; 461 462 state->beginning = ptr; 463 464 state->start = (void*) ((char*) ptr + start * state->charsize); 465 state->end = (void*) ((char*) ptr + end * state->charsize); 466 467 Py_INCREF(string); 468 state->string = string; 469 state->pos = start; 470 state->endpos = end; 471 472 return string; 473 err: 474 /* We add an explicit cast here because MSVC has a bug when 475 compiling C code where it believes that `const void**` cannot be 476 safely casted to `void*`, see bpo-39943 for details. */ 477 PyMem_Free((void*) state->mark); 478 state->mark = NULL; 479 if (state->buffer.buf) 480 PyBuffer_Release(&state->buffer); 481 return NULL; 482} 483 484LOCAL(void) 485state_fini(SRE_STATE* state) 486{ 487 if (state->buffer.buf) 488 PyBuffer_Release(&state->buffer); 489 Py_XDECREF(state->string); 490 data_stack_dealloc(state); 491 /* See above PyMem_Del for why we explicitly cast here. */ 492 PyMem_Free((void*) state->mark); 493 state->mark = NULL; 494} 495 496/* calculate offset from start of string */ 497#define STATE_OFFSET(state, member)\ 498 (((char*)(member) - (char*)(state)->beginning) / (state)->charsize) 499 500LOCAL(PyObject*) 501getslice(int isbytes, const void *ptr, 502 PyObject* string, Py_ssize_t start, Py_ssize_t end) 503{ 504 if (isbytes) { 505 if (PyBytes_CheckExact(string) && 506 start == 0 && end == PyBytes_GET_SIZE(string)) { 507 Py_INCREF(string); 508 return string; 509 } 510 return PyBytes_FromStringAndSize( 511 (const char *)ptr + start, end - start); 512 } 513 else { 514 return PyUnicode_Substring(string, start, end); 515 } 516} 517 518LOCAL(PyObject*) 519state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty) 520{ 521 Py_ssize_t i, j; 522 523 index = (index - 1) * 2; 524 525 if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) { 526 if (empty) 527 /* want empty string */ 528 i = j = 0; 529 else { 530 Py_RETURN_NONE; 531 } 532 } else { 533 i = STATE_OFFSET(state, state->mark[index]); 534 j = STATE_OFFSET(state, state->mark[index+1]); 535 536 /* check wrong span */ 537 if (i > j) { 538 PyErr_SetString(PyExc_SystemError, 539 "The span of capturing group is wrong," 540 " please report a bug for the re module."); 541 return NULL; 542 } 543 } 544 545 return getslice(state->isbytes, state->beginning, string, i, j); 546} 547 548static void 549pattern_error(Py_ssize_t status) 550{ 551 switch (status) { 552 case SRE_ERROR_RECURSION_LIMIT: 553 /* This error code seems to be unused. */ 554 PyErr_SetString( 555 PyExc_RecursionError, 556 "maximum recursion limit exceeded" 557 ); 558 break; 559 case SRE_ERROR_MEMORY: 560 PyErr_NoMemory(); 561 break; 562 case SRE_ERROR_INTERRUPTED: 563 /* An exception has already been raised, so let it fly */ 564 break; 565 default: 566 /* other error codes indicate compiler/engine bugs */ 567 PyErr_SetString( 568 PyExc_RuntimeError, 569 "internal error in regular expression engine" 570 ); 571 } 572} 573 574static int 575pattern_traverse(PatternObject *self, visitproc visit, void *arg) 576{ 577 Py_VISIT(Py_TYPE(self)); 578 Py_VISIT(self->groupindex); 579 Py_VISIT(self->indexgroup); 580 Py_VISIT(self->pattern); 581 return 0; 582} 583 584static int 585pattern_clear(PatternObject *self) 586{ 587 Py_CLEAR(self->groupindex); 588 Py_CLEAR(self->indexgroup); 589 Py_CLEAR(self->pattern); 590 return 0; 591} 592 593static void 594pattern_dealloc(PatternObject* self) 595{ 596 PyTypeObject *tp = Py_TYPE(self); 597 598 PyObject_GC_UnTrack(self); 599 if (self->weakreflist != NULL) { 600 PyObject_ClearWeakRefs((PyObject *) self); 601 } 602 (void)pattern_clear(self); 603 tp->tp_free(self); 604 Py_DECREF(tp); 605} 606 607LOCAL(Py_ssize_t) 608sre_match(SRE_STATE* state, SRE_CODE* pattern) 609{ 610 if (state->charsize == 1) 611 return sre_ucs1_match(state, pattern, 1); 612 if (state->charsize == 2) 613 return sre_ucs2_match(state, pattern, 1); 614 assert(state->charsize == 4); 615 return sre_ucs4_match(state, pattern, 1); 616} 617 618LOCAL(Py_ssize_t) 619sre_search(SRE_STATE* state, SRE_CODE* pattern) 620{ 621 if (state->charsize == 1) 622 return sre_ucs1_search(state, pattern); 623 if (state->charsize == 2) 624 return sre_ucs2_search(state, pattern); 625 assert(state->charsize == 4); 626 return sre_ucs4_search(state, pattern); 627} 628 629/*[clinic input] 630_sre.SRE_Pattern.match 631 632 cls: defining_class 633 / 634 string: object 635 pos: Py_ssize_t = 0 636 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize 637 638Matches zero or more characters at the beginning of the string. 639[clinic start generated code]*/ 640 641static PyObject * 642_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, 643 PyObject *string, Py_ssize_t pos, 644 Py_ssize_t endpos) 645/*[clinic end generated code: output=ec6208ea58a0cca0 input=4bdb9c3e564d13ac]*/ 646{ 647 _sremodulestate *module_state = get_sre_module_state_by_class(cls); 648 SRE_STATE state; 649 Py_ssize_t status; 650 PyObject *match; 651 652 if (!state_init(&state, (PatternObject *)self, string, pos, endpos)) 653 return NULL; 654 655 state.ptr = state.start; 656 657 TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr)); 658 659 status = sre_match(&state, PatternObject_GetCode(self)); 660 661 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); 662 if (PyErr_Occurred()) { 663 state_fini(&state); 664 return NULL; 665 } 666 667 match = pattern_new_match(module_state, self, &state, status); 668 state_fini(&state); 669 return match; 670} 671 672/*[clinic input] 673_sre.SRE_Pattern.fullmatch 674 675 cls: defining_class 676 / 677 string: object 678 pos: Py_ssize_t = 0 679 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize 680 681Matches against all of the string. 682[clinic start generated code]*/ 683 684static PyObject * 685_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls, 686 PyObject *string, Py_ssize_t pos, 687 Py_ssize_t endpos) 688/*[clinic end generated code: output=625b75b027ef94da input=50981172ab0fcfdd]*/ 689{ 690 _sremodulestate *module_state = get_sre_module_state_by_class(cls); 691 SRE_STATE state; 692 Py_ssize_t status; 693 PyObject *match; 694 695 if (!state_init(&state, self, string, pos, endpos)) 696 return NULL; 697 698 state.ptr = state.start; 699 700 TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr)); 701 702 state.match_all = 1; 703 status = sre_match(&state, PatternObject_GetCode(self)); 704 705 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); 706 if (PyErr_Occurred()) { 707 state_fini(&state); 708 return NULL; 709 } 710 711 match = pattern_new_match(module_state, self, &state, status); 712 state_fini(&state); 713 return match; 714} 715 716/*[clinic input] 717_sre.SRE_Pattern.search 718 719 cls: defining_class 720 / 721 string: object 722 pos: Py_ssize_t = 0 723 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize 724 725Scan through string looking for a match, and return a corresponding match object instance. 726 727Return None if no position in the string matches. 728[clinic start generated code]*/ 729 730static PyObject * 731_sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls, 732 PyObject *string, Py_ssize_t pos, 733 Py_ssize_t endpos) 734/*[clinic end generated code: output=bd7f2d9d583e1463 input=afa9afb66a74a4b3]*/ 735{ 736 _sremodulestate *module_state = get_sre_module_state_by_class(cls); 737 SRE_STATE state; 738 Py_ssize_t status; 739 PyObject *match; 740 741 if (!state_init(&state, self, string, pos, endpos)) 742 return NULL; 743 744 TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr)); 745 746 status = sre_search(&state, PatternObject_GetCode(self)); 747 748 TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); 749 750 if (PyErr_Occurred()) { 751 state_fini(&state); 752 return NULL; 753 } 754 755 match = pattern_new_match(module_state, self, &state, status); 756 state_fini(&state); 757 return match; 758} 759 760static PyObject* 761call(const char* module, const char* function, PyObject* args) 762{ 763 PyObject* name; 764 PyObject* mod; 765 PyObject* func; 766 PyObject* result; 767 768 if (!args) 769 return NULL; 770 name = PyUnicode_FromString(module); 771 if (!name) 772 return NULL; 773 mod = PyImport_Import(name); 774 Py_DECREF(name); 775 if (!mod) 776 return NULL; 777 func = PyObject_GetAttrString(mod, function); 778 Py_DECREF(mod); 779 if (!func) 780 return NULL; 781 result = PyObject_CallObject(func, args); 782 Py_DECREF(func); 783 Py_DECREF(args); 784 return result; 785} 786 787/*[clinic input] 788_sre.SRE_Pattern.findall 789 790 string: object 791 pos: Py_ssize_t = 0 792 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize 793 794Return a list of all non-overlapping matches of pattern in string. 795[clinic start generated code]*/ 796 797static PyObject * 798_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string, 799 Py_ssize_t pos, Py_ssize_t endpos) 800/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/ 801{ 802 SRE_STATE state; 803 PyObject* list; 804 Py_ssize_t status; 805 Py_ssize_t i, b, e; 806 807 if (!state_init(&state, self, string, pos, endpos)) 808 return NULL; 809 810 list = PyList_New(0); 811 if (!list) { 812 state_fini(&state); 813 return NULL; 814 } 815 816 while (state.start <= state.end) { 817 818 PyObject* item; 819 820 state_reset(&state); 821 822 state.ptr = state.start; 823 824 status = sre_search(&state, PatternObject_GetCode(self)); 825 if (PyErr_Occurred()) 826 goto error; 827 828 if (status <= 0) { 829 if (status == 0) 830 break; 831 pattern_error(status); 832 goto error; 833 } 834 835 /* don't bother to build a match object */ 836 switch (self->groups) { 837 case 0: 838 b = STATE_OFFSET(&state, state.start); 839 e = STATE_OFFSET(&state, state.ptr); 840 item = getslice(state.isbytes, state.beginning, 841 string, b, e); 842 if (!item) 843 goto error; 844 break; 845 case 1: 846 item = state_getslice(&state, 1, string, 1); 847 if (!item) 848 goto error; 849 break; 850 default: 851 item = PyTuple_New(self->groups); 852 if (!item) 853 goto error; 854 for (i = 0; i < self->groups; i++) { 855 PyObject* o = state_getslice(&state, i+1, string, 1); 856 if (!o) { 857 Py_DECREF(item); 858 goto error; 859 } 860 PyTuple_SET_ITEM(item, i, o); 861 } 862 break; 863 } 864 865 status = PyList_Append(list, item); 866 Py_DECREF(item); 867 if (status < 0) 868 goto error; 869 870 state.must_advance = (state.ptr == state.start); 871 state.start = state.ptr; 872 } 873 874 state_fini(&state); 875 return list; 876 877error: 878 Py_DECREF(list); 879 state_fini(&state); 880 return NULL; 881 882} 883 884/*[clinic input] 885_sre.SRE_Pattern.finditer 886 887 cls: defining_class 888 / 889 string: object 890 pos: Py_ssize_t = 0 891 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize 892 893Return an iterator over all non-overlapping matches for the RE pattern in string. 894 895For each match, the iterator returns a match object. 896[clinic start generated code]*/ 897 898static PyObject * 899_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyTypeObject *cls, 900 PyObject *string, Py_ssize_t pos, 901 Py_ssize_t endpos) 902/*[clinic end generated code: output=1791dbf3618ade56 input=812e332a4848cbaf]*/ 903{ 904 _sremodulestate *module_state = get_sre_module_state_by_class(cls); 905 PyObject* scanner; 906 PyObject* search; 907 PyObject* iterator; 908 909 scanner = pattern_scanner(module_state, self, string, pos, endpos); 910 if (!scanner) 911 return NULL; 912 913 search = PyObject_GetAttrString(scanner, "search"); 914 Py_DECREF(scanner); 915 if (!search) 916 return NULL; 917 918 iterator = PyCallIter_New(search, Py_None); 919 Py_DECREF(search); 920 921 return iterator; 922} 923 924/*[clinic input] 925_sre.SRE_Pattern.scanner 926 927 cls: defining_class 928 / 929 string: object 930 pos: Py_ssize_t = 0 931 endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize 932 933[clinic start generated code]*/ 934 935static PyObject * 936_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyTypeObject *cls, 937 PyObject *string, Py_ssize_t pos, 938 Py_ssize_t endpos) 939/*[clinic end generated code: output=f70cd506112f1bd9 input=2e487e5151bcee4c]*/ 940{ 941 _sremodulestate *module_state = get_sre_module_state_by_class(cls); 942 943 return pattern_scanner(module_state, self, string, pos, endpos); 944} 945 946/*[clinic input] 947_sre.SRE_Pattern.split 948 949 string: object 950 maxsplit: Py_ssize_t = 0 951 952Split string by the occurrences of pattern. 953[clinic start generated code]*/ 954 955static PyObject * 956_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string, 957 Py_ssize_t maxsplit) 958/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/ 959{ 960 SRE_STATE state; 961 PyObject* list; 962 PyObject* item; 963 Py_ssize_t status; 964 Py_ssize_t n; 965 Py_ssize_t i; 966 const void* last; 967 968 assert(self->codesize != 0); 969 970 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) 971 return NULL; 972 973 list = PyList_New(0); 974 if (!list) { 975 state_fini(&state); 976 return NULL; 977 } 978 979 n = 0; 980 last = state.start; 981 982 while (!maxsplit || n < maxsplit) { 983 984 state_reset(&state); 985 986 state.ptr = state.start; 987 988 status = sre_search(&state, PatternObject_GetCode(self)); 989 if (PyErr_Occurred()) 990 goto error; 991 992 if (status <= 0) { 993 if (status == 0) 994 break; 995 pattern_error(status); 996 goto error; 997 } 998 999 /* get segment before this match */ 1000 item = getslice(state.isbytes, state.beginning, 1001 string, STATE_OFFSET(&state, last), 1002 STATE_OFFSET(&state, state.start) 1003 ); 1004 if (!item) 1005 goto error; 1006 status = PyList_Append(list, item); 1007 Py_DECREF(item); 1008 if (status < 0) 1009 goto error; 1010 1011 /* add groups (if any) */ 1012 for (i = 0; i < self->groups; i++) { 1013 item = state_getslice(&state, i+1, string, 0); 1014 if (!item) 1015 goto error; 1016 status = PyList_Append(list, item); 1017 Py_DECREF(item); 1018 if (status < 0) 1019 goto error; 1020 } 1021 1022 n = n + 1; 1023 state.must_advance = (state.ptr == state.start); 1024 last = state.start = state.ptr; 1025 1026 } 1027 1028 /* get segment following last match (even if empty) */ 1029 item = getslice(state.isbytes, state.beginning, 1030 string, STATE_OFFSET(&state, last), state.endpos 1031 ); 1032 if (!item) 1033 goto error; 1034 status = PyList_Append(list, item); 1035 Py_DECREF(item); 1036 if (status < 0) 1037 goto error; 1038 1039 state_fini(&state); 1040 return list; 1041 1042error: 1043 Py_DECREF(list); 1044 state_fini(&state); 1045 return NULL; 1046 1047} 1048 1049static PyObject* 1050pattern_subx(_sremodulestate* module_state, 1051 PatternObject* self, 1052 PyObject* ptemplate, 1053 PyObject* string, 1054 Py_ssize_t count, 1055 Py_ssize_t subn) 1056{ 1057 SRE_STATE state; 1058 PyObject* list; 1059 PyObject* joiner; 1060 PyObject* item; 1061 PyObject* filter; 1062 PyObject* match; 1063 const void* ptr; 1064 Py_ssize_t status; 1065 Py_ssize_t n; 1066 Py_ssize_t i, b, e; 1067 int isbytes, charsize; 1068 int filter_is_callable; 1069 Py_buffer view; 1070 1071 if (PyCallable_Check(ptemplate)) { 1072 /* sub/subn takes either a function or a template */ 1073 filter = ptemplate; 1074 Py_INCREF(filter); 1075 filter_is_callable = 1; 1076 } else { 1077 /* if not callable, check if it's a literal string */ 1078 int literal; 1079 view.buf = NULL; 1080 ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view); 1081 if (ptr) { 1082 if (charsize == 1) 1083 literal = memchr(ptr, '\\', n) == NULL; 1084 else 1085 literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1; 1086 } else { 1087 PyErr_Clear(); 1088 literal = 0; 1089 } 1090 if (view.buf) 1091 PyBuffer_Release(&view); 1092 if (literal) { 1093 filter = ptemplate; 1094 Py_INCREF(filter); 1095 filter_is_callable = 0; 1096 } else { 1097 /* not a literal; hand it over to the template compiler */ 1098 filter = call( 1099 SRE_PY_MODULE, "_subx", 1100 PyTuple_Pack(2, self, ptemplate) 1101 ); 1102 if (!filter) 1103 return NULL; 1104 filter_is_callable = PyCallable_Check(filter); 1105 } 1106 } 1107 1108 if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) { 1109 Py_DECREF(filter); 1110 return NULL; 1111 } 1112 1113 list = PyList_New(0); 1114 if (!list) { 1115 Py_DECREF(filter); 1116 state_fini(&state); 1117 return NULL; 1118 } 1119 1120 n = i = 0; 1121 1122 while (!count || n < count) { 1123 1124 state_reset(&state); 1125 1126 state.ptr = state.start; 1127 1128 status = sre_search(&state, PatternObject_GetCode(self)); 1129 if (PyErr_Occurred()) 1130 goto error; 1131 1132 if (status <= 0) { 1133 if (status == 0) 1134 break; 1135 pattern_error(status); 1136 goto error; 1137 } 1138 1139 b = STATE_OFFSET(&state, state.start); 1140 e = STATE_OFFSET(&state, state.ptr); 1141 1142 if (i < b) { 1143 /* get segment before this match */ 1144 item = getslice(state.isbytes, state.beginning, 1145 string, i, b); 1146 if (!item) 1147 goto error; 1148 status = PyList_Append(list, item); 1149 Py_DECREF(item); 1150 if (status < 0) 1151 goto error; 1152 1153 } 1154 1155 if (filter_is_callable) { 1156 /* pass match object through filter */ 1157 match = pattern_new_match(module_state, self, &state, 1); 1158 if (!match) 1159 goto error; 1160 item = PyObject_CallOneArg(filter, match); 1161 Py_DECREF(match); 1162 if (!item) 1163 goto error; 1164 } else { 1165 /* filter is literal string */ 1166 item = filter; 1167 Py_INCREF(item); 1168 } 1169 1170 /* add to list */ 1171 if (item != Py_None) { 1172 status = PyList_Append(list, item); 1173 Py_DECREF(item); 1174 if (status < 0) 1175 goto error; 1176 } 1177 1178 i = e; 1179 n = n + 1; 1180 state.must_advance = (state.ptr == state.start); 1181 state.start = state.ptr; 1182 } 1183 1184 /* get segment following last match */ 1185 if (i < state.endpos) { 1186 item = getslice(state.isbytes, state.beginning, 1187 string, i, state.endpos); 1188 if (!item) 1189 goto error; 1190 status = PyList_Append(list, item); 1191 Py_DECREF(item); 1192 if (status < 0) 1193 goto error; 1194 } 1195 1196 state_fini(&state); 1197 1198 Py_DECREF(filter); 1199 1200 /* convert list to single string (also removes list) */ 1201 joiner = getslice(state.isbytes, state.beginning, string, 0, 0); 1202 if (!joiner) { 1203 Py_DECREF(list); 1204 return NULL; 1205 } 1206 if (PyList_GET_SIZE(list) == 0) { 1207 Py_DECREF(list); 1208 item = joiner; 1209 } 1210 else { 1211 if (state.isbytes) 1212 item = _PyBytes_Join(joiner, list); 1213 else 1214 item = PyUnicode_Join(joiner, list); 1215 Py_DECREF(joiner); 1216 Py_DECREF(list); 1217 if (!item) 1218 return NULL; 1219 } 1220 1221 if (subn) 1222 return Py_BuildValue("Nn", item, n); 1223 1224 return item; 1225 1226error: 1227 Py_DECREF(list); 1228 state_fini(&state); 1229 Py_DECREF(filter); 1230 return NULL; 1231 1232} 1233 1234/*[clinic input] 1235_sre.SRE_Pattern.sub 1236 1237 cls: defining_class 1238 / 1239 repl: object 1240 string: object 1241 count: Py_ssize_t = 0 1242 1243Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl. 1244[clinic start generated code]*/ 1245 1246static PyObject * 1247_sre_SRE_Pattern_sub_impl(PatternObject *self, PyTypeObject *cls, 1248 PyObject *repl, PyObject *string, Py_ssize_t count) 1249/*[clinic end generated code: output=4be141ab04bca60d input=d8d1d4ac2311a07c]*/ 1250{ 1251 _sremodulestate *module_state = get_sre_module_state_by_class(cls); 1252 1253 return pattern_subx(module_state, self, repl, string, count, 0); 1254} 1255 1256/*[clinic input] 1257_sre.SRE_Pattern.subn 1258 1259 cls: defining_class 1260 / 1261 repl: object 1262 string: object 1263 count: Py_ssize_t = 0 1264 1265Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl. 1266[clinic start generated code]*/ 1267 1268static PyObject * 1269_sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls, 1270 PyObject *repl, PyObject *string, 1271 Py_ssize_t count) 1272/*[clinic end generated code: output=da02fd85258b1e1f input=8b78a65b8302e58d]*/ 1273{ 1274 _sremodulestate *module_state = get_sre_module_state_by_class(cls); 1275 1276 return pattern_subx(module_state, self, repl, string, count, 1); 1277} 1278 1279/*[clinic input] 1280_sre.SRE_Pattern.__copy__ 1281 1282[clinic start generated code]*/ 1283 1284static PyObject * 1285_sre_SRE_Pattern___copy___impl(PatternObject *self) 1286/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/ 1287{ 1288 Py_INCREF(self); 1289 return (PyObject *)self; 1290} 1291 1292/*[clinic input] 1293_sre.SRE_Pattern.__deepcopy__ 1294 1295 memo: object 1296 / 1297 1298[clinic start generated code]*/ 1299 1300static PyObject * 1301_sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo) 1302/*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/ 1303{ 1304 Py_INCREF(self); 1305 return (PyObject *)self; 1306} 1307 1308static PyObject * 1309pattern_repr(PatternObject *obj) 1310{ 1311 static const struct { 1312 const char *name; 1313 int value; 1314 } flag_names[] = { 1315 {"re.TEMPLATE", SRE_FLAG_TEMPLATE}, 1316 {"re.IGNORECASE", SRE_FLAG_IGNORECASE}, 1317 {"re.LOCALE", SRE_FLAG_LOCALE}, 1318 {"re.MULTILINE", SRE_FLAG_MULTILINE}, 1319 {"re.DOTALL", SRE_FLAG_DOTALL}, 1320 {"re.UNICODE", SRE_FLAG_UNICODE}, 1321 {"re.VERBOSE", SRE_FLAG_VERBOSE}, 1322 {"re.DEBUG", SRE_FLAG_DEBUG}, 1323 {"re.ASCII", SRE_FLAG_ASCII}, 1324 }; 1325 PyObject *result = NULL; 1326 PyObject *flag_items; 1327 size_t i; 1328 int flags = obj->flags; 1329 1330 /* Omit re.UNICODE for valid string patterns. */ 1331 if (obj->isbytes == 0 && 1332 (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) == 1333 SRE_FLAG_UNICODE) 1334 flags &= ~SRE_FLAG_UNICODE; 1335 1336 flag_items = PyList_New(0); 1337 if (!flag_items) 1338 return NULL; 1339 1340 for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) { 1341 if (flags & flag_names[i].value) { 1342 PyObject *item = PyUnicode_FromString(flag_names[i].name); 1343 if (!item) 1344 goto done; 1345 1346 if (PyList_Append(flag_items, item) < 0) { 1347 Py_DECREF(item); 1348 goto done; 1349 } 1350 Py_DECREF(item); 1351 flags &= ~flag_names[i].value; 1352 } 1353 } 1354 if (flags) { 1355 PyObject *item = PyUnicode_FromFormat("0x%x", flags); 1356 if (!item) 1357 goto done; 1358 1359 if (PyList_Append(flag_items, item) < 0) { 1360 Py_DECREF(item); 1361 goto done; 1362 } 1363 Py_DECREF(item); 1364 } 1365 1366 if (PyList_Size(flag_items) > 0) { 1367 PyObject *flags_result; 1368 PyObject *sep = PyUnicode_FromString("|"); 1369 if (!sep) 1370 goto done; 1371 flags_result = PyUnicode_Join(sep, flag_items); 1372 Py_DECREF(sep); 1373 if (!flags_result) 1374 goto done; 1375 result = PyUnicode_FromFormat("re.compile(%.200R, %S)", 1376 obj->pattern, flags_result); 1377 Py_DECREF(flags_result); 1378 } 1379 else { 1380 result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern); 1381 } 1382 1383done: 1384 Py_DECREF(flag_items); 1385 return result; 1386} 1387 1388PyDoc_STRVAR(pattern_doc, "Compiled regular expression object."); 1389 1390/* PatternObject's 'groupindex' method. */ 1391static PyObject * 1392pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored)) 1393{ 1394 if (self->groupindex == NULL) 1395 return PyDict_New(); 1396 return PyDictProxy_New(self->groupindex); 1397} 1398 1399static int _validate(PatternObject *self); /* Forward */ 1400 1401/*[clinic input] 1402_sre.compile 1403 1404 pattern: object 1405 flags: int 1406 code: object(subclass_of='&PyList_Type') 1407 groups: Py_ssize_t 1408 groupindex: object(subclass_of='&PyDict_Type') 1409 indexgroup: object(subclass_of='&PyTuple_Type') 1410 1411[clinic start generated code]*/ 1412 1413static PyObject * 1414_sre_compile_impl(PyObject *module, PyObject *pattern, int flags, 1415 PyObject *code, Py_ssize_t groups, PyObject *groupindex, 1416 PyObject *indexgroup) 1417/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/ 1418{ 1419 /* "compile" pattern descriptor to pattern object */ 1420 1421 _sremodulestate *module_state = get_sre_module_state(module); 1422 PatternObject* self; 1423 Py_ssize_t i, n; 1424 1425 n = PyList_GET_SIZE(code); 1426 /* coverity[ampersand_in_size] */ 1427 self = PyObject_GC_NewVar(PatternObject, module_state->Pattern_Type, n); 1428 if (!self) 1429 return NULL; 1430 self->weakreflist = NULL; 1431 self->pattern = NULL; 1432 self->groupindex = NULL; 1433 self->indexgroup = NULL; 1434 1435 self->codesize = n; 1436 1437 for (i = 0; i < n; i++) { 1438 PyObject *o = PyList_GET_ITEM(code, i); 1439 unsigned long value = PyLong_AsUnsignedLong(o); 1440 self->code[i] = (SRE_CODE) value; 1441 if ((unsigned long) self->code[i] != value) { 1442 PyErr_SetString(PyExc_OverflowError, 1443 "regular expression code size limit exceeded"); 1444 break; 1445 } 1446 } 1447 PyObject_GC_Track(self); 1448 1449 if (PyErr_Occurred()) { 1450 Py_DECREF(self); 1451 return NULL; 1452 } 1453 1454 if (pattern == Py_None) { 1455 self->isbytes = -1; 1456 } 1457 else { 1458 Py_ssize_t p_length; 1459 int charsize; 1460 Py_buffer view; 1461 view.buf = NULL; 1462 if (!getstring(pattern, &p_length, &self->isbytes, 1463 &charsize, &view)) { 1464 Py_DECREF(self); 1465 return NULL; 1466 } 1467 if (view.buf) 1468 PyBuffer_Release(&view); 1469 } 1470 1471 Py_INCREF(pattern); 1472 self->pattern = pattern; 1473 1474 self->flags = flags; 1475 1476 self->groups = groups; 1477 1478 if (PyDict_GET_SIZE(groupindex) > 0) { 1479 Py_INCREF(groupindex); 1480 self->groupindex = groupindex; 1481 if (PyTuple_GET_SIZE(indexgroup) > 0) { 1482 Py_INCREF(indexgroup); 1483 self->indexgroup = indexgroup; 1484 } 1485 } 1486 1487 if (!_validate(self)) { 1488 Py_DECREF(self); 1489 return NULL; 1490 } 1491 1492 return (PyObject*) self; 1493} 1494 1495/* -------------------------------------------------------------------- */ 1496/* Code validation */ 1497 1498/* To learn more about this code, have a look at the _compile() function in 1499 Lib/sre_compile.py. The validation functions below checks the code array 1500 for conformance with the code patterns generated there. 1501 1502 The nice thing about the generated code is that it is position-independent: 1503 all jumps are relative jumps forward. Also, jumps don't cross each other: 1504 the target of a later jump is always earlier than the target of an earlier 1505 jump. IOW, this is okay: 1506 1507 J---------J-------T--------T 1508 \ \_____/ / 1509 \______________________/ 1510 1511 but this is not: 1512 1513 J---------J-------T--------T 1514 \_________\_____/ / 1515 \____________/ 1516 1517 It also helps that SRE_CODE is always an unsigned type. 1518*/ 1519 1520/* Defining this one enables tracing of the validator */ 1521#undef VVERBOSE 1522 1523/* Trace macro for the validator */ 1524#if defined(VVERBOSE) 1525#define VTRACE(v) printf v 1526#else 1527#define VTRACE(v) do {} while(0) /* do nothing */ 1528#endif 1529 1530/* Report failure */ 1531#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0) 1532 1533/* Extract opcode, argument, or skip count from code array */ 1534#define GET_OP \ 1535 do { \ 1536 VTRACE(("%p: ", code)); \ 1537 if (code >= end) FAIL; \ 1538 op = *code++; \ 1539 VTRACE(("%lu (op)\n", (unsigned long)op)); \ 1540 } while (0) 1541#define GET_ARG \ 1542 do { \ 1543 VTRACE(("%p= ", code)); \ 1544 if (code >= end) FAIL; \ 1545 arg = *code++; \ 1546 VTRACE(("%lu (arg)\n", (unsigned long)arg)); \ 1547 } while (0) 1548#define GET_SKIP_ADJ(adj) \ 1549 do { \ 1550 VTRACE(("%p= ", code)); \ 1551 if (code >= end) FAIL; \ 1552 skip = *code; \ 1553 VTRACE(("%lu (skip to %p)\n", \ 1554 (unsigned long)skip, code+skip)); \ 1555 if (skip-adj > (uintptr_t)(end - code)) \ 1556 FAIL; \ 1557 code++; \ 1558 } while (0) 1559#define GET_SKIP GET_SKIP_ADJ(0) 1560 1561static int 1562_validate_charset(SRE_CODE *code, SRE_CODE *end) 1563{ 1564 /* Some variables are manipulated by the macros above */ 1565 SRE_CODE op; 1566 SRE_CODE arg; 1567 SRE_CODE offset; 1568 int i; 1569 1570 while (code < end) { 1571 GET_OP; 1572 switch (op) { 1573 1574 case SRE_OP_NEGATE: 1575 break; 1576 1577 case SRE_OP_LITERAL: 1578 GET_ARG; 1579 break; 1580 1581 case SRE_OP_RANGE: 1582 case SRE_OP_RANGE_UNI_IGNORE: 1583 GET_ARG; 1584 GET_ARG; 1585 break; 1586 1587 case SRE_OP_CHARSET: 1588 offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */ 1589 if (offset > (uintptr_t)(end - code)) 1590 FAIL; 1591 code += offset; 1592 break; 1593 1594 case SRE_OP_BIGCHARSET: 1595 GET_ARG; /* Number of blocks */ 1596 offset = 256/sizeof(SRE_CODE); /* 256-byte table */ 1597 if (offset > (uintptr_t)(end - code)) 1598 FAIL; 1599 /* Make sure that each byte points to a valid block */ 1600 for (i = 0; i < 256; i++) { 1601 if (((unsigned char *)code)[i] >= arg) 1602 FAIL; 1603 } 1604 code += offset; 1605 offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */ 1606 if (offset > (uintptr_t)(end - code)) 1607 FAIL; 1608 code += offset; 1609 break; 1610 1611 case SRE_OP_CATEGORY: 1612 GET_ARG; 1613 switch (arg) { 1614 case SRE_CATEGORY_DIGIT: 1615 case SRE_CATEGORY_NOT_DIGIT: 1616 case SRE_CATEGORY_SPACE: 1617 case SRE_CATEGORY_NOT_SPACE: 1618 case SRE_CATEGORY_WORD: 1619 case SRE_CATEGORY_NOT_WORD: 1620 case SRE_CATEGORY_LINEBREAK: 1621 case SRE_CATEGORY_NOT_LINEBREAK: 1622 case SRE_CATEGORY_LOC_WORD: 1623 case SRE_CATEGORY_LOC_NOT_WORD: 1624 case SRE_CATEGORY_UNI_DIGIT: 1625 case SRE_CATEGORY_UNI_NOT_DIGIT: 1626 case SRE_CATEGORY_UNI_SPACE: 1627 case SRE_CATEGORY_UNI_NOT_SPACE: 1628 case SRE_CATEGORY_UNI_WORD: 1629 case SRE_CATEGORY_UNI_NOT_WORD: 1630 case SRE_CATEGORY_UNI_LINEBREAK: 1631 case SRE_CATEGORY_UNI_NOT_LINEBREAK: 1632 break; 1633 default: 1634 FAIL; 1635 } 1636 break; 1637 1638 default: 1639 FAIL; 1640 1641 } 1642 } 1643 1644 return 0; 1645} 1646 1647/* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */ 1648static int 1649_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) 1650{ 1651 /* Some variables are manipulated by the macros above */ 1652 SRE_CODE op; 1653 SRE_CODE arg; 1654 SRE_CODE skip; 1655 1656 VTRACE(("code=%p, end=%p\n", code, end)); 1657 1658 if (code > end) 1659 FAIL; 1660 1661 while (code < end) { 1662 GET_OP; 1663 switch (op) { 1664 1665 case SRE_OP_MARK: 1666 /* We don't check whether marks are properly nested; the 1667 sre_match() code is robust even if they don't, and the worst 1668 you can get is nonsensical match results. */ 1669 GET_ARG; 1670 if (arg > 2 * (size_t)groups + 1) { 1671 VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups)); 1672 FAIL; 1673 } 1674 break; 1675 1676 case SRE_OP_LITERAL: 1677 case SRE_OP_NOT_LITERAL: 1678 case SRE_OP_LITERAL_IGNORE: 1679 case SRE_OP_NOT_LITERAL_IGNORE: 1680 case SRE_OP_LITERAL_UNI_IGNORE: 1681 case SRE_OP_NOT_LITERAL_UNI_IGNORE: 1682 case SRE_OP_LITERAL_LOC_IGNORE: 1683 case SRE_OP_NOT_LITERAL_LOC_IGNORE: 1684 GET_ARG; 1685 /* The arg is just a character, nothing to check */ 1686 break; 1687 1688 case SRE_OP_SUCCESS: 1689 case SRE_OP_FAILURE: 1690 /* Nothing to check; these normally end the matching process */ 1691 break; 1692 1693 case SRE_OP_AT: 1694 GET_ARG; 1695 switch (arg) { 1696 case SRE_AT_BEGINNING: 1697 case SRE_AT_BEGINNING_STRING: 1698 case SRE_AT_BEGINNING_LINE: 1699 case SRE_AT_END: 1700 case SRE_AT_END_LINE: 1701 case SRE_AT_END_STRING: 1702 case SRE_AT_BOUNDARY: 1703 case SRE_AT_NON_BOUNDARY: 1704 case SRE_AT_LOC_BOUNDARY: 1705 case SRE_AT_LOC_NON_BOUNDARY: 1706 case SRE_AT_UNI_BOUNDARY: 1707 case SRE_AT_UNI_NON_BOUNDARY: 1708 break; 1709 default: 1710 FAIL; 1711 } 1712 break; 1713 1714 case SRE_OP_ANY: 1715 case SRE_OP_ANY_ALL: 1716 /* These have no operands */ 1717 break; 1718 1719 case SRE_OP_IN: 1720 case SRE_OP_IN_IGNORE: 1721 case SRE_OP_IN_UNI_IGNORE: 1722 case SRE_OP_IN_LOC_IGNORE: 1723 GET_SKIP; 1724 /* Stop 1 before the end; we check the FAILURE below */ 1725 if (_validate_charset(code, code+skip-2)) 1726 FAIL; 1727 if (code[skip-2] != SRE_OP_FAILURE) 1728 FAIL; 1729 code += skip-1; 1730 break; 1731 1732 case SRE_OP_INFO: 1733 { 1734 /* A minimal info field is 1735 <INFO> <1=skip> <2=flags> <3=min> <4=max>; 1736 If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags, 1737 more follows. */ 1738 SRE_CODE flags, i; 1739 SRE_CODE *newcode; 1740 GET_SKIP; 1741 newcode = code+skip-1; 1742 GET_ARG; flags = arg; 1743 GET_ARG; 1744 GET_ARG; 1745 /* Check that only valid flags are present */ 1746 if ((flags & ~(SRE_INFO_PREFIX | 1747 SRE_INFO_LITERAL | 1748 SRE_INFO_CHARSET)) != 0) 1749 FAIL; 1750 /* PREFIX and CHARSET are mutually exclusive */ 1751 if ((flags & SRE_INFO_PREFIX) && 1752 (flags & SRE_INFO_CHARSET)) 1753 FAIL; 1754 /* LITERAL implies PREFIX */ 1755 if ((flags & SRE_INFO_LITERAL) && 1756 !(flags & SRE_INFO_PREFIX)) 1757 FAIL; 1758 /* Validate the prefix */ 1759 if (flags & SRE_INFO_PREFIX) { 1760 SRE_CODE prefix_len; 1761 GET_ARG; prefix_len = arg; 1762 GET_ARG; 1763 /* Here comes the prefix string */ 1764 if (prefix_len > (uintptr_t)(newcode - code)) 1765 FAIL; 1766 code += prefix_len; 1767 /* And here comes the overlap table */ 1768 if (prefix_len > (uintptr_t)(newcode - code)) 1769 FAIL; 1770 /* Each overlap value should be < prefix_len */ 1771 for (i = 0; i < prefix_len; i++) { 1772 if (code[i] >= prefix_len) 1773 FAIL; 1774 } 1775 code += prefix_len; 1776 } 1777 /* Validate the charset */ 1778 if (flags & SRE_INFO_CHARSET) { 1779 if (_validate_charset(code, newcode-1)) 1780 FAIL; 1781 if (newcode[-1] != SRE_OP_FAILURE) 1782 FAIL; 1783 code = newcode; 1784 } 1785 else if (code != newcode) { 1786 VTRACE(("code=%p, newcode=%p\n", code, newcode)); 1787 FAIL; 1788 } 1789 } 1790 break; 1791 1792 case SRE_OP_BRANCH: 1793 { 1794 SRE_CODE *target = NULL; 1795 for (;;) { 1796 GET_SKIP; 1797 if (skip == 0) 1798 break; 1799 /* Stop 2 before the end; we check the JUMP below */ 1800 if (_validate_inner(code, code+skip-3, groups)) 1801 FAIL; 1802 code += skip-3; 1803 /* Check that it ends with a JUMP, and that each JUMP 1804 has the same target */ 1805 GET_OP; 1806 if (op != SRE_OP_JUMP) 1807 FAIL; 1808 GET_SKIP; 1809 if (target == NULL) 1810 target = code+skip-1; 1811 else if (code+skip-1 != target) 1812 FAIL; 1813 } 1814 if (code != target) 1815 FAIL; 1816 } 1817 break; 1818 1819 case SRE_OP_REPEAT_ONE: 1820 case SRE_OP_MIN_REPEAT_ONE: 1821 case SRE_OP_POSSESSIVE_REPEAT_ONE: 1822 { 1823 SRE_CODE min, max; 1824 GET_SKIP; 1825 GET_ARG; min = arg; 1826 GET_ARG; max = arg; 1827 if (min > max) 1828 FAIL; 1829 if (max > SRE_MAXREPEAT) 1830 FAIL; 1831 if (_validate_inner(code, code+skip-4, groups)) 1832 FAIL; 1833 code += skip-4; 1834 GET_OP; 1835 if (op != SRE_OP_SUCCESS) 1836 FAIL; 1837 } 1838 break; 1839 1840 case SRE_OP_REPEAT: 1841 case SRE_OP_POSSESSIVE_REPEAT: 1842 { 1843 SRE_CODE op1 = op, min, max; 1844 GET_SKIP; 1845 GET_ARG; min = arg; 1846 GET_ARG; max = arg; 1847 if (min > max) 1848 FAIL; 1849 if (max > SRE_MAXREPEAT) 1850 FAIL; 1851 if (_validate_inner(code, code+skip-3, groups)) 1852 FAIL; 1853 code += skip-3; 1854 GET_OP; 1855 if (op1 == SRE_OP_POSSESSIVE_REPEAT) { 1856 if (op != SRE_OP_SUCCESS) 1857 FAIL; 1858 } 1859 else { 1860 if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL) 1861 FAIL; 1862 } 1863 } 1864 break; 1865 1866 case SRE_OP_ATOMIC_GROUP: 1867 { 1868 GET_SKIP; 1869 if (_validate_inner(code, code+skip-2, groups)) 1870 FAIL; 1871 code += skip-2; 1872 GET_OP; 1873 if (op != SRE_OP_SUCCESS) 1874 FAIL; 1875 } 1876 break; 1877 1878 case SRE_OP_GROUPREF: 1879 case SRE_OP_GROUPREF_IGNORE: 1880 case SRE_OP_GROUPREF_UNI_IGNORE: 1881 case SRE_OP_GROUPREF_LOC_IGNORE: 1882 GET_ARG; 1883 if (arg >= (size_t)groups) 1884 FAIL; 1885 break; 1886 1887 case SRE_OP_GROUPREF_EXISTS: 1888 /* The regex syntax for this is: '(?(group)then|else)', where 1889 'group' is either an integer group number or a group name, 1890 'then' and 'else' are sub-regexes, and 'else' is optional. */ 1891 GET_ARG; 1892 if (arg >= (size_t)groups) 1893 FAIL; 1894 GET_SKIP_ADJ(1); 1895 code--; /* The skip is relative to the first arg! */ 1896 /* There are two possibilities here: if there is both a 'then' 1897 part and an 'else' part, the generated code looks like: 1898 1899 GROUPREF_EXISTS 1900 <group> 1901 <skipyes> 1902 ...then part... 1903 JUMP 1904 <skipno> 1905 (<skipyes> jumps here) 1906 ...else part... 1907 (<skipno> jumps here) 1908 1909 If there is only a 'then' part, it looks like: 1910 1911 GROUPREF_EXISTS 1912 <group> 1913 <skip> 1914 ...then part... 1915 (<skip> jumps here) 1916 1917 There is no direct way to decide which it is, and we don't want 1918 to allow arbitrary jumps anywhere in the code; so we just look 1919 for a JUMP opcode preceding our skip target. 1920 */ 1921 VTRACE(("then part:\n")); 1922 int rc = _validate_inner(code+1, code+skip-1, groups); 1923 if (rc == 1) { 1924 VTRACE(("else part:\n")); 1925 code += skip-2; /* Position after JUMP, at <skipno> */ 1926 GET_SKIP; 1927 rc = _validate_inner(code, code+skip-1, groups); 1928 } 1929 if (rc) 1930 FAIL; 1931 code += skip-1; 1932 break; 1933 1934 case SRE_OP_ASSERT: 1935 case SRE_OP_ASSERT_NOT: 1936 GET_SKIP; 1937 GET_ARG; /* 0 for lookahead, width for lookbehind */ 1938 code--; /* Back up over arg to simplify math below */ 1939 if (arg & 0x80000000) 1940 FAIL; /* Width too large */ 1941 /* Stop 1 before the end; we check the SUCCESS below */ 1942 if (_validate_inner(code+1, code+skip-2, groups)) 1943 FAIL; 1944 code += skip-2; 1945 GET_OP; 1946 if (op != SRE_OP_SUCCESS) 1947 FAIL; 1948 break; 1949 1950 case SRE_OP_JUMP: 1951 if (code + 1 != end) 1952 FAIL; 1953 VTRACE(("JUMP: %d\n", __LINE__)); 1954 return 1; 1955 1956 default: 1957 FAIL; 1958 1959 } 1960 } 1961 1962 VTRACE(("okay\n")); 1963 return 0; 1964} 1965 1966static int 1967_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) 1968{ 1969 if (groups < 0 || (size_t)groups > SRE_MAXGROUPS || 1970 code >= end || end[-1] != SRE_OP_SUCCESS) 1971 FAIL; 1972 return _validate_inner(code, end-1, groups); 1973} 1974 1975static int 1976_validate(PatternObject *self) 1977{ 1978 if (_validate_outer(self->code, self->code+self->codesize, self->groups)) 1979 { 1980 PyErr_SetString(PyExc_RuntimeError, "invalid SRE code"); 1981 return 0; 1982 } 1983 else 1984 VTRACE(("Success!\n")); 1985 return 1; 1986} 1987 1988/* -------------------------------------------------------------------- */ 1989/* match methods */ 1990 1991static int 1992match_traverse(MatchObject *self, visitproc visit, void *arg) 1993{ 1994 Py_VISIT(Py_TYPE(self)); 1995 Py_VISIT(self->string); 1996 Py_VISIT(self->regs); 1997 Py_VISIT(self->pattern); 1998 return 0; 1999} 2000 2001static int 2002match_clear(MatchObject *self) 2003{ 2004 Py_CLEAR(self->string); 2005 Py_CLEAR(self->regs); 2006 Py_CLEAR(self->pattern); 2007 return 0; 2008} 2009 2010static void 2011match_dealloc(MatchObject* self) 2012{ 2013 PyTypeObject *tp = Py_TYPE(self); 2014 2015 PyObject_GC_UnTrack(self); 2016 (void)match_clear(self); 2017 tp->tp_free(self); 2018 Py_DECREF(tp); 2019} 2020 2021static PyObject* 2022match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def) 2023{ 2024 Py_ssize_t length; 2025 int isbytes, charsize; 2026 Py_buffer view; 2027 PyObject *result; 2028 const void* ptr; 2029 Py_ssize_t i, j; 2030 2031 assert(0 <= index && index < self->groups); 2032 index *= 2; 2033 2034 if (self->string == Py_None || self->mark[index] < 0) { 2035 /* return default value if the string or group is undefined */ 2036 Py_INCREF(def); 2037 return def; 2038 } 2039 2040 ptr = getstring(self->string, &length, &isbytes, &charsize, &view); 2041 if (ptr == NULL) 2042 return NULL; 2043 2044 i = self->mark[index]; 2045 j = self->mark[index+1]; 2046 i = Py_MIN(i, length); 2047 j = Py_MIN(j, length); 2048 result = getslice(isbytes, ptr, self->string, i, j); 2049 if (isbytes && view.buf != NULL) 2050 PyBuffer_Release(&view); 2051 return result; 2052} 2053 2054static Py_ssize_t 2055match_getindex(MatchObject* self, PyObject* index) 2056{ 2057 Py_ssize_t i; 2058 2059 if (index == NULL) 2060 /* Default value */ 2061 return 0; 2062 2063 if (PyIndex_Check(index)) { 2064 i = PyNumber_AsSsize_t(index, NULL); 2065 } 2066 else { 2067 i = -1; 2068 2069 if (self->pattern->groupindex) { 2070 index = PyDict_GetItemWithError(self->pattern->groupindex, index); 2071 if (index && PyLong_Check(index)) { 2072 i = PyLong_AsSsize_t(index); 2073 } 2074 } 2075 } 2076 if (i < 0 || i >= self->groups) { 2077 /* raise IndexError if we were given a bad group number */ 2078 if (!PyErr_Occurred()) { 2079 PyErr_SetString(PyExc_IndexError, "no such group"); 2080 } 2081 return -1; 2082 } 2083 2084 return i; 2085} 2086 2087static PyObject* 2088match_getslice(MatchObject* self, PyObject* index, PyObject* def) 2089{ 2090 Py_ssize_t i = match_getindex(self, index); 2091 2092 if (i < 0) { 2093 return NULL; 2094 } 2095 2096 return match_getslice_by_index(self, i, def); 2097} 2098 2099/*[clinic input] 2100_sre.SRE_Match.expand 2101 2102 template: object 2103 2104Return the string obtained by doing backslash substitution on the string template, as done by the sub() method. 2105[clinic start generated code]*/ 2106 2107static PyObject * 2108_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template) 2109/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/ 2110{ 2111 /* delegate to Python code */ 2112 return call( 2113 SRE_PY_MODULE, "_expand", 2114 PyTuple_Pack(3, self->pattern, self, template) 2115 ); 2116} 2117 2118static PyObject* 2119match_group(MatchObject* self, PyObject* args) 2120{ 2121 PyObject* result; 2122 Py_ssize_t i, size; 2123 2124 size = PyTuple_GET_SIZE(args); 2125 2126 switch (size) { 2127 case 0: 2128 result = match_getslice(self, _PyLong_GetZero(), Py_None); 2129 break; 2130 case 1: 2131 result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None); 2132 break; 2133 default: 2134 /* fetch multiple items */ 2135 result = PyTuple_New(size); 2136 if (!result) 2137 return NULL; 2138 for (i = 0; i < size; i++) { 2139 PyObject* item = match_getslice( 2140 self, PyTuple_GET_ITEM(args, i), Py_None 2141 ); 2142 if (!item) { 2143 Py_DECREF(result); 2144 return NULL; 2145 } 2146 PyTuple_SET_ITEM(result, i, item); 2147 } 2148 break; 2149 } 2150 return result; 2151} 2152 2153static PyObject* 2154match_getitem(MatchObject* self, PyObject* name) 2155{ 2156 return match_getslice(self, name, Py_None); 2157} 2158 2159/*[clinic input] 2160_sre.SRE_Match.groups 2161 2162 default: object = None 2163 Is used for groups that did not participate in the match. 2164 2165Return a tuple containing all the subgroups of the match, from 1. 2166[clinic start generated code]*/ 2167 2168static PyObject * 2169_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value) 2170/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/ 2171{ 2172 PyObject* result; 2173 Py_ssize_t index; 2174 2175 result = PyTuple_New(self->groups-1); 2176 if (!result) 2177 return NULL; 2178 2179 for (index = 1; index < self->groups; index++) { 2180 PyObject* item; 2181 item = match_getslice_by_index(self, index, default_value); 2182 if (!item) { 2183 Py_DECREF(result); 2184 return NULL; 2185 } 2186 PyTuple_SET_ITEM(result, index-1, item); 2187 } 2188 2189 return result; 2190} 2191 2192/*[clinic input] 2193_sre.SRE_Match.groupdict 2194 2195 default: object = None 2196 Is used for groups that did not participate in the match. 2197 2198Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name. 2199[clinic start generated code]*/ 2200 2201static PyObject * 2202_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value) 2203/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/ 2204{ 2205 PyObject *result; 2206 PyObject *key; 2207 PyObject *value; 2208 Py_ssize_t pos = 0; 2209 Py_hash_t hash; 2210 2211 result = PyDict_New(); 2212 if (!result || !self->pattern->groupindex) 2213 return result; 2214 2215 while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) { 2216 int status; 2217 Py_INCREF(key); 2218 value = match_getslice(self, key, default_value); 2219 if (!value) { 2220 Py_DECREF(key); 2221 goto failed; 2222 } 2223 status = _PyDict_SetItem_KnownHash(result, key, value, hash); 2224 Py_DECREF(value); 2225 Py_DECREF(key); 2226 if (status < 0) 2227 goto failed; 2228 } 2229 2230 return result; 2231 2232failed: 2233 Py_DECREF(result); 2234 return NULL; 2235} 2236 2237/*[clinic input] 2238_sre.SRE_Match.start -> Py_ssize_t 2239 2240 group: object(c_default="NULL") = 0 2241 / 2242 2243Return index of the start of the substring matched by group. 2244[clinic start generated code]*/ 2245 2246static Py_ssize_t 2247_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group) 2248/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/ 2249{ 2250 Py_ssize_t index = match_getindex(self, group); 2251 2252 if (index < 0) { 2253 return -1; 2254 } 2255 2256 /* mark is -1 if group is undefined */ 2257 return self->mark[index*2]; 2258} 2259 2260/*[clinic input] 2261_sre.SRE_Match.end -> Py_ssize_t 2262 2263 group: object(c_default="NULL") = 0 2264 / 2265 2266Return index of the end of the substring matched by group. 2267[clinic start generated code]*/ 2268 2269static Py_ssize_t 2270_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group) 2271/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/ 2272{ 2273 Py_ssize_t index = match_getindex(self, group); 2274 2275 if (index < 0) { 2276 return -1; 2277 } 2278 2279 /* mark is -1 if group is undefined */ 2280 return self->mark[index*2+1]; 2281} 2282 2283LOCAL(PyObject*) 2284_pair(Py_ssize_t i1, Py_ssize_t i2) 2285{ 2286 PyObject* pair; 2287 PyObject* item; 2288 2289 pair = PyTuple_New(2); 2290 if (!pair) 2291 return NULL; 2292 2293 item = PyLong_FromSsize_t(i1); 2294 if (!item) 2295 goto error; 2296 PyTuple_SET_ITEM(pair, 0, item); 2297 2298 item = PyLong_FromSsize_t(i2); 2299 if (!item) 2300 goto error; 2301 PyTuple_SET_ITEM(pair, 1, item); 2302 2303 return pair; 2304 2305 error: 2306 Py_DECREF(pair); 2307 return NULL; 2308} 2309 2310/*[clinic input] 2311_sre.SRE_Match.span 2312 2313 group: object(c_default="NULL") = 0 2314 / 2315 2316For match object m, return the 2-tuple (m.start(group), m.end(group)). 2317[clinic start generated code]*/ 2318 2319static PyObject * 2320_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group) 2321/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/ 2322{ 2323 Py_ssize_t index = match_getindex(self, group); 2324 2325 if (index < 0) { 2326 return NULL; 2327 } 2328 2329 /* marks are -1 if group is undefined */ 2330 return _pair(self->mark[index*2], self->mark[index*2+1]); 2331} 2332 2333static PyObject* 2334match_regs(MatchObject* self) 2335{ 2336 PyObject* regs; 2337 PyObject* item; 2338 Py_ssize_t index; 2339 2340 regs = PyTuple_New(self->groups); 2341 if (!regs) 2342 return NULL; 2343 2344 for (index = 0; index < self->groups; index++) { 2345 item = _pair(self->mark[index*2], self->mark[index*2+1]); 2346 if (!item) { 2347 Py_DECREF(regs); 2348 return NULL; 2349 } 2350 PyTuple_SET_ITEM(regs, index, item); 2351 } 2352 2353 Py_INCREF(regs); 2354 self->regs = regs; 2355 2356 return regs; 2357} 2358 2359/*[clinic input] 2360_sre.SRE_Match.__copy__ 2361 2362[clinic start generated code]*/ 2363 2364static PyObject * 2365_sre_SRE_Match___copy___impl(MatchObject *self) 2366/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/ 2367{ 2368 Py_INCREF(self); 2369 return (PyObject *)self; 2370} 2371 2372/*[clinic input] 2373_sre.SRE_Match.__deepcopy__ 2374 2375 memo: object 2376 / 2377 2378[clinic start generated code]*/ 2379 2380static PyObject * 2381_sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo) 2382/*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/ 2383{ 2384 Py_INCREF(self); 2385 return (PyObject *)self; 2386} 2387 2388PyDoc_STRVAR(match_doc, 2389"The result of re.match() and re.search().\n\ 2390Match objects always have a boolean value of True."); 2391 2392PyDoc_STRVAR(match_group_doc, 2393"group([group1, ...]) -> str or tuple.\n\ 2394 Return subgroup(s) of the match by indices or names.\n\ 2395 For 0 returns the entire match."); 2396 2397static PyObject * 2398match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored)) 2399{ 2400 if (self->lastindex >= 0) 2401 return PyLong_FromSsize_t(self->lastindex); 2402 Py_RETURN_NONE; 2403} 2404 2405static PyObject * 2406match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored)) 2407{ 2408 if (self->pattern->indexgroup && 2409 self->lastindex >= 0 && 2410 self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup)) 2411 { 2412 PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup, 2413 self->lastindex); 2414 Py_INCREF(result); 2415 return result; 2416 } 2417 Py_RETURN_NONE; 2418} 2419 2420static PyObject * 2421match_regs_get(MatchObject *self, void *Py_UNUSED(ignored)) 2422{ 2423 if (self->regs) { 2424 Py_INCREF(self->regs); 2425 return self->regs; 2426 } else 2427 return match_regs(self); 2428} 2429 2430static PyObject * 2431match_repr(MatchObject *self) 2432{ 2433 PyObject *result; 2434 PyObject *group0 = match_getslice_by_index(self, 0, Py_None); 2435 if (group0 == NULL) 2436 return NULL; 2437 result = PyUnicode_FromFormat( 2438 "<%s object; span=(%zd, %zd), match=%.50R>", 2439 Py_TYPE(self)->tp_name, 2440 self->mark[0], self->mark[1], group0); 2441 Py_DECREF(group0); 2442 return result; 2443} 2444 2445 2446static PyObject* 2447pattern_new_match(_sremodulestate* module_state, 2448 PatternObject* pattern, 2449 SRE_STATE* state, 2450 Py_ssize_t status) 2451{ 2452 /* create match object (from state object) */ 2453 2454 MatchObject* match; 2455 Py_ssize_t i, j; 2456 char* base; 2457 int n; 2458 2459 if (status > 0) { 2460 2461 /* create match object (with room for extra group marks) */ 2462 /* coverity[ampersand_in_size] */ 2463 match = PyObject_GC_NewVar(MatchObject, 2464 module_state->Match_Type, 2465 2*(pattern->groups+1)); 2466 if (!match) 2467 return NULL; 2468 2469 Py_INCREF(pattern); 2470 match->pattern = pattern; 2471 2472 Py_INCREF(state->string); 2473 match->string = state->string; 2474 2475 match->regs = NULL; 2476 match->groups = pattern->groups+1; 2477 2478 /* fill in group slices */ 2479 2480 base = (char*) state->beginning; 2481 n = state->charsize; 2482 2483 match->mark[0] = ((char*) state->start - base) / n; 2484 match->mark[1] = ((char*) state->ptr - base) / n; 2485 2486 for (i = j = 0; i < pattern->groups; i++, j+=2) 2487 if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) { 2488 match->mark[j+2] = ((char*) state->mark[j] - base) / n; 2489 match->mark[j+3] = ((char*) state->mark[j+1] - base) / n; 2490 2491 /* check wrong span */ 2492 if (match->mark[j+2] > match->mark[j+3]) { 2493 PyErr_SetString(PyExc_SystemError, 2494 "The span of capturing group is wrong," 2495 " please report a bug for the re module."); 2496 Py_DECREF(match); 2497 return NULL; 2498 } 2499 } else 2500 match->mark[j+2] = match->mark[j+3] = -1; /* undefined */ 2501 2502 match->pos = state->pos; 2503 match->endpos = state->endpos; 2504 2505 match->lastindex = state->lastindex; 2506 2507 PyObject_GC_Track(match); 2508 return (PyObject*) match; 2509 2510 } else if (status == 0) { 2511 2512 /* no match */ 2513 Py_RETURN_NONE; 2514 2515 } 2516 2517 /* internal error */ 2518 pattern_error(status); 2519 return NULL; 2520} 2521 2522 2523/* -------------------------------------------------------------------- */ 2524/* scanner methods (experimental) */ 2525 2526static int 2527scanner_traverse(ScannerObject *self, visitproc visit, void *arg) 2528{ 2529 Py_VISIT(Py_TYPE(self)); 2530 Py_VISIT(self->pattern); 2531 return 0; 2532} 2533 2534static int 2535scanner_clear(ScannerObject *self) 2536{ 2537 Py_CLEAR(self->pattern); 2538 return 0; 2539} 2540 2541static void 2542scanner_dealloc(ScannerObject* self) 2543{ 2544 PyTypeObject *tp = Py_TYPE(self); 2545 2546 PyObject_GC_UnTrack(self); 2547 state_fini(&self->state); 2548 (void)scanner_clear(self); 2549 tp->tp_free(self); 2550 Py_DECREF(tp); 2551} 2552 2553static int 2554scanner_begin(ScannerObject* self) 2555{ 2556 if (self->executing) { 2557 PyErr_SetString(PyExc_ValueError, 2558 "regular expression scanner already executing"); 2559 return 0; 2560 } 2561 self->executing = 1; 2562 return 1; 2563} 2564 2565static void 2566scanner_end(ScannerObject* self) 2567{ 2568 assert(self->executing); 2569 self->executing = 0; 2570} 2571 2572/*[clinic input] 2573_sre.SRE_Scanner.match 2574 2575 cls: defining_class 2576 / 2577 2578[clinic start generated code]*/ 2579 2580static PyObject * 2581_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls) 2582/*[clinic end generated code: output=6e22c149dc0f0325 input=b5146e1f30278cb7]*/ 2583{ 2584 _sremodulestate *module_state = get_sre_module_state_by_class(cls); 2585 SRE_STATE* state = &self->state; 2586 PyObject* match; 2587 Py_ssize_t status; 2588 2589 if (!scanner_begin(self)) { 2590 return NULL; 2591 } 2592 if (state->start == NULL) { 2593 scanner_end(self); 2594 Py_RETURN_NONE; 2595 } 2596 2597 state_reset(state); 2598 2599 state->ptr = state->start; 2600 2601 status = sre_match(state, PatternObject_GetCode(self->pattern)); 2602 if (PyErr_Occurred()) { 2603 scanner_end(self); 2604 return NULL; 2605 } 2606 2607 match = pattern_new_match(module_state, (PatternObject*) self->pattern, 2608 state, status); 2609 2610 if (status == 0) 2611 state->start = NULL; 2612 else { 2613 state->must_advance = (state->ptr == state->start); 2614 state->start = state->ptr; 2615 } 2616 2617 scanner_end(self); 2618 return match; 2619} 2620 2621 2622/*[clinic input] 2623_sre.SRE_Scanner.search 2624 2625 cls: defining_class 2626 / 2627 2628[clinic start generated code]*/ 2629 2630static PyObject * 2631_sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls) 2632/*[clinic end generated code: output=23e8fc78013f9161 input=056c2d37171d0bf2]*/ 2633{ 2634 _sremodulestate *module_state = get_sre_module_state_by_class(cls); 2635 SRE_STATE* state = &self->state; 2636 PyObject* match; 2637 Py_ssize_t status; 2638 2639 if (!scanner_begin(self)) { 2640 return NULL; 2641 } 2642 if (state->start == NULL) { 2643 scanner_end(self); 2644 Py_RETURN_NONE; 2645 } 2646 2647 state_reset(state); 2648 2649 state->ptr = state->start; 2650 2651 status = sre_search(state, PatternObject_GetCode(self->pattern)); 2652 if (PyErr_Occurred()) { 2653 scanner_end(self); 2654 return NULL; 2655 } 2656 2657 match = pattern_new_match(module_state, (PatternObject*) self->pattern, 2658 state, status); 2659 2660 if (status == 0) 2661 state->start = NULL; 2662 else { 2663 state->must_advance = (state->ptr == state->start); 2664 state->start = state->ptr; 2665 } 2666 2667 scanner_end(self); 2668 return match; 2669} 2670 2671static PyObject * 2672pattern_scanner(_sremodulestate *module_state, 2673 PatternObject *self, 2674 PyObject *string, 2675 Py_ssize_t pos, 2676 Py_ssize_t endpos) 2677{ 2678 ScannerObject* scanner; 2679 2680 /* create scanner object */ 2681 scanner = PyObject_GC_New(ScannerObject, module_state->Scanner_Type); 2682 if (!scanner) 2683 return NULL; 2684 scanner->pattern = NULL; 2685 scanner->executing = 0; 2686 2687 /* create search state object */ 2688 if (!state_init(&scanner->state, self, string, pos, endpos)) { 2689 Py_DECREF(scanner); 2690 return NULL; 2691 } 2692 2693 Py_INCREF(self); 2694 scanner->pattern = (PyObject*) self; 2695 2696 PyObject_GC_Track(scanner); 2697 return (PyObject*) scanner; 2698} 2699 2700static Py_hash_t 2701pattern_hash(PatternObject *self) 2702{ 2703 Py_hash_t hash, hash2; 2704 2705 hash = PyObject_Hash(self->pattern); 2706 if (hash == -1) { 2707 return -1; 2708 } 2709 2710 hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize); 2711 hash ^= hash2; 2712 2713 hash ^= self->flags; 2714 hash ^= self->isbytes; 2715 hash ^= self->codesize; 2716 2717 if (hash == -1) { 2718 hash = -2; 2719 } 2720 return hash; 2721} 2722 2723static PyObject* 2724pattern_richcompare(PyObject *lefto, PyObject *righto, int op) 2725{ 2726 PyTypeObject *tp = Py_TYPE(lefto); 2727 _sremodulestate *module_state = get_sre_module_state_by_class(tp); 2728 PatternObject *left, *right; 2729 int cmp; 2730 2731 if (op != Py_EQ && op != Py_NE) { 2732 Py_RETURN_NOTIMPLEMENTED; 2733 } 2734 2735 if (!Py_IS_TYPE(righto, module_state->Pattern_Type)) 2736 { 2737 Py_RETURN_NOTIMPLEMENTED; 2738 } 2739 2740 if (lefto == righto) { 2741 /* a pattern is equal to itself */ 2742 return PyBool_FromLong(op == Py_EQ); 2743 } 2744 2745 left = (PatternObject *)lefto; 2746 right = (PatternObject *)righto; 2747 2748 cmp = (left->flags == right->flags 2749 && left->isbytes == right->isbytes 2750 && left->codesize == right->codesize); 2751 if (cmp) { 2752 /* Compare the code and the pattern because the same pattern can 2753 produce different codes depending on the locale used to compile the 2754 pattern when the re.LOCALE flag is used. Don't compare groups, 2755 indexgroup nor groupindex: they are derivated from the pattern. */ 2756 cmp = (memcmp(left->code, right->code, 2757 sizeof(left->code[0]) * left->codesize) == 0); 2758 } 2759 if (cmp) { 2760 cmp = PyObject_RichCompareBool(left->pattern, right->pattern, 2761 Py_EQ); 2762 if (cmp < 0) { 2763 return NULL; 2764 } 2765 } 2766 if (op == Py_NE) { 2767 cmp = !cmp; 2768 } 2769 return PyBool_FromLong(cmp); 2770} 2771 2772#include "clinic/sre.c.h" 2773 2774static PyMethodDef pattern_methods[] = { 2775 _SRE_SRE_PATTERN_MATCH_METHODDEF 2776 _SRE_SRE_PATTERN_FULLMATCH_METHODDEF 2777 _SRE_SRE_PATTERN_SEARCH_METHODDEF 2778 _SRE_SRE_PATTERN_SUB_METHODDEF 2779 _SRE_SRE_PATTERN_SUBN_METHODDEF 2780 _SRE_SRE_PATTERN_FINDALL_METHODDEF 2781 _SRE_SRE_PATTERN_SPLIT_METHODDEF 2782 _SRE_SRE_PATTERN_FINDITER_METHODDEF 2783 _SRE_SRE_PATTERN_SCANNER_METHODDEF 2784 _SRE_SRE_PATTERN___COPY___METHODDEF 2785 _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF 2786 {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, 2787 PyDoc_STR("See PEP 585")}, 2788 {NULL, NULL} 2789}; 2790 2791static PyGetSetDef pattern_getset[] = { 2792 {"groupindex", (getter)pattern_groupindex, (setter)NULL, 2793 "A dictionary mapping group names to group numbers."}, 2794 {NULL} /* Sentinel */ 2795}; 2796 2797#define PAT_OFF(x) offsetof(PatternObject, x) 2798static PyMemberDef pattern_members[] = { 2799 {"pattern", T_OBJECT, PAT_OFF(pattern), READONLY, 2800 "The pattern string from which the RE object was compiled."}, 2801 {"flags", T_INT, PAT_OFF(flags), READONLY, 2802 "The regex matching flags."}, 2803 {"groups", T_PYSSIZET, PAT_OFF(groups), READONLY, 2804 "The number of capturing groups in the pattern."}, 2805 {"__weaklistoffset__", T_PYSSIZET, offsetof(PatternObject, weakreflist), READONLY}, 2806 {NULL} /* Sentinel */ 2807}; 2808 2809static PyType_Slot pattern_slots[] = { 2810 {Py_tp_dealloc, (destructor)pattern_dealloc}, 2811 {Py_tp_repr, (reprfunc)pattern_repr}, 2812 {Py_tp_hash, (hashfunc)pattern_hash}, 2813 {Py_tp_doc, (void *)pattern_doc}, 2814 {Py_tp_richcompare, pattern_richcompare}, 2815 {Py_tp_methods, pattern_methods}, 2816 {Py_tp_members, pattern_members}, 2817 {Py_tp_getset, pattern_getset}, 2818 {Py_tp_traverse, pattern_traverse}, 2819 {Py_tp_clear, pattern_clear}, 2820 {0, NULL}, 2821}; 2822 2823static PyType_Spec pattern_spec = { 2824 .name = "re.Pattern", 2825 .basicsize = sizeof(PatternObject), 2826 .itemsize = sizeof(SRE_CODE), 2827 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | 2828 Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC), 2829 .slots = pattern_slots, 2830}; 2831 2832static PyMethodDef match_methods[] = { 2833 {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc}, 2834 _SRE_SRE_MATCH_START_METHODDEF 2835 _SRE_SRE_MATCH_END_METHODDEF 2836 _SRE_SRE_MATCH_SPAN_METHODDEF 2837 _SRE_SRE_MATCH_GROUPS_METHODDEF 2838 _SRE_SRE_MATCH_GROUPDICT_METHODDEF 2839 _SRE_SRE_MATCH_EXPAND_METHODDEF 2840 _SRE_SRE_MATCH___COPY___METHODDEF 2841 _SRE_SRE_MATCH___DEEPCOPY___METHODDEF 2842 {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, 2843 PyDoc_STR("See PEP 585")}, 2844 {NULL, NULL} 2845}; 2846 2847static PyGetSetDef match_getset[] = { 2848 {"lastindex", (getter)match_lastindex_get, (setter)NULL, 2849 "The integer index of the last matched capturing group."}, 2850 {"lastgroup", (getter)match_lastgroup_get, (setter)NULL, 2851 "The name of the last matched capturing group."}, 2852 {"regs", (getter)match_regs_get, (setter)NULL}, 2853 {NULL} 2854}; 2855 2856#define MATCH_OFF(x) offsetof(MatchObject, x) 2857static PyMemberDef match_members[] = { 2858 {"string", T_OBJECT, MATCH_OFF(string), READONLY, 2859 "The string passed to match() or search()."}, 2860 {"re", T_OBJECT, MATCH_OFF(pattern), READONLY, 2861 "The regular expression object."}, 2862 {"pos", T_PYSSIZET, MATCH_OFF(pos), READONLY, 2863 "The index into the string at which the RE engine started looking for a match."}, 2864 {"endpos", T_PYSSIZET, MATCH_OFF(endpos), READONLY, 2865 "The index into the string beyond which the RE engine will not go."}, 2866 {NULL} 2867}; 2868 2869/* FIXME: implement setattr("string", None) as a special case (to 2870 detach the associated string, if any */ 2871static PyType_Slot match_slots[] = { 2872 {Py_tp_dealloc, match_dealloc}, 2873 {Py_tp_repr, match_repr}, 2874 {Py_tp_doc, (void *)match_doc}, 2875 {Py_tp_methods, match_methods}, 2876 {Py_tp_members, match_members}, 2877 {Py_tp_getset, match_getset}, 2878 {Py_tp_traverse, match_traverse}, 2879 {Py_tp_clear, match_clear}, 2880 2881 /* As mapping. 2882 * 2883 * Match objects do not support length or assignment, but do support 2884 * __getitem__. 2885 */ 2886 {Py_mp_subscript, match_getitem}, 2887 2888 {0, NULL}, 2889}; 2890 2891static PyType_Spec match_spec = { 2892 .name = "re.Match", 2893 .basicsize = sizeof(MatchObject), 2894 .itemsize = sizeof(Py_ssize_t), 2895 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | 2896 Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC), 2897 .slots = match_slots, 2898}; 2899 2900static PyMethodDef scanner_methods[] = { 2901 _SRE_SRE_SCANNER_MATCH_METHODDEF 2902 _SRE_SRE_SCANNER_SEARCH_METHODDEF 2903 {NULL, NULL} 2904}; 2905 2906#define SCAN_OFF(x) offsetof(ScannerObject, x) 2907static PyMemberDef scanner_members[] = { 2908 {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY}, 2909 {NULL} /* Sentinel */ 2910}; 2911 2912static PyType_Slot scanner_slots[] = { 2913 {Py_tp_dealloc, scanner_dealloc}, 2914 {Py_tp_methods, scanner_methods}, 2915 {Py_tp_members, scanner_members}, 2916 {Py_tp_traverse, scanner_traverse}, 2917 {Py_tp_clear, scanner_clear}, 2918 {0, NULL}, 2919}; 2920 2921static PyType_Spec scanner_spec = { 2922 .name = "_" SRE_MODULE ".SRE_Scanner", 2923 .basicsize = sizeof(ScannerObject), 2924 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | 2925 Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC), 2926 .slots = scanner_slots, 2927}; 2928 2929static PyMethodDef _functions[] = { 2930 _SRE_COMPILE_METHODDEF 2931 _SRE_GETCODESIZE_METHODDEF 2932 _SRE_ASCII_ISCASED_METHODDEF 2933 _SRE_UNICODE_ISCASED_METHODDEF 2934 _SRE_ASCII_TOLOWER_METHODDEF 2935 _SRE_UNICODE_TOLOWER_METHODDEF 2936 {NULL, NULL} 2937}; 2938 2939static int 2940sre_traverse(PyObject *module, visitproc visit, void *arg) 2941{ 2942 _sremodulestate *state = get_sre_module_state(module); 2943 2944 Py_VISIT(state->Pattern_Type); 2945 Py_VISIT(state->Match_Type); 2946 Py_VISIT(state->Scanner_Type); 2947 2948 return 0; 2949} 2950 2951static int 2952sre_clear(PyObject *module) 2953{ 2954 _sremodulestate *state = get_sre_module_state(module); 2955 2956 Py_CLEAR(state->Pattern_Type); 2957 Py_CLEAR(state->Match_Type); 2958 Py_CLEAR(state->Scanner_Type); 2959 2960 return 0; 2961} 2962 2963static void 2964sre_free(void *module) 2965{ 2966 sre_clear((PyObject *)module); 2967} 2968 2969#define CREATE_TYPE(m, type, spec) \ 2970do { \ 2971 type = (PyTypeObject *)PyType_FromModuleAndSpec(m, spec, NULL); \ 2972 if (type == NULL) { \ 2973 goto error; \ 2974 } \ 2975} while (0) 2976 2977#define ADD_ULONG_CONSTANT(module, name, value) \ 2978 do { \ 2979 PyObject *o = PyLong_FromUnsignedLong(value); \ 2980 if (!o) \ 2981 goto error; \ 2982 int res = PyModule_AddObjectRef(module, name, o); \ 2983 Py_DECREF(o); \ 2984 if (res < 0) { \ 2985 goto error; \ 2986 } \ 2987} while (0) 2988 2989static int 2990sre_exec(PyObject *m) 2991{ 2992 _sremodulestate *state; 2993 2994 /* Create heap types */ 2995 state = get_sre_module_state(m); 2996 CREATE_TYPE(m, state->Pattern_Type, &pattern_spec); 2997 CREATE_TYPE(m, state->Match_Type, &match_spec); 2998 CREATE_TYPE(m, state->Scanner_Type, &scanner_spec); 2999 3000 if (PyModule_AddIntConstant(m, "MAGIC", SRE_MAGIC) < 0) { 3001 goto error; 3002 } 3003 3004 if (PyModule_AddIntConstant(m, "CODESIZE", sizeof(SRE_CODE)) < 0) { 3005 goto error; 3006 } 3007 3008 ADD_ULONG_CONSTANT(m, "MAXREPEAT", SRE_MAXREPEAT); 3009 ADD_ULONG_CONSTANT(m, "MAXGROUPS", SRE_MAXGROUPS); 3010 3011 if (PyModule_AddStringConstant(m, "copyright", copyright) < 0) { 3012 goto error; 3013 } 3014 3015 return 0; 3016 3017error: 3018 return -1; 3019} 3020 3021static PyModuleDef_Slot sre_slots[] = { 3022 {Py_mod_exec, sre_exec}, 3023 {0, NULL}, 3024}; 3025 3026static struct PyModuleDef sremodule = { 3027 .m_base = PyModuleDef_HEAD_INIT, 3028 .m_name = "_" SRE_MODULE, 3029 .m_size = sizeof(_sremodulestate), 3030 .m_methods = _functions, 3031 .m_slots = sre_slots, 3032 .m_traverse = sre_traverse, 3033 .m_free = sre_free, 3034 .m_clear = sre_clear, 3035}; 3036 3037PyMODINIT_FUNC 3038PyInit__sre(void) 3039{ 3040 return PyModuleDef_Init(&sremodule); 3041} 3042 3043/* vim:ts=4:sw=4:et 3044*/ 3045