1#ifndef Py_INTERNAL_CODE_H 2#define Py_INTERNAL_CODE_H 3#ifdef __cplusplus 4extern "C" { 5#endif 6 7/* PEP 659 8 * Specialization and quickening structs and helper functions 9 */ 10 11 12// Inline caches. If you change the number of cache entries for an instruction, 13// you must *also* update the number of cache entries in Lib/opcode.py and bump 14// the magic number in Lib/importlib/_bootstrap_external.py! 15 16#define CACHE_ENTRIES(cache) (sizeof(cache)/sizeof(_Py_CODEUNIT)) 17 18typedef struct { 19 _Py_CODEUNIT counter; 20 _Py_CODEUNIT index; 21 _Py_CODEUNIT module_keys_version[2]; 22 _Py_CODEUNIT builtin_keys_version; 23} _PyLoadGlobalCache; 24 25#define INLINE_CACHE_ENTRIES_LOAD_GLOBAL CACHE_ENTRIES(_PyLoadGlobalCache) 26 27typedef struct { 28 _Py_CODEUNIT counter; 29} _PyBinaryOpCache; 30 31#define INLINE_CACHE_ENTRIES_BINARY_OP CACHE_ENTRIES(_PyBinaryOpCache) 32 33typedef struct { 34 _Py_CODEUNIT counter; 35} _PyUnpackSequenceCache; 36 37#define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \ 38 CACHE_ENTRIES(_PyUnpackSequenceCache) 39 40typedef struct { 41 _Py_CODEUNIT counter; 42 _Py_CODEUNIT mask; 43} _PyCompareOpCache; 44 45#define INLINE_CACHE_ENTRIES_COMPARE_OP CACHE_ENTRIES(_PyCompareOpCache) 46 47typedef struct { 48 _Py_CODEUNIT counter; 49 _Py_CODEUNIT type_version[2]; 50 _Py_CODEUNIT func_version; 51} _PyBinarySubscrCache; 52 53#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache) 54 55typedef struct { 56 _Py_CODEUNIT counter; 57 _Py_CODEUNIT version[2]; 58 _Py_CODEUNIT index; 59} _PyAttrCache; 60 61#define INLINE_CACHE_ENTRIES_LOAD_ATTR CACHE_ENTRIES(_PyAttrCache) 62 63#define INLINE_CACHE_ENTRIES_STORE_ATTR CACHE_ENTRIES(_PyAttrCache) 64 65typedef struct { 66 _Py_CODEUNIT counter; 67 _Py_CODEUNIT type_version[2]; 68 _Py_CODEUNIT dict_offset; 69 _Py_CODEUNIT keys_version[2]; 70 _Py_CODEUNIT descr[4]; 71} _PyLoadMethodCache; 72 73#define INLINE_CACHE_ENTRIES_LOAD_METHOD CACHE_ENTRIES(_PyLoadMethodCache) 74 75typedef struct { 76 _Py_CODEUNIT counter; 77 _Py_CODEUNIT func_version[2]; 78 _Py_CODEUNIT min_args; 79} _PyCallCache; 80 81#define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache) 82 83typedef struct { 84 _Py_CODEUNIT counter; 85} _PyPrecallCache; 86 87#define INLINE_CACHE_ENTRIES_PRECALL CACHE_ENTRIES(_PyPrecallCache) 88 89typedef struct { 90 _Py_CODEUNIT counter; 91} _PyStoreSubscrCache; 92 93#define INLINE_CACHE_ENTRIES_STORE_SUBSCR CACHE_ENTRIES(_PyStoreSubscrCache) 94 95#define QUICKENING_WARMUP_DELAY 8 96 97/* We want to compare to zero for efficiency, so we offset values accordingly */ 98#define QUICKENING_INITIAL_WARMUP_VALUE (-QUICKENING_WARMUP_DELAY) 99 100void _PyCode_Quicken(PyCodeObject *code); 101 102static inline void 103_PyCode_Warmup(PyCodeObject *code) 104{ 105 if (code->co_warmup != 0) { 106 code->co_warmup++; 107 if (code->co_warmup == 0) { 108 _PyCode_Quicken(code); 109 } 110 } 111} 112 113extern uint8_t _PyOpcode_Adaptive[256]; 114 115extern Py_ssize_t _Py_QuickenedCount; 116 117// Borrowed references to common callables: 118struct callable_cache { 119 PyObject *isinstance; 120 PyObject *len; 121 PyObject *list_append; 122}; 123 124/* "Locals plus" for a code object is the set of locals + cell vars + 125 * free vars. This relates to variable names as well as offsets into 126 * the "fast locals" storage array of execution frames. The compiler 127 * builds the list of names, their offsets, and the corresponding 128 * kind of local. 129 * 130 * Those kinds represent the source of the initial value and the 131 * variable's scope (as related to closures). A "local" is an 132 * argument or other variable defined in the current scope. A "free" 133 * variable is one that is defined in an outer scope and comes from 134 * the function's closure. A "cell" variable is a local that escapes 135 * into an inner function as part of a closure, and thus must be 136 * wrapped in a cell. Any "local" can also be a "cell", but the 137 * "free" kind is mutually exclusive with both. 138 */ 139 140// Note that these all fit within a byte, as do combinations. 141// Later, we will use the smaller numbers to differentiate the different 142// kinds of locals (e.g. pos-only arg, varkwargs, local-only). 143#define CO_FAST_LOCAL 0x20 144#define CO_FAST_CELL 0x40 145#define CO_FAST_FREE 0x80 146 147typedef unsigned char _PyLocals_Kind; 148 149static inline _PyLocals_Kind 150_PyLocals_GetKind(PyObject *kinds, int i) 151{ 152 assert(PyBytes_Check(kinds)); 153 assert(0 <= i && i < PyBytes_GET_SIZE(kinds)); 154 char *ptr = PyBytes_AS_STRING(kinds); 155 return (_PyLocals_Kind)(ptr[i]); 156} 157 158static inline void 159_PyLocals_SetKind(PyObject *kinds, int i, _PyLocals_Kind kind) 160{ 161 assert(PyBytes_Check(kinds)); 162 assert(0 <= i && i < PyBytes_GET_SIZE(kinds)); 163 char *ptr = PyBytes_AS_STRING(kinds); 164 ptr[i] = (char) kind; 165} 166 167 168struct _PyCodeConstructor { 169 /* metadata */ 170 PyObject *filename; 171 PyObject *name; 172 PyObject *qualname; 173 int flags; 174 175 /* the code */ 176 PyObject *code; 177 int firstlineno; 178 PyObject *linetable; 179 180 /* used by the code */ 181 PyObject *consts; 182 PyObject *names; 183 184 /* mapping frame offsets to information */ 185 PyObject *localsplusnames; // Tuple of strings 186 PyObject *localspluskinds; // Bytes object, one byte per variable 187 188 /* args (within varnames) */ 189 int argcount; 190 int posonlyargcount; 191 // XXX Replace argcount with posorkwargcount (argcount - posonlyargcount). 192 int kwonlyargcount; 193 194 /* needed to create the frame */ 195 int stacksize; 196 197 /* used by the eval loop */ 198 PyObject *exceptiontable; 199}; 200 201// Using an "arguments struct" like this is helpful for maintainability 202// in a case such as this with many parameters. It does bear a risk: 203// if the struct changes and callers are not updated properly then the 204// compiler will not catch problems (like a missing argument). This can 205// cause hard-to-debug problems. The risk is mitigated by the use of 206// check_code() in codeobject.c. However, we may decide to switch 207// back to a regular function signature. Regardless, this approach 208// wouldn't be appropriate if this weren't a strictly internal API. 209// (See the comments in https://github.com/python/cpython/pull/26258.) 210PyAPI_FUNC(int) _PyCode_Validate(struct _PyCodeConstructor *); 211PyAPI_FUNC(PyCodeObject *) _PyCode_New(struct _PyCodeConstructor *); 212 213 214/* Private API */ 215 216/* Getters for internal PyCodeObject data. */ 217extern PyObject* _PyCode_GetVarnames(PyCodeObject *); 218extern PyObject* _PyCode_GetCellvars(PyCodeObject *); 219extern PyObject* _PyCode_GetFreevars(PyCodeObject *); 220extern PyObject* _PyCode_GetCode(PyCodeObject *); 221 222/** API for initializing the line number tables. */ 223extern int _PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds); 224 225/** Out of process API for initializing the location table. */ 226extern void _PyLineTable_InitAddressRange( 227 const char *linetable, 228 Py_ssize_t length, 229 int firstlineno, 230 PyCodeAddressRange *range); 231 232/** API for traversing the line number table. */ 233extern int _PyLineTable_NextAddressRange(PyCodeAddressRange *range); 234extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range); 235 236/* Specialization functions */ 237 238extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, 239 PyObject *name); 240extern int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, 241 PyObject *name); 242extern int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name); 243extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, 244 PyObject *name); 245extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); 246extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr); 247extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, 248 int nargs, PyObject *kwnames); 249extern int _Py_Specialize_Precall(PyObject *callable, _Py_CODEUNIT *instr, 250 int nargs, PyObject *kwnames, int oparg); 251extern void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, 252 int oparg, PyObject **locals); 253extern void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, 254 _Py_CODEUNIT *instr, int oparg); 255extern void _Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr, 256 int oparg); 257 258/* Deallocator function for static codeobjects used in deepfreeze.py */ 259extern void _PyStaticCode_Dealloc(PyCodeObject *co); 260/* Function to intern strings of codeobjects */ 261extern int _PyStaticCode_InternStrings(PyCodeObject *co); 262 263#ifdef Py_STATS 264 265#define SPECIALIZATION_FAILURE_KINDS 30 266 267typedef struct _specialization_stats { 268 uint64_t success; 269 uint64_t failure; 270 uint64_t hit; 271 uint64_t deferred; 272 uint64_t miss; 273 uint64_t deopt; 274 uint64_t failure_kinds[SPECIALIZATION_FAILURE_KINDS]; 275} SpecializationStats; 276 277typedef struct _opcode_stats { 278 SpecializationStats specialization; 279 uint64_t execution_count; 280 uint64_t pair_count[256]; 281} OpcodeStats; 282 283typedef struct _call_stats { 284 uint64_t inlined_py_calls; 285 uint64_t pyeval_calls; 286 uint64_t frames_pushed; 287 uint64_t frame_objects_created; 288} CallStats; 289 290typedef struct _object_stats { 291 uint64_t allocations; 292 uint64_t allocations512; 293 uint64_t allocations4k; 294 uint64_t allocations_big; 295 uint64_t frees; 296 uint64_t to_freelist; 297 uint64_t from_freelist; 298 uint64_t new_values; 299 uint64_t dict_materialized_on_request; 300 uint64_t dict_materialized_new_key; 301 uint64_t dict_materialized_too_big; 302 uint64_t dict_materialized_str_subclass; 303} ObjectStats; 304 305typedef struct _stats { 306 OpcodeStats opcode_stats[256]; 307 CallStats call_stats; 308 ObjectStats object_stats; 309} PyStats; 310 311extern PyStats _py_stats; 312 313#define STAT_INC(opname, name) _py_stats.opcode_stats[opname].specialization.name++ 314#define STAT_DEC(opname, name) _py_stats.opcode_stats[opname].specialization.name-- 315#define OPCODE_EXE_INC(opname) _py_stats.opcode_stats[opname].execution_count++ 316#define CALL_STAT_INC(name) _py_stats.call_stats.name++ 317#define OBJECT_STAT_INC(name) _py_stats.object_stats.name++ 318#define OBJECT_STAT_INC_COND(name, cond) \ 319 do { if (cond) _py_stats.object_stats.name++; } while (0) 320 321extern void _Py_PrintSpecializationStats(int to_file); 322 323// Used by the _opcode extension which is built as a shared library 324PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); 325 326#else 327#define STAT_INC(opname, name) ((void)0) 328#define STAT_DEC(opname, name) ((void)0) 329#define OPCODE_EXE_INC(opname) ((void)0) 330#define CALL_STAT_INC(name) ((void)0) 331#define OBJECT_STAT_INC(name) ((void)0) 332#define OBJECT_STAT_INC_COND(name, cond) ((void)0) 333#endif // !Py_STATS 334 335// Cache values are only valid in memory, so use native endianness. 336#ifdef WORDS_BIGENDIAN 337 338static inline void 339write_u32(uint16_t *p, uint32_t val) 340{ 341 p[0] = (uint16_t)(val >> 16); 342 p[1] = (uint16_t)(val >> 0); 343} 344 345static inline void 346write_u64(uint16_t *p, uint64_t val) 347{ 348 p[0] = (uint16_t)(val >> 48); 349 p[1] = (uint16_t)(val >> 32); 350 p[2] = (uint16_t)(val >> 16); 351 p[3] = (uint16_t)(val >> 0); 352} 353 354static inline uint32_t 355read_u32(uint16_t *p) 356{ 357 uint32_t val = 0; 358 val |= (uint32_t)p[0] << 16; 359 val |= (uint32_t)p[1] << 0; 360 return val; 361} 362 363static inline uint64_t 364read_u64(uint16_t *p) 365{ 366 uint64_t val = 0; 367 val |= (uint64_t)p[0] << 48; 368 val |= (uint64_t)p[1] << 32; 369 val |= (uint64_t)p[2] << 16; 370 val |= (uint64_t)p[3] << 0; 371 return val; 372} 373 374#else 375 376static inline void 377write_u32(uint16_t *p, uint32_t val) 378{ 379 p[0] = (uint16_t)(val >> 0); 380 p[1] = (uint16_t)(val >> 16); 381} 382 383static inline void 384write_u64(uint16_t *p, uint64_t val) 385{ 386 p[0] = (uint16_t)(val >> 0); 387 p[1] = (uint16_t)(val >> 16); 388 p[2] = (uint16_t)(val >> 32); 389 p[3] = (uint16_t)(val >> 48); 390} 391 392static inline uint32_t 393read_u32(uint16_t *p) 394{ 395 uint32_t val = 0; 396 val |= (uint32_t)p[0] << 0; 397 val |= (uint32_t)p[1] << 16; 398 return val; 399} 400 401static inline uint64_t 402read_u64(uint16_t *p) 403{ 404 uint64_t val = 0; 405 val |= (uint64_t)p[0] << 0; 406 val |= (uint64_t)p[1] << 16; 407 val |= (uint64_t)p[2] << 32; 408 val |= (uint64_t)p[3] << 48; 409 return val; 410} 411 412#endif 413 414static inline void 415write_obj(uint16_t *p, PyObject *obj) 416{ 417 uintptr_t val = (uintptr_t)obj; 418#if SIZEOF_VOID_P == 8 419 write_u64(p, val); 420#elif SIZEOF_VOID_P == 4 421 write_u32(p, val); 422#else 423 #error "SIZEOF_VOID_P must be 4 or 8" 424#endif 425} 426 427static inline PyObject * 428read_obj(uint16_t *p) 429{ 430 uintptr_t val; 431#if SIZEOF_VOID_P == 8 432 val = read_u64(p); 433#elif SIZEOF_VOID_P == 4 434 val = read_u32(p); 435#else 436 #error "SIZEOF_VOID_P must be 4 or 8" 437#endif 438 return (PyObject *)val; 439} 440 441/* See Objects/exception_handling_notes.txt for details. 442 */ 443static inline unsigned char * 444parse_varint(unsigned char *p, int *result) { 445 int val = p[0] & 63; 446 while (p[0] & 64) { 447 p++; 448 val = (val << 6) | (p[0] & 63); 449 } 450 *result = val; 451 return p+1; 452} 453 454static inline int 455write_varint(uint8_t *ptr, unsigned int val) 456{ 457 int written = 1; 458 while (val >= 64) { 459 *ptr++ = 64 | (val & 63); 460 val >>= 6; 461 written++; 462 } 463 *ptr = val; 464 return written; 465} 466 467static inline int 468write_signed_varint(uint8_t *ptr, int val) 469{ 470 if (val < 0) { 471 val = ((-val)<<1) | 1; 472 } 473 else { 474 val = val << 1; 475 } 476 return write_varint(ptr, val); 477} 478 479static inline int 480write_location_entry_start(uint8_t *ptr, int code, int length) 481{ 482 assert((code & 15) == code); 483 *ptr = 128 | (code << 3) | (length - 1); 484 return 1; 485} 486 487 488/** Counters 489 * The first 16-bit value in each inline cache is a counter. 490 * When counting misses, the counter is treated as a simple unsigned value. 491 * 492 * When counting executions until the next specialization attempt, 493 * exponential backoff is used to reduce the number of specialization failures. 494 * The high 12 bits store the counter, the low 4 bits store the backoff exponent. 495 * On a specialization failure, the backoff exponent is incremented and the 496 * counter set to (2**backoff - 1). 497 * Backoff == 6 -> starting counter == 63, backoff == 10 -> starting counter == 1023. 498 */ 499 500/* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */ 501#define ADAPTIVE_BACKOFF_BITS 4 502/* The initial counter value is 31 == 2**ADAPTIVE_BACKOFF_START - 1 */ 503#define ADAPTIVE_BACKOFF_START 5 504 505#define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS) 506 507 508static inline uint16_t 509adaptive_counter_bits(int value, int backoff) { 510 return (value << ADAPTIVE_BACKOFF_BITS) | 511 (backoff & ((1<<ADAPTIVE_BACKOFF_BITS)-1)); 512} 513 514static inline uint16_t 515adaptive_counter_start(void) { 516 unsigned int value = (1 << ADAPTIVE_BACKOFF_START) - 1; 517 return adaptive_counter_bits(value, ADAPTIVE_BACKOFF_START); 518} 519 520static inline uint16_t 521adaptive_counter_backoff(uint16_t counter) { 522 unsigned int backoff = counter & ((1<<ADAPTIVE_BACKOFF_BITS)-1); 523 backoff++; 524 if (backoff > MAX_BACKOFF_VALUE) { 525 backoff = MAX_BACKOFF_VALUE; 526 } 527 unsigned int value = (1 << backoff) - 1; 528 return adaptive_counter_bits(value, backoff); 529} 530 531 532/* Line array cache for tracing */ 533 534extern int _PyCode_CreateLineArray(PyCodeObject *co); 535 536static inline int 537_PyCode_InitLineArray(PyCodeObject *co) 538{ 539 if (co->_co_linearray) { 540 return 0; 541 } 542 return _PyCode_CreateLineArray(co); 543} 544 545static inline int 546_PyCode_LineNumberFromArray(PyCodeObject *co, int index) 547{ 548 assert(co->_co_linearray != NULL); 549 assert(index >= 0); 550 assert(index < Py_SIZE(co)); 551 if (co->_co_linearray_entry_size == 2) { 552 return ((int16_t *)co->_co_linearray)[index]; 553 } 554 else { 555 assert(co->_co_linearray_entry_size == 4); 556 return ((int32_t *)co->_co_linearray)[index]; 557 } 558} 559 560 561#ifdef __cplusplus 562} 563#endif 564#endif /* !Py_INTERNAL_CODE_H */ 565