1 #ifndef Py_INTERNAL_CODE_H
2 #define Py_INTERNAL_CODE_H
3 #ifdef __cplusplus
4 extern "C" {
5 #endif
6
7 /* PEP 659
8 * Specialization and quickening structs and helper functions
9 */
10
11
12 // Inline caches. If you change the number of cache entries for an instruction,
13 // you must *also* update the number of cache entries in Lib/opcode.py and bump
14 // the magic number in Lib/importlib/_bootstrap_external.py!
15
16 #define CACHE_ENTRIES(cache) (sizeof(cache)/sizeof(_Py_CODEUNIT))
17
18 typedef struct {
19 _Py_CODEUNIT counter;
20 _Py_CODEUNIT index;
21 _Py_CODEUNIT module_keys_version[2];
22 _Py_CODEUNIT builtin_keys_version;
23 } _PyLoadGlobalCache;
24
25 #define INLINE_CACHE_ENTRIES_LOAD_GLOBAL CACHE_ENTRIES(_PyLoadGlobalCache)
26
27 typedef struct {
28 _Py_CODEUNIT counter;
29 } _PyBinaryOpCache;
30
31 #define INLINE_CACHE_ENTRIES_BINARY_OP CACHE_ENTRIES(_PyBinaryOpCache)
32
33 typedef struct {
34 _Py_CODEUNIT counter;
35 } _PyUnpackSequenceCache;
36
37 #define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \
38 CACHE_ENTRIES(_PyUnpackSequenceCache)
39
40 typedef struct {
41 _Py_CODEUNIT counter;
42 _Py_CODEUNIT mask;
43 } _PyCompareOpCache;
44
45 #define INLINE_CACHE_ENTRIES_COMPARE_OP CACHE_ENTRIES(_PyCompareOpCache)
46
47 typedef struct {
48 _Py_CODEUNIT counter;
49 _Py_CODEUNIT type_version[2];
50 _Py_CODEUNIT func_version;
51 } _PyBinarySubscrCache;
52
53 #define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache)
54
55 typedef struct {
56 _Py_CODEUNIT counter;
57 _Py_CODEUNIT version[2];
58 _Py_CODEUNIT index;
59 } _PyAttrCache;
60
61 #define INLINE_CACHE_ENTRIES_LOAD_ATTR CACHE_ENTRIES(_PyAttrCache)
62
63 #define INLINE_CACHE_ENTRIES_STORE_ATTR CACHE_ENTRIES(_PyAttrCache)
64
65 typedef struct {
66 _Py_CODEUNIT counter;
67 _Py_CODEUNIT type_version[2];
68 _Py_CODEUNIT dict_offset;
69 _Py_CODEUNIT keys_version[2];
70 _Py_CODEUNIT descr[4];
71 } _PyLoadMethodCache;
72
73 #define INLINE_CACHE_ENTRIES_LOAD_METHOD CACHE_ENTRIES(_PyLoadMethodCache)
74
75 typedef struct {
76 _Py_CODEUNIT counter;
77 _Py_CODEUNIT func_version[2];
78 _Py_CODEUNIT min_args;
79 } _PyCallCache;
80
81 #define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache)
82
83 typedef struct {
84 _Py_CODEUNIT counter;
85 } _PyPrecallCache;
86
87 #define INLINE_CACHE_ENTRIES_PRECALL CACHE_ENTRIES(_PyPrecallCache)
88
89 typedef struct {
90 _Py_CODEUNIT counter;
91 } _PyStoreSubscrCache;
92
93 #define INLINE_CACHE_ENTRIES_STORE_SUBSCR CACHE_ENTRIES(_PyStoreSubscrCache)
94
95 #define QUICKENING_WARMUP_DELAY 8
96
97 /* We want to compare to zero for efficiency, so we offset values accordingly */
98 #define QUICKENING_INITIAL_WARMUP_VALUE (-QUICKENING_WARMUP_DELAY)
99
100 void _PyCode_Quicken(PyCodeObject *code);
101
102 static inline void
_PyCode_Warmup(PyCodeObject *code)103 _PyCode_Warmup(PyCodeObject *code)
104 {
105 if (code->co_warmup != 0) {
106 code->co_warmup++;
107 if (code->co_warmup == 0) {
108 _PyCode_Quicken(code);
109 }
110 }
111 }
112
113 extern uint8_t _PyOpcode_Adaptive[256];
114
115 extern Py_ssize_t _Py_QuickenedCount;
116
117 // Borrowed references to common callables:
118 struct callable_cache {
119 PyObject *isinstance;
120 PyObject *len;
121 PyObject *list_append;
122 };
123
124 /* "Locals plus" for a code object is the set of locals + cell vars +
125 * free vars. This relates to variable names as well as offsets into
126 * the "fast locals" storage array of execution frames. The compiler
127 * builds the list of names, their offsets, and the corresponding
128 * kind of local.
129 *
130 * Those kinds represent the source of the initial value and the
131 * variable's scope (as related to closures). A "local" is an
132 * argument or other variable defined in the current scope. A "free"
133 * variable is one that is defined in an outer scope and comes from
134 * the function's closure. A "cell" variable is a local that escapes
135 * into an inner function as part of a closure, and thus must be
136 * wrapped in a cell. Any "local" can also be a "cell", but the
137 * "free" kind is mutually exclusive with both.
138 */
139
140 // Note that these all fit within a byte, as do combinations.
141 // Later, we will use the smaller numbers to differentiate the different
142 // kinds of locals (e.g. pos-only arg, varkwargs, local-only).
143 #define CO_FAST_LOCAL 0x20
144 #define CO_FAST_CELL 0x40
145 #define CO_FAST_FREE 0x80
146
147 typedef unsigned char _PyLocals_Kind;
148
149 static inline _PyLocals_Kind
_PyLocals_GetKind(PyObject *kinds, int i)150 _PyLocals_GetKind(PyObject *kinds, int i)
151 {
152 assert(PyBytes_Check(kinds));
153 assert(0 <= i && i < PyBytes_GET_SIZE(kinds));
154 char *ptr = PyBytes_AS_STRING(kinds);
155 return (_PyLocals_Kind)(ptr[i]);
156 }
157
158 static inline void
_PyLocals_SetKind(PyObject *kinds, int i, _PyLocals_Kind kind)159 _PyLocals_SetKind(PyObject *kinds, int i, _PyLocals_Kind kind)
160 {
161 assert(PyBytes_Check(kinds));
162 assert(0 <= i && i < PyBytes_GET_SIZE(kinds));
163 char *ptr = PyBytes_AS_STRING(kinds);
164 ptr[i] = (char) kind;
165 }
166
167
168 struct _PyCodeConstructor {
169 /* metadata */
170 PyObject *filename;
171 PyObject *name;
172 PyObject *qualname;
173 int flags;
174
175 /* the code */
176 PyObject *code;
177 int firstlineno;
178 PyObject *linetable;
179
180 /* used by the code */
181 PyObject *consts;
182 PyObject *names;
183
184 /* mapping frame offsets to information */
185 PyObject *localsplusnames; // Tuple of strings
186 PyObject *localspluskinds; // Bytes object, one byte per variable
187
188 /* args (within varnames) */
189 int argcount;
190 int posonlyargcount;
191 // XXX Replace argcount with posorkwargcount (argcount - posonlyargcount).
192 int kwonlyargcount;
193
194 /* needed to create the frame */
195 int stacksize;
196
197 /* used by the eval loop */
198 PyObject *exceptiontable;
199 };
200
201 // Using an "arguments struct" like this is helpful for maintainability
202 // in a case such as this with many parameters. It does bear a risk:
203 // if the struct changes and callers are not updated properly then the
204 // compiler will not catch problems (like a missing argument). This can
205 // cause hard-to-debug problems. The risk is mitigated by the use of
206 // check_code() in codeobject.c. However, we may decide to switch
207 // back to a regular function signature. Regardless, this approach
208 // wouldn't be appropriate if this weren't a strictly internal API.
209 // (See the comments in https://github.com/python/cpython/pull/26258.)
210 PyAPI_FUNC(int) _PyCode_Validate(struct _PyCodeConstructor *);
211 PyAPI_FUNC(PyCodeObject *) _PyCode_New(struct _PyCodeConstructor *);
212
213
214 /* Private API */
215
216 /* Getters for internal PyCodeObject data. */
217 extern PyObject* _PyCode_GetVarnames(PyCodeObject *);
218 extern PyObject* _PyCode_GetCellvars(PyCodeObject *);
219 extern PyObject* _PyCode_GetFreevars(PyCodeObject *);
220 extern PyObject* _PyCode_GetCode(PyCodeObject *);
221
222 /** API for initializing the line number tables. */
223 extern int _PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds);
224
225 /** Out of process API for initializing the location table. */
226 extern void _PyLineTable_InitAddressRange(
227 const char *linetable,
228 Py_ssize_t length,
229 int firstlineno,
230 PyCodeAddressRange *range);
231
232 /** API for traversing the line number table. */
233 extern int _PyLineTable_NextAddressRange(PyCodeAddressRange *range);
234 extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range);
235
236 /* Specialization functions */
237
238 extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr,
239 PyObject *name);
240 extern int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr,
241 PyObject *name);
242 extern int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name);
243 extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr,
244 PyObject *name);
245 extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr);
246 extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr);
247 extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr,
248 int nargs, PyObject *kwnames);
249 extern int _Py_Specialize_Precall(PyObject *callable, _Py_CODEUNIT *instr,
250 int nargs, PyObject *kwnames, int oparg);
251 extern void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
252 int oparg, PyObject **locals);
253 extern void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs,
254 _Py_CODEUNIT *instr, int oparg);
255 extern void _Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr,
256 int oparg);
257
258 /* Deallocator function for static codeobjects used in deepfreeze.py */
259 extern void _PyStaticCode_Dealloc(PyCodeObject *co);
260 /* Function to intern strings of codeobjects */
261 extern int _PyStaticCode_InternStrings(PyCodeObject *co);
262
263 #ifdef Py_STATS
264
265 #define SPECIALIZATION_FAILURE_KINDS 30
266
267 typedef struct _specialization_stats {
268 uint64_t success;
269 uint64_t failure;
270 uint64_t hit;
271 uint64_t deferred;
272 uint64_t miss;
273 uint64_t deopt;
274 uint64_t failure_kinds[SPECIALIZATION_FAILURE_KINDS];
275 } SpecializationStats;
276
277 typedef struct _opcode_stats {
278 SpecializationStats specialization;
279 uint64_t execution_count;
280 uint64_t pair_count[256];
281 } OpcodeStats;
282
283 typedef struct _call_stats {
284 uint64_t inlined_py_calls;
285 uint64_t pyeval_calls;
286 uint64_t frames_pushed;
287 uint64_t frame_objects_created;
288 } CallStats;
289
290 typedef struct _object_stats {
291 uint64_t allocations;
292 uint64_t allocations512;
293 uint64_t allocations4k;
294 uint64_t allocations_big;
295 uint64_t frees;
296 uint64_t to_freelist;
297 uint64_t from_freelist;
298 uint64_t new_values;
299 uint64_t dict_materialized_on_request;
300 uint64_t dict_materialized_new_key;
301 uint64_t dict_materialized_too_big;
302 uint64_t dict_materialized_str_subclass;
303 } ObjectStats;
304
305 typedef struct _stats {
306 OpcodeStats opcode_stats[256];
307 CallStats call_stats;
308 ObjectStats object_stats;
309 } PyStats;
310
311 extern PyStats _py_stats;
312
313 #define STAT_INC(opname, name) _py_stats.opcode_stats[opname].specialization.name++
314 #define STAT_DEC(opname, name) _py_stats.opcode_stats[opname].specialization.name--
315 #define OPCODE_EXE_INC(opname) _py_stats.opcode_stats[opname].execution_count++
316 #define CALL_STAT_INC(name) _py_stats.call_stats.name++
317 #define OBJECT_STAT_INC(name) _py_stats.object_stats.name++
318 #define OBJECT_STAT_INC_COND(name, cond) \
319 do { if (cond) _py_stats.object_stats.name++; } while (0)
320
321 extern void _Py_PrintSpecializationStats(int to_file);
322
323 // Used by the _opcode extension which is built as a shared library
324 PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
325
326 #else
327 #define STAT_INC(opname, name) ((void)0)
328 #define STAT_DEC(opname, name) ((void)0)
329 #define OPCODE_EXE_INC(opname) ((void)0)
330 #define CALL_STAT_INC(name) ((void)0)
331 #define OBJECT_STAT_INC(name) ((void)0)
332 #define OBJECT_STAT_INC_COND(name, cond) ((void)0)
333 #endif // !Py_STATS
334
335 // Cache values are only valid in memory, so use native endianness.
336 #ifdef WORDS_BIGENDIAN
337
338 static inline void
write_u32(uint16_t *p, uint32_t val)339 write_u32(uint16_t *p, uint32_t val)
340 {
341 p[0] = (uint16_t)(val >> 16);
342 p[1] = (uint16_t)(val >> 0);
343 }
344
345 static inline void
write_u64(uint16_t *p, uint64_t val)346 write_u64(uint16_t *p, uint64_t val)
347 {
348 p[0] = (uint16_t)(val >> 48);
349 p[1] = (uint16_t)(val >> 32);
350 p[2] = (uint16_t)(val >> 16);
351 p[3] = (uint16_t)(val >> 0);
352 }
353
354 static inline uint32_t
read_u32(uint16_t *p)355 read_u32(uint16_t *p)
356 {
357 uint32_t val = 0;
358 val |= (uint32_t)p[0] << 16;
359 val |= (uint32_t)p[1] << 0;
360 return val;
361 }
362
363 static inline uint64_t
read_u64(uint16_t *p)364 read_u64(uint16_t *p)
365 {
366 uint64_t val = 0;
367 val |= (uint64_t)p[0] << 48;
368 val |= (uint64_t)p[1] << 32;
369 val |= (uint64_t)p[2] << 16;
370 val |= (uint64_t)p[3] << 0;
371 return val;
372 }
373
374 #else
375
376 static inline void
write_u32(uint16_t *p, uint32_t val)377 write_u32(uint16_t *p, uint32_t val)
378 {
379 p[0] = (uint16_t)(val >> 0);
380 p[1] = (uint16_t)(val >> 16);
381 }
382
383 static inline void
write_u64(uint16_t *p, uint64_t val)384 write_u64(uint16_t *p, uint64_t val)
385 {
386 p[0] = (uint16_t)(val >> 0);
387 p[1] = (uint16_t)(val >> 16);
388 p[2] = (uint16_t)(val >> 32);
389 p[3] = (uint16_t)(val >> 48);
390 }
391
392 static inline uint32_t
read_u32(uint16_t *p)393 read_u32(uint16_t *p)
394 {
395 uint32_t val = 0;
396 val |= (uint32_t)p[0] << 0;
397 val |= (uint32_t)p[1] << 16;
398 return val;
399 }
400
401 static inline uint64_t
read_u64(uint16_t *p)402 read_u64(uint16_t *p)
403 {
404 uint64_t val = 0;
405 val |= (uint64_t)p[0] << 0;
406 val |= (uint64_t)p[1] << 16;
407 val |= (uint64_t)p[2] << 32;
408 val |= (uint64_t)p[3] << 48;
409 return val;
410 }
411
412 #endif
413
414 static inline void
write_obj(uint16_t *p, PyObject *obj)415 write_obj(uint16_t *p, PyObject *obj)
416 {
417 uintptr_t val = (uintptr_t)obj;
418 #if SIZEOF_VOID_P == 8
419 write_u64(p, val);
420 #elif SIZEOF_VOID_P == 4
421 write_u32(p, val);
422 #else
423 #error "SIZEOF_VOID_P must be 4 or 8"
424 #endif
425 }
426
427 static inline PyObject *
read_obj(uint16_t *p)428 read_obj(uint16_t *p)
429 {
430 uintptr_t val;
431 #if SIZEOF_VOID_P == 8
432 val = read_u64(p);
433 #elif SIZEOF_VOID_P == 4
434 val = read_u32(p);
435 #else
436 #error "SIZEOF_VOID_P must be 4 or 8"
437 #endif
438 return (PyObject *)val;
439 }
440
441 /* See Objects/exception_handling_notes.txt for details.
442 */
443 static inline unsigned char *
parse_varint(unsigned char *p, int *result)444 parse_varint(unsigned char *p, int *result) {
445 int val = p[0] & 63;
446 while (p[0] & 64) {
447 p++;
448 val = (val << 6) | (p[0] & 63);
449 }
450 *result = val;
451 return p+1;
452 }
453
454 static inline int
write_varint(uint8_t *ptr, unsigned int val)455 write_varint(uint8_t *ptr, unsigned int val)
456 {
457 int written = 1;
458 while (val >= 64) {
459 *ptr++ = 64 | (val & 63);
460 val >>= 6;
461 written++;
462 }
463 *ptr = val;
464 return written;
465 }
466
467 static inline int
write_signed_varint(uint8_t *ptr, int val)468 write_signed_varint(uint8_t *ptr, int val)
469 {
470 if (val < 0) {
471 val = ((-val)<<1) | 1;
472 }
473 else {
474 val = val << 1;
475 }
476 return write_varint(ptr, val);
477 }
478
479 static inline int
write_location_entry_start(uint8_t *ptr, int code, int length)480 write_location_entry_start(uint8_t *ptr, int code, int length)
481 {
482 assert((code & 15) == code);
483 *ptr = 128 | (code << 3) | (length - 1);
484 return 1;
485 }
486
487
488 /** Counters
489 * The first 16-bit value in each inline cache is a counter.
490 * When counting misses, the counter is treated as a simple unsigned value.
491 *
492 * When counting executions until the next specialization attempt,
493 * exponential backoff is used to reduce the number of specialization failures.
494 * The high 12 bits store the counter, the low 4 bits store the backoff exponent.
495 * On a specialization failure, the backoff exponent is incremented and the
496 * counter set to (2**backoff - 1).
497 * Backoff == 6 -> starting counter == 63, backoff == 10 -> starting counter == 1023.
498 */
499
500 /* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */
501 #define ADAPTIVE_BACKOFF_BITS 4
502 /* The initial counter value is 31 == 2**ADAPTIVE_BACKOFF_START - 1 */
503 #define ADAPTIVE_BACKOFF_START 5
504
505 #define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS)
506
507
508 static inline uint16_t
adaptive_counter_bits(int value, int backoff)509 adaptive_counter_bits(int value, int backoff) {
510 return (value << ADAPTIVE_BACKOFF_BITS) |
511 (backoff & ((1<<ADAPTIVE_BACKOFF_BITS)-1));
512 }
513
514 static inline uint16_t
adaptive_counter_start(void)515 adaptive_counter_start(void) {
516 unsigned int value = (1 << ADAPTIVE_BACKOFF_START) - 1;
517 return adaptive_counter_bits(value, ADAPTIVE_BACKOFF_START);
518 }
519
520 static inline uint16_t
adaptive_counter_backoff(uint16_t counter)521 adaptive_counter_backoff(uint16_t counter) {
522 unsigned int backoff = counter & ((1<<ADAPTIVE_BACKOFF_BITS)-1);
523 backoff++;
524 if (backoff > MAX_BACKOFF_VALUE) {
525 backoff = MAX_BACKOFF_VALUE;
526 }
527 unsigned int value = (1 << backoff) - 1;
528 return adaptive_counter_bits(value, backoff);
529 }
530
531
532 /* Line array cache for tracing */
533
534 extern int _PyCode_CreateLineArray(PyCodeObject *co);
535
536 static inline int
_PyCode_InitLineArray(PyCodeObject *co)537 _PyCode_InitLineArray(PyCodeObject *co)
538 {
539 if (co->_co_linearray) {
540 return 0;
541 }
542 return _PyCode_CreateLineArray(co);
543 }
544
545 static inline int
_PyCode_LineNumberFromArray(PyCodeObject *co, int index)546 _PyCode_LineNumberFromArray(PyCodeObject *co, int index)
547 {
548 assert(co->_co_linearray != NULL);
549 assert(index >= 0);
550 assert(index < Py_SIZE(co));
551 if (co->_co_linearray_entry_size == 2) {
552 return ((int16_t *)co->_co_linearray)[index];
553 }
554 else {
555 assert(co->_co_linearray_entry_size == 4);
556 return ((int32_t *)co->_co_linearray)[index];
557 }
558 }
559
560
561 #ifdef __cplusplus
562 }
563 #endif
564 #endif /* !Py_INTERNAL_CODE_H */
565