1 #ifndef Py_INTERNAL_CODE_H
2 #define Py_INTERNAL_CODE_H
3 #ifdef __cplusplus
4 extern "C" {
5 #endif
6 
7 /* PEP 659
8  * Specialization and quickening structs and helper functions
9  */
10 
11 
12 // Inline caches. If you change the number of cache entries for an instruction,
13 // you must *also* update the number of cache entries in Lib/opcode.py and bump
14 // the magic number in Lib/importlib/_bootstrap_external.py!
15 
16 #define CACHE_ENTRIES(cache) (sizeof(cache)/sizeof(_Py_CODEUNIT))
17 
18 typedef struct {
19     _Py_CODEUNIT counter;
20     _Py_CODEUNIT index;
21     _Py_CODEUNIT module_keys_version[2];
22     _Py_CODEUNIT builtin_keys_version;
23 } _PyLoadGlobalCache;
24 
25 #define INLINE_CACHE_ENTRIES_LOAD_GLOBAL CACHE_ENTRIES(_PyLoadGlobalCache)
26 
27 typedef struct {
28     _Py_CODEUNIT counter;
29 } _PyBinaryOpCache;
30 
31 #define INLINE_CACHE_ENTRIES_BINARY_OP CACHE_ENTRIES(_PyBinaryOpCache)
32 
33 typedef struct {
34     _Py_CODEUNIT counter;
35 } _PyUnpackSequenceCache;
36 
37 #define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \
38     CACHE_ENTRIES(_PyUnpackSequenceCache)
39 
40 typedef struct {
41     _Py_CODEUNIT counter;
42     _Py_CODEUNIT mask;
43 } _PyCompareOpCache;
44 
45 #define INLINE_CACHE_ENTRIES_COMPARE_OP CACHE_ENTRIES(_PyCompareOpCache)
46 
47 typedef struct {
48     _Py_CODEUNIT counter;
49     _Py_CODEUNIT type_version[2];
50     _Py_CODEUNIT func_version;
51 } _PyBinarySubscrCache;
52 
53 #define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache)
54 
55 typedef struct {
56     _Py_CODEUNIT counter;
57     _Py_CODEUNIT version[2];
58     _Py_CODEUNIT index;
59 } _PyAttrCache;
60 
61 #define INLINE_CACHE_ENTRIES_LOAD_ATTR CACHE_ENTRIES(_PyAttrCache)
62 
63 #define INLINE_CACHE_ENTRIES_STORE_ATTR CACHE_ENTRIES(_PyAttrCache)
64 
65 typedef struct {
66     _Py_CODEUNIT counter;
67     _Py_CODEUNIT type_version[2];
68     _Py_CODEUNIT dict_offset;
69     _Py_CODEUNIT keys_version[2];
70     _Py_CODEUNIT descr[4];
71 } _PyLoadMethodCache;
72 
73 #define INLINE_CACHE_ENTRIES_LOAD_METHOD CACHE_ENTRIES(_PyLoadMethodCache)
74 
75 typedef struct {
76     _Py_CODEUNIT counter;
77     _Py_CODEUNIT func_version[2];
78     _Py_CODEUNIT min_args;
79 } _PyCallCache;
80 
81 #define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache)
82 
83 typedef struct {
84     _Py_CODEUNIT counter;
85 } _PyPrecallCache;
86 
87 #define INLINE_CACHE_ENTRIES_PRECALL CACHE_ENTRIES(_PyPrecallCache)
88 
89 typedef struct {
90     _Py_CODEUNIT counter;
91 } _PyStoreSubscrCache;
92 
93 #define INLINE_CACHE_ENTRIES_STORE_SUBSCR CACHE_ENTRIES(_PyStoreSubscrCache)
94 
95 #define QUICKENING_WARMUP_DELAY 8
96 
97 /* We want to compare to zero for efficiency, so we offset values accordingly */
98 #define QUICKENING_INITIAL_WARMUP_VALUE (-QUICKENING_WARMUP_DELAY)
99 
100 void _PyCode_Quicken(PyCodeObject *code);
101 
102 static inline void
_PyCode_Warmup(PyCodeObject *code)103 _PyCode_Warmup(PyCodeObject *code)
104 {
105     if (code->co_warmup != 0) {
106         code->co_warmup++;
107         if (code->co_warmup == 0) {
108             _PyCode_Quicken(code);
109         }
110     }
111 }
112 
113 extern uint8_t _PyOpcode_Adaptive[256];
114 
115 extern Py_ssize_t _Py_QuickenedCount;
116 
117 // Borrowed references to common callables:
118 struct callable_cache {
119     PyObject *isinstance;
120     PyObject *len;
121     PyObject *list_append;
122 };
123 
124 /* "Locals plus" for a code object is the set of locals + cell vars +
125  * free vars.  This relates to variable names as well as offsets into
126  * the "fast locals" storage array of execution frames.  The compiler
127  * builds the list of names, their offsets, and the corresponding
128  * kind of local.
129  *
130  * Those kinds represent the source of the initial value and the
131  * variable's scope (as related to closures).  A "local" is an
132  * argument or other variable defined in the current scope.  A "free"
133  * variable is one that is defined in an outer scope and comes from
134  * the function's closure.  A "cell" variable is a local that escapes
135  * into an inner function as part of a closure, and thus must be
136  * wrapped in a cell.  Any "local" can also be a "cell", but the
137  * "free" kind is mutually exclusive with both.
138  */
139 
140 // Note that these all fit within a byte, as do combinations.
141 // Later, we will use the smaller numbers to differentiate the different
142 // kinds of locals (e.g. pos-only arg, varkwargs, local-only).
143 #define CO_FAST_LOCAL   0x20
144 #define CO_FAST_CELL    0x40
145 #define CO_FAST_FREE    0x80
146 
147 typedef unsigned char _PyLocals_Kind;
148 
149 static inline _PyLocals_Kind
_PyLocals_GetKind(PyObject *kinds, int i)150 _PyLocals_GetKind(PyObject *kinds, int i)
151 {
152     assert(PyBytes_Check(kinds));
153     assert(0 <= i && i < PyBytes_GET_SIZE(kinds));
154     char *ptr = PyBytes_AS_STRING(kinds);
155     return (_PyLocals_Kind)(ptr[i]);
156 }
157 
158 static inline void
_PyLocals_SetKind(PyObject *kinds, int i, _PyLocals_Kind kind)159 _PyLocals_SetKind(PyObject *kinds, int i, _PyLocals_Kind kind)
160 {
161     assert(PyBytes_Check(kinds));
162     assert(0 <= i && i < PyBytes_GET_SIZE(kinds));
163     char *ptr = PyBytes_AS_STRING(kinds);
164     ptr[i] = (char) kind;
165 }
166 
167 
168 struct _PyCodeConstructor {
169     /* metadata */
170     PyObject *filename;
171     PyObject *name;
172     PyObject *qualname;
173     int flags;
174 
175     /* the code */
176     PyObject *code;
177     int firstlineno;
178     PyObject *linetable;
179 
180     /* used by the code */
181     PyObject *consts;
182     PyObject *names;
183 
184     /* mapping frame offsets to information */
185     PyObject *localsplusnames;  // Tuple of strings
186     PyObject *localspluskinds;  // Bytes object, one byte per variable
187 
188     /* args (within varnames) */
189     int argcount;
190     int posonlyargcount;
191     // XXX Replace argcount with posorkwargcount (argcount - posonlyargcount).
192     int kwonlyargcount;
193 
194     /* needed to create the frame */
195     int stacksize;
196 
197     /* used by the eval loop */
198     PyObject *exceptiontable;
199 };
200 
201 // Using an "arguments struct" like this is helpful for maintainability
202 // in a case such as this with many parameters.  It does bear a risk:
203 // if the struct changes and callers are not updated properly then the
204 // compiler will not catch problems (like a missing argument).  This can
205 // cause hard-to-debug problems.  The risk is mitigated by the use of
206 // check_code() in codeobject.c.  However, we may decide to switch
207 // back to a regular function signature.  Regardless, this approach
208 // wouldn't be appropriate if this weren't a strictly internal API.
209 // (See the comments in https://github.com/python/cpython/pull/26258.)
210 PyAPI_FUNC(int) _PyCode_Validate(struct _PyCodeConstructor *);
211 PyAPI_FUNC(PyCodeObject *) _PyCode_New(struct _PyCodeConstructor *);
212 
213 
214 /* Private API */
215 
216 /* Getters for internal PyCodeObject data. */
217 extern PyObject* _PyCode_GetVarnames(PyCodeObject *);
218 extern PyObject* _PyCode_GetCellvars(PyCodeObject *);
219 extern PyObject* _PyCode_GetFreevars(PyCodeObject *);
220 extern PyObject* _PyCode_GetCode(PyCodeObject *);
221 
222 /** API for initializing the line number tables. */
223 extern int _PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds);
224 
225 /** Out of process API for initializing the location table. */
226 extern void _PyLineTable_InitAddressRange(
227     const char *linetable,
228     Py_ssize_t length,
229     int firstlineno,
230     PyCodeAddressRange *range);
231 
232 /** API for traversing the line number table. */
233 extern int _PyLineTable_NextAddressRange(PyCodeAddressRange *range);
234 extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range);
235 
236 /* Specialization functions */
237 
238 extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr,
239                                    PyObject *name);
240 extern int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr,
241                                     PyObject *name);
242 extern int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name);
243 extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr,
244                                      PyObject *name);
245 extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr);
246 extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr);
247 extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr,
248                                int nargs, PyObject *kwnames);
249 extern int _Py_Specialize_Precall(PyObject *callable, _Py_CODEUNIT *instr,
250                                   int nargs, PyObject *kwnames, int oparg);
251 extern void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
252                                     int oparg, PyObject **locals);
253 extern void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs,
254                                      _Py_CODEUNIT *instr, int oparg);
255 extern void _Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr,
256                                           int oparg);
257 
258 /* Deallocator function for static codeobjects used in deepfreeze.py */
259 extern void _PyStaticCode_Dealloc(PyCodeObject *co);
260 /* Function to intern strings of codeobjects */
261 extern int _PyStaticCode_InternStrings(PyCodeObject *co);
262 
263 #ifdef Py_STATS
264 
265 #define SPECIALIZATION_FAILURE_KINDS 30
266 
267 typedef struct _specialization_stats {
268     uint64_t success;
269     uint64_t failure;
270     uint64_t hit;
271     uint64_t deferred;
272     uint64_t miss;
273     uint64_t deopt;
274     uint64_t failure_kinds[SPECIALIZATION_FAILURE_KINDS];
275 } SpecializationStats;
276 
277 typedef struct _opcode_stats {
278     SpecializationStats specialization;
279     uint64_t execution_count;
280     uint64_t pair_count[256];
281 } OpcodeStats;
282 
283 typedef struct _call_stats {
284     uint64_t inlined_py_calls;
285     uint64_t pyeval_calls;
286     uint64_t frames_pushed;
287     uint64_t frame_objects_created;
288 } CallStats;
289 
290 typedef struct _object_stats {
291     uint64_t allocations;
292     uint64_t allocations512;
293     uint64_t allocations4k;
294     uint64_t allocations_big;
295     uint64_t frees;
296     uint64_t to_freelist;
297     uint64_t from_freelist;
298     uint64_t new_values;
299     uint64_t dict_materialized_on_request;
300     uint64_t dict_materialized_new_key;
301     uint64_t dict_materialized_too_big;
302     uint64_t dict_materialized_str_subclass;
303 } ObjectStats;
304 
305 typedef struct _stats {
306     OpcodeStats opcode_stats[256];
307     CallStats call_stats;
308     ObjectStats object_stats;
309 } PyStats;
310 
311 extern PyStats _py_stats;
312 
313 #define STAT_INC(opname, name) _py_stats.opcode_stats[opname].specialization.name++
314 #define STAT_DEC(opname, name) _py_stats.opcode_stats[opname].specialization.name--
315 #define OPCODE_EXE_INC(opname) _py_stats.opcode_stats[opname].execution_count++
316 #define CALL_STAT_INC(name) _py_stats.call_stats.name++
317 #define OBJECT_STAT_INC(name) _py_stats.object_stats.name++
318 #define OBJECT_STAT_INC_COND(name, cond) \
319     do { if (cond) _py_stats.object_stats.name++; } while (0)
320 
321 extern void _Py_PrintSpecializationStats(int to_file);
322 
323 // Used by the _opcode extension which is built as a shared library
324 PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
325 
326 #else
327 #define STAT_INC(opname, name) ((void)0)
328 #define STAT_DEC(opname, name) ((void)0)
329 #define OPCODE_EXE_INC(opname) ((void)0)
330 #define CALL_STAT_INC(name) ((void)0)
331 #define OBJECT_STAT_INC(name) ((void)0)
332 #define OBJECT_STAT_INC_COND(name, cond) ((void)0)
333 #endif  // !Py_STATS
334 
335 // Cache values are only valid in memory, so use native endianness.
336 #ifdef WORDS_BIGENDIAN
337 
338 static inline void
write_u32(uint16_t *p, uint32_t val)339 write_u32(uint16_t *p, uint32_t val)
340 {
341     p[0] = (uint16_t)(val >> 16);
342     p[1] = (uint16_t)(val >>  0);
343 }
344 
345 static inline void
write_u64(uint16_t *p, uint64_t val)346 write_u64(uint16_t *p, uint64_t val)
347 {
348     p[0] = (uint16_t)(val >> 48);
349     p[1] = (uint16_t)(val >> 32);
350     p[2] = (uint16_t)(val >> 16);
351     p[3] = (uint16_t)(val >>  0);
352 }
353 
354 static inline uint32_t
read_u32(uint16_t *p)355 read_u32(uint16_t *p)
356 {
357     uint32_t val = 0;
358     val |= (uint32_t)p[0] << 16;
359     val |= (uint32_t)p[1] <<  0;
360     return val;
361 }
362 
363 static inline uint64_t
read_u64(uint16_t *p)364 read_u64(uint16_t *p)
365 {
366     uint64_t val = 0;
367     val |= (uint64_t)p[0] << 48;
368     val |= (uint64_t)p[1] << 32;
369     val |= (uint64_t)p[2] << 16;
370     val |= (uint64_t)p[3] <<  0;
371     return val;
372 }
373 
374 #else
375 
376 static inline void
write_u32(uint16_t *p, uint32_t val)377 write_u32(uint16_t *p, uint32_t val)
378 {
379     p[0] = (uint16_t)(val >>  0);
380     p[1] = (uint16_t)(val >> 16);
381 }
382 
383 static inline void
write_u64(uint16_t *p, uint64_t val)384 write_u64(uint16_t *p, uint64_t val)
385 {
386     p[0] = (uint16_t)(val >>  0);
387     p[1] = (uint16_t)(val >> 16);
388     p[2] = (uint16_t)(val >> 32);
389     p[3] = (uint16_t)(val >> 48);
390 }
391 
392 static inline uint32_t
read_u32(uint16_t *p)393 read_u32(uint16_t *p)
394 {
395     uint32_t val = 0;
396     val |= (uint32_t)p[0] <<  0;
397     val |= (uint32_t)p[1] << 16;
398     return val;
399 }
400 
401 static inline uint64_t
read_u64(uint16_t *p)402 read_u64(uint16_t *p)
403 {
404     uint64_t val = 0;
405     val |= (uint64_t)p[0] <<  0;
406     val |= (uint64_t)p[1] << 16;
407     val |= (uint64_t)p[2] << 32;
408     val |= (uint64_t)p[3] << 48;
409     return val;
410 }
411 
412 #endif
413 
414 static inline void
write_obj(uint16_t *p, PyObject *obj)415 write_obj(uint16_t *p, PyObject *obj)
416 {
417     uintptr_t val = (uintptr_t)obj;
418 #if SIZEOF_VOID_P == 8
419     write_u64(p, val);
420 #elif SIZEOF_VOID_P == 4
421     write_u32(p, val);
422 #else
423     #error "SIZEOF_VOID_P must be 4 or 8"
424 #endif
425 }
426 
427 static inline PyObject *
read_obj(uint16_t *p)428 read_obj(uint16_t *p)
429 {
430     uintptr_t val;
431 #if SIZEOF_VOID_P == 8
432     val = read_u64(p);
433 #elif SIZEOF_VOID_P == 4
434     val = read_u32(p);
435 #else
436     #error "SIZEOF_VOID_P must be 4 or 8"
437 #endif
438     return (PyObject *)val;
439 }
440 
441 /* See Objects/exception_handling_notes.txt for details.
442  */
443 static inline unsigned char *
parse_varint(unsigned char *p, int *result)444 parse_varint(unsigned char *p, int *result) {
445     int val = p[0] & 63;
446     while (p[0] & 64) {
447         p++;
448         val = (val << 6) | (p[0] & 63);
449     }
450     *result = val;
451     return p+1;
452 }
453 
454 static inline int
write_varint(uint8_t *ptr, unsigned int val)455 write_varint(uint8_t *ptr, unsigned int val)
456 {
457     int written = 1;
458     while (val >= 64) {
459         *ptr++ = 64 | (val & 63);
460         val >>= 6;
461         written++;
462     }
463     *ptr = val;
464     return written;
465 }
466 
467 static inline int
write_signed_varint(uint8_t *ptr, int val)468 write_signed_varint(uint8_t *ptr, int val)
469 {
470     if (val < 0) {
471         val = ((-val)<<1) | 1;
472     }
473     else {
474         val = val << 1;
475     }
476     return write_varint(ptr, val);
477 }
478 
479 static inline int
write_location_entry_start(uint8_t *ptr, int code, int length)480 write_location_entry_start(uint8_t *ptr, int code, int length)
481 {
482     assert((code & 15) == code);
483     *ptr = 128 | (code << 3) | (length - 1);
484     return 1;
485 }
486 
487 
488 /** Counters
489  * The first 16-bit value in each inline cache is a counter.
490  * When counting misses, the counter is treated as a simple unsigned value.
491  *
492  * When counting executions until the next specialization attempt,
493  * exponential backoff is used to reduce the number of specialization failures.
494  * The high 12 bits store the counter, the low 4 bits store the backoff exponent.
495  * On a specialization failure, the backoff exponent is incremented and the
496  * counter set to (2**backoff - 1).
497  * Backoff == 6 -> starting counter == 63, backoff == 10 -> starting counter == 1023.
498  */
499 
500 /* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */
501 #define ADAPTIVE_BACKOFF_BITS 4
502 /* The initial counter value is 31 == 2**ADAPTIVE_BACKOFF_START - 1 */
503 #define ADAPTIVE_BACKOFF_START 5
504 
505 #define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS)
506 
507 
508 static inline uint16_t
adaptive_counter_bits(int value, int backoff)509 adaptive_counter_bits(int value, int backoff) {
510     return (value << ADAPTIVE_BACKOFF_BITS) |
511            (backoff & ((1<<ADAPTIVE_BACKOFF_BITS)-1));
512 }
513 
514 static inline uint16_t
adaptive_counter_start(void)515 adaptive_counter_start(void) {
516     unsigned int value = (1 << ADAPTIVE_BACKOFF_START) - 1;
517     return adaptive_counter_bits(value, ADAPTIVE_BACKOFF_START);
518 }
519 
520 static inline uint16_t
adaptive_counter_backoff(uint16_t counter)521 adaptive_counter_backoff(uint16_t counter) {
522     unsigned int backoff = counter & ((1<<ADAPTIVE_BACKOFF_BITS)-1);
523     backoff++;
524     if (backoff > MAX_BACKOFF_VALUE) {
525         backoff = MAX_BACKOFF_VALUE;
526     }
527     unsigned int value = (1 << backoff) - 1;
528     return adaptive_counter_bits(value, backoff);
529 }
530 
531 
532 /* Line array cache for tracing */
533 
534 extern int _PyCode_CreateLineArray(PyCodeObject *co);
535 
536 static inline int
_PyCode_InitLineArray(PyCodeObject *co)537 _PyCode_InitLineArray(PyCodeObject *co)
538 {
539     if (co->_co_linearray) {
540         return 0;
541     }
542     return _PyCode_CreateLineArray(co);
543 }
544 
545 static inline int
_PyCode_LineNumberFromArray(PyCodeObject *co, int index)546 _PyCode_LineNumberFromArray(PyCodeObject *co, int index)
547 {
548     assert(co->_co_linearray != NULL);
549     assert(index >= 0);
550     assert(index < Py_SIZE(co));
551     if (co->_co_linearray_entry_size == 2) {
552         return ((int16_t *)co->_co_linearray)[index];
553     }
554     else {
555         assert(co->_co_linearray_entry_size == 4);
556         return ((int32_t *)co->_co_linearray)[index];
557     }
558 }
559 
560 
561 #ifdef __cplusplus
562 }
563 #endif
564 #endif /* !Py_INTERNAL_CODE_H */
565