1#ifndef Py_INTERNAL_CODE_H
2#define Py_INTERNAL_CODE_H
3#ifdef __cplusplus
4extern "C" {
5#endif
6
7/* PEP 659
8 * Specialization and quickening structs and helper functions
9 */
10
11
12// Inline caches. If you change the number of cache entries for an instruction,
13// you must *also* update the number of cache entries in Lib/opcode.py and bump
14// the magic number in Lib/importlib/_bootstrap_external.py!
15
16#define CACHE_ENTRIES(cache) (sizeof(cache)/sizeof(_Py_CODEUNIT))
17
18typedef struct {
19    _Py_CODEUNIT counter;
20    _Py_CODEUNIT index;
21    _Py_CODEUNIT module_keys_version[2];
22    _Py_CODEUNIT builtin_keys_version;
23} _PyLoadGlobalCache;
24
25#define INLINE_CACHE_ENTRIES_LOAD_GLOBAL CACHE_ENTRIES(_PyLoadGlobalCache)
26
27typedef struct {
28    _Py_CODEUNIT counter;
29} _PyBinaryOpCache;
30
31#define INLINE_CACHE_ENTRIES_BINARY_OP CACHE_ENTRIES(_PyBinaryOpCache)
32
33typedef struct {
34    _Py_CODEUNIT counter;
35} _PyUnpackSequenceCache;
36
37#define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \
38    CACHE_ENTRIES(_PyUnpackSequenceCache)
39
40typedef struct {
41    _Py_CODEUNIT counter;
42    _Py_CODEUNIT mask;
43} _PyCompareOpCache;
44
45#define INLINE_CACHE_ENTRIES_COMPARE_OP CACHE_ENTRIES(_PyCompareOpCache)
46
47typedef struct {
48    _Py_CODEUNIT counter;
49    _Py_CODEUNIT type_version[2];
50    _Py_CODEUNIT func_version;
51} _PyBinarySubscrCache;
52
53#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache)
54
55typedef struct {
56    _Py_CODEUNIT counter;
57    _Py_CODEUNIT version[2];
58    _Py_CODEUNIT index;
59} _PyAttrCache;
60
61#define INLINE_CACHE_ENTRIES_LOAD_ATTR CACHE_ENTRIES(_PyAttrCache)
62
63#define INLINE_CACHE_ENTRIES_STORE_ATTR CACHE_ENTRIES(_PyAttrCache)
64
65typedef struct {
66    _Py_CODEUNIT counter;
67    _Py_CODEUNIT type_version[2];
68    _Py_CODEUNIT dict_offset;
69    _Py_CODEUNIT keys_version[2];
70    _Py_CODEUNIT descr[4];
71} _PyLoadMethodCache;
72
73#define INLINE_CACHE_ENTRIES_LOAD_METHOD CACHE_ENTRIES(_PyLoadMethodCache)
74
75typedef struct {
76    _Py_CODEUNIT counter;
77    _Py_CODEUNIT func_version[2];
78    _Py_CODEUNIT min_args;
79} _PyCallCache;
80
81#define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache)
82
83typedef struct {
84    _Py_CODEUNIT counter;
85} _PyPrecallCache;
86
87#define INLINE_CACHE_ENTRIES_PRECALL CACHE_ENTRIES(_PyPrecallCache)
88
89typedef struct {
90    _Py_CODEUNIT counter;
91} _PyStoreSubscrCache;
92
93#define INLINE_CACHE_ENTRIES_STORE_SUBSCR CACHE_ENTRIES(_PyStoreSubscrCache)
94
95#define QUICKENING_WARMUP_DELAY 8
96
97/* We want to compare to zero for efficiency, so we offset values accordingly */
98#define QUICKENING_INITIAL_WARMUP_VALUE (-QUICKENING_WARMUP_DELAY)
99
100void _PyCode_Quicken(PyCodeObject *code);
101
102static inline void
103_PyCode_Warmup(PyCodeObject *code)
104{
105    if (code->co_warmup != 0) {
106        code->co_warmup++;
107        if (code->co_warmup == 0) {
108            _PyCode_Quicken(code);
109        }
110    }
111}
112
113extern uint8_t _PyOpcode_Adaptive[256];
114
115extern Py_ssize_t _Py_QuickenedCount;
116
117// Borrowed references to common callables:
118struct callable_cache {
119    PyObject *isinstance;
120    PyObject *len;
121    PyObject *list_append;
122};
123
124/* "Locals plus" for a code object is the set of locals + cell vars +
125 * free vars.  This relates to variable names as well as offsets into
126 * the "fast locals" storage array of execution frames.  The compiler
127 * builds the list of names, their offsets, and the corresponding
128 * kind of local.
129 *
130 * Those kinds represent the source of the initial value and the
131 * variable's scope (as related to closures).  A "local" is an
132 * argument or other variable defined in the current scope.  A "free"
133 * variable is one that is defined in an outer scope and comes from
134 * the function's closure.  A "cell" variable is a local that escapes
135 * into an inner function as part of a closure, and thus must be
136 * wrapped in a cell.  Any "local" can also be a "cell", but the
137 * "free" kind is mutually exclusive with both.
138 */
139
140// Note that these all fit within a byte, as do combinations.
141// Later, we will use the smaller numbers to differentiate the different
142// kinds of locals (e.g. pos-only arg, varkwargs, local-only).
143#define CO_FAST_LOCAL   0x20
144#define CO_FAST_CELL    0x40
145#define CO_FAST_FREE    0x80
146
147typedef unsigned char _PyLocals_Kind;
148
149static inline _PyLocals_Kind
150_PyLocals_GetKind(PyObject *kinds, int i)
151{
152    assert(PyBytes_Check(kinds));
153    assert(0 <= i && i < PyBytes_GET_SIZE(kinds));
154    char *ptr = PyBytes_AS_STRING(kinds);
155    return (_PyLocals_Kind)(ptr[i]);
156}
157
158static inline void
159_PyLocals_SetKind(PyObject *kinds, int i, _PyLocals_Kind kind)
160{
161    assert(PyBytes_Check(kinds));
162    assert(0 <= i && i < PyBytes_GET_SIZE(kinds));
163    char *ptr = PyBytes_AS_STRING(kinds);
164    ptr[i] = (char) kind;
165}
166
167
168struct _PyCodeConstructor {
169    /* metadata */
170    PyObject *filename;
171    PyObject *name;
172    PyObject *qualname;
173    int flags;
174
175    /* the code */
176    PyObject *code;
177    int firstlineno;
178    PyObject *linetable;
179
180    /* used by the code */
181    PyObject *consts;
182    PyObject *names;
183
184    /* mapping frame offsets to information */
185    PyObject *localsplusnames;  // Tuple of strings
186    PyObject *localspluskinds;  // Bytes object, one byte per variable
187
188    /* args (within varnames) */
189    int argcount;
190    int posonlyargcount;
191    // XXX Replace argcount with posorkwargcount (argcount - posonlyargcount).
192    int kwonlyargcount;
193
194    /* needed to create the frame */
195    int stacksize;
196
197    /* used by the eval loop */
198    PyObject *exceptiontable;
199};
200
201// Using an "arguments struct" like this is helpful for maintainability
202// in a case such as this with many parameters.  It does bear a risk:
203// if the struct changes and callers are not updated properly then the
204// compiler will not catch problems (like a missing argument).  This can
205// cause hard-to-debug problems.  The risk is mitigated by the use of
206// check_code() in codeobject.c.  However, we may decide to switch
207// back to a regular function signature.  Regardless, this approach
208// wouldn't be appropriate if this weren't a strictly internal API.
209// (See the comments in https://github.com/python/cpython/pull/26258.)
210PyAPI_FUNC(int) _PyCode_Validate(struct _PyCodeConstructor *);
211PyAPI_FUNC(PyCodeObject *) _PyCode_New(struct _PyCodeConstructor *);
212
213
214/* Private API */
215
216/* Getters for internal PyCodeObject data. */
217extern PyObject* _PyCode_GetVarnames(PyCodeObject *);
218extern PyObject* _PyCode_GetCellvars(PyCodeObject *);
219extern PyObject* _PyCode_GetFreevars(PyCodeObject *);
220extern PyObject* _PyCode_GetCode(PyCodeObject *);
221
222/** API for initializing the line number tables. */
223extern int _PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds);
224
225/** Out of process API for initializing the location table. */
226extern void _PyLineTable_InitAddressRange(
227    const char *linetable,
228    Py_ssize_t length,
229    int firstlineno,
230    PyCodeAddressRange *range);
231
232/** API for traversing the line number table. */
233extern int _PyLineTable_NextAddressRange(PyCodeAddressRange *range);
234extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range);
235
236/* Specialization functions */
237
238extern int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr,
239                                   PyObject *name);
240extern int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr,
241                                    PyObject *name);
242extern int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name);
243extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr,
244                                     PyObject *name);
245extern int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr);
246extern int _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, _Py_CODEUNIT *instr);
247extern int _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr,
248                               int nargs, PyObject *kwnames);
249extern int _Py_Specialize_Precall(PyObject *callable, _Py_CODEUNIT *instr,
250                                  int nargs, PyObject *kwnames, int oparg);
251extern void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
252                                    int oparg, PyObject **locals);
253extern void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs,
254                                     _Py_CODEUNIT *instr, int oparg);
255extern void _Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr,
256                                          int oparg);
257
258/* Deallocator function for static codeobjects used in deepfreeze.py */
259extern void _PyStaticCode_Dealloc(PyCodeObject *co);
260/* Function to intern strings of codeobjects */
261extern int _PyStaticCode_InternStrings(PyCodeObject *co);
262
263#ifdef Py_STATS
264
265#define SPECIALIZATION_FAILURE_KINDS 30
266
267typedef struct _specialization_stats {
268    uint64_t success;
269    uint64_t failure;
270    uint64_t hit;
271    uint64_t deferred;
272    uint64_t miss;
273    uint64_t deopt;
274    uint64_t failure_kinds[SPECIALIZATION_FAILURE_KINDS];
275} SpecializationStats;
276
277typedef struct _opcode_stats {
278    SpecializationStats specialization;
279    uint64_t execution_count;
280    uint64_t pair_count[256];
281} OpcodeStats;
282
283typedef struct _call_stats {
284    uint64_t inlined_py_calls;
285    uint64_t pyeval_calls;
286    uint64_t frames_pushed;
287    uint64_t frame_objects_created;
288} CallStats;
289
290typedef struct _object_stats {
291    uint64_t allocations;
292    uint64_t allocations512;
293    uint64_t allocations4k;
294    uint64_t allocations_big;
295    uint64_t frees;
296    uint64_t to_freelist;
297    uint64_t from_freelist;
298    uint64_t new_values;
299    uint64_t dict_materialized_on_request;
300    uint64_t dict_materialized_new_key;
301    uint64_t dict_materialized_too_big;
302    uint64_t dict_materialized_str_subclass;
303} ObjectStats;
304
305typedef struct _stats {
306    OpcodeStats opcode_stats[256];
307    CallStats call_stats;
308    ObjectStats object_stats;
309} PyStats;
310
311extern PyStats _py_stats;
312
313#define STAT_INC(opname, name) _py_stats.opcode_stats[opname].specialization.name++
314#define STAT_DEC(opname, name) _py_stats.opcode_stats[opname].specialization.name--
315#define OPCODE_EXE_INC(opname) _py_stats.opcode_stats[opname].execution_count++
316#define CALL_STAT_INC(name) _py_stats.call_stats.name++
317#define OBJECT_STAT_INC(name) _py_stats.object_stats.name++
318#define OBJECT_STAT_INC_COND(name, cond) \
319    do { if (cond) _py_stats.object_stats.name++; } while (0)
320
321extern void _Py_PrintSpecializationStats(int to_file);
322
323// Used by the _opcode extension which is built as a shared library
324PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
325
326#else
327#define STAT_INC(opname, name) ((void)0)
328#define STAT_DEC(opname, name) ((void)0)
329#define OPCODE_EXE_INC(opname) ((void)0)
330#define CALL_STAT_INC(name) ((void)0)
331#define OBJECT_STAT_INC(name) ((void)0)
332#define OBJECT_STAT_INC_COND(name, cond) ((void)0)
333#endif  // !Py_STATS
334
335// Cache values are only valid in memory, so use native endianness.
336#ifdef WORDS_BIGENDIAN
337
338static inline void
339write_u32(uint16_t *p, uint32_t val)
340{
341    p[0] = (uint16_t)(val >> 16);
342    p[1] = (uint16_t)(val >>  0);
343}
344
345static inline void
346write_u64(uint16_t *p, uint64_t val)
347{
348    p[0] = (uint16_t)(val >> 48);
349    p[1] = (uint16_t)(val >> 32);
350    p[2] = (uint16_t)(val >> 16);
351    p[3] = (uint16_t)(val >>  0);
352}
353
354static inline uint32_t
355read_u32(uint16_t *p)
356{
357    uint32_t val = 0;
358    val |= (uint32_t)p[0] << 16;
359    val |= (uint32_t)p[1] <<  0;
360    return val;
361}
362
363static inline uint64_t
364read_u64(uint16_t *p)
365{
366    uint64_t val = 0;
367    val |= (uint64_t)p[0] << 48;
368    val |= (uint64_t)p[1] << 32;
369    val |= (uint64_t)p[2] << 16;
370    val |= (uint64_t)p[3] <<  0;
371    return val;
372}
373
374#else
375
376static inline void
377write_u32(uint16_t *p, uint32_t val)
378{
379    p[0] = (uint16_t)(val >>  0);
380    p[1] = (uint16_t)(val >> 16);
381}
382
383static inline void
384write_u64(uint16_t *p, uint64_t val)
385{
386    p[0] = (uint16_t)(val >>  0);
387    p[1] = (uint16_t)(val >> 16);
388    p[2] = (uint16_t)(val >> 32);
389    p[3] = (uint16_t)(val >> 48);
390}
391
392static inline uint32_t
393read_u32(uint16_t *p)
394{
395    uint32_t val = 0;
396    val |= (uint32_t)p[0] <<  0;
397    val |= (uint32_t)p[1] << 16;
398    return val;
399}
400
401static inline uint64_t
402read_u64(uint16_t *p)
403{
404    uint64_t val = 0;
405    val |= (uint64_t)p[0] <<  0;
406    val |= (uint64_t)p[1] << 16;
407    val |= (uint64_t)p[2] << 32;
408    val |= (uint64_t)p[3] << 48;
409    return val;
410}
411
412#endif
413
414static inline void
415write_obj(uint16_t *p, PyObject *obj)
416{
417    uintptr_t val = (uintptr_t)obj;
418#if SIZEOF_VOID_P == 8
419    write_u64(p, val);
420#elif SIZEOF_VOID_P == 4
421    write_u32(p, val);
422#else
423    #error "SIZEOF_VOID_P must be 4 or 8"
424#endif
425}
426
427static inline PyObject *
428read_obj(uint16_t *p)
429{
430    uintptr_t val;
431#if SIZEOF_VOID_P == 8
432    val = read_u64(p);
433#elif SIZEOF_VOID_P == 4
434    val = read_u32(p);
435#else
436    #error "SIZEOF_VOID_P must be 4 or 8"
437#endif
438    return (PyObject *)val;
439}
440
441/* See Objects/exception_handling_notes.txt for details.
442 */
443static inline unsigned char *
444parse_varint(unsigned char *p, int *result) {
445    int val = p[0] & 63;
446    while (p[0] & 64) {
447        p++;
448        val = (val << 6) | (p[0] & 63);
449    }
450    *result = val;
451    return p+1;
452}
453
454static inline int
455write_varint(uint8_t *ptr, unsigned int val)
456{
457    int written = 1;
458    while (val >= 64) {
459        *ptr++ = 64 | (val & 63);
460        val >>= 6;
461        written++;
462    }
463    *ptr = val;
464    return written;
465}
466
467static inline int
468write_signed_varint(uint8_t *ptr, int val)
469{
470    if (val < 0) {
471        val = ((-val)<<1) | 1;
472    }
473    else {
474        val = val << 1;
475    }
476    return write_varint(ptr, val);
477}
478
479static inline int
480write_location_entry_start(uint8_t *ptr, int code, int length)
481{
482    assert((code & 15) == code);
483    *ptr = 128 | (code << 3) | (length - 1);
484    return 1;
485}
486
487
488/** Counters
489 * The first 16-bit value in each inline cache is a counter.
490 * When counting misses, the counter is treated as a simple unsigned value.
491 *
492 * When counting executions until the next specialization attempt,
493 * exponential backoff is used to reduce the number of specialization failures.
494 * The high 12 bits store the counter, the low 4 bits store the backoff exponent.
495 * On a specialization failure, the backoff exponent is incremented and the
496 * counter set to (2**backoff - 1).
497 * Backoff == 6 -> starting counter == 63, backoff == 10 -> starting counter == 1023.
498 */
499
500/* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */
501#define ADAPTIVE_BACKOFF_BITS 4
502/* The initial counter value is 31 == 2**ADAPTIVE_BACKOFF_START - 1 */
503#define ADAPTIVE_BACKOFF_START 5
504
505#define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS)
506
507
508static inline uint16_t
509adaptive_counter_bits(int value, int backoff) {
510    return (value << ADAPTIVE_BACKOFF_BITS) |
511           (backoff & ((1<<ADAPTIVE_BACKOFF_BITS)-1));
512}
513
514static inline uint16_t
515adaptive_counter_start(void) {
516    unsigned int value = (1 << ADAPTIVE_BACKOFF_START) - 1;
517    return adaptive_counter_bits(value, ADAPTIVE_BACKOFF_START);
518}
519
520static inline uint16_t
521adaptive_counter_backoff(uint16_t counter) {
522    unsigned int backoff = counter & ((1<<ADAPTIVE_BACKOFF_BITS)-1);
523    backoff++;
524    if (backoff > MAX_BACKOFF_VALUE) {
525        backoff = MAX_BACKOFF_VALUE;
526    }
527    unsigned int value = (1 << backoff) - 1;
528    return adaptive_counter_bits(value, backoff);
529}
530
531
532/* Line array cache for tracing */
533
534extern int _PyCode_CreateLineArray(PyCodeObject *co);
535
536static inline int
537_PyCode_InitLineArray(PyCodeObject *co)
538{
539    if (co->_co_linearray) {
540        return 0;
541    }
542    return _PyCode_CreateLineArray(co);
543}
544
545static inline int
546_PyCode_LineNumberFromArray(PyCodeObject *co, int index)
547{
548    assert(co->_co_linearray != NULL);
549    assert(index >= 0);
550    assert(index < Py_SIZE(co));
551    if (co->_co_linearray_entry_size == 2) {
552        return ((int16_t *)co->_co_linearray)[index];
553    }
554    else {
555        assert(co->_co_linearray_entry_size == 4);
556        return ((int32_t *)co->_co_linearray)[index];
557    }
558}
559
560
561#ifdef __cplusplus
562}
563#endif
564#endif /* !Py_INTERNAL_CODE_H */
565