struct _frame { PyObject_HEAD PyFrameObject *f_back; /* previous frame, or NULL */ struct _PyInterpreterFrame *f_frame;/* points to the frame data */ PyObject *f_trace; /* Trace function */ int f_lineno; /* Current line number. Only valid if non-zero */ char f_trace_lines; /* Emit per-line trace events? */ char f_trace_opcodes; /* Emit per-opcode trace events? */ char f_fast_as_locals; /* Have the fast locals of this frame been converted to a dict? */ /* The frame data, if this frame object owns the frame */ PyObject *_f_frame_data[1]; };
typedefstruct _PyInterpreterFrame { PyObject *f_executable; /* Strong reference */ struct _PyInterpreterFrame *previous; PyObject *f_funcobj; /* Strong reference. Only valid if not on C stack */ PyObject *f_globals; /* Borrowed reference. Only valid if not on C stack */ PyObject *f_builtins; /* Borrowed reference. Only valid if not on C stack */ PyObject *f_locals; /* Strong reference, may be NULL. Only valid if not on C stack */ PyFrameObject *frame_obj; /* Strong reference, may be NULL. Only valid if not on C stack */ // NOTE: This is not necessarily the last instruction started in the given // frame. Rather, it is the code unit *prior to* the *next* instruction. For // example, it may be an inline CACHE entry, an instruction we just jumped // over, or (in the case of a newly-created frame) a totally invalid value: _Py_CODEUNIT *prev_instr; int stacktop; /* Offset of TOS from localsplus */ /* The return_offset determines where a `RETURN` should go in the caller, * relative to `prev_instr`. * It is only meaningful to the callee, * so it needs to be set in any CALL (to a Python function) * or SEND (to a coroutine or generator). * If there is no callee, then it is meaningless. */ uint16_t return_offset; char owner; /* Locals and stack */ PyObject *localsplus[1]; } _PyInterpreterFrame;
externvoid *_Py_trampoline_func_start; // Start of the template of the // assembly trampoline externvoid * _Py_trampoline_func_end; // End of the template of the assembly trampoline
structcode_arena_st { char *start_addr; // Start of the memory arena char *current_addr; // Address of the current trampoline within the arena size_t size; // Size of the memory arena size_t size_left; // Remaining size of the memory arena size_t code_size; // Size of the code of every trampoline in the arena structcode_arena_st *prev;// Pointer to the arena or NULL if this is the first arena. };
static PyObject * py_trampoline_evaluator(PyThreadState *ts, _PyInterpreterFrame *frame, int throw) { if (perf_status == PERF_STATUS_FAILED || perf_status == PERF_STATUS_NO_INIT) { goto default_eval; } PyCodeObject *co = _PyFrame_GetCode(frame); py_trampoline f = NULL; assert(extra_code_index != -1); int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f); if (ret != 0 || f == NULL) { // This is the first time we see this code object so we need // to compile a trampoline for it. py_trampoline new_trampoline = compile_trampoline(); if (new_trampoline == NULL) { goto default_eval; } trampoline_api.write_state(trampoline_api.state, new_trampoline, perf_code_arena->code_size, co); _PyCode_SetExtra((PyObject *)co, extra_code_index, (void *)new_trampoline); f = new_trampoline; } assert(f != NULL); return f(ts, frame, throw, _PyEval_EvalFrameDefault); default_eval: // Something failed, fall back to the default evaluator. return _PyEval_EvalFrameDefault(ts, frame, throw); }
staticint new_code_arena(void) { // non-trivial programs typically need 64 to 256 kiB. size_t mem_size = 4096 * 16; assert(mem_size % sysconf(_SC_PAGESIZE) == 0); char *memory = mmap(NULL, // address mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, // fd (not used here) 0); // offset (not used here) if (!memory) { PyErr_SetFromErrno(PyExc_OSError); _PyErr_WriteUnraisableMsg( "Failed to create new mmap for perf trampoline", NULL); perf_status = PERF_STATUS_FAILED; return-1; } void *start = &_Py_trampoline_func_start; void *end = &_Py_trampoline_func_end; size_t code_size = end - start;
size_t n_copies = mem_size / code_size; for (size_t i = 0; i < n_copies; i++) { memcpy(memory + i * code_size, start, code_size * sizeof(char)); } // Some systems may prevent us from creating executable code on the fly. int res = mprotect(memory, mem_size, PROT_READ | PROT_EXEC); if (res == -1) { PyErr_SetFromErrno(PyExc_OSError); munmap(memory, mem_size); _PyErr_WriteUnraisableMsg( "Failed to set mmap for perf trampoline to PROT_READ | PROT_EXEC", NULL); return-1; }
invalidate_icache(memory, memory + mem_size);
code_arena_t *new_arena = PyMem_RawCalloc(1, sizeof(code_arena_t)); if (new_arena == NULL) { PyErr_NoMemory(); munmap(memory, mem_size); _PyErr_WriteUnraisableMsg("Failed to allocate new code arena struct", NULL); return-1; }
7f0caf8aa70c b py::_path_abspath:<frozen importlib._bootstrap_external> 7f0caf8aa717 b py::_path_isabs:<frozen importlib._bootstrap_external> 7f0caf8aa722 b py::FileFinder._fill_cache:<frozen importlib._bootstrap_external> 7f0caf8aa72d b py::execusercustomize:<frozen site> 7f0caf8aa738 b py::_read_directory:<frozen zipimport> 7f0caf8aa743 b py::FileLoader.__init__:<frozen importlib._bootstrap_external> 7f0caf8aa74e b py::<module>:/home/manjusaka/Documents/projects/cpython/demo.py 7f0caf8aa759 b py::baz:/home/manjusaka/Documents/projects/cpython/demo.py 7f0caf8aa764 b py::bar:/home/manjusaka/Documents/projects/cpython/demo.py 7f0caf8aa76f b py::foo:/home/manjusaka/Documents/projects/cpython/demo.py
这样的好处有很多,我们可以利用 Linux 本身的 perf 生态,来完成很多基本的工作(比如火焰图)