From 5d7f3024567d8eab28154289e900df853b1fa67a Mon Sep 17 00:00:00 2001 From: cocolato Date: Sun, 8 Feb 2026 21:00:33 +0800 Subject: [PATCH] Eliminate redundant refcounting from BINARY_SLICE --- Include/internal/pycore_opcode_metadata.h | 4 +- Include/internal/pycore_uop_ids.h | 2 +- Include/internal/pycore_uop_metadata.h | 8 ++-- Lib/test/test_capi/test_opt.py | 17 ++++++++ Modules/_testinternalcapi/test_cases.c.h | 53 ++++++++++++++++------- Python/bytecodes.c | 20 +++++---- Python/executor_cases.c.h | 48 +++++++++++--------- Python/generated_cases.c.h | 53 ++++++++++++++++------- Python/optimizer_bytecodes.c | 5 ++- Python/optimizer_cases.c.h | 17 +++++++- 10 files changed, 158 insertions(+), 69 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 98d9c2b51a7834..41c0a03f9e3158 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1109,7 +1109,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [BINARY_OP_SUBSCR_USTR_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG }, - [BINARY_SLICE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [BINARY_SLICE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [BUILD_INTERPOLATION] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BUILD_LIST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [BUILD_MAP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1362,7 +1362,7 @@ _PyOpcode_macro_expansion[256] = { [BINARY_OP_SUBSCR_USTR_INT] = { .nuops = 5, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_SUBSCR_USTR_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 } } }, [BINARY_OP_SUBTRACT_FLOAT] = { .nuops = 5, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_SUBTRACT_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_SUBTRACT_INT] = { .nuops = 5, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_SUBTRACT_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 } } }, - [BINARY_SLICE] = { .nuops = 1, .uops = { { _BINARY_SLICE, OPARG_SIMPLE, 0 } } }, + [BINARY_SLICE] = { .nuops = 4, .uops = { { _BINARY_SLICE, OPARG_SIMPLE, 0 }, { _POP_TOP, OPARG_SIMPLE, 0 }, { _POP_TOP, OPARG_SIMPLE, 0 }, { _POP_TOP, OPARG_SIMPLE, 0 } } }, [BUILD_INTERPOLATION] = { .nuops = 1, .uops = { { _BUILD_INTERPOLATION, OPARG_SIMPLE, 0 } } }, [BUILD_LIST] = { .nuops = 1, .uops = { { _BUILD_LIST, OPARG_SIMPLE, 0 } } }, [BUILD_MAP] = { .nuops = 1, .uops = { { _BUILD_MAP, OPARG_SIMPLE, 0 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index f9313621756b45..5b5d94dfd41095 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -426,7 +426,7 @@ extern "C" { #define _BINARY_OP_SUBTRACT_INT_r03 623 #define _BINARY_OP_SUBTRACT_INT_r13 624 #define _BINARY_OP_SUBTRACT_INT_r23 625 -#define _BINARY_SLICE_r31 626 +#define _BINARY_SLICE_r33 626 #define _BUILD_INTERPOLATION_r01 627 #define _BUILD_LIST_r01 628 #define _BUILD_MAP_r01 629 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 0835ee5c9499d1..968537c51d6b7c 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -116,7 +116,7 @@ const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GUARD_BINARY_OP_EXTEND] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_BINARY_OP_EXTEND] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, - [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_OP_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_BINARY_OP_SUBSCR_LIST_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -1133,7 +1133,7 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = { { -1, -1, -1 }, { -1, -1, -1 }, { -1, -1, -1 }, - { 1, 3, _BINARY_SLICE_r31 }, + { 3, 3, _BINARY_SLICE_r33 }, }, }, [_STORE_SLICE] = { @@ -3699,7 +3699,7 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = { [_BINARY_OP_INPLACE_ADD_UNICODE_r21] = _BINARY_OP_INPLACE_ADD_UNICODE, [_GUARD_BINARY_OP_EXTEND_r22] = _GUARD_BINARY_OP_EXTEND, [_BINARY_OP_EXTEND_r23] = _BINARY_OP_EXTEND, - [_BINARY_SLICE_r31] = _BINARY_SLICE, + [_BINARY_SLICE_r33] = _BINARY_SLICE, [_STORE_SLICE_r30] = _STORE_SLICE, [_BINARY_OP_SUBSCR_LIST_INT_r23] = _BINARY_OP_SUBSCR_LIST_INT, [_BINARY_OP_SUBSCR_LIST_SLICE_r21] = _BINARY_OP_SUBSCR_LIST_SLICE, @@ -4315,7 +4315,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = { [_BINARY_OP_SUBTRACT_INT_r13] = "_BINARY_OP_SUBTRACT_INT_r13", [_BINARY_OP_SUBTRACT_INT_r23] = "_BINARY_OP_SUBTRACT_INT_r23", [_BINARY_SLICE] = "_BINARY_SLICE", - [_BINARY_SLICE_r31] = "_BINARY_SLICE_r31", + [_BINARY_SLICE_r33] = "_BINARY_SLICE_r33", [_BUILD_INTERPOLATION] = "_BUILD_INTERPOLATION", [_BUILD_INTERPOLATION_r01] = "_BUILD_INTERPOLATION_r01", [_BUILD_LIST] = "_BUILD_LIST", diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 43b268b0206a46..50731e4eca9472 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2098,6 +2098,23 @@ def testfunc(n): self.assertIn("_BINARY_OP_SUBSCR_DICT", uops) self.assertLessEqual(count_ops(ex, "_POP_TOP"), 2) + def test_binary_slice(self): + def testfunc(n): + x = 0 + s = "hello world" + a, b = 1, 4 + for _ in range(n): + v = s[a:b] + x += len(v) + return x + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, TIER2_THRESHOLD * 3) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_BINARY_SLICE", uops) + self.assertGreaterEqual(count_ops(ex, "_POP_TOP"), 3) + def test_contains_op(self): def testfunc(n): x = 0 diff --git a/Modules/_testinternalcapi/test_cases.c.h b/Modules/_testinternalcapi/test_cases.c.h index dda3bc53dc5e0d..e49eeb51c8ecc1 100644 --- a/Modules/_testinternalcapi/test_cases.c.h +++ b/Modules/_testinternalcapi/test_cases.c.h @@ -1377,6 +1377,10 @@ _PyStackRef start; _PyStackRef stop; _PyStackRef res; + _PyStackRef c; + _PyStackRef sta; + _PyStackRef sto; + _PyStackRef value; // _SPECIALIZE_BINARY_SLICE { #if ENABLE_SPECIALIZATION @@ -1388,36 +1392,55 @@ stop = stack_pointer[-1]; start = stack_pointer[-2]; container = stack_pointer[-3]; - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *slice = _PyBuildSlice_ConsumeRefs(PyStackRef_AsPyObjectSteal(start), - PyStackRef_AsPyObjectSteal(stop)); - stack_pointer = _PyFrame_GetStackPointer(frame); + PyObject *slice = PySlice_New(PyStackRef_AsPyObjectBorrow(start), + PyStackRef_AsPyObjectBorrow(stop), + NULL); PyObject *res_o; if (slice == NULL) { res_o = NULL; } else { - stack_pointer += -2; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); res_o = PyObject_GetItem(PyStackRef_AsPyObjectBorrow(container), slice); Py_DECREF(slice); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += 2; } - stack_pointer += -3; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(container); - stack_pointer = _PyFrame_GetStackPointer(frame); if (res_o == NULL) { JUMP_TO_LABEL(error); } res = PyStackRef_FromPyObjectSteal(res_o); + c = container; + sta = start; + sto = stop; + } + // _POP_TOP + { + value = sto; + stack_pointer[-3] = res; + stack_pointer[-2] = c; + stack_pointer[-1] = sta; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _POP_TOP + { + value = sta; + stack_pointer += -1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _POP_TOP + { + value = c; + stack_pointer += -1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); } - stack_pointer[0] = res; - stack_pointer += 1; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index bd22599aef725d..869b885d914cb6 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -862,12 +862,11 @@ dummy_func( #endif /* ENABLE_SPECIALIZATION */ } - op(_BINARY_SLICE, (container, start, stop -- res)) { - PyObject *slice = _PyBuildSlice_ConsumeRefs(PyStackRef_AsPyObjectSteal(start), - PyStackRef_AsPyObjectSteal(stop)); + op(_BINARY_SLICE, (container, start, stop -- res, c, sta, sto)) { + PyObject *slice = PySlice_New(PyStackRef_AsPyObjectBorrow(start), + PyStackRef_AsPyObjectBorrow(stop), + NULL); PyObject *res_o; - // Can't use ERROR_IF() here, because we haven't - // DECREF'ed container yet, and we still own slice. if (slice == NULL) { res_o = NULL; } @@ -875,12 +874,17 @@ dummy_func( res_o = PyObject_GetItem(PyStackRef_AsPyObjectBorrow(container), slice); Py_DECREF(slice); } - PyStackRef_CLOSE(container); - ERROR_IF(res_o == NULL); + if (res_o == NULL) { + ERROR_NO_POP(); + } res = PyStackRef_FromPyObjectSteal(res_o); + c = container; + sta = start; + sto = stop; + INPUTS_DEAD(); } - macro(BINARY_SLICE) = _SPECIALIZE_BINARY_SLICE + _BINARY_SLICE; + macro(BINARY_SLICE) = _SPECIALIZE_BINARY_SLICE + _BINARY_SLICE + POP_TOP + POP_TOP + POP_TOP; specializing op(_SPECIALIZE_STORE_SLICE, (v, container, start, stop -- v, container, start, stop)) { // Placeholder until we implement STORE_SLICE specialization diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index f8de66cbce3a9f..e0a4af2a5cf7f4 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -5185,55 +5185,61 @@ break; } - case _BINARY_SLICE_r31: { + case _BINARY_SLICE_r33: { CHECK_CURRENT_CACHED_VALUES(3); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); _PyStackRef stop; _PyStackRef start; _PyStackRef container; _PyStackRef res; + _PyStackRef c; + _PyStackRef sta; + _PyStackRef sto; _PyStackRef _stack_item_0 = _tos_cache0; _PyStackRef _stack_item_1 = _tos_cache1; _PyStackRef _stack_item_2 = _tos_cache2; stop = _stack_item_2; start = _stack_item_1; container = _stack_item_0; - stack_pointer[0] = container; - stack_pointer[1] = start; - stack_pointer[2] = stop; - stack_pointer += 3; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *slice = _PyBuildSlice_ConsumeRefs(PyStackRef_AsPyObjectSteal(start), - PyStackRef_AsPyObjectSteal(stop)); - stack_pointer = _PyFrame_GetStackPointer(frame); + PyObject *slice = PySlice_New(PyStackRef_AsPyObjectBorrow(start), + PyStackRef_AsPyObjectBorrow(stop), + NULL); PyObject *res_o; if (slice == NULL) { res_o = NULL; } else { - stack_pointer += -2; + stack_pointer[0] = container; + stack_pointer[1] = start; + stack_pointer[2] = stop; + stack_pointer += 3; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); res_o = PyObject_GetItem(PyStackRef_AsPyObjectBorrow(container), slice); Py_DECREF(slice); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += 2; + stack_pointer += -3; } - stack_pointer += -3; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(container); - stack_pointer = _PyFrame_GetStackPointer(frame); if (res_o == NULL) { + stack_pointer[0] = container; + stack_pointer[1] = start; + stack_pointer[2] = stop; + stack_pointer += 3; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); SET_CURRENT_CACHED_VALUES(0); JUMP_TO_ERROR(); } res = PyStackRef_FromPyObjectSteal(res_o); - _tos_cache0 = res; - _tos_cache1 = PyStackRef_ZERO_BITS; - _tos_cache2 = PyStackRef_ZERO_BITS; - SET_CURRENT_CACHED_VALUES(1); + c = container; + sta = start; + sto = stop; + _tos_cache2 = sto; + _tos_cache1 = sta; + _tos_cache0 = c; + SET_CURRENT_CACHED_VALUES(3); + stack_pointer[0] = res; + stack_pointer += 1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 4cc9d9e03a545d..ad382bd8bf3dda 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1377,6 +1377,10 @@ _PyStackRef start; _PyStackRef stop; _PyStackRef res; + _PyStackRef c; + _PyStackRef sta; + _PyStackRef sto; + _PyStackRef value; // _SPECIALIZE_BINARY_SLICE { #if ENABLE_SPECIALIZATION @@ -1388,36 +1392,55 @@ stop = stack_pointer[-1]; start = stack_pointer[-2]; container = stack_pointer[-3]; - _PyFrame_SetStackPointer(frame, stack_pointer); - PyObject *slice = _PyBuildSlice_ConsumeRefs(PyStackRef_AsPyObjectSteal(start), - PyStackRef_AsPyObjectSteal(stop)); - stack_pointer = _PyFrame_GetStackPointer(frame); + PyObject *slice = PySlice_New(PyStackRef_AsPyObjectBorrow(start), + PyStackRef_AsPyObjectBorrow(stop), + NULL); PyObject *res_o; if (slice == NULL) { res_o = NULL; } else { - stack_pointer += -2; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); res_o = PyObject_GetItem(PyStackRef_AsPyObjectBorrow(container), slice); Py_DECREF(slice); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += 2; } - stack_pointer += -3; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_CLOSE(container); - stack_pointer = _PyFrame_GetStackPointer(frame); if (res_o == NULL) { JUMP_TO_LABEL(error); } res = PyStackRef_FromPyObjectSteal(res_o); + c = container; + sta = start; + sto = stop; + } + // _POP_TOP + { + value = sto; + stack_pointer[-3] = res; + stack_pointer[-2] = c; + stack_pointer[-1] = sta; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _POP_TOP + { + value = sta; + stack_pointer += -1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _POP_TOP + { + value = c; + stack_pointer += -1; + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); } - stack_pointer[0] = res; - stack_pointer += 1; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 0863d5dd8f8df7..16e9134b1ea7f9 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -1491,7 +1491,7 @@ dummy_func(void) { sym_set_const(callable, list_append); } - op(_BINARY_SLICE, (container, start, stop -- res)) { + op(_BINARY_SLICE, (container, start, stop -- res, c, sta, sto)) { // Slicing a string/list/tuple always returns the same type. PyTypeObject *type = sym_get_type(container); if (type == &PyUnicode_Type || @@ -1503,6 +1503,9 @@ dummy_func(void) { else { res = sym_new_not_null(ctx); } + c = container; + sta = start; + sto = stop; } op(_GUARD_GLOBALS_VERSION, (version/1 --)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 9a51d2fa366661..8ab45f6b3cce48 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -903,8 +903,15 @@ } case _BINARY_SLICE: { + JitOptRef stop; + JitOptRef start; JitOptRef container; JitOptRef res; + JitOptRef c; + JitOptRef sta; + JitOptRef sto; + stop = stack_pointer[-1]; + start = stack_pointer[-2]; container = stack_pointer[-3]; PyTypeObject *type = sym_get_type(container); if (type == &PyUnicode_Type || @@ -916,9 +923,15 @@ else { res = sym_new_not_null(ctx); } - CHECK_STACK_BOUNDS(-2); + c = container; + sta = start; + sto = stop; + CHECK_STACK_BOUNDS(1); stack_pointer[-3] = res; - stack_pointer += -2; + stack_pointer[-2] = c; + stack_pointer[-1] = sta; + stack_pointer[0] = sto; + stack_pointer += 1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; }