Skip to content

Commit ded8366

Browse files
committed
gh-144438: Fix false sharing between QSBR and tlbc_index
Align the QSBR thread state array to a 64-byte cache line boundary and add padding at the end of _PyThreadStateImpl. Depending on heap layout, the QSBR array could end up sharing a cache line with a thread's tlbc_index, causing QSBR quiescent state updates to contend with reads of tlbc_index in RESUME_CHECK. This is sensitive to earlier allocations during interpreter init and can appear or disappear with seemingly unrelated changes. Either change alone is sufficient to fix the specific issue, but both are worthwhile to avoid similar problems in the future.
1 parent 172135c commit ded8366

File tree

4 files changed

+23
-7
lines changed

4 files changed

+23
-7
lines changed

Include/internal/pycore_qsbr.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,9 @@ struct _qsbr_shared {
8383
// Minimum observed read sequence of all QSBR thread states
8484
uint64_t rd_seq;
8585

86-
// Array of QSBR thread states.
86+
// Array of QSBR thread states (aligned to 64 bytes).
8787
struct _qsbr_pad *array;
88+
void *array_raw; // raw allocation pointer (for free)
8889
Py_ssize_t size;
8990

9091
// Freelist of unused _qsbr_thread_states (protected by mutex)

Include/internal/pycore_tstate.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,12 @@ typedef struct _PyThreadStateImpl {
102102
#if _Py_TIER2
103103
struct _PyJitTracerState *jit_tracer_state;
104104
#endif
105+
106+
#ifdef Py_GIL_DISABLED
107+
// gh-144438: Add padding to ensure that the fields above don't share a
108+
// cache line with other allocations.
109+
char __padding[64];
110+
#endif
105111
} _PyThreadStateImpl;
106112

107113
#ifdef __cplusplus
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Align the QSBR thread state array to a 64-byte cache line boundary to
2+
avoid false sharing in the free-threaded build.

Python/qsbr.c

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,22 +85,28 @@ grow_thread_array(struct _qsbr_shared *shared)
8585
new_size = MIN_ARRAY_SIZE;
8686
}
8787

88-
struct _qsbr_pad *array = PyMem_RawCalloc(new_size, sizeof(*array));
89-
if (array == NULL) {
88+
// Overallocate by 63 bytes so we can align to a 64-byte boundary.
89+
// This avoids potential false sharing between the first entry and other
90+
// allocations.
91+
size_t alloc_size = (size_t)new_size * sizeof(struct _qsbr_pad) + 63;
92+
void *raw = PyMem_RawCalloc(1, alloc_size);
93+
if (raw == NULL) {
9094
return -1;
9195
}
96+
struct _qsbr_pad *array = (struct _qsbr_pad *)(((uintptr_t)raw + 63) & ~(uintptr_t)63);
9297

93-
struct _qsbr_pad *old = shared->array;
94-
if (old != NULL) {
98+
void *old_raw = shared->array_raw;
99+
if (shared->array != NULL) {
95100
memcpy(array, shared->array, shared->size * sizeof(*array));
96101
}
97102

98103
shared->array = array;
104+
shared->array_raw = raw;
99105
shared->size = new_size;
100106
shared->freelist = NULL;
101107
initialize_new_array(shared);
102108

103-
PyMem_RawFree(old);
109+
PyMem_RawFree(old_raw);
104110
return 0;
105111
}
106112

@@ -257,8 +263,9 @@ void
257263
_Py_qsbr_fini(PyInterpreterState *interp)
258264
{
259265
struct _qsbr_shared *shared = &interp->qsbr;
260-
PyMem_RawFree(shared->array);
266+
PyMem_RawFree(shared->array_raw);
261267
shared->array = NULL;
268+
shared->array_raw = NULL;
262269
shared->size = 0;
263270
shared->freelist = NULL;
264271
}

0 commit comments

Comments
 (0)