Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 25 additions & 2 deletions Include/internal/pycore_lock.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif

#if defined(MS_WINDOWS)
# include <intrin.h> // _mm_pause(), __yield()
#endif

//_Py_UNLOCKED is defined as 0 and _Py_LOCKED as 1 in Include/cpython/pylock.h
#define _Py_HAS_PARKED 2
#define _Py_ONCE_INITIALIZED 4
Expand Down Expand Up @@ -70,8 +74,27 @@ PyMutex_LockFlags(PyMutex *m, _PyLockFlags flags)
// error messages) otherwise returns 0.
extern int _PyMutex_TryUnlock(PyMutex *m);

// Yield the processor to other threads (e.g., sched_yield).
extern void _Py_yield(void);
// Lightweight CPU pause hint for spin-wait loops (e.g., x86 PAUSE, AArch64 WFE).
// Falls back to sched_yield() on platforms without a known pause instruction.
static inline void
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made it static inline because the function call overhead is more expensive than a single instruction.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is only used in lock.c, why move it to header?

Copy link
Member Author

@corona10 corona10 Feb 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yeah, we can move back to lock.c

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

umm no

_Py_yield();

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, I was looking at older checkout of main branch. Making it static inline looks fine although I think LTO would have inlined it anyways.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think LTO would have inlined it anyways.

I think that same way, but just follow our old convention :)

_Py_yield(void)
{
#if defined(__x86_64__) || defined(__i386__)
__asm__ volatile ("pause" ::: "memory");
#elif defined(__aarch64__)
__asm__ volatile ("wfe");
#elif defined(__arm__) && __ARM_ARCH >= 7
__asm__ volatile ("yield" ::: "memory");
#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)
__asm__ volatile ("or 27,27,27" ::: "memory");
#elif defined(_M_X64) || defined(_M_IX86)
_mm_pause();
#elif defined(_M_ARM64) || defined(_M_ARM)
__yield();
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#elif defined(HAVE_SCHED_H)
sched_yield();
#endif
}


// PyEvent is a one-time event notification
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Improve _Py_yield to use light weight cpu instruction. Patch By Donghee
Na.
18 changes: 0 additions & 18 deletions Python/lock.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,6 @@
#include "pycore_time.h" // _PyTime_Add()
#include "pycore_stats.h" // FT_STAT_MUTEX_SLEEP_INC()

#ifdef MS_WINDOWS
# ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
# endif
# include <windows.h> // SwitchToThread()
#elif defined(HAVE_SCHED_H)
# include <sched.h> // sched_yield()
#endif

// If a thread waits on a lock for longer than TIME_TO_BE_FAIR_NS (1 ms), then
// the unlocking thread directly hands off ownership of the lock. This avoids
Expand All @@ -40,16 +32,6 @@ struct mutex_entry {
int handed_off;
};

void
_Py_yield(void)
{
#ifdef MS_WINDOWS
SwitchToThread();
#elif defined(HAVE_SCHED_H)
sched_yield();
#endif
}

PyLockStatus
_PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags)
{
Expand Down
Loading