Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import os

import pytest

from cuda.pathfinder import get_cuda_path_or_home


def pytest_collection_modifyitems(config, items): # noqa: ARG001
cuda_home = os.environ.get("CUDA_HOME")
cuda_home = get_cuda_path_or_home()
for item in items:
nodeid = item.nodeid.replace("\\", "/")

Expand Down
2 changes: 1 addition & 1 deletion cuda_bindings/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ To run these tests:

Cython tests are located in `tests/cython` and need to be built. These builds have the same CUDA Toolkit header requirements as [Installing from Source](https://nvidia.github.io/cuda-python/cuda-bindings/latest/install.html#requirements) where the major.minor version must match `cuda.bindings`. To build them:

1. Setup environment variable `CUDA_HOME` with the path to the CUDA Toolkit installation.
1. Setup environment variable `CUDA_PATH` (or `CUDA_HOME`) with the path to the CUDA Toolkit installation. Note: If both are set, `CUDA_PATH` takes precedence.
2. Run `build_tests` script located in `test/cython` appropriate to your platform. This will both cythonize the tests and build them.

To run these tests:
Expand Down
25 changes: 13 additions & 12 deletions cuda_bindings/build_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,14 @@


@functools.cache
def _get_cuda_paths() -> list[str]:
CUDA_HOME = os.environ.get("CUDA_HOME", os.environ.get("CUDA_PATH", None))
if not CUDA_HOME:
raise RuntimeError("Environment variable CUDA_HOME or CUDA_PATH is not set")
CUDA_HOME = CUDA_HOME.split(os.pathsep)
print("CUDA paths:", CUDA_HOME)
return CUDA_HOME
def _get_cuda_path() -> str:
from cuda.pathfinder import get_cuda_path_or_home

cuda_path = get_cuda_path_or_home()
if not cuda_path:
raise RuntimeError("Environment variable CUDA_PATH or CUDA_HOME is not set")
print("CUDA path:", cuda_path)
return cuda_path


# -----------------------------------------------------------------------
Expand Down Expand Up @@ -133,8 +134,8 @@ def _fetch_header_paths(required_headers, include_path_list):
if missing_headers:
error_message = "Couldn't find required headers: "
error_message += ", ".join(missing_headers)
cuda_paths = _get_cuda_paths()
raise RuntimeError(f'{error_message}\nIs CUDA_HOME setup correctly? (CUDA_HOME="{cuda_paths}")')
cuda_path = _get_cuda_path()
raise RuntimeError(f'{error_message}\nIs CUDA_PATH setup correctly? (CUDA_PATH="{cuda_path}")')

return header_dict

Expand Down Expand Up @@ -291,7 +292,7 @@ def _build_cuda_bindings(strip=False):

global _extensions

cuda_paths = _get_cuda_paths()
cuda_path = _get_cuda_path()

if os.environ.get("PARALLEL_LEVEL") is not None:
warn(
Expand All @@ -307,7 +308,7 @@ def _build_cuda_bindings(strip=False):
compile_for_coverage = bool(int(os.environ.get("CUDA_PYTHON_COVERAGE", "0")))

# Parse CUDA headers
include_path_list = [os.path.join(path, "include") for path in cuda_paths]
include_path_list = [os.path.join(cuda_path, "include")]
header_dict = _fetch_header_paths(_REQUIRED_HEADERS, include_path_list)
found_types, found_functions, found_values, found_struct, struct_list = _parse_headers(
header_dict, include_path_list, parser_caching
Expand Down Expand Up @@ -347,7 +348,7 @@ def _build_cuda_bindings(strip=False):
] + include_path_list
library_dirs = [sysconfig.get_path("platlib"), os.path.join(os.sys.prefix, "lib")]
cudalib_subdirs = [r"lib\x64"] if sys.platform == "win32" else ["lib64", "lib"]
library_dirs.extend(os.path.join(prefix, subdir) for prefix in cuda_paths for subdir in cudalib_subdirs)
library_dirs.extend(os.path.join(cuda_path, subdir) for subdir in cudalib_subdirs)

extra_compile_args = []
extra_link_args = []
Expand Down
9 changes: 8 additions & 1 deletion cuda_bindings/docs/source/environment_variables.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,14 @@ Runtime Environment Variables
Build-Time Environment Variables
--------------------------------

- ``CUDA_HOME`` or ``CUDA_PATH``: Specifies the location of the CUDA Toolkit.
- ``CUDA_PATH`` or ``CUDA_HOME``: Specifies the location of the CUDA Toolkit. If both are set, ``CUDA_PATH`` takes precedence.

.. note::
The ``CUDA_PATH`` > ``CUDA_HOME`` priority is determined by ``cuda-pathfinder``.
Earlier versions of ``cuda-pathfinder`` (before 1.5.0) used the opposite order
(``CUDA_HOME`` > ``CUDA_PATH``). See the
`cuda-pathfinder 1.5.0 release notes <https://nvidia.github.io/cuda-python/cuda-pathfinder/latest/release/1.5.0-notes.html>`_
for details and migration guidance.

- ``CUDA_PYTHON_PARSER_CACHING`` : bool, toggles the caching of parsed header files during the cuda-bindings build process. If caching is enabled (``CUDA_PYTHON_PARSER_CACHING`` is True), the cache path is set to ./cache_<library_name>, where <library_name> is derived from the cuda toolkit libraries used to build cuda-bindings.

Expand Down
4 changes: 2 additions & 2 deletions cuda_bindings/docs/source/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,11 @@ Requirements

[^2]: The CUDA Runtime static library (``libcudart_static.a`` on Linux, ``cudart_static.lib`` on Windows) is part of the CUDA Toolkit. If using conda packages, it is contained in the ``cuda-cudart-static`` package.

Source builds require that the provided CUDA headers are of the same major.minor version as the ``cuda.bindings`` you're trying to build. Despite this requirement, note that the minor version compatibility is still maintained. Use the ``CUDA_HOME`` (or ``CUDA_PATH``) environment variable to specify the location of your headers. For example, if your headers are located in ``/usr/local/cuda/include``, then you should set ``CUDA_HOME`` with:
Source builds require that the provided CUDA headers are of the same major.minor version as the ``cuda.bindings`` you're trying to build. Despite this requirement, note that the minor version compatibility is still maintained. Use the ``CUDA_PATH`` (or ``CUDA_HOME``) environment variable to specify the location of your headers. If both are set, ``CUDA_PATH`` takes precedence. For example, if your headers are located in ``/usr/local/cuda/include``, then you should set ``CUDA_PATH`` with:

.. code-block:: console

$ export CUDA_HOME=/usr/local/cuda
$ export CUDA_PATH=/usr/local/cuda

See `Environment Variables <environment_variables.rst>`_ for a description of other build-time environment variables.

Expand Down
1 change: 1 addition & 0 deletions cuda_bindings/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ requires = [
"setuptools_scm[simple]>=8",
"cython>=3.2,<3.3",
"pyclibrary>=0.1.7",
"cuda-pathfinder",
]
build-backend = "build_hooks"
backend-path = ["."]
Expand Down
2 changes: 1 addition & 1 deletion cuda_core/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Alternatively, from the repository root you can use a simple script:

Cython tests are located in `tests/cython` and need to be built. These builds have the same CUDA Toolkit header requirements as [those of cuda.bindings](https://nvidia.github.io/cuda-python/cuda-bindings/latest/install.html#requirements) where the major.minor version must match `cuda.bindings`. To build them:

1. Set up environment variable `CUDA_HOME` with the path to the CUDA Toolkit installation.
1. Set up environment variable `CUDA_PATH` (or `CUDA_HOME`) with the path to the CUDA Toolkit installation. Note: If both are set, `CUDA_PATH` takes precedence.
2. Run `build_tests` script located in `tests/cython` appropriate to your platform. This will both cythonize the tests and build them.

To run these tests:
Expand Down
40 changes: 20 additions & 20 deletions cuda_core/build_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,13 @@


@functools.cache
def _get_cuda_paths() -> list[str]:
cuda_path = os.environ.get("CUDA_PATH", os.environ.get("CUDA_HOME", None))
def _get_cuda_path() -> str:
from cuda.pathfinder import get_cuda_path_or_home

cuda_path = get_cuda_path_or_home()
if not cuda_path:
raise RuntimeError("Environment variable CUDA_PATH or CUDA_HOME is not set")
cuda_path = cuda_path.split(os.pathsep)
print("CUDA paths:", cuda_path)
print("CUDA path:", cuda_path)
return cuda_path


Expand All @@ -60,21 +61,20 @@ def _determine_cuda_major_version() -> str:
return cuda_major

# Derive from the CUDA headers (the authoritative source for what we compile against).
cuda_path = _get_cuda_paths()
for root in cuda_path:
cuda_h = os.path.join(root, "include", "cuda.h")
try:
with open(cuda_h, encoding="utf-8") as f:
for line in f:
m = re.match(r"^#\s*define\s+CUDA_VERSION\s+(\d+)\s*$", line)
if m:
v = int(m.group(1))
# CUDA_VERSION is e.g. 12020 for 12.2.
cuda_major = str(v // 1000)
print("CUDA MAJOR VERSION:", cuda_major)
return cuda_major
except OSError:
continue
cuda_path = _get_cuda_path()
cuda_h = os.path.join(cuda_path, "include", "cuda.h")
try:
with open(cuda_h, encoding="utf-8") as f:
for line in f:
m = re.match(r"^#\s*define\s+CUDA_VERSION\s+(\d+)\s*$", line)
if m:
v = int(m.group(1))
# CUDA_VERSION is e.g. 12020 for 12.2.
cuda_major = str(v // 1000)
print("CUDA MAJOR VERSION:", cuda_major)
return cuda_major
except OSError:
pass

# CUDA_PATH or CUDA_HOME is required for the build, so we should not reach here
# in normal circumstances. Raise an error to make the issue clear.
Expand Down Expand Up @@ -132,7 +132,7 @@ def get_sources(mod_name):

return sources

all_include_dirs = [os.path.join(root, "include") for root in _get_cuda_paths()]
all_include_dirs = [os.path.join(_get_cuda_path(), "include")]
extra_compile_args = []
if COMPILE_FOR_COVERAGE:
# CYTHON_TRACE_NOGIL indicates to trace nogil functions. It is not
Expand Down
5 changes: 3 additions & 2 deletions cuda_core/examples/thread_block_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
ProgramOptions,
launch,
)
from cuda.pathfinder import get_cuda_path_or_home

# print cluster info using a kernel and store results in pinned memory
code = r"""
Expand Down Expand Up @@ -65,9 +66,9 @@ def main():
print("This example requires NumPy 2.2.5 or later", file=sys.stderr)
sys.exit(1)

cuda_path = os.environ.get("CUDA_PATH", os.environ.get("CUDA_HOME"))
cuda_path = get_cuda_path_or_home()
if cuda_path is None:
print("this example requires a valid CUDA_PATH environment variable set", file=sys.stderr)
print("This example requires CUDA_PATH or CUDA_HOME to point to a CUDA toolkit.", file=sys.stderr)
sys.exit(1)
cuda_include = os.path.join(cuda_path, "include")
if not os.path.isdir(cuda_include):
Expand Down
3 changes: 2 additions & 1 deletion cuda_core/examples/tma_tensor_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
StridedMemoryView,
launch,
)
from cuda.pathfinder import get_cuda_path_or_home

# ---------------------------------------------------------------------------
# CUDA kernel that uses TMA to load a 1-D tile into shared memory, then
Expand Down Expand Up @@ -103,7 +104,7 @@


def _get_cccl_include_paths():
cuda_path = os.environ.get("CUDA_PATH", os.environ.get("CUDA_HOME"))
cuda_path = get_cuda_path_or_home()
if cuda_path is None:
print("This example requires CUDA_PATH or CUDA_HOME to point to a CUDA toolkit.", file=sys.stderr)
sys.exit(1)
Expand Down
3 changes: 2 additions & 1 deletion cuda_core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
requires = [
"setuptools>=80",
"setuptools-scm[simple]>=8",
"Cython>=3.2,<3.3"
"Cython>=3.2,<3.3",
"cuda-pathfinder"
]
build-backend = "build_hooks"
backend-path = ["."]
Expand Down
4 changes: 3 additions & 1 deletion cuda_core/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

import pytest

from cuda.pathfinder import get_cuda_path_or_home

try:
from cuda.bindings import driver
except ImportError:
Expand Down Expand Up @@ -253,6 +255,6 @@ def test_something(memory_resource_factory):


skipif_need_cuda_headers = pytest.mark.skipif(
not os.path.isdir(os.path.join(os.environ.get("CUDA_PATH", ""), "include")),
get_cuda_path_or_home() is None,
reason="need CUDA header",
)
5 changes: 3 additions & 2 deletions cuda_core/tests/helpers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import functools
import os
from typing import Union

from cuda.core._utils.cuda_utils import handle_return
from cuda.pathfinder import get_cuda_path_or_home
from cuda_python_test_helpers import *

CUDA_PATH = os.environ.get("CUDA_PATH")
CUDA_PATH = get_cuda_path_or_home()
CUDA_INCLUDE_PATH = None
CCCL_INCLUDE_PATHS = None
if CUDA_PATH is not None:
Expand Down
11 changes: 8 additions & 3 deletions cuda_core/tests/test_build_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

import pytest

from cuda.pathfinder import get_cuda_path_or_home

# build_hooks.py imports Cython and setuptools at the top level, so skip if not available
pytest.importorskip("Cython")
pytest.importorskip("setuptools")
Expand Down Expand Up @@ -66,8 +68,9 @@ def _check_version_detection(
cuda_h = include_dir / "cuda.h"
cuda_h.write_text(f"#define CUDA_VERSION {cuda_version}\n")

build_hooks._get_cuda_paths.cache_clear()
build_hooks._get_cuda_path.cache_clear()
build_hooks._determine_cuda_major_version.cache_clear()
get_cuda_path_or_home.cache_clear()

mock_env = {
k: v
Expand All @@ -90,8 +93,9 @@ class TestGetCudaMajorVersion:
@pytest.mark.parametrize("version", ["11", "12", "13", "14"])
def test_env_var_override(self, version):
"""CUDA_CORE_BUILD_MAJOR env var override works with various versions."""
build_hooks._get_cuda_paths.cache_clear()
build_hooks._get_cuda_path.cache_clear()
build_hooks._determine_cuda_major_version.cache_clear()
get_cuda_path_or_home.cache_clear()
with mock.patch.dict(os.environ, {"CUDA_CORE_BUILD_MAJOR": version}, clear=False):
result = build_hooks._determine_cuda_major_version()
assert result == version
Expand Down Expand Up @@ -123,8 +127,9 @@ def test_env_var_takes_priority_over_headers(self):

def test_missing_cuda_path_raises_error(self):
"""RuntimeError is raised when CUDA_PATH/CUDA_HOME not set and no env var override."""
build_hooks._get_cuda_paths.cache_clear()
build_hooks._get_cuda_path.cache_clear()
build_hooks._determine_cuda_major_version.cache_clear()
get_cuda_path_or_home.cache_clear()
with (
mock.patch.dict(os.environ, {}, clear=True),
pytest.raises(RuntimeError, match="CUDA_PATH or CUDA_HOME"),
Expand Down
1 change: 1 addition & 0 deletions cuda_pathfinder/cuda/pathfinder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
from cuda.pathfinder._static_libs.find_static_lib import (
locate_static_lib as locate_static_lib,
)
from cuda.pathfinder._utils.env_vars import get_cuda_path_or_home as get_cuda_path_or_home

from cuda.pathfinder._version import __version__ # isort: skip

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import shutil

from cuda.pathfinder._binaries import supported_nvidia_binaries
from cuda.pathfinder._utils.env_vars import get_cuda_home_or_path
from cuda.pathfinder._utils.env_vars import get_cuda_path_or_home
from cuda.pathfinder._utils.find_sub_dirs import find_sub_dirs_all_sitepackages
from cuda.pathfinder._utils.platform_aware import IS_WINDOWS

Expand Down Expand Up @@ -97,7 +97,7 @@ def find_nvidia_binary_utility(utility_name: str) -> str | None:
dirs.append(os.path.join(conda_prefix, "bin"))

# 3. Search in CUDA Toolkit (CUDA_HOME/CUDA_PATH)
if (cuda_home := get_cuda_home_or_path()) is not None:
if (cuda_home := get_cuda_path_or_home()) is not None:
if IS_WINDOWS:
dirs.append(os.path.join(cuda_home, "bin", "x64"))
dirs.append(os.path.join(cuda_home, "bin", "x86_64"))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations
Expand Down Expand Up @@ -50,7 +50,7 @@

# Driver libraries: shipped with the NVIDIA display driver, always on the
# system linker path. These skip all CTK search steps (site-packages,
# conda, CUDA_HOME, canary) and go straight to system search.
# conda, CUDA_PATH, canary) and go straight to system search.
_DRIVER_ONLY_LIBNAMES = frozenset(name for name, desc in LIB_DESCRIPTORS.items() if desc.packaged_with == "driver")


Expand All @@ -60,7 +60,7 @@ def _load_driver_lib_no_cache(desc: LibDescriptor) -> LoadedDL:
Driver libs (libcuda, libnvidia-ml) are part of the display driver, not
the CUDA Toolkit. They are expected to be discoverable via the platform's
native loader mechanisms, so the full CTK search cascade (site-packages,
conda, CUDA_HOME, canary) is unnecessary.
conda, CUDA_PATH, canary) is unnecessary.
"""
loaded = LOADER.check_if_already_loaded_from_elsewhere(desc, False)
if loaded is not None:
Expand Down Expand Up @@ -246,7 +246,7 @@ def load_nvidia_dynamic_lib(libname: str) -> LoadedDL:

4. **Environment variables**

- If set, use ``CUDA_HOME`` or ``CUDA_PATH`` (in that order).
- If set, use ``CUDA_PATH`` or ``CUDA_HOME`` (in that order).
On Windows, this is the typical way system-installed CTK DLLs are
located. Note that the NVIDIA CTK installer automatically
adds ``CUDA_PATH`` to the system-wide environment.
Expand All @@ -269,7 +269,7 @@ def load_nvidia_dynamic_lib(libname: str) -> LoadedDL:
0. Already loaded in the current process
1. OS default mechanisms (``dlopen`` / ``LoadLibraryExW``)

The CTK-specific steps (site-packages, conda, ``CUDA_HOME``, canary
The CTK-specific steps (site-packages, conda, ``CUDA_PATH``, canary
probe) are skipped entirely.

Notes:
Expand Down
Loading