NVIDIA · rwgk · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026
diff --git a/conftest.py b/conftest.py
@@ -1,13 +1,14 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
-import os
 
 import pytest
 
+from cuda.pathfinder import get_cuda_path_or_home
+
 
 def pytest_collection_modifyitems(config, items):  # noqa: ARG001
-    cuda_home = os.environ.get("CUDA_HOME")
+    cuda_home = get_cuda_path_or_home()
     for item in items:
         nodeid = item.nodeid.replace("\\", "/")
 

diff --git a/cuda_bindings/README.md b/cuda_bindings/README.md
@@ -33,7 +33,7 @@ To run these tests:
 
 Cython tests are located in `tests/cython` and need to be built. These builds have the same CUDA Toolkit header requirements as [Installing from Source](https://nvidia.github.io/cuda-python/cuda-bindings/latest/install.html#requirements) where the major.minor version must match `cuda.bindings`. To build them:
 
-1. Setup environment variable `CUDA_HOME` with the path to the CUDA Toolkit installation.
+1. Setup environment variable `CUDA_PATH` (or `CUDA_HOME`) with the path to the CUDA Toolkit installation. Note: If both are set, `CUDA_PATH` takes precedence.
 2. Run `build_tests` script located in `test/cython` appropriate to your platform. This will both cythonize the tests and build them.
 
 To run these tests:

diff --git a/cuda_bindings/build_hooks.py b/cuda_bindings/build_hooks.py
@@ -34,13 +34,14 @@
 
 
 @functools.cache
-def _get_cuda_paths() -> list[str]:
-    CUDA_HOME = os.environ.get("CUDA_HOME", os.environ.get("CUDA_PATH", None))
-    if not CUDA_HOME:
-        raise RuntimeError("Environment variable CUDA_HOME or CUDA_PATH is not set")
-    CUDA_HOME = CUDA_HOME.split(os.pathsep)
-    print("CUDA paths:", CUDA_HOME)
-    return CUDA_HOME
+def _get_cuda_path() -> str:
+    from cuda.pathfinder import get_cuda_path_or_home
+
+    cuda_path = get_cuda_path_or_home()
+    if not cuda_path:
+        raise RuntimeError("Environment variable CUDA_PATH or CUDA_HOME is not set")
+    print("CUDA path:", cuda_path)
+    return cuda_path
 
 
 # -----------------------------------------------------------------------
@@ -133,8 +134,8 @@ def _fetch_header_paths(required_headers, include_path_list):
     if missing_headers:
         error_message = "Couldn't find required headers: "
         error_message += ", ".join(missing_headers)
-        cuda_paths = _get_cuda_paths()
-        raise RuntimeError(f'{error_message}\nIs CUDA_HOME setup correctly? (CUDA_HOME="{cuda_paths}")')
+        cuda_path = _get_cuda_path()
+        raise RuntimeError(f'{error_message}\nIs CUDA_PATH setup correctly? (CUDA_PATH="{cuda_path}")')
 
     return header_dict
 
@@ -291,7 +292,7 @@ def _build_cuda_bindings(strip=False):
 
     global _extensions
 
-    cuda_paths = _get_cuda_paths()
+    cuda_path = _get_cuda_path()
 
     if os.environ.get("PARALLEL_LEVEL") is not None:
         warn(
@@ -307,7 +308,7 @@ def _build_cuda_bindings(strip=False):
     compile_for_coverage = bool(int(os.environ.get("CUDA_PYTHON_COVERAGE", "0")))
 
     # Parse CUDA headers
-    include_path_list = [os.path.join(path, "include") for path in cuda_paths]
+    include_path_list = [os.path.join(cuda_path, "include")]
     header_dict = _fetch_header_paths(_REQUIRED_HEADERS, include_path_list)
     found_types, found_functions, found_values, found_struct, struct_list = _parse_headers(
         header_dict, include_path_list, parser_caching
@@ -347,7 +348,7 @@ def _build_cuda_bindings(strip=False):
     ] + include_path_list
     library_dirs = [sysconfig.get_path("platlib"), os.path.join(os.sys.prefix, "lib")]
     cudalib_subdirs = [r"lib\x64"] if sys.platform == "win32" else ["lib64", "lib"]
-    library_dirs.extend(os.path.join(prefix, subdir) for prefix in cuda_paths for subdir in cudalib_subdirs)
+    library_dirs.extend(os.path.join(cuda_path, subdir) for subdir in cudalib_subdirs)
 
     extra_compile_args = []
     extra_link_args = []

diff --git a/cuda_bindings/docs/source/environment_variables.rst b/cuda_bindings/docs/source/environment_variables.rst
@@ -15,7 +15,14 @@ Runtime Environment Variables
 Build-Time Environment Variables
 --------------------------------
 
-- ``CUDA_HOME`` or ``CUDA_PATH``: Specifies the location of the CUDA Toolkit.
+- ``CUDA_PATH`` or ``CUDA_HOME``: Specifies the location of the CUDA Toolkit. If both are set, ``CUDA_PATH`` takes precedence.
+
+  .. note::
+     The ``CUDA_PATH`` > ``CUDA_HOME`` priority is determined by ``cuda-pathfinder``.
+     Earlier versions of ``cuda-pathfinder`` (before 1.5.0) used the opposite order
+     (``CUDA_HOME`` > ``CUDA_PATH``). See the
+     `cuda-pathfinder 1.5.0 release notes <https://nvidia.github.io/cuda-python/cuda-pathfinder/latest/release/1.5.0-notes.html>`_
+     for details and migration guidance.
 
 - ``CUDA_PYTHON_PARSER_CACHING`` : bool, toggles the caching of parsed header files during the cuda-bindings build process. If caching is enabled (``CUDA_PYTHON_PARSER_CACHING`` is True), the cache path is set to ./cache_<library_name>, where <library_name> is derived from the cuda toolkit libraries used to build cuda-bindings.
 

diff --git a/cuda_bindings/docs/source/install.rst b/cuda_bindings/docs/source/install.rst
@@ -87,11 +87,11 @@ Requirements
 
 [^2]: The CUDA Runtime static library (``libcudart_static.a`` on Linux, ``cudart_static.lib`` on Windows) is part of the CUDA Toolkit. If using conda packages, it is contained in the ``cuda-cudart-static`` package.
 
-Source builds require that the provided CUDA headers are of the same major.minor version as the ``cuda.bindings`` you're trying to build. Despite this requirement, note that the minor version compatibility is still maintained. Use the ``CUDA_HOME`` (or ``CUDA_PATH``) environment variable to specify the location of your headers. For example, if your headers are located in ``/usr/local/cuda/include``, then you should set ``CUDA_HOME`` with:
+Source builds require that the provided CUDA headers are of the same major.minor version as the ``cuda.bindings`` you're trying to build. Despite this requirement, note that the minor version compatibility is still maintained. Use the ``CUDA_PATH`` (or ``CUDA_HOME``) environment variable to specify the location of your headers. If both are set, ``CUDA_PATH`` takes precedence. For example, if your headers are located in ``/usr/local/cuda/include``, then you should set ``CUDA_PATH`` with:
 
 .. code-block:: console
 
-   $ export CUDA_HOME=/usr/local/cuda
+   $ export CUDA_PATH=/usr/local/cuda
 
 See `Environment Variables <environment_variables.rst>`_ for a description of other build-time environment variables.
 

diff --git a/cuda_bindings/pyproject.toml b/cuda_bindings/pyproject.toml
@@ -6,6 +6,7 @@ requires = [
     "setuptools_scm[simple]>=8",
     "cython>=3.2,<3.3",
     "pyclibrary>=0.1.7",
+    "cuda-pathfinder",
 ]
 build-backend = "build_hooks"
 backend-path = ["."]

diff --git a/cuda_core/README.md b/cuda_core/README.md
@@ -26,7 +26,7 @@ Alternatively, from the repository root you can use a simple script:
 
 Cython tests are located in `tests/cython` and need to be built. These builds have the same CUDA Toolkit header requirements as [those of cuda.bindings](https://nvidia.github.io/cuda-python/cuda-bindings/latest/install.html#requirements) where the major.minor version must match `cuda.bindings`. To build them:
 
-1. Set up environment variable `CUDA_HOME` with the path to the CUDA Toolkit installation.
+1. Set up environment variable `CUDA_PATH` (or `CUDA_HOME`) with the path to the CUDA Toolkit installation. Note: If both are set, `CUDA_PATH` takes precedence.
 2. Run `build_tests` script located in `tests/cython` appropriate to your platform. This will both cythonize the tests and build them.
 
 To run these tests:

diff --git a/cuda_core/build_hooks.py b/cuda_core/build_hooks.py
@@ -29,12 +29,13 @@
 
 
 @functools.cache
-def _get_cuda_paths() -> list[str]:
-    cuda_path = os.environ.get("CUDA_PATH", os.environ.get("CUDA_HOME", None))
+def _get_cuda_path() -> str:
+    from cuda.pathfinder import get_cuda_path_or_home
+
+    cuda_path = get_cuda_path_or_home()
     if not cuda_path:
         raise RuntimeError("Environment variable CUDA_PATH or CUDA_HOME is not set")
-    cuda_path = cuda_path.split(os.pathsep)
-    print("CUDA paths:", cuda_path)
+    print("CUDA path:", cuda_path)
     return cuda_path
 
 
@@ -60,21 +61,20 @@ def _determine_cuda_major_version() -> str:
         return cuda_major
 
     # Derive from the CUDA headers (the authoritative source for what we compile against).
-    cuda_path = _get_cuda_paths()
-    for root in cuda_path:
-        cuda_h = os.path.join(root, "include", "cuda.h")
-        try:
-            with open(cuda_h, encoding="utf-8") as f:
-                for line in f:
-                    m = re.match(r"^#\s*define\s+CUDA_VERSION\s+(\d+)\s*$", line)
-                    if m:
-                        v = int(m.group(1))
-                        # CUDA_VERSION is e.g. 12020 for 12.2.
-                        cuda_major = str(v // 1000)
-                        print("CUDA MAJOR VERSION:", cuda_major)
-                        return cuda_major
-        except OSError:
-            continue
+    cuda_path = _get_cuda_path()
+    cuda_h = os.path.join(cuda_path, "include", "cuda.h")
+    try:
+        with open(cuda_h, encoding="utf-8") as f:
+            for line in f:
+                m = re.match(r"^#\s*define\s+CUDA_VERSION\s+(\d+)\s*$", line)
+                if m:
+                    v = int(m.group(1))
+                    # CUDA_VERSION is e.g. 12020 for 12.2.
+                    cuda_major = str(v // 1000)
+                    print("CUDA MAJOR VERSION:", cuda_major)
+                    return cuda_major
+    except OSError:
+        pass
 
     # CUDA_PATH or CUDA_HOME is required for the build, so we should not reach here
     # in normal circumstances. Raise an error to make the issue clear.
@@ -132,7 +132,7 @@ def get_sources(mod_name):
 
         return sources
 
-    all_include_dirs = [os.path.join(root, "include") for root in _get_cuda_paths()]
+    all_include_dirs = [os.path.join(_get_cuda_path(), "include")]
     extra_compile_args = []
     if COMPILE_FOR_COVERAGE:
         # CYTHON_TRACE_NOGIL indicates to trace nogil functions.  It is not

diff --git a/cuda_core/examples/thread_block_cluster.py b/cuda_core/examples/thread_block_cluster.py
@@ -23,6 +23,7 @@
     ProgramOptions,
     launch,
 )
+from cuda.pathfinder import get_cuda_path_or_home
 
 # print cluster info using a kernel and store results in pinned memory
 code = r"""
@@ -65,9 +66,9 @@ def main():
         print("This example requires NumPy 2.2.5 or later", file=sys.stderr)
         sys.exit(1)
 
-    cuda_path = os.environ.get("CUDA_PATH", os.environ.get("CUDA_HOME"))
+    cuda_path = get_cuda_path_or_home()
     if cuda_path is None:
-        print("this example requires a valid CUDA_PATH environment variable set", file=sys.stderr)
+        print("This example requires CUDA_PATH or CUDA_HOME to point to a CUDA toolkit.", file=sys.stderr)
         sys.exit(1)
     cuda_include = os.path.join(cuda_path, "include")
     if not os.path.isdir(cuda_include):

diff --git a/cuda_core/examples/tma_tensor_map.py b/cuda_core/examples/tma_tensor_map.py
@@ -36,6 +36,7 @@
     StridedMemoryView,
     launch,
 )
+from cuda.pathfinder import get_cuda_path_or_home
 
 # ---------------------------------------------------------------------------
 # CUDA kernel that uses TMA to load a 1-D tile into shared memory, then
@@ -103,7 +104,7 @@
 
 
 def _get_cccl_include_paths():
-    cuda_path = os.environ.get("CUDA_PATH", os.environ.get("CUDA_HOME"))
+    cuda_path = get_cuda_path_or_home()
     if cuda_path is None:
         print("This example requires CUDA_PATH or CUDA_HOME to point to a CUDA toolkit.", file=sys.stderr)
         sys.exit(1)

diff --git a/cuda_core/pyproject.toml b/cuda_core/pyproject.toml
@@ -6,7 +6,8 @@
 requires = [
     "setuptools>=80",
     "setuptools-scm[simple]>=8",
-    "Cython>=3.2,<3.3"
+    "Cython>=3.2,<3.3",
+    "cuda-pathfinder"
 ]
 build-backend = "build_hooks"
 backend-path = ["."]

diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py
@@ -9,6 +9,8 @@
 
 import pytest
 
+from cuda.pathfinder import get_cuda_path_or_home
+
 try:
     from cuda.bindings import driver
 except ImportError:
@@ -253,6 +255,6 @@ def test_something(memory_resource_factory):
 
 
 skipif_need_cuda_headers = pytest.mark.skipif(
-    not os.path.isdir(os.path.join(os.environ.get("CUDA_PATH", ""), "include")),
+    get_cuda_path_or_home() is None,
     reason="need CUDA header",
 )
diff --git a/cuda_core/tests/helpers/__init__.py b/cuda_core/tests/helpers/__init__.py
@@ -1,14 +1,15 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 import functools
 import os
 from typing import Union
 
 from cuda.core._utils.cuda_utils import handle_return
+from cuda.pathfinder import get_cuda_path_or_home
 from cuda_python_test_helpers import *
 
-CUDA_PATH = os.environ.get("CUDA_PATH")
+CUDA_PATH = get_cuda_path_or_home()
 CUDA_INCLUDE_PATH = None
 CCCL_INCLUDE_PATHS = None
 if CUDA_PATH is not None:

diff --git a/cuda_core/tests/test_build_hooks.py b/cuda_core/tests/test_build_hooks.py
@@ -24,6 +24,8 @@
 
 import pytest
 
+from cuda.pathfinder import get_cuda_path_or_home
+
 # build_hooks.py imports Cython and setuptools at the top level, so skip if not available
 pytest.importorskip("Cython")
 pytest.importorskip("setuptools")
@@ -66,8 +68,9 @@ def _check_version_detection(
         cuda_h = include_dir / "cuda.h"
         cuda_h.write_text(f"#define CUDA_VERSION {cuda_version}\n")
 
-        build_hooks._get_cuda_paths.cache_clear()
+        build_hooks._get_cuda_path.cache_clear()
         build_hooks._determine_cuda_major_version.cache_clear()
+        get_cuda_path_or_home.cache_clear()
 
         mock_env = {
             k: v
@@ -90,8 +93,9 @@ class TestGetCudaMajorVersion:
     @pytest.mark.parametrize("version", ["11", "12", "13", "14"])
     def test_env_var_override(self, version):
         """CUDA_CORE_BUILD_MAJOR env var override works with various versions."""
-        build_hooks._get_cuda_paths.cache_clear()
+        build_hooks._get_cuda_path.cache_clear()
         build_hooks._determine_cuda_major_version.cache_clear()
+        get_cuda_path_or_home.cache_clear()
         with mock.patch.dict(os.environ, {"CUDA_CORE_BUILD_MAJOR": version}, clear=False):
             result = build_hooks._determine_cuda_major_version()
             assert result == version
@@ -123,8 +127,9 @@ def test_env_var_takes_priority_over_headers(self):
 
     def test_missing_cuda_path_raises_error(self):
         """RuntimeError is raised when CUDA_PATH/CUDA_HOME not set and no env var override."""
-        build_hooks._get_cuda_paths.cache_clear()
+        build_hooks._get_cuda_path.cache_clear()
         build_hooks._determine_cuda_major_version.cache_clear()
+        get_cuda_path_or_home.cache_clear()
         with (
             mock.patch.dict(os.environ, {}, clear=True),
             pytest.raises(RuntimeError, match="CUDA_PATH or CUDA_HOME"),

diff --git a/cuda_pathfinder/cuda/pathfinder/__init__.py b/cuda_pathfinder/cuda/pathfinder/__init__.py
@@ -59,6 +59,7 @@
 from cuda.pathfinder._static_libs.find_static_lib import (
     locate_static_lib as locate_static_lib,
 )
+from cuda.pathfinder._utils.env_vars import get_cuda_path_or_home as get_cuda_path_or_home
 
 from cuda.pathfinder._version import __version__  # isort: skip
 

diff --git a/cuda_pathfinder/cuda/pathfinder/_binaries/find_nvidia_binary_utility.py b/cuda_pathfinder/cuda/pathfinder/_binaries/find_nvidia_binary_utility.py
@@ -6,7 +6,7 @@
 import shutil
 
 from cuda.pathfinder._binaries import supported_nvidia_binaries
-from cuda.pathfinder._utils.env_vars import get_cuda_home_or_path
+from cuda.pathfinder._utils.env_vars import get_cuda_path_or_home
 from cuda.pathfinder._utils.find_sub_dirs import find_sub_dirs_all_sitepackages
 from cuda.pathfinder._utils.platform_aware import IS_WINDOWS
 
@@ -97,7 +97,7 @@ def find_nvidia_binary_utility(utility_name: str) -> str | None:
             dirs.append(os.path.join(conda_prefix, "bin"))
 
     # 3. Search in CUDA Toolkit (CUDA_HOME/CUDA_PATH)
-    if (cuda_home := get_cuda_home_or_path()) is not None:
+    if (cuda_home := get_cuda_path_or_home()) is not None:
         if IS_WINDOWS:
             dirs.append(os.path.join(cuda_home, "bin", "x64"))
             dirs.append(os.path.join(cuda_home, "bin", "x86_64"))

diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 from __future__ import annotations
@@ -50,7 +50,7 @@
 
 # Driver libraries: shipped with the NVIDIA display driver, always on the
 # system linker path.  These skip all CTK search steps (site-packages,
-# conda, CUDA_HOME, canary) and go straight to system search.
+# conda, CUDA_PATH, canary) and go straight to system search.
 _DRIVER_ONLY_LIBNAMES = frozenset(name for name, desc in LIB_DESCRIPTORS.items() if desc.packaged_with == "driver")
 
 
@@ -60,7 +60,7 @@ def _load_driver_lib_no_cache(desc: LibDescriptor) -> LoadedDL:
     Driver libs (libcuda, libnvidia-ml) are part of the display driver, not
     the CUDA Toolkit. They are expected to be discoverable via the platform's
     native loader mechanisms, so the full CTK search cascade (site-packages,
-    conda, CUDA_HOME, canary) is unnecessary.
+    conda, CUDA_PATH, canary) is unnecessary.
     """
     loaded = LOADER.check_if_already_loaded_from_elsewhere(desc, False)
     if loaded is not None:
@@ -246,7 +246,7 @@ def load_nvidia_dynamic_lib(libname: str) -> LoadedDL:
 
         4. **Environment variables**
 
-           - If set, use ``CUDA_HOME`` or ``CUDA_PATH`` (in that order).
+           - If set, use ``CUDA_PATH`` or ``CUDA_HOME`` (in that order).
              On Windows, this is the typical way system-installed CTK DLLs are
              located. Note that the NVIDIA CTK installer automatically
              adds ``CUDA_PATH`` to the system-wide environment.
@@ -269,7 +269,7 @@ def load_nvidia_dynamic_lib(libname: str) -> LoadedDL:
         0. Already loaded in the current process
         1. OS default mechanisms (``dlopen`` / ``LoadLibraryExW``)
 
-        The CTK-specific steps (site-packages, conda, ``CUDA_HOME``, canary
+        The CTK-specific steps (site-packages, conda, ``CUDA_PATH``, canary
         probe) are skipped entirely.
 
     Notes: