NVIDIA · mdboom · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026
diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml
@@ -96,12 +96,15 @@ jobs:
         uses: nv-gha-runners/setup-proxy-cache@main
         continue-on-error: true
 
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
+
       - name: Install dependencies
         uses: ./.github/actions/install_unix_deps
         continue-on-error: false
         with:
           # for artifact fetching, graphics libs
-          dependencies: "jq wget libgl1 libegl1"
+          dependencies: "jq wget libgl1 libegl1 g++"
           dependent_exes: "jq wget"
 
       - name: Set environment variables

diff --git a/cuda_core/examples/cuda_graphs.py b/cuda_core/examples/cuda_graphs.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
@@ -10,6 +10,10 @@
 #
 # ################################################################################
 
+# /// script
+# dependencies = ["cuda_bindings", "cuda_core", "cupy-cuda13x"]
+# ///
+
 import sys
 import time
 
@@ -121,6 +125,9 @@ def main():
         end_time = time.time()
 
         graph_execution_time = end_time - start_time
+        if graph_execution_time == 0.0:
+            print("Graph execution time is too fast to measure accurately.")
+            graph_execution_time = 1e-9  # Assign a small value to avoid division by zero in speedup calculation
         print(f"Graph execution time: {graph_execution_time:.6f} seconds")
 
         # Verify results

diff --git a/cuda_core/examples/gl_interop_plasma.py b/cuda_core/examples/gl_interop_plasma.py
@@ -53,9 +53,10 @@
 # effect popular in the demoscene). The window title shows the current FPS.
 # Close the window or press Escape to exit.
 #
-# Requirements
-# ============
-#   pip install pyglet
+
+# /// script
+# dependencies = ["cuda_bindings", "cuda_core>0.6.0", "pyglet"]
+# ///
 
 import ctypes
 import sys

diff --git a/cuda_core/examples/jit_lto_fractal.py b/cuda_core/examples/jit_lto_fractal.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
@@ -12,6 +12,10 @@
 #
 # ################################################################################
 
+# /// script
+# dependencies = ["cuda_bindings", "cuda_core", "cupy-cuda13x"]
+# ///
+
 import argparse
 import sys
 

diff --git a/cuda_core/examples/memory_ops.py b/cuda_core/examples/memory_ops.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
@@ -10,6 +10,10 @@
 #
 # ################################################################################
 
+# /// script
+# dependencies = ["cuda_bindings", "cuda_core", "cupy-cuda13x"]
+# ///
+
 import sys
 
 import cupy as cp

diff --git a/cuda_core/examples/pytorch_example.py b/cuda_core/examples/pytorch_example.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
@@ -9,6 +9,10 @@
 #
 # ################################################################################
 
+# /// script
+# dependencies = ["cuda_bindings", "cuda_core", "torch"]
+# ///
+
 import sys
 
 import torch

diff --git a/cuda_core/examples/saxpy.py b/cuda_core/examples/saxpy.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
@@ -10,6 +10,10 @@
 #
 # ################################################################################
 
+# /// script
+# dependencies = ["cuda_bindings", "cuda_core", "cupy-cuda13x"]
+# ///
+
 import sys
 
 import cupy as cp

diff --git a/cuda_core/examples/show_device_properties.py b/cuda_core/examples/show_device_properties.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
@@ -9,6 +9,10 @@
 #
 # ################################################################################
 
+# /// script
+# dependencies = ["cuda_bindings", "cuda_core"]
+# ///
+
 import sys
 
 from cuda.core import Device, system

diff --git a/cuda_core/examples/simple_multi_gpu_example.py b/cuda_core/examples/simple_multi_gpu_example.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
@@ -9,6 +9,10 @@
 #
 # ################################################################################
 
+# /// script
+# dependencies = ["cuda_bindings", "cuda_core", "cupy-cuda13x"]
+# ///
+
 import sys
 
 import cupy as cp

diff --git a/cuda_core/examples/strided_memory_view_cpu.py b/cuda_core/examples/strided_memory_view_cpu.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
@@ -9,6 +9,10 @@
 #
 # ################################################################################
 
+# /// script
+# dependencies = ["cuda_bindings", "cuda_core", "cffi", "setuptools"]
+# ///
+
 import importlib
 import string
 import sys

diff --git a/cuda_core/examples/strided_memory_view_gpu.py b/cuda_core/examples/strided_memory_view_gpu.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
@@ -9,6 +9,10 @@
 #
 # ################################################################################
 
+# /// script
+# dependencies = ["cuda_bindings", "cuda_core", "cupy-cuda13x"]
+# ///
+
 import string
 import sys
 

diff --git a/cuda_core/examples/thread_block_cluster.py b/cuda_core/examples/thread_block_cluster.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
@@ -10,6 +10,10 @@
 #
 # ################################################################################
 
+# /// script
+# dependencies = ["cuda_bindings", "cuda_core"]
+# ///
+
 import os
 import sys
 

diff --git a/cuda_core/examples/tma_tensor_map.py b/cuda_core/examples/tma_tensor_map.py
@@ -22,6 +22,10 @@
 #
 # ################################################################################
 
+# /// script
+# dependencies = ["cuda_bindings", "cuda_core>0.6.0", "cupy-cuda13x"]
+# ///
+
 import os
 import sys
 

diff --git a/cuda_core/examples/vector_add.py b/cuda_core/examples/vector_add.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
@@ -9,6 +9,10 @@
 #
 # ################################################################################
 
+# /// script
+# dependencies = ["cuda_bindings", "cuda_core", "cupy-cuda13x"]
+# ///
+
 import cupy as cp
 
 from cuda.core import Device, LaunchConfig, Program, ProgramOptions, launch

diff --git a/cuda_core/pyproject.toml b/cuda_core/pyproject.toml
@@ -56,7 +56,7 @@ cu12 = ["cuda-bindings[all]==12.*"]
 cu13 = ["cuda-bindings[all]==13.*"]
 
 [dependency-groups]
-test = ["cython>=3.2,<3.3", "setuptools", "pytest>=6.2.4", "pytest-randomly", "pytest-repeat", "pytest-rerunfailures"]
+test = ["cython>=3.2,<3.3", "setuptools", "pytest>=6.2.4", "pytest-randomly", "pytest-repeat", "pytest-rerunfailures", "cffi"]
 ml-dtypes = ["ml-dtypes>=0.5.4,<0.6.0"]
 test-cu12 = [ {include-group = "ml-dtypes" }, {include-group = "test" }, "cupy-cuda12x; python_version < '3.14'", "cuda-toolkit[cudart]==12.*"]  # runtime headers needed by CuPy
 test-cu13 = [ {include-group = "ml-dtypes" }, {include-group = "test" }, "cupy-cuda13x; python_version < '3.14'", "cuda-toolkit[cudart]==13.*"]  # runtime headers needed by CuPy

diff --git a/cuda_core/tests/example_tests/test_basic_examples.py b/cuda_core/tests/example_tests/test_basic_examples.py
@@ -1,24 +1,130 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 # If we have subcategories of examples in the future, this file can be split along those lines
 
 import glob
 import os
+import platform
+import re
+import subprocess
+import sys
 
 import pytest
 
-from cuda.core import Device
+from cuda.core import Device, system
+
+# Each example in cuda_core/examples is tested in two ways:
+#
+# 1) Directly running the example in the same environment as the test suite.
+#    This gives access to the current development version of cuda_core.
+# 2) Running the example in a subprocess with "uv run" to verify that the PEP
+#    723 metadata works correctly and that the example can be run in isolation from
+#    the test suite.
+
+
+def has_compute_capability_9_or_higher() -> bool:
+    return Device().compute_capability >= (9, 0)
+
+
+def has_multiple_devices() -> bool:
+    return system.get_num_devices() >= 2
+
+
+def has_display() -> bool:
+    # We assume that we don't want to open any windows during testing,
+    # so we always return False
+    return False
+
+
+def is_not_windows() -> bool:
+    return sys.platform != "win32"
+
+
+def is_x86_64() -> bool:
+    return platform.machine() == "x86_64"
+
+
+def uv_installed() -> bool:
+    try:
+        subprocess.run(["uv", "--version"], check=True)  # noqa: S607
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        return False
+    return True
+
+
+PACKAGE_REQUIREMENTS = {
+    "cuda_graphs.py": ["cupy"],
+    "jit_lto_fractal.py": ["cupy"],
+    "memory_ops.py": ["cupy"],
+    "pytorch_example.py": ["torch"],
+    "saxpy.py": ["cupy"],
+    "simple_multi_gpu_example.py": ["cupy"],
+    "strided_memory_view_cpu.py": ["cffi"],
+    "strided_memory_view_gpu.py": ["cupy"],
+    "tma_tensor_map.py": ["cupy"],
+    "vector_add.py": ["cupy"],
+}
+
+
+SYSTEM_REQUIREMENTS = {
+    "gl_interop_plasma.py": has_display,
+    "pytorch_example.py": is_x86_64,  # PyTorch only provides CUDA support for x86_64
+    "simple_multi_gpu_example.py": has_multiple_devices,
+    "strided_memory_view_cpu.py": is_not_windows,
+    "thread_block_cluster.py": has_compute_capability_9_or_higher,
+}
 
-from .utils import run_example
 
 samples_path = os.path.join(os.path.dirname(__file__), "..", "..", "examples")
-sample_files = glob.glob(samples_path + "**/*.py", recursive=True)
+sample_files = [os.path.basename(x) for x in glob.glob(samples_path + "**/*.py", recursive=True)]
 
 
 @pytest.mark.parametrize("example", sample_files)
 class TestExamples:
-    def test_example(self, example, deinit_cuda):
-        run_example(samples_path, example)
-        if Device().device_id != 0:
-            Device(0).set_current()
+    def test_example(self, example):
+        package_requirements = PACKAGE_REQUIREMENTS.get(example, [])
+        for package in package_requirements:
+            try:
+                __import__(package)
+            except ImportError:
+                pytest.skip(f"Skipping {example} due to missing package requirement: {package}")
+
+        system_requirement = SYSTEM_REQUIREMENTS.get(example, lambda: True)
+        if not system_requirement():
+            pytest.skip(f"Skipping {example} due to unmet system requirement")
+
+        example_path = os.path.join(samples_path, example)
+        process = subprocess.run([sys.executable, example_path], capture_output=True)  # noqa: S603
+        if process.returncode != 0:
+            if process.stdout:
+                print(process.stdout.decode())
+            if process.stderr:
+                print(process.stderr.decode(), file=sys.stderr)
+            raise AssertionError(f"`{example}` failed ({process.returncode})")
+
+    @pytest.mark.skipif(not uv_installed(), reason="uv is required to test PEP 723 metadata installation")
+    def test_example_pep723(self, example):
+        system_requirement = SYSTEM_REQUIREMENTS.get(example, lambda: True)
+        if not system_requirement():
+            pytest.skip(f"Skipping {example} due to unmet system requirement")
+
+        example_path = os.path.join(samples_path, example)
+
+        # Have uv use the same version of Python that is running the test suite,
+        # not because they have to match but to give Python version coverage in CI.
+        version_info = sys.version_info
+        py_version = f"{version_info.major}.{version_info.minor}"
+
+        process = subprocess.run(["uv", "run", "--python", py_version, example_path], capture_output=True)  # noqa: S603, S607
+        if process.returncode != 0:
+            # This example requires a development version of cuda_core, so requirements can't be met.
+            # That's ok, it was tested in the other test, so we just skip it instead of failing.
+            if re.search("Because only cuda-(core)|(bindings)", process.stderr.decode()):
+                pytest.skip(f"Skipping {example} due to unmet PEP 723 requirement")
+
+            if process.stdout:
+                print(process.stdout.decode())
+            if process.stderr:
+                print(process.stderr.decode(), file=sys.stderr)
+            raise AssertionError(f"`uv run {example}` failed ({process.returncode})")