From e17027ad52054fb670fe5d7a4aa5b8c58b2a9f4e Mon Sep 17 00:00:00 2001 From: Andy Jost Date: Mon, 2 Feb 2026 15:15:40 -0800 Subject: [PATCH 1/3] Begin Cythonization of _program.py - Rename _program.py to _program.pyx - Convert Program to cdef class with _program.pxd declarations - Extract _MembersNeededForFinalize to module-level _ProgramMNFF (nested classes not allowed in cdef class) - Add __repr__ method to Program - Keep ProgramOptions as @dataclass (unchanged) - Keep weakref.finalize pattern for handle cleanup --- cuda_core/cuda/core/_program.pxd | 12 +++++ .../cuda/core/{_program.py => _program.pyx} | 46 ++++++++++--------- 2 files changed, 37 insertions(+), 21 deletions(-) create mode 100644 cuda_core/cuda/core/_program.pxd rename cuda_core/cuda/core/{_program.py => _program.pyx} (97%) diff --git a/cuda_core/cuda/core/_program.pxd b/cuda_core/cuda/core/_program.pxd new file mode 100644 index 0000000000..7dc89cf87d --- /dev/null +++ b/cuda_core/cuda/core/_program.pxd @@ -0,0 +1,12 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + + +cdef class Program: + cdef: + object _mnff + str _backend + object _linker # Linker (not yet Cythonized) + object _options # ProgramOptions + object __weakref__ diff --git a/cuda_core/cuda/core/_program.py b/cuda_core/cuda/core/_program.pyx similarity index 97% rename from cuda_core/cuda/core/_program.py rename to cuda_core/cuda/core/_program.pyx index 1ef1aa51f5..45e5441cac 100644 --- a/cuda_core/cuda/core/_program.py +++ b/cuda_core/cuda/core/_program.pyx @@ -631,7 +631,27 @@ def __repr__(self): ProgramHandleT = Union["cuda.bindings.nvrtc.nvrtcProgram", LinkerHandleT] -class Program: +class _ProgramMNFF: + """Members needed for postrm release of program handles.""" + + __slots__ = "handle", "backend" + + def __init__(self, program_obj, handle, backend): + self.handle = handle + self.backend = backend + weakref.finalize(program_obj, self.close) + + def close(self): + if self.handle is not None: + if self.backend == "NVRTC": + handle_return(nvrtc.nvrtcDestroyProgram(self.handle)) + elif self.backend == "NVVM": + nvvm = _get_nvvm_module() + nvvm.destroy_program(self.handle) + self.handle = None + + +cdef class Program: """Represent a compilation machinery to process programs into :obj:`~_module.ObjectCode`. @@ -650,27 +670,8 @@ class Program: See :obj:`ProgramOptions` for more information. """ - class _MembersNeededForFinalize: - __slots__ = "handle", "backend" - - def __init__(self, program_obj, handle, backend): - self.handle = handle - self.backend = backend - weakref.finalize(program_obj, self.close) - - def close(self): - if self.handle is not None: - if self.backend == "NVRTC": - handle_return(nvrtc.nvrtcDestroyProgram(self.handle)) - elif self.backend == "NVVM": - nvvm = _get_nvvm_module() - nvvm.destroy_program(self.handle) - self.handle = None - - __slots__ = ("__weakref__", "_mnff", "_backend", "_linker", "_options") - def __init__(self, code, code_type, options: ProgramOptions = None): - self._mnff = Program._MembersNeededForFinalize(self, None, None) + self._mnff = _ProgramMNFF(self, None, None) self._options = options = check_or_create_options(ProgramOptions, options, "Program options") code_type = code_type.lower() @@ -858,3 +859,6 @@ def handle(self) -> ProgramHandleT: handle, call ``int(Program.handle)``. """ return self._mnff.handle + + def __repr__(self) -> str: + return f"" From 2f47e9ee08da9585be80636751dbf9c65d7bb314 Mon Sep 17 00:00:00 2001 From: Andy Jost Date: Mon, 2 Feb 2026 15:23:38 -0800 Subject: [PATCH 2/3] Extract Program helpers to module-level cdef functions - Move _translate_program_options to Program_translate_options (cdef) - Move _can_load_generated_ptx to Program_can_load_generated_ptx (cdef) - Remove unused TYPE_CHECKING import block - Follow _memory/_buffer.pyx helper function patterns --- cuda_core/cuda/core/_program.pyx | 65 +++++++++++++++++--------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/cuda_core/cuda/core/_program.pyx b/cuda_core/cuda/core/_program.pyx index 45e5441cac..4d2eccd893 100644 --- a/cuda_core/cuda/core/_program.pyx +++ b/cuda_core/cuda/core/_program.pyx @@ -7,12 +7,9 @@ from __future__ import annotations import weakref from contextlib import contextmanager from dataclasses import dataclass -from typing import TYPE_CHECKING, Union +from typing import Union from warnings import warn -if TYPE_CHECKING: - import cuda.bindings - from cuda.core._device import Device from cuda.core._linker import Linker, LinkerHandleT, LinkerOptions from cuda.core._module import ObjectCode @@ -689,7 +686,7 @@ cdef class Program: elif code_type == "ptx": assert_type(code, str) self._linker = Linker( - ObjectCode._init(code.encode(), code_type), options=self._translate_program_options(options) + ObjectCode._init(code.encode(), code_type), options=Program_translate_options(options) ) self._backend = self._linker.backend @@ -711,36 +708,12 @@ cdef class Program: assert code_type not in supported_code_types, f"{code_type=}" raise RuntimeError(f"Unsupported {code_type=} ({supported_code_types=})") - def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions: - return LinkerOptions( - name=options.name, - arch=options.arch, - max_register_count=options.max_register_count, - time=options.time, - link_time_optimization=options.link_time_optimization, - debug=options.debug, - lineinfo=options.lineinfo, - ftz=options.ftz, - prec_div=options.prec_div, - prec_sqrt=options.prec_sqrt, - fma=options.fma, - split_compile=options.split_compile, - ptxas_options=options.ptxas_options, - no_cache=options.no_cache, - ) - def close(self): """Destroy this program.""" if self._linker: self._linker.close() self._mnff.close() - @staticmethod - def _can_load_generated_ptx(): - driver_ver = handle_return(driver.cuDriverGetVersion()) - nvrtc_major, nvrtc_minor = handle_return(nvrtc.nvrtcVersion()) - return nvrtc_major * 1000 + nvrtc_minor * 10 <= driver_ver - def compile(self, target_type, name_expressions=(), logs=None): """Compile the program with a specific compilation type. @@ -768,7 +741,7 @@ cdef class Program: raise ValueError(f'Unsupported target_type="{target_type}" ({supported_target_types=})') if self._backend == "NVRTC": - if target_type == "ptx" and not self._can_load_generated_ptx(): + if target_type == "ptx" and not Program_can_load_generated_ptx(): warn( "The CUDA driver version is older than the backend version. " "The generated ptx will not be loadable by the current driver.", @@ -862,3 +835,35 @@ cdef class Program: def __repr__(self) -> str: return f"" + + +# ============================================================================= +# Helper functions +# ============================================================================= + + +cdef bint Program_can_load_generated_ptx(): + """Check if the driver can load PTX generated by the current NVRTC version.""" + driver_ver = handle_return(driver.cuDriverGetVersion()) + nvrtc_major, nvrtc_minor = handle_return(nvrtc.nvrtcVersion()) + return nvrtc_major * 1000 + nvrtc_minor * 10 <= driver_ver + + +cdef object Program_translate_options(object options): + """Translate ProgramOptions to LinkerOptions for PTX compilation.""" + return LinkerOptions( + name=options.name, + arch=options.arch, + max_register_count=options.max_register_count, + time=options.time, + link_time_optimization=options.link_time_optimization, + debug=options.debug, + lineinfo=options.lineinfo, + ftz=options.ftz, + prec_div=options.prec_div, + prec_sqrt=options.prec_sqrt, + fma=options.fma, + split_compile=options.split_compile, + ptxas_options=options.ptxas_options, + no_cache=options.no_cache, + ) From 85dbbb5918fa6c7422c71d581ecda59bca94adda Mon Sep 17 00:00:00 2001 From: Andy Jost Date: Mon, 2 Feb 2026 16:54:21 -0800 Subject: [PATCH 3/3] Complete Cythonization of _program.py - Reorganize file structure per developer guide (principal class first) - Add module docstring, __all__, type alias section - Factor long methods into cdef inline helpers - Add proper exception specs to cdef functions - Fix docstrings (use :class: refs, public paths) - Add type annotations to public methods - Inline _nvvm_exception_manager (single use) - Remove Union import, use | syntax - Add public Program.driver_can_load_nvrtc_ptx_output() API - Update tests to use new public API Closes #1082 --- cuda_core/cuda/core/_program.pxd | 2 +- cuda_core/cuda/core/_program.pyx | 1037 ++++++++++++++++-------------- cuda_core/tests/test_module.py | 4 +- 3 files changed, 544 insertions(+), 499 deletions(-) diff --git a/cuda_core/cuda/core/_program.pxd b/cuda_core/cuda/core/_program.pxd index 7dc89cf87d..444257f1e4 100644 --- a/cuda_core/cuda/core/_program.pxd +++ b/cuda_core/cuda/core/_program.pxd @@ -7,6 +7,6 @@ cdef class Program: cdef: object _mnff str _backend - object _linker # Linker (not yet Cythonized) + object _linker # Linker object _options # ProgramOptions object __weakref__ diff --git a/cuda_core/cuda/core/_program.pyx b/cuda_core/cuda/core/_program.pyx index 4d2eccd893..79a3cd4f7f 100644 --- a/cuda_core/cuda/core/_program.pyx +++ b/cuda_core/cuda/core/_program.pyx @@ -1,15 +1,19 @@ # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 +"""Compilation machinery for CUDA programs. + +This module provides :class:`Program` for compiling source code into +:class:`~cuda.core.ObjectCode`, with :class:`ProgramOptions` for configuration. +""" from __future__ import annotations import weakref -from contextlib import contextmanager from dataclasses import dataclass -from typing import Union from warnings import warn +from cuda.bindings import driver, nvrtc from cuda.core._device import Device from cuda.core._linker import Linker, LinkerHandleT, LinkerOptions from cuda.core._module import ObjectCode @@ -18,115 +22,127 @@ from cuda.core._utils.cuda_utils import ( CUDAError, _handle_boolean_option, check_or_create_options, - driver, get_binding_version, handle_return, is_nested_sequence, is_sequence, - nvrtc, ) +__all__ = ["Program", "ProgramOptions"] -@contextmanager -def _nvvm_exception_manager(self): - """ - Taken from _linker.py - """ - try: - yield - except Exception as e: - error_log = "" - if hasattr(self, "_mnff"): - try: - nvvm = _get_nvvm_module() - logsize = nvvm.get_program_log_size(self._mnff.handle) - if logsize > 1: - log = bytearray(logsize) - nvvm.get_program_log(self._mnff.handle, log) - error_log = log.decode("utf-8", errors="backslashreplace") - except Exception: - error_log = "" - # Starting Python 3.11 we could also use Exception.add_note() for the same purpose, but - # unfortunately we are still supporting Python 3.10... - e.args = (e.args[0] + (f"\nNVVM program log: {error_log}" if error_log else ""), *e.args[1:]) - raise e +ProgramHandleT = nvrtc.nvrtcProgram | LinkerHandleT +"""Type alias for program handle types across different backends.""" -_nvvm_module = None -_nvvm_import_attempted = False +# ============================================================================= +# Principal Class +# ============================================================================= -def _get_nvvm_module(): - """ - Handles the import of NVVM module with version and availability checks. - NVVM bindings were added in cuda-bindings 12.9.0, so we need to handle cases where: - 1. cuda.bindings is not new enough (< 12.9.0) - 2. libnvvm is not found in the Python environment +cdef class Program: + """Represent a compilation machinery to process programs into + :class:`~cuda.core.ObjectCode`. - Returns: - The nvvm module if available and working + This object provides a unified interface to multiple underlying + compiler libraries. Compilation support is enabled for a wide + range of code types and compilation types. - Raises: - RuntimeError: If NVVM is not available due to version or library issues + Parameters + ---------- + code : str | bytes | bytearray + The source code to compile. For C++ and PTX, must be a string. + For NVVM IR, can be str, bytes, or bytearray. + code_type : str + The type of source code. Must be one of ``"c++"``, ``"ptx"``, or ``"nvvm"``. + options : :class:`ProgramOptions`, optional + Options to customize the compilation process. """ - global _nvvm_module, _nvvm_import_attempted - if _nvvm_import_attempted: - if _nvvm_module is None: - raise RuntimeError("NVVM module is not available (previous import attempt failed)") - return _nvvm_module + def __init__(self, code: str | bytes | bytearray, code_type: str, options: ProgramOptions | None = None): + Program_init(self, code, code_type, options) - _nvvm_import_attempted = True + def close(self): + """Destroy this program.""" + if self._linker: + self._linker.close() + self._mnff.close() - try: - version = get_binding_version() - if version < (12, 9): - raise RuntimeError( - f"NVVM bindings require cuda-bindings >= 12.9.0, but found {version[0]}.{version[1]}.x. " - "Please update cuda-bindings to use NVVM features." - ) + def compile( + self, target_type: str, name_expressions: tuple | list = (), logs = None + ) -> ObjectCode: + """Compile the program to the specified target type. - from cuda.bindings import nvvm - from cuda.bindings._internal.nvvm import _inspect_function_pointer + Parameters + ---------- + target_type : str + The compilation target. Must be one of ``"ptx"``, ``"cubin"``, or ``"ltoir"``. + name_expressions : tuple | list, optional + Sequence of name expressions to make accessible in the compiled code. + Used for template instantiation and similar cases. + logs : object, optional + Object with a ``write`` method to receive compilation logs. - if _inspect_function_pointer("__nvvmCreateProgram") == 0: - raise RuntimeError("NVVM library (libnvvm) is not available in this Python environment. ") + Returns + ------- + :class:`~cuda.core.ObjectCode` + The compiled object code. + """ + return Program_compile(self, target_type, name_expressions, logs) - _nvvm_module = nvvm - return _nvvm_module + @property + def backend(self) -> str: + """Return this Program instance's underlying backend.""" + return self._backend - except RuntimeError as e: - _nvvm_module = None - raise e + @property + def handle(self) -> ProgramHandleT: + """Return the underlying handle object. + .. note:: -def _process_define_macro_inner(formatted_options, macro): - if isinstance(macro, str): - formatted_options.append(f"--define-macro={macro}") - return True - if isinstance(macro, tuple): - if len(macro) != 2 or any(not isinstance(val, str) for val in macro): - raise RuntimeError(f"Expected define_macro tuple[str, str], got {macro}") - formatted_options.append(f"--define-macro={macro[0]}={macro[1]}") - return True - return False + The type of the returned object depends on the backend. + .. caution:: -def _process_define_macro(formatted_options, macro): - union_type = "Union[str, tuple[str, str]]" - if _process_define_macro_inner(formatted_options, macro): - return - if is_nested_sequence(macro): - for seq_macro in macro: - if not _process_define_macro_inner(formatted_options, seq_macro): - raise RuntimeError(f"Expected define_macro {union_type}, got {seq_macro}") - return - raise RuntimeError(f"Expected define_macro {union_type}, list[{union_type}], got {macro}") + This handle is a Python object. To get the memory address of the underlying C + handle, call ``int(Program.handle)``. + """ + return self._mnff.handle + + @staticmethod + def driver_can_load_nvrtc_ptx_output() -> bool: + """Check if the CUDA driver can load PTX generated by NVRTC. + + NVRTC generates PTX targeting a specific CUDA version. If the installed + driver is older than the NVRTC version, it may not be able to load the + generated PTX. + + Returns + ------- + bool + True if the driver version is new enough to load PTX generated + by the current NVRTC version, False otherwise. + + Examples + -------- + >>> if Program.driver_can_load_nvrtc_ptx_output(): + ... obj = program.compile("ptx") + ... kernel = obj.get_kernel("my_kernel") + """ + return _can_load_generated_ptx() + + def __repr__(self) -> str: + return f"" + + +# ============================================================================= +# Other Public Classes +# ============================================================================= @dataclass class ProgramOptions: - """Customizable options for configuring `Program`. + """Customizable options for configuring :class:`Program`. Attributes ---------- @@ -149,7 +165,7 @@ class ProgramOptions: Generate line-number information. Default: False device_code_optimize : bool, optional - Enable device code optimization. When specified along with ā€˜-G’, enables limited debug information generation + Enable device code optimization. When specified along with '-G', enables limited debug information generation for optimized device code. Default: None ptxas_options : Union[str, list[str]], optional @@ -351,238 +367,10 @@ class ProgramOptions: self.arch = f"sm_{Device().arch}" def _prepare_nvrtc_options(self) -> list[bytes]: - # Build NVRTC-specific options - options = [f"-arch={self.arch}"] - if self.relocatable_device_code is not None: - options.append(f"--relocatable-device-code={_handle_boolean_option(self.relocatable_device_code)}") - if self.extensible_whole_program is not None and self.extensible_whole_program: - options.append("--extensible-whole-program") - if self.debug is not None and self.debug: - options.append("--device-debug") - if self.lineinfo is not None and self.lineinfo: - options.append("--generate-line-info") - if self.device_code_optimize is not None and self.device_code_optimize: - options.append("--dopt=on") - if self.ptxas_options is not None: - opt_name = "--ptxas-options" - if isinstance(self.ptxas_options, str): - options.append(f"{opt_name}={self.ptxas_options}") - elif is_sequence(self.ptxas_options): - for opt_value in self.ptxas_options: - options.append(f"{opt_name}={opt_value}") - if self.max_register_count is not None: - options.append(f"--maxrregcount={self.max_register_count}") - if self.ftz is not None: - options.append(f"--ftz={_handle_boolean_option(self.ftz)}") - if self.prec_sqrt is not None: - options.append(f"--prec-sqrt={_handle_boolean_option(self.prec_sqrt)}") - if self.prec_div is not None: - options.append(f"--prec-div={_handle_boolean_option(self.prec_div)}") - if self.fma is not None: - options.append(f"--fmad={_handle_boolean_option(self.fma)}") - if self.use_fast_math is not None and self.use_fast_math: - options.append("--use_fast_math") - if self.extra_device_vectorization is not None and self.extra_device_vectorization: - options.append("--extra-device-vectorization") - if self.link_time_optimization is not None and self.link_time_optimization: - options.append("--dlink-time-opt") - if self.gen_opt_lto is not None and self.gen_opt_lto: - options.append("--gen-opt-lto") - if self.define_macro is not None: - _process_define_macro(options, self.define_macro) - if self.undefine_macro is not None: - if isinstance(self.undefine_macro, str): - options.append(f"--undefine-macro={self.undefine_macro}") - elif is_sequence(self.undefine_macro): - for macro in self.undefine_macro: - options.append(f"--undefine-macro={macro}") - if self.include_path is not None: - if isinstance(self.include_path, str): - options.append(f"--include-path={self.include_path}") - elif is_sequence(self.include_path): - for path in self.include_path: - options.append(f"--include-path={path}") - if self.pre_include is not None: - if isinstance(self.pre_include, str): - options.append(f"--pre-include={self.pre_include}") - elif is_sequence(self.pre_include): - for header in self.pre_include: - options.append(f"--pre-include={header}") - if self.no_source_include is not None and self.no_source_include: - options.append("--no-source-include") - if self.std is not None: - options.append(f"--std={self.std}") - if self.builtin_move_forward is not None: - options.append(f"--builtin-move-forward={_handle_boolean_option(self.builtin_move_forward)}") - if self.builtin_initializer_list is not None: - options.append(f"--builtin-initializer-list={_handle_boolean_option(self.builtin_initializer_list)}") - if self.disable_warnings is not None and self.disable_warnings: - options.append("--disable-warnings") - if self.restrict is not None and self.restrict: - options.append("--restrict") - if self.device_as_default_execution_space is not None and self.device_as_default_execution_space: - options.append("--device-as-default-execution-space") - if self.device_int128 is not None and self.device_int128: - options.append("--device-int128") - if self.device_float128 is not None and self.device_float128: - options.append("--device-float128") - if self.optimization_info is not None: - options.append(f"--optimization-info={self.optimization_info}") - if self.no_display_error_number is not None and self.no_display_error_number: - options.append("--no-display-error-number") - if self.diag_error is not None: - if isinstance(self.diag_error, int): - options.append(f"--diag-error={self.diag_error}") - elif is_sequence(self.diag_error): - for error in self.diag_error: - options.append(f"--diag-error={error}") - if self.diag_suppress is not None: - if isinstance(self.diag_suppress, int): - options.append(f"--diag-suppress={self.diag_suppress}") - elif is_sequence(self.diag_suppress): - for suppress in self.diag_suppress: - options.append(f"--diag-suppress={suppress}") - if self.diag_warn is not None: - if isinstance(self.diag_warn, int): - options.append(f"--diag-warn={self.diag_warn}") - elif is_sequence(self.diag_warn): - for warn in self.diag_warn: - options.append(f"--diag-warn={warn}") - if self.brief_diagnostics is not None: - options.append(f"--brief-diagnostics={_handle_boolean_option(self.brief_diagnostics)}") - if self.time is not None: - options.append(f"--time={self.time}") - if self.split_compile is not None: - options.append(f"--split-compile={self.split_compile}") - if self.fdevice_syntax_only is not None and self.fdevice_syntax_only: - options.append("--fdevice-syntax-only") - if self.minimal is not None and self.minimal: - options.append("--minimal") - if self.no_cache is not None and self.no_cache: - options.append("--no-cache") - if self.fdevice_time_trace is not None: - options.append(f"--fdevice-time-trace={self.fdevice_time_trace}") - if self.frandom_seed is not None: - options.append(f"--frandom-seed={self.frandom_seed}") - if self.ofast_compile is not None: - options.append(f"--Ofast-compile={self.ofast_compile}") - # PCH options (CUDA 12.8+) - if self.pch is not None and self.pch: - options.append("--pch") - if self.create_pch is not None: - options.append(f"--create-pch={self.create_pch}") - if self.use_pch is not None: - options.append(f"--use-pch={self.use_pch}") - if self.pch_dir is not None: - options.append(f"--pch-dir={self.pch_dir}") - if self.pch_verbose is not None: - options.append(f"--pch-verbose={_handle_boolean_option(self.pch_verbose)}") - if self.pch_messages is not None: - options.append(f"--pch-messages={_handle_boolean_option(self.pch_messages)}") - if self.instantiate_templates_in_pch is not None: - options.append( - f"--instantiate-templates-in-pch={_handle_boolean_option(self.instantiate_templates_in_pch)}" - ) - if self.numba_debug: - options.append("--numba-debug") - return [o.encode() for o in options] + return _prepare_nvrtc_options_impl(self) def _prepare_nvvm_options(self, as_bytes: bool = True) -> list[bytes] | list[str]: - options = [] - - # Options supported by NVVM - assert self.arch is not None - arch = self.arch - if arch.startswith("sm_"): - arch = f"compute_{arch[3:]}" - options.append(f"-arch={arch}") - if self.debug is not None and self.debug: - options.append("-g") - if self.device_code_optimize is False: - options.append("-opt=0") - elif self.device_code_optimize is True: - options.append("-opt=3") - # NVVM uses 0/1 instead of true/false for boolean options - if self.ftz is not None: - options.append(f"-ftz={'1' if self.ftz else '0'}") - if self.prec_sqrt is not None: - options.append(f"-prec-sqrt={'1' if self.prec_sqrt else '0'}") - if self.prec_div is not None: - options.append(f"-prec-div={'1' if self.prec_div else '0'}") - if self.fma is not None: - options.append(f"-fma={'1' if self.fma else '0'}") - - # Check for unsupported options and raise error if they are set - unsupported = [] - if self.relocatable_device_code is not None: - unsupported.append("relocatable_device_code") - if self.extensible_whole_program is not None and self.extensible_whole_program: - unsupported.append("extensible_whole_program") - if self.lineinfo is not None and self.lineinfo: - unsupported.append("lineinfo") - if self.ptxas_options is not None: - unsupported.append("ptxas_options") - if self.max_register_count is not None: - unsupported.append("max_register_count") - if self.use_fast_math is not None and self.use_fast_math: - unsupported.append("use_fast_math") - if self.extra_device_vectorization is not None and self.extra_device_vectorization: - unsupported.append("extra_device_vectorization") - if self.gen_opt_lto is not None and self.gen_opt_lto: - unsupported.append("gen_opt_lto") - if self.define_macro is not None: - unsupported.append("define_macro") - if self.undefine_macro is not None: - unsupported.append("undefine_macro") - if self.include_path is not None: - unsupported.append("include_path") - if self.pre_include is not None: - unsupported.append("pre_include") - if self.no_source_include is not None and self.no_source_include: - unsupported.append("no_source_include") - if self.std is not None: - unsupported.append("std") - if self.builtin_move_forward is not None: - unsupported.append("builtin_move_forward") - if self.builtin_initializer_list is not None: - unsupported.append("builtin_initializer_list") - if self.disable_warnings is not None and self.disable_warnings: - unsupported.append("disable_warnings") - if self.restrict is not None and self.restrict: - unsupported.append("restrict") - if self.device_as_default_execution_space is not None and self.device_as_default_execution_space: - unsupported.append("device_as_default_execution_space") - if self.device_int128 is not None and self.device_int128: - unsupported.append("device_int128") - if self.optimization_info is not None: - unsupported.append("optimization_info") - if self.no_display_error_number is not None and self.no_display_error_number: - unsupported.append("no_display_error_number") - if self.diag_error is not None: - unsupported.append("diag_error") - if self.diag_suppress is not None: - unsupported.append("diag_suppress") - if self.diag_warn is not None: - unsupported.append("diag_warn") - if self.brief_diagnostics is not None: - unsupported.append("brief_diagnostics") - if self.time is not None: - unsupported.append("time") - if self.split_compile is not None: - unsupported.append("split_compile") - if self.fdevice_syntax_only is not None and self.fdevice_syntax_only: - unsupported.append("fdevice_syntax_only") - if self.minimal is not None and self.minimal: - unsupported.append("minimal") - if self.numba_debug is not None and self.numba_debug: - unsupported.append("numba_debug") - if unsupported: - raise CUDAError(f"The following options are not supported by NVVM backend: {', '.join(unsupported)}") - - if as_bytes: - return [o.encode() for o in options] - else: - return options + return _prepare_nvvm_options_impl(self, as_bytes) def as_bytes(self, backend: str) -> list[bytes]: """Convert program options to bytes format for the specified backend. @@ -625,7 +413,13 @@ class ProgramOptions: return f"ProgramOptions(name={self.name!r}, arch={self.arch!r})" -ProgramHandleT = Union["cuda.bindings.nvrtc.nvrtcProgram", LinkerHandleT] +# ============================================================================= +# Private Classes and Helper Functions +# ============================================================================= + +# Module-level state for NVVM lazy loading +cdef object_nvvm_module = None +cdef bint _nvvm_import_attempted = False class _ProgramMNFF: @@ -648,208 +442,73 @@ class _ProgramMNFF: self.handle = None -cdef class Program: - """Represent a compilation machinery to process programs into - :obj:`~_module.ObjectCode`. - - This object provides a unified interface to multiple underlying - compiler libraries. Compilation support is enabled for a wide - range of code types and compilation types. - - Parameters - ---------- - code : Any - String of the CUDA Runtime Compilation program. - code_type : Any - String of the code type. Currently ``"ptx"``, ``"c++"``, and ``"nvvm"`` are supported. - options : ProgramOptions, optional - A ProgramOptions object to customize the compilation process. - See :obj:`ProgramOptions` for more information. - """ - - def __init__(self, code, code_type, options: ProgramOptions = None): - self._mnff = _ProgramMNFF(self, None, None) - - self._options = options = check_or_create_options(ProgramOptions, options, "Program options") - code_type = code_type.lower() - - if code_type == "c++": - assert_type(code, str) - # TODO: support pre-loaded headers & include names - # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved - - self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), options._name, 0, [], [])) - self._mnff.backend = "NVRTC" - self._backend = "NVRTC" - self._linker = None - - elif code_type == "ptx": - assert_type(code, str) - self._linker = Linker( - ObjectCode._init(code.encode(), code_type), options=Program_translate_options(options) - ) - self._backend = self._linker.backend - - elif code_type == "nvvm": - if isinstance(code, str): - code = code.encode("utf-8") - elif not isinstance(code, (bytes, bytearray)): - raise TypeError("NVVM IR code must be provided as str, bytes, or bytearray") - - nvvm = _get_nvvm_module() - self._mnff.handle = nvvm.create_program() - self._mnff.backend = "NVVM" - nvvm.add_module_to_program(self._mnff.handle, code, len(code), options._name.decode()) - self._backend = "NVVM" - self._linker = None - - else: - supported_code_types = ("c++", "ptx", "nvvm") - assert code_type not in supported_code_types, f"{code_type=}" - raise RuntimeError(f"Unsupported {code_type=} ({supported_code_types=})") - - def close(self): - """Destroy this program.""" - if self._linker: - self._linker.close() - self._mnff.close() - - def compile(self, target_type, name_expressions=(), logs=None): - """Compile the program with a specific compilation type. +def _get_nvvm_module(): + """Get the NVVM module, importing it lazily with availability checks.""" + global _nvvm_module, _nvvm_import_attempted - Parameters - ---------- - target_type : Any - String of the targeted compilation type. - Supported options are "ptx", "cubin" and "ltoir". - name_expressions : Union[list, tuple], optional - List of explicit name expressions to become accessible. - (Default to no expressions) - logs : Any, optional - Object with a write method to receive the logs generated - from compilation. - (Default to no logs) + if _nvvm_import_attempted: + if _nvvm_module is None: + raise RuntimeError("NVVM module is not available (previous import attempt failed)") + return _nvvm_module - Returns - ------- - :obj:`~_module.ObjectCode` - Newly created code object. + _nvvm_import_attempted = True - """ - supported_target_types = ("ptx", "cubin", "ltoir") - if target_type not in supported_target_types: - raise ValueError(f'Unsupported target_type="{target_type}" ({supported_target_types=})') - - if self._backend == "NVRTC": - if target_type == "ptx" and not Program_can_load_generated_ptx(): - warn( - "The CUDA driver version is older than the backend version. " - "The generated ptx will not be loadable by the current driver.", - stacklevel=1, - category=RuntimeWarning, - ) - if name_expressions: - for n in name_expressions: - handle_return( - nvrtc.nvrtcAddNameExpression(self._mnff.handle, n.encode()), - handle=self._mnff.handle, - ) - options = self._options.as_bytes("nvrtc") - handle_return( - nvrtc.nvrtcCompileProgram(self._mnff.handle, len(options), options), - handle=self._mnff.handle, + try: + version = get_binding_version() + if version < (12, 9): + raise RuntimeError( + f"NVVM bindings require cuda-bindings >= 12.9.0, but found {version[0]}.{version[1]}.x. " + "Please update cuda-bindings to use NVVM features." ) - size_func = getattr(nvrtc, f"nvrtcGet{target_type.upper()}Size") - comp_func = getattr(nvrtc, f"nvrtcGet{target_type.upper()}") - size = handle_return(size_func(self._mnff.handle), handle=self._mnff.handle) - data = b" " * size - handle_return(comp_func(self._mnff.handle, data), handle=self._mnff.handle) - - symbol_mapping = {} - if name_expressions: - for n in name_expressions: - symbol_mapping[n] = handle_return( - nvrtc.nvrtcGetLoweredName(self._mnff.handle, n.encode()), handle=self._mnff.handle - ) - - if logs is not None: - logsize = handle_return(nvrtc.nvrtcGetProgramLogSize(self._mnff.handle), handle=self._mnff.handle) - if logsize > 1: - log = b" " * logsize - handle_return(nvrtc.nvrtcGetProgramLog(self._mnff.handle, log), handle=self._mnff.handle) - logs.write(log.decode("utf-8", errors="backslashreplace")) - - return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping, name=self._options.name) - - elif self._backend == "NVVM": - if target_type not in ("ptx", "ltoir"): - raise ValueError(f'NVVM backend only supports target_type="ptx", "ltoir", got "{target_type}"') - - # TODO: flip to True when NVIDIA/cuda-python#1354 is resolved and CUDA 12 is dropped - nvvm_options = self._options._prepare_nvvm_options(as_bytes=False) - if target_type == "ltoir" and "-gen-lto" not in nvvm_options: - nvvm_options.append("-gen-lto") - nvvm = _get_nvvm_module() - with _nvvm_exception_manager(self): - nvvm.verify_program(self._mnff.handle, len(nvvm_options), nvvm_options) - nvvm.compile_program(self._mnff.handle, len(nvvm_options), nvvm_options) - - size = nvvm.get_compiled_result_size(self._mnff.handle) - data = bytearray(size) - nvvm.get_compiled_result(self._mnff.handle, data) - - if logs is not None: - logsize = nvvm.get_program_log_size(self._mnff.handle) - if logsize > 1: - log = bytearray(logsize) - nvvm.get_program_log(self._mnff.handle, log) - logs.write(log.decode("utf-8", errors="backslashreplace")) - - return ObjectCode._init(data, target_type, name=self._options.name) - - supported_backends = ("nvJitLink", "driver") - if self._backend not in supported_backends: - raise ValueError(f'Unsupported backend="{self._backend}" ({supported_backends=})') - return self._linker.link(target_type) - - @property - def backend(self) -> str: - """Return this Program instance's underlying backend.""" - return self._backend - - @property - def handle(self) -> ProgramHandleT: - """Return the underlying handle object. + from cuda.bindings import nvvm + from cuda.bindings._internal.nvvm import _inspect_function_pointer - .. note:: + if _inspect_function_pointer("__nvvmCreateProgram") == 0: + raise RuntimeError("NVVM library (libnvvm) is not available in this Python environment. ") - The type of the returned object depends on the backend. + _nvvm_module = nvvm + return _nvvm_module - .. caution:: + except RuntimeError as e: + _nvvm_module = None + raise e - This handle is a Python object. To get the memory address of the underlying C - handle, call ``int(Program.handle)``. - """ - return self._mnff.handle - def __repr__(self) -> str: - return f"" +cdef inline bint _process_define_macro_inner(list options, object macro) except? -1: + """Process a single define macro, returning True if successful.""" + if isinstance(macro, str): + options.append(f"--define-macro={macro}") + return True + if isinstance(macro, tuple): + if len(macro) != 2 or any(not isinstance(val, str) for val in macro): + raise RuntimeError(f"Expected define_macro tuple[str, str], got {macro}") + options.append(f"--define-macro={macro[0]}={macro[1]}") + return True + return False -# ============================================================================= -# Helper functions -# ============================================================================= +cdef inline void _process_define_macro(list options, object macro) except *: + """Process define_macro option which can be str, tuple, or list thereof.""" + union_type = "Union[str, tuple[str, str]]" + if _process_define_macro_inner(options, macro): + return + if is_nested_sequence(macro): + for seq_macro in macro: + if not _process_define_macro_inner(options, seq_macro): + raise RuntimeError(f"Expected define_macro {union_type}, got {seq_macro}") + return + raise RuntimeError(f"Expected define_macro {union_type}, list[{union_type}], got {macro}") -cdef bint Program_can_load_generated_ptx(): +cdef inline bint _can_load_generated_ptx() except? -1: """Check if the driver can load PTX generated by the current NVRTC version.""" driver_ver = handle_return(driver.cuDriverGetVersion()) nvrtc_major, nvrtc_minor = handle_return(nvrtc.nvrtcVersion()) return nvrtc_major * 1000 + nvrtc_minor * 10 <= driver_ver -cdef object Program_translate_options(object options): +cdef inline object _translate_program_options(object options): """Translate ProgramOptions to LinkerOptions for PTX compilation.""" return LinkerOptions( name=options.name, @@ -867,3 +526,389 @@ cdef object Program_translate_options(object options): ptxas_options=options.ptxas_options, no_cache=options.no_cache, ) + + +cdef inline int Program_init(Program self, object code, str code_type, object options) except -1: + """Initialize a Program instance.""" + self._mnff = _ProgramMNFF(self, None, None) + self._options = options = check_or_create_options(ProgramOptions, options, "Program options") + code_type = code_type.lower() + + if code_type == "c++": + assert_type(code, str) + # TODO: support pre-loaded headers & include names + # TODO: allow tuples once NVIDIA/cuda-python#72 is resolved + self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), options._name, 0, [], [])) + self._mnff.backend = "NVRTC" + self._backend = "NVRTC" + self._linker = None + + elif code_type == "ptx": + assert_type(code, str) + self._linker = Linker( + ObjectCode._init(code.encode(), code_type), options=_translate_program_options(options) + ) + self._backend = self._linker.backend + + elif code_type == "nvvm": + if isinstance(code, str): + code = code.encode("utf-8") + elif not isinstance(code, (bytes, bytearray)): + raise TypeError("NVVM IR code must be provided as str, bytes, or bytearray") + + nvvm = _get_nvvm_module() + self._mnff.handle = nvvm.create_program() + self._mnff.backend = "NVVM" + nvvm.add_module_to_program(self._mnff.handle, code, len(code), options._name.decode()) + self._backend = "NVVM" + self._linker = None + + else: + supported_code_types = ("c++", "ptx", "nvvm") + assert code_type not in supported_code_types, f"{code_type=}" + raise RuntimeError(f"Unsupported {code_type=} ({supported_code_types=})") + + return 0 + + +cdef object Program_compile_nvrtc(Program self, str target_type, object name_expressions, object logs): + """Compile using NVRTC backend.""" + if target_type == "ptx" and not _can_load_generated_ptx(): + warn( + "The CUDA driver version is older than the backend version. " + "The generated ptx will not be loadable by the current driver.", + stacklevel=2, + category=RuntimeWarning, + ) + + if name_expressions: + for n in name_expressions: + handle_return( + nvrtc.nvrtcAddNameExpression(self._mnff.handle, n.encode()), + handle=self._mnff.handle, + ) + + options = self._options.as_bytes("nvrtc") + handle_return( + nvrtc.nvrtcCompileProgram(self._mnff.handle, len(options), options), + handle=self._mnff.handle, + ) + + size_func = getattr(nvrtc, f"nvrtcGet{target_type.upper()}Size") + comp_func = getattr(nvrtc, f"nvrtcGet{target_type.upper()}") + size = handle_return(size_func(self._mnff.handle), handle=self._mnff.handle) + data = b" " * size + handle_return(comp_func(self._mnff.handle, data), handle=self._mnff.handle) + + symbol_mapping = {} + if name_expressions: + for n in name_expressions: + symbol_mapping[n] = handle_return( + nvrtc.nvrtcGetLoweredName(self._mnff.handle, n.encode()), handle=self._mnff.handle + ) + + if logs is not None: + logsize = handle_return(nvrtc.nvrtcGetProgramLogSize(self._mnff.handle), handle=self._mnff.handle) + if logsize > 1: + log = b" " * logsize + handle_return(nvrtc.nvrtcGetProgramLog(self._mnff.handle, log), handle=self._mnff.handle) + logs.write(log.decode("utf-8", errors="backslashreplace")) + + return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping, name=self._options.name) + + +cdef object Program_compile_nvvm(Program self, str target_type, object logs): + """Compile using NVVM backend.""" + if target_type not in ("ptx", "ltoir"): + raise ValueError(f'NVVM backend only supports target_type="ptx", "ltoir", got "{target_type}"') + + # TODO: flip to True when NVIDIA/cuda-python#1354 is resolved and CUDA 12 is dropped + nvvm_options = self._options._prepare_nvvm_options(as_bytes=False) + if target_type == "ltoir" and "-gen-lto" not in nvvm_options: + nvvm_options.append("-gen-lto") + + nvvm = _get_nvvm_module() + try: + nvvm.verify_program(self._mnff.handle, len(nvvm_options), nvvm_options) + nvvm.compile_program(self._mnff.handle, len(nvvm_options), nvvm_options) + except Exception as e: + # Capture NVVM program log on error + error_log = "" + try: + logsize = nvvm.get_program_log_size(self._mnff.handle) + if logsize > 1: + log = bytearray(logsize) + nvvm.get_program_log(self._mnff.handle, log) + error_log = log.decode("utf-8", errors="backslashreplace") + except Exception: + pass + e.args = (e.args[0] + (f"\nNVVM program log: {error_log}" if error_log else ""), *e.args[1:]) + raise + + size = nvvm.get_compiled_result_size(self._mnff.handle) + data = bytearray(size) + nvvm.get_compiled_result(self._mnff.handle, data) + + if logs is not None: + logsize = nvvm.get_program_log_size(self._mnff.handle) + if logsize > 1: + log = bytearray(logsize) + nvvm.get_program_log(self._mnff.handle, log) + logs.write(log.decode("utf-8", errors="backslashreplace")) + + return ObjectCode._init(data, target_type, name=self._options.name) + + +cdef object Program_compile(Program self, str target_type, object name_expressions, object logs): + """Compile the program to the specified target type.""" + supported_target_types = ("ptx", "cubin", "ltoir") + if target_type not in supported_target_types: + raise ValueError(f'Unsupported target_type="{target_type}" ({supported_target_types=})') + + if self._backend == "NVRTC": + return Program_compile_nvrtc(self, target_type, name_expressions, logs) + elif self._backend == "NVVM": + return Program_compile_nvvm(self, target_type, logs) + + # Linker backend (PTX code type) + supported_backends = ("nvJitLink", "driver") + if self._backend not in supported_backends: + raise ValueError(f'Unsupported backend="{self._backend}" ({supported_backends=})') + return self._linker.link(target_type) + + +cdef inline list _prepare_nvrtc_options_impl(object opts): + """Build NVRTC-specific compiler options.""" + options = [f"-arch={opts.arch}"] + if opts.relocatable_device_code is not None: + options.append(f"--relocatable-device-code={_handle_boolean_option(opts.relocatable_device_code)}") + if opts.extensible_whole_program is not None and opts.extensible_whole_program: + options.append("--extensible-whole-program") + if opts.debug is not None and opts.debug: + options.append("--device-debug") + if opts.lineinfo is not None and opts.lineinfo: + options.append("--generate-line-info") + if opts.device_code_optimize is not None and opts.device_code_optimize: + options.append("--dopt=on") + if opts.ptxas_options is not None: + opt_name = "--ptxas-options" + if isinstance(opts.ptxas_options, str): + options.append(f"{opt_name}={opts.ptxas_options}") + elif is_sequence(opts.ptxas_options): + for opt_value in opts.ptxas_options: + options.append(f"{opt_name}={opt_value}") + if opts.max_register_count is not None: + options.append(f"--maxrregcount={opts.max_register_count}") + if opts.ftz is not None: + options.append(f"--ftz={_handle_boolean_option(opts.ftz)}") + if opts.prec_sqrt is not None: + options.append(f"--prec-sqrt={_handle_boolean_option(opts.prec_sqrt)}") + if opts.prec_div is not None: + options.append(f"--prec-div={_handle_boolean_option(opts.prec_div)}") + if opts.fma is not None: + options.append(f"--fmad={_handle_boolean_option(opts.fma)}") + if opts.use_fast_math is not None and opts.use_fast_math: + options.append("--use_fast_math") + if opts.extra_device_vectorization is not None and opts.extra_device_vectorization: + options.append("--extra-device-vectorization") + if opts.link_time_optimization is not None and opts.link_time_optimization: + options.append("--dlink-time-opt") + if opts.gen_opt_lto is not None and opts.gen_opt_lto: + options.append("--gen-opt-lto") + if opts.define_macro is not None: + _process_define_macro(options, opts.define_macro) + if opts.undefine_macro is not None: + if isinstance(opts.undefine_macro, str): + options.append(f"--undefine-macro={opts.undefine_macro}") + elif is_sequence(opts.undefine_macro): + for macro in opts.undefine_macro: + options.append(f"--undefine-macro={macro}") + if opts.include_path is not None: + if isinstance(opts.include_path, str): + options.append(f"--include-path={opts.include_path}") + elif is_sequence(opts.include_path): + for path in opts.include_path: + options.append(f"--include-path={path}") + if opts.pre_include is not None: + if isinstance(opts.pre_include, str): + options.append(f"--pre-include={opts.pre_include}") + elif is_sequence(opts.pre_include): + for header in opts.pre_include: + options.append(f"--pre-include={header}") + if opts.no_source_include is not None and opts.no_source_include: + options.append("--no-source-include") + if opts.std is not None: + options.append(f"--std={opts.std}") + if opts.builtin_move_forward is not None: + options.append(f"--builtin-move-forward={_handle_boolean_option(opts.builtin_move_forward)}") + if opts.builtin_initializer_list is not None: + options.append(f"--builtin-initializer-list={_handle_boolean_option(opts.builtin_initializer_list)}") + if opts.disable_warnings is not None and opts.disable_warnings: + options.append("--disable-warnings") + if opts.restrict is not None and opts.restrict: + options.append("--restrict") + if opts.device_as_default_execution_space is not None and opts.device_as_default_execution_space: + options.append("--device-as-default-execution-space") + if opts.device_int128 is not None and opts.device_int128: + options.append("--device-int128") + if opts.device_float128 is not None and opts.device_float128: + options.append("--device-float128") + if opts.optimization_info is not None: + options.append(f"--optimization-info={opts.optimization_info}") + if opts.no_display_error_number is not None and opts.no_display_error_number: + options.append("--no-display-error-number") + if opts.diag_error is not None: + if isinstance(opts.diag_error, int): + options.append(f"--diag-error={opts.diag_error}") + elif is_sequence(opts.diag_error): + for error in opts.diag_error: + options.append(f"--diag-error={error}") + if opts.diag_suppress is not None: + if isinstance(opts.diag_suppress, int): + options.append(f"--diag-suppress={opts.diag_suppress}") + elif is_sequence(opts.diag_suppress): + for suppress in opts.diag_suppress: + options.append(f"--diag-suppress={suppress}") + if opts.diag_warn is not None: + if isinstance(opts.diag_warn, int): + options.append(f"--diag-warn={opts.diag_warn}") + elif is_sequence(opts.diag_warn): + for w in opts.diag_warn: + options.append(f"--diag-warn={w}") + if opts.brief_diagnostics is not None: + options.append(f"--brief-diagnostics={_handle_boolean_option(opts.brief_diagnostics)}") + if opts.time is not None: + options.append(f"--time={opts.time}") + if opts.split_compile is not None: + options.append(f"--split-compile={opts.split_compile}") + if opts.fdevice_syntax_only is not None and opts.fdevice_syntax_only: + options.append("--fdevice-syntax-only") + if opts.minimal is not None and opts.minimal: + options.append("--minimal") + if opts.no_cache is not None and opts.no_cache: + options.append("--no-cache") + if opts.fdevice_time_trace is not None: + options.append(f"--fdevice-time-trace={opts.fdevice_time_trace}") + if opts.frandom_seed is not None: + options.append(f"--frandom-seed={opts.frandom_seed}") + if opts.ofast_compile is not None: + options.append(f"--Ofast-compile={opts.ofast_compile}") + # PCH options (CUDA 12.8+) + if opts.pch is not None and opts.pch: + options.append("--pch") + if opts.create_pch is not None: + options.append(f"--create-pch={opts.create_pch}") + if opts.use_pch is not None: + options.append(f"--use-pch={opts.use_pch}") + if opts.pch_dir is not None: + options.append(f"--pch-dir={opts.pch_dir}") + if opts.pch_verbose is not None: + options.append(f"--pch-verbose={_handle_boolean_option(opts.pch_verbose)}") + if opts.pch_messages is not None: + options.append(f"--pch-messages={_handle_boolean_option(opts.pch_messages)}") + if opts.instantiate_templates_in_pch is not None: + options.append( + f"--instantiate-templates-in-pch={_handle_boolean_option(opts.instantiate_templates_in_pch)}" + ) + if opts.numba_debug: + options.append("--numba-debug") + return [o.encode() for o in options] + + +cdef inline object _prepare_nvvm_options_impl(object opts, bint as_bytes): + """Build NVVM-specific compiler options.""" + options = [] + + # Options supported by NVVM + assert opts.arch is not None + arch = opts.arch + if arch.startswith("sm_"): + arch = f"compute_{arch[3:]}" + options.append(f"-arch={arch}") + if opts.debug is not None and opts.debug: + options.append("-g") + if opts.device_code_optimize is False: + options.append("-opt=0") + elif opts.device_code_optimize is True: + options.append("-opt=3") + # NVVM uses 0/1 instead of true/false for boolean options + if opts.ftz is not None: + options.append(f"-ftz={'1' if opts.ftz else '0'}") + if opts.prec_sqrt is not None: + options.append(f"-prec-sqrt={'1' if opts.prec_sqrt else '0'}") + if opts.prec_div is not None: + options.append(f"-prec-div={'1' if opts.prec_div else '0'}") + if opts.fma is not None: + options.append(f"-fma={'1' if opts.fma else '0'}") + + # Check for unsupported options and raise error if they are set + unsupported = [] + if opts.relocatable_device_code is not None: + unsupported.append("relocatable_device_code") + if opts.extensible_whole_program is not None and opts.extensible_whole_program: + unsupported.append("extensible_whole_program") + if opts.lineinfo is not None and opts.lineinfo: + unsupported.append("lineinfo") + if opts.ptxas_options is not None: + unsupported.append("ptxas_options") + if opts.max_register_count is not None: + unsupported.append("max_register_count") + if opts.use_fast_math is not None and opts.use_fast_math: + unsupported.append("use_fast_math") + if opts.extra_device_vectorization is not None and opts.extra_device_vectorization: + unsupported.append("extra_device_vectorization") + if opts.gen_opt_lto is not None and opts.gen_opt_lto: + unsupported.append("gen_opt_lto") + if opts.define_macro is not None: + unsupported.append("define_macro") + if opts.undefine_macro is not None: + unsupported.append("undefine_macro") + if opts.include_path is not None: + unsupported.append("include_path") + if opts.pre_include is not None: + unsupported.append("pre_include") + if opts.no_source_include is not None and opts.no_source_include: + unsupported.append("no_source_include") + if opts.std is not None: + unsupported.append("std") + if opts.builtin_move_forward is not None: + unsupported.append("builtin_move_forward") + if opts.builtin_initializer_list is not None: + unsupported.append("builtin_initializer_list") + if opts.disable_warnings is not None and opts.disable_warnings: + unsupported.append("disable_warnings") + if opts.restrict is not None and opts.restrict: + unsupported.append("restrict") + if opts.device_as_default_execution_space is not None and opts.device_as_default_execution_space: + unsupported.append("device_as_default_execution_space") + if opts.device_int128 is not None and opts.device_int128: + unsupported.append("device_int128") + if opts.optimization_info is not None: + unsupported.append("optimization_info") + if opts.no_display_error_number is not None and opts.no_display_error_number: + unsupported.append("no_display_error_number") + if opts.diag_error is not None: + unsupported.append("diag_error") + if opts.diag_suppress is not None: + unsupported.append("diag_suppress") + if opts.diag_warn is not None: + unsupported.append("diag_warn") + if opts.brief_diagnostics is not None: + unsupported.append("brief_diagnostics") + if opts.time is not None: + unsupported.append("time") + if opts.split_compile is not None: + unsupported.append("split_compile") + if opts.fdevice_syntax_only is not None and opts.fdevice_syntax_only: + unsupported.append("fdevice_syntax_only") + if opts.minimal is not None and opts.minimal: + unsupported.append("minimal") + if opts.numba_debug is not None and opts.numba_debug: + unsupported.append("numba_debug") + if unsupported: + raise CUDAError(f"The following options are not supported by NVVM backend: {', '.join(unsupported)}") + + if as_bytes: + return [o.encode() for o in options] + else: + return options diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 72591b54d5..b53f14c45c 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -146,7 +146,7 @@ def test_object_code_load_ptx(get_saxpy_kernel_ptx): sym_map = mod.symbol_mapping mod_obj = ObjectCode.from_ptx(ptx, symbol_mapping=sym_map) assert mod.code == ptx - if not Program._can_load_generated_ptx(): + if not Program.driver_can_load_nvrtc_ptx_output(): pytest.skip("PTX version too new for current driver") mod_obj.get_kernel("saxpy") # force loading @@ -160,7 +160,7 @@ def test_object_code_load_ptx_from_file(get_saxpy_kernel_ptx, tmp_path): mod_obj = ObjectCode.from_ptx(str(ptx_file), symbol_mapping=sym_map) assert mod_obj.code == str(ptx_file) assert mod_obj.code_type == "ptx" - if not Program._can_load_generated_ptx(): + if not Program.driver_can_load_nvrtc_ptx_output(): pytest.skip("PTX version too new for current driver") mod_obj.get_kernel("saxpy") # force loading