Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .ci/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ Platform is auto-detected (via `nvidia-smi`/`ixsmi`/`mx-smi`/`mthreads-gmi`/`cnm
| `--stage` | Run only the specified stage |
| `--image-tag` | Override image tag |
| `--gpu-id` | Override GPU device IDs (nvidia via `--gpus`, others via `CUDA_VISIBLE_DEVICES`) |
| `--test` | Override pytest test path (e.g., `tests/test_gemm.py::test_gemm`) |
| `--test` | Replace stage command entirely (e.g., `pytest tests/test_add.py -v`) |
| `--results-dir` | Host directory mounted to `/workspace/results` inside the container |
| `--local` | Mount current directory (read-only) instead of cloning from git |
| `--dry-run` | Print docker command without executing |
Expand Down Expand Up @@ -195,7 +195,7 @@ Proxy vars are forwarded from the host. Test results are written to `--results-d
| MetaX | `--privileged` | `none` | `maca-pytorch:3.2.1.4-...` | `mx-smi` |
| Moore | `--privileged` | `none` | `vllm_musa:20251112_hygon` | `mthreads-gmi` |
| Cambricon | `--privileged` | `mlu` | `cambricon/pytorch:v1.25.3` | `cnmon` |
| Ascend | TODO | — | `ascend-pytorch:24.0.0` | |
| Ascend | `--privileged` + device mounts | `npu` | `ascend-pytorch:24.0.RC3-A2-2.1.0` | `npu-smi` |

`gpu_style` controls the Docker device injection mechanism: `nvidia` uses `--gpus`, `none` uses `CUDA_VISIBLE_DEVICES` (or skips injection for Moore), `mlu` uses `MLU_VISIBLE_DEVICES`.

Expand Down
72 changes: 72 additions & 0 deletions .ci/ci_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
GPU_STYLE_NVIDIA = "nvidia"
GPU_STYLE_NONE = "none"
GPU_STYLE_MLU = "mlu"
GPU_STYLE_NPU = "npu"


@dataclass
Expand Down Expand Up @@ -44,6 +45,7 @@ class ResourcePool:
"metax": "mx-smi",
"moore": "mthreads-gmi",
"cambricon": "cnmon",
"ascend": "npu-smi",
}

def __init__(self, platform, utilization_threshold=10):
Expand Down Expand Up @@ -72,6 +74,9 @@ def detect_gpus(self) -> list[GpuInfo]:
if self._platform == "cambricon":
return self._detect_gpus_cambricon()

if self._platform == "ascend":
return self._detect_gpus_ascend()

tool = self.GPU_QUERY_TOOLS.get(self._platform)

if not tool:
Expand Down Expand Up @@ -325,6 +330,73 @@ def _detect_gpus_cambricon(self) -> list[GpuInfo]:

return sorted(gpus, key=operator.attrgetter("index"))

def _detect_gpus_ascend(self) -> list[GpuInfo]:
"""Parse npu-smi info output for Huawei Ascend NPUs.

Output format (pipe-delimited table, two rows per NPU):
| 0 910B4 | OK | 86.5 41 ...
| 0 | 0000:C1:00.0 | 0 0 / 0 2789 / 32768 |
Row 1: index, name, health, power, temp, hugepages.
Row 2: chip_id, bus_id, aicore_util, memory_usage, hbm_usage.
"""
try:
result = subprocess.run(
["npu-smi", "info"],
capture_output=True,
text=True,
timeout=10,
)
except (FileNotFoundError, subprocess.TimeoutExpired):
return []

if result.returncode != 0:
return []

gpus = []
lines = result.stdout.splitlines()
i = 0

while i < len(lines):
line = lines[i]
# Match row 1: "| {index} {name} ..."
m1 = re.match(r"^\|\s+(\d+)\s+", line)

if m1 and i + 1 < len(lines):
try:
npu_index = int(m1.group(1))
aicore_m = re.match(
r"^\|\s+\d+\s+\|\s+[\da-f:.]+\s+\|\s*([\d.]+)\s", lines[i + 1]
)

util_pct = float(aicore_m.group(1)) if aicore_m else 0.0

# Parse HBM usage from row 2: "{used} / {total}".
hbm_m = re.search(r"([\d.]+)\s*/\s*([\d.]+)", lines[i + 1])

if hbm_m:
used_mb = float(hbm_m.group(1))
total_mb = float(hbm_m.group(2))
else:
used_mb, total_mb = 0.0, 0.0

gpus.append(
GpuInfo(
index=npu_index,
memory_used_mb=used_mb,
memory_total_mb=total_mb,
utilization_pct=util_pct,
)
)
except (ValueError, AttributeError):
pass

i += 2
continue

i += 1

return sorted(gpus, key=operator.attrgetter("index"))

def detect_system_resources(self) -> SystemResources:
"""Read system memory from /proc/meminfo and CPU count."""
total_mb = 0.0
Expand Down
32 changes: 28 additions & 4 deletions .ci/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,34 @@ platforms:
- name: test
run: pytest tests/test_gemm.py -n 4 -v --tb=short --junitxml=/workspace/results/test-results.xml

ascend: # TODO: Ascend image is not ready yet
ascend:
image:
dockerfile: .ci/images/ascend/
build_args:
BASE_IMAGE: ascendhub.huawei.com/public-ascendhub/ascend-pytorch:24.0.0
private_sdk:
source_env: PRIVATE_SDK_URL
BASE_IMAGE: quay.io/ascend/vllm-ascend:v0.18.0rc1-openeuler
PIP_INDEX_URL: https://pypi.org/simple
docker_args:
- "--runtime=runc"
- "--privileged"
- "--device=/dev/davinci0"
- "--device=/dev/davinci_manager"
- "--device=/dev/devmm_svm"
- "--device=/dev/hisi_hdc"
volumes:
- /usr/local/Ascend/driver:/usr/local/Ascend/driver:ro
- /usr/local/dcmi:/usr/local/dcmi:ro
- /usr/local/bin/npu-smi:/usr/local/bin/npu-smi:ro
env:
ASCEND_HOME_PATH: /usr/local/Ascend/ascend-toolkit/latest
setup: pip install .[dev] --no-build-isolation
jobs:
npu:
resources:
gpu_ids: "0"
gpu_style: npu
memory: 32GB
shm_size: 16g
timeout: 3600
stages:
- name: test
run: pytest tests/ -n 1 -k npu -v --tb=short --junitxml=/workspace/results/test-results.xml
34 changes: 13 additions & 21 deletions .ci/images/ascend/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
ARG BASE_IMAGE
FROM ${BASE_IMAGE}

ENV DEBIAN_FRONTEND=noninteractive
USER root

ARG HTTP_PROXY
ARG HTTPS_PROXY
Expand All @@ -10,30 +10,22 @@ ARG http_proxy
ARG https_proxy
ARG no_proxy

RUN apt-get update && \
apt-get install -y --no-install-recommends \
git \
cmake \
ninja-build \
coreutils \
curl \
libclang-dev \
&& rm -rf /var/lib/apt/lists/*

ARG PRIVATE_SDK_URL
RUN if [ -n "$PRIVATE_SDK_URL" ]; then \
curl -fSL "$PRIVATE_SDK_URL" -o /tmp/sdk.run && \
chmod +x /tmp/sdk.run && /tmp/sdk.run --quiet && \
rm /tmp/sdk.run; \
fi

RUN pip install --no-cache-dir \
ARG PIP_INDEX_URL
RUN pip install --no-cache-dir --progress-bar off \
${PIP_INDEX_URL:+--index-url "$PIP_INDEX_URL"} \
libclang \
ninja \
scikit-build-core \
pybind11 \
libclang \
pytest \
pytest-cov \
pytest-xdist \
pyyaml
ruff==0.15.7

# Pin pre-installed torch to prevent pip from replacing it.
RUN pip show torch >/dev/null 2>&1 && \
echo "torch==$(pip show torch | grep '^Version:' | awk '{print $2}')" > /etc/pip-constraints.txt || \
touch /etc/pip-constraints.txt
ENV PIP_CONSTRAINT=/etc/pip-constraints.txt

WORKDIR /workspace
45 changes: 10 additions & 35 deletions .ci/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,47 +13,19 @@
GPU_STYLE_NVIDIA,
GPU_STYLE_NONE,
GPU_STYLE_MLU,
GPU_STYLE_NPU,
ResourcePool,
detect_platform,
)
from utils import get_git_commit, load_config

# Flags that consume the next token as their value (e.g. -n 4, -k expr).
_PYTEST_VALUE_FLAGS = {"-n", "-k", "-m", "-p", "--tb", "--junitxml", "--rootdir"}
def apply_test_override(run_cmd, test_cmd):
"""Replace a stage command with *test_cmd*.


def apply_test_override(run_cmd, test_path):
"""Replace positional test path(s) in a pytest stage command.

For example: ``pytest tests/ -n 4 ...`` becomes
``pytest tests/test_gemm.py -n 4 ...`` when ``test_path`` is
``tests/test_gemm.py``.
``--test`` always replaces the entire stage command regardless of whether
the original is pytest or something else.
"""
parts = shlex.split(run_cmd)

if not parts or parts[0] != "pytest":
return run_cmd

result = ["pytest", test_path]
skip_next = False

for p in parts[1:]:
if skip_next:
result.append(p)
skip_next = False
continue

if p.startswith("-"):
result.append(p)
if p in _PYTEST_VALUE_FLAGS:
skip_next = True
continue

# Skip existing test paths; the override is already in result[1].
if not ("/" in p or p.endswith(".py") or "::" in p):
result.append(p)

return shlex.join(result)
return test_cmd


def build_results_dir(base, platform, stages, commit):
Expand Down Expand Up @@ -212,6 +184,9 @@ def build_docker_args(
# For Cambricon MLU platforms that use --privileged,
# control visible devices via MLU_VISIBLE_DEVICES.
args.extend(["-e", f"MLU_VISIBLE_DEVICES={gpu_id}"])
elif gpu_style == GPU_STYLE_NPU and gpu_id and gpu_id != "all":
# Ascend: control visible NPU via ASCEND_VISIBLE_DEVICES.
args.extend(["-e", f"ASCEND_VISIBLE_DEVICES={gpu_id}"])

memory = resources.get("memory")

Expand Down Expand Up @@ -315,7 +290,7 @@ def main():
parser.add_argument(
"--test",
type=str,
help='Override pytest test path, e.g. "tests/test_gemm.py" or "tests/test_gemm.py::test_gemm"',
help='Replace stage command with this (e.g. "pytest tests/test_add.py -v")',
)
parser.add_argument(
"--local",
Expand Down
1 change: 1 addition & 0 deletions .ci/tests/test_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def test_detect_system_resources(monkeypatch, tmp_path):
"MemAvailable: 20000000 kB\n"
)


_real_open = open

def fake_open(path, **kw):
Expand Down
33 changes: 33 additions & 0 deletions .ci/tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,3 +296,36 @@ def test_build_results_dir_under_base():
stages = [{"name": "test", "run": "pytest"}]
d = run.build_results_dir("/tmp/my-results", "ascend", stages, "def5678")
assert d.parent == Path("/tmp/my-results")


# ---------------------------------------------------------------------------
# Tests for `apply_test_override`.
# ---------------------------------------------------------------------------


def test_apply_test_override_replaces_pytest_command():
assert run.apply_test_override("pytest tests/ -v", "pytest tests/test_add.py") == (
"pytest tests/test_add.py"
)


def test_apply_test_override_replaces_non_pytest_command():
assert run.apply_test_override("ruff check .", "python docs/repro.py") == (
"python docs/repro.py"
)


def test_apply_test_override_replaces_empty_command():
assert run.apply_test_override("", "bash script.sh") == "bash script.sh"


def test_apply_test_override_preserves_user_flags():
cmd = "pytest tests/test_gemm.py -n 1 -v --tb=short"
assert run.apply_test_override("pytest tests/ -n 4", cmd) == cmd


def test_apply_test_override_with_shell_command():
assert (
run.apply_test_override("pytest tests/", "cd /tmp && python repro.py")
== "cd /tmp && python repro.py"
)
22 changes: 22 additions & 0 deletions scripts/generate_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,21 +103,39 @@ def _find_optional_tensor_params(op_name):
return set(re.findall(r"std::optional<Tensor>\s+(\w+)", source))


def _find_vector_tensor_params(op_name):
"""Return a set of parameter names declared as `std::vector<Tensor>` in
the base header.
"""
import re

source = (_BASE_DIR / f"{op_name}.h").read_text()
return set(re.findall(r"std::vector<Tensor>\s+(\w+)", source))


def _generate_pybind11(operator):
optional_tensor_params = _find_optional_tensor_params(operator.name)
vector_tensor_params = _find_vector_tensor_params(operator.name)

def _is_optional_tensor(arg):
if arg.spelling in optional_tensor_params:
return True
return "std::optional" in arg.type.spelling and "Tensor" in arg.type.spelling

def _is_vector_tensor(arg):
if arg.spelling in vector_tensor_params:
return True
return "std::vector" in arg.type.spelling and "Tensor" in arg.type.spelling

def _generate_params(node):
parts = []
for arg in node.get_arguments():
if arg.spelling == "stream":
continue
if _is_optional_tensor(arg):
parts.append(f"std::optional<py::object> {arg.spelling}")
elif _is_vector_tensor(arg):
parts.append(f"std::vector<py::object> {arg.spelling}")
else:
param = (
arg.type.spelling
Expand All @@ -136,6 +154,10 @@ def _generate_arguments(node):
args.append(
f"OptionalTensorFromPybind11Handle({arg.spelling})"
)
elif _is_vector_tensor(arg):
args.append(
f"VectorTensorFromPybind11Handle({arg.spelling})"
)
elif "Tensor" in arg.type.spelling:
args.append(f"TensorFromPybind11Handle({arg.spelling})")
else:
Expand Down
Loading