PaddlePaddle · Aidenwu0209 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/pytest.ini b/pytest.ini
@@ -2,3 +2,9 @@
 ; print user output; clear warning
 addopts = -v -s -p no:warnings
 timeout = 100
+; 固定默认测试收集根目录，减少从不同入口目录启动 pytest 时的收集差异。
+; 这样即使脚本或开发者没有显式传 tests/，默认发现行为也会落在仓库的测试目录下。
+testpaths = tests
+; 显式把 tests 加入导入路径，稳定 `from apibase import APIBase` 这类测试工具导入。
+; 这一步主要用于收敛“全量执行、单文件执行、python -m pytest”之间的导入差异。
+pythonpath = tests
diff --git a/scripts/unittest_check.sh b/scripts/unittest_check.sh
@@ -14,7 +14,54 @@
 
 set +x
 
-cd /workspace/$1/PaConvert/
+# 先显式进入 repo root，而不是继续依赖外部调用方的当前目录。
+# 这样后续所有相对路径都会稳定落在同一个工作区下，包括：
+# 1. requirements.txt 的安装路径
+# 2. pytest ./tests 的收集根目录
+# 3. tests/apibase.py 中基于 os.getcwd() 生成的临时文件目录
+# 如果这里进入失败，就直接中断，避免后面出现更难读的连锁报错。
+cd /workspace/$1/PaConvert/ || {
+    echo "[unittest-cpu] Failed to enter repo root: /workspace/$1/PaConvert/"
+    exit 1
+}
+
+# 这两个检查属于 fail-fast：
+# 1. requirements.txt 用于后续安装测试依赖
+# 2. tests 是 unittest 主入口
+# 如果这两个路径在当前目录下都不可见，说明 workspace 本身或 cwd 已经不对，
+# 继续跑只会把真正的问题伪装成 pip/pytest 的噪声错误。
+test -f requirements.txt || {
+    echo "[unittest-cpu] requirements.txt not found under repo root"
+    exit 1
+}
+
+test -d tests || {
+    echo "[unittest-cpu] tests directory not found under repo root"
+    exit 1
+}
+
+# These files mutate process-level defaults and are more stable when run in
+# their own pytest process after the main suite.
+# 这些文件会修改默认 device / dtype / tensor type / 线程数 / printoptions 等进程级状态。
+# 本次改法分两步：
+# 1. 主套件先通过 --ignore 跳过它们，只验证普通测试
+# 2. 再为每个文件单独启动一个 pytest 进程执行
+# 目的不是跳过这些测试，而是通过进程边界隔离副作用，降低 first-run 全量执行时
+# 因状态污染导致的“第一次失败、rerun 又通过”的概率。
+ISOLATED_TEST_FILES=(
+  ./tests/test_set_default_device.py
+  ./tests/test_set_default_dtype.py
+  ./tests/test_set_default_tensor_type.py
+  ./tests/test_set_num_threads.py
+  ./tests/test_set_printoptions.py
+)
+
+# 把上面的列表转换成 pytest 所需的 --ignore 参数。
+# 这样做可以避免手写重复命令，同时确保 CPU/GPU 两个脚本使用完全一致的隔离集合。
+IGNORE_ARGS=()
+for test_file in "${ISOLATED_TEST_FILES[@]}"; do
+    IGNORE_ARGS+=("--ignore=${test_file}")
+done
 
 echo '************************************************************************************************************'
 echo "Insalling latest release cpu version torch"
@@ -47,10 +94,44 @@ python -m pip install -r requirements.txt
 echo '************************************************************************************************************'
 echo "Checking code cpu unit test by pytest ..."
 python -m pip install pytest-timeout
-python -m pytest -v -s -p no:warnings ./tests;check_error=$?
-if [ ${check_error} != 0 ];then
-    echo "Rerun cpu unit test check." 
-    python -m pytest -v -s -p no:warnings --lf ./tests; check_error=$?
+# 这里显式设置 PYTHONPATH=.:tests，有两个目的：
+# 1. 让脚本直接调用 python -m pytest 时，也能稳定导入 tests/apibase.py
+# 2. 与 pytest.ini 中 pythonpath = tests 的配置形成显式兜底
+# 主套件先忽略掉 stateful 文件，把“普通测试是否稳定”单独跑出来看。
+PYTHONPATH=.:tests python -m pytest -v -s -p no:warnings "${IGNORE_ARGS[@]}" ./tests
+first_run_error=$?
+
+# 首轮结果决定最终 success / fail。
+# 如果首轮失败，保留一次 --lf rerun 仅用于诊断日志，帮助判断失败是否具有顺序相关性；
+# 但 rerun 结果不会再覆盖首轮退出码，避免把真实问题“洗绿”。
+if [ ${first_run_error} != 0 ]; then
+    echo "[unittest-cpu] Diagnostic rerun of failed tests. This does not change the final result."
+    PYTHONPATH=.:tests python -m pytest -v -s -p no:warnings "${IGNORE_ARGS[@]}" --lf ./tests || true
+fi
+
+# 接下来逐个执行隔离文件。每个文件都会启动新的 pytest 进程：
+# 1. 前一个文件留下的全局状态不会泄漏到下一个文件
+# 2. 一旦某个隔离文件失败，再追加一次诊断性 rerun 方便看日志
+# 3. isolated_error 只记录“是否存在任一隔离文件失败”，最后与主套件统一汇总
+isolated_error=0
+for test_file in "${ISOLATED_TEST_FILES[@]}"; do
+    echo "[unittest-cpu] Running isolated test file: ${test_file}"
+    PYTHONPATH=.:tests python -m pytest -v -s -p no:warnings "${test_file}"
+    file_error=$?
+
+    if [ ${file_error} != 0 ]; then
+        echo "[unittest-cpu] Diagnostic rerun for isolated file: ${test_file}. This does not change the final result."
+        PYTHONPATH=.:tests python -m pytest -v -s -p no:warnings "${test_file}" || true
+        isolated_error=1
+    fi
+done
+
+# 最终退出码采用主套件和隔离套件的并集语义：
+# - 任意一边失败，整个 unittest job 都失败
+# - 不因为诊断 rerun 通过而掩盖 first-run 的真实结果
+check_error=0
+if [ ${first_run_error} != 0 ] || [ ${isolated_error} != 0 ]; then
+    check_error=1
 fi
 
 echo '************************************************************************************************************'

diff --git a/scripts/unittest_check_gpu.sh b/scripts/unittest_check_gpu.sh
@@ -14,6 +14,53 @@
 
 set +x
 
+# GPU 脚本之前没有像 CPU 脚本一样固定 cwd，导致它更依赖外部调用方从哪里触发。
+# 这里统一切到 repo root，是为了让 requirements.txt、./tests 以及 apibase 的临时文件路径
+# 都基于同一个仓库目录；如果 workspace 或 $1 不对，就立刻失败并给出明确日志。
+cd /workspace/$1/PaConvert/ || {
+    echo "[unittest-gpu] Failed to enter repo root: /workspace/$1/PaConvert/"
+    exit 1
+}
+
+# 这行日志用于在 CI 首屏确认 GPU job 是否真的进入了预期目录，
+# 排查“同一脚本在不同入口目录下行为不一致”的问题时会更直观。
+echo "Current working directory: $(pwd)"
+
+# 与 CPU 脚本相同，先做最小环境自检：
+# - requirements.txt 决定依赖安装是否可执行
+# - tests 目录决定 pytest 收集是否能落在正确位置
+# 如果这两个路径不可见，就说明当前工作区本身已经不满足执行前提。
+test -f requirements.txt || {
+    echo "[unittest-gpu] requirements.txt not found under repo root"
+    exit 1
+}
+
+test -d tests || {
+    echo "[unittest-gpu] tests directory not found under repo root"
+    exit 1
+}
+
+# These files mutate process-level defaults and are more stable when run in
+# their own pytest process after the main suite.
+# 这些文件会修改进程级默认状态，是引发全量执行顺序污染的高风险集合。
+# 处理方式与 CPU 一致：
+# 1. 主套件先忽略它们
+# 2. 再逐个单文件、单独进程执行
+# 这样既不丢掉覆盖率，也能把“普通测试”和“全局状态测试”的影响面拆开观察。
+ISOLATED_TEST_FILES=(
+  ./tests/test_set_default_device.py
+  ./tests/test_set_default_dtype.py
+  ./tests/test_set_default_tensor_type.py
+  ./tests/test_set_num_threads.py
+  ./tests/test_set_printoptions.py
+)
+
+# 统一生成 pytest --ignore 参数，避免人工维护两套重复命令。
+IGNORE_ARGS=()
+for test_file in "${ISOLATED_TEST_FILES[@]}"; do
+    IGNORE_ARGS+=("--ignore=${test_file}")
+done
+
 echo '************************************************************************************************************'
 echo "Insalling latest release gpu version torch"
 python -m pip uninstall -y torchaudio
@@ -46,11 +93,41 @@ python -m pip install -r requirements.txt
 
 echo '************************************************************************************************************'
 echo "Checking code gpu unit test by pytest ..."
-python -m pip install pytest-timeout pytest-xdist pytest-rerunfailures
-python -m pytest -v -s -p no:warnings -n 1 --reruns=3 ./tests; check_error=$?
-if [ ${check_error} != 0 ];then
-    echo "Rerun gpu unit test check." 
-    python -m pytest -v -s -p no:warnings -n 1 --lf ./tests; check_error=$?
+# GPU 这边保留 pytest-xdist，因为脚本原本就在使用 -n 1 的执行方式；
+# 但去掉 pytest-rerunfailures/--reruns=3，避免 first-run 失败被自动重试掩盖。
+python -m pip install pytest-timeout pytest-xdist
+# 这里同样显式设置 PYTHONPATH=.:tests，确保 GPU CI 在脚本直接调用 python -m pytest 时，
+# 对 tests/apibase.py 的导入与 pytest.ini 中的配置保持一致。
+PYTHONPATH=.:tests python -m pytest -v -s -p no:warnings -n 1 "${IGNORE_ARGS[@]}" ./tests
+first_run_error=$?
+
+# 首轮结果决定最终成败；失败后保留一次 --lf rerun 仅用于诊断，
+# 方便观察失败是否与执行顺序或环境状态相关，但不会再改变最终 exit code。
+if [ ${first_run_error} != 0 ]; then
+    echo "[unittest-gpu] Diagnostic rerun of failed tests. This does not change the final result."
+    PYTHONPATH=.:tests python -m pytest -v -s -p no:warnings -n 1 "${IGNORE_ARGS[@]}" --lf ./tests || true
+fi
+
+# 逐个执行隔离文件，让每个 stateful 文件都跑在新的 pytest 进程里。
+# 这样即使某个文件会修改默认 device / dtype，也不会把副作用泄漏到其他测试文件。
+isolated_error=0
+for test_file in "${ISOLATED_TEST_FILES[@]}"; do
+    echo "[unittest-gpu] Running isolated test file: ${test_file}"
+    PYTHONPATH=.:tests python -m pytest -v -s -p no:warnings -n 1 "${test_file}"
+    file_error=$?
+
+    if [ ${file_error} != 0 ]; then
+        echo "[unittest-gpu] Diagnostic rerun for isolated file: ${test_file}. This does not change the final result."
+        PYTHONPATH=.:tests python -m pytest -v -s -p no:warnings -n 1 "${test_file}" || true
+        isolated_error=1
+    fi
+done
+
+# 最终退出码仍然只看“主套件 + 隔离套件”的 first-run 结果。
+# 任何一边失败都应该让 job fail，这样 CI 才能真实暴露不稳定问题。
+check_error=0
+if [ ${first_run_error} != 0 ] || [ ${isolated_error} != 0 ]; then
+    check_error=1
 fi
 
 echo '************************************************************************************************************'