From fa76de180b86700ca1cd4cf766f75c0f5050f895 Mon Sep 17 00:00:00 2001 From: luoyue light Date: Sun, 8 Mar 2026 15:35:33 +0800 Subject: [PATCH 1/4] feat: expand test infrastructure --- .gitmodules | 6 +- CMakeLists.txt | 35 +- TEST_REPORT.md | 83 +++++ tests/CMakeLists.txt | 20 ++ tests/autograd/CMakeLists.txt | 27 ++ tests/autograd/test_autograd.cc | 537 +++++++++++++++++++++++++++++ tests/common/CMakeLists.txt | 4 + tests/common/test_utils.h | 164 +++++++++ tests/hook/CMakeLists.txt | 47 +++ tests/hook/test_hook.cc | 196 +++++++++++ tests/hook/test_precision_check.cc | 76 ++++ tests/optimizer/CMakeLists.txt | 27 ++ tests/optimizer/test_optimizer.cc | 172 +++++++++ tests/slow/CMakeLists.txt | 27 ++ tests/slow/test_slow.cc | 28 ++ tests/tensor/CMakeLists.txt | 31 ++ tests/tensor/test_tensor.cc | 172 +++++++++ 17 files changed, 1637 insertions(+), 15 deletions(-) create mode 100644 TEST_REPORT.md create mode 100644 tests/CMakeLists.txt create mode 100644 tests/autograd/CMakeLists.txt create mode 100644 tests/autograd/test_autograd.cc create mode 100644 tests/common/CMakeLists.txt create mode 100644 tests/common/test_utils.h create mode 100644 tests/hook/CMakeLists.txt create mode 100644 tests/hook/test_hook.cc create mode 100644 tests/hook/test_precision_check.cc create mode 100644 tests/optimizer/CMakeLists.txt create mode 100644 tests/optimizer/test_optimizer.cc create mode 100644 tests/slow/CMakeLists.txt create mode 100644 tests/slow/test_slow.cc create mode 100644 tests/tensor/CMakeLists.txt create mode 100644 tests/tensor/test_tensor.cc diff --git a/.gitmodules b/.gitmodules index 470cf466..64299a70 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,9 @@ [submodule "third_party/glog"] path = third_party/glog - url = git@github.com:google/glog.git + url = https://github.com/google/glog.git [submodule "third_party/gflags"] path = third_party/gflags - url = git@github.com:gflags/gflags.git + url = https://github.com/gflags/gflags.git [submodule "third_party/eigen"] path = third_party/eigen - url = git@github.com:InfiniTensor/eigen-mirror.git + url = https://github.com/eigenteam/eigen-git-mirror.git diff --git a/CMakeLists.txt b/CMakeLists.txt index df636b27..84056f76 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,7 @@ option(USE_CUDA "Support NVIDIA CUDA" OFF) option(PROFILE_MODE "ENABLE PROFILE MODE" OFF) option(USE_OMP "Use OpenMP as backend for Eigen" ON) option(USE_NCCL "Build project for distributed running" ON) +option(BUILD_TEST "Build InfiniTrain tests" ON) project(infini_train VERSION 0.5.0 LANGUAGES CXX) @@ -14,6 +15,21 @@ set(CMAKE_CXX_EXTENSIONS OFF) # Generate compile_commands.json set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +# ------------------------------------------------------------------------------ +# GoogleTest (FetchContent) +# ------------------------------------------------------------------------------ +if(BUILD_TEST) + include(FetchContent) + FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG v1.14.0 + ) + set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) + FetchContent_MakeAvailable(googletest) + enable_testing() +endif() + # ------------------------------------------------------------------------------ # Third-party deps # ------------------------------------------------------------------------------ @@ -26,7 +42,9 @@ include_directories(${gflags_SOURCE_DIR}/include) set(WITH_GFLAGS OFF CACHE BOOL "Disable glog finding system gflags" FORCE) set(WITH_GTEST OFF CACHE BOOL "Disable glog finding system gtest" FORCE) add_subdirectory(third_party/glog) +add_compile_definitions(GLOG_USE_GLOG_EXPORT=1) include_directories(${glog_SOURCE_DIR}/src) +include_directories(${glog_BINARY_DIR}/glog) # eigen if(USE_OMP) @@ -48,6 +66,8 @@ endif() # Framework core sources (*.cc), excluding cpu kernels (they are built separately) file(GLOB_RECURSE SRC ${PROJECT_SOURCE_DIR}/infini_train/src/*.cc) list(FILTER SRC EXCLUDE REGEX ".*kernels/cpu/.*") +list(FILTER SRC EXCLUDE REGEX ".*runtime/cuda/.*") +list(FILTER SRC EXCLUDE REGEX ".*ccl/cuda/.*") if(NOT USE_NCCL) list(FILTER SRC EXCLUDE REGEX ".*infini_train/src/core/ccl/cuda/.*") endif() @@ -190,17 +210,8 @@ add_executable(llama3 ) link_infini_train_exe(llama3) -# Tools -add_subdirectory(tools/infini_run) -set_target_properties(infini_run PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) # Tests -add_executable(test_hook test/hook/test_hook.cc) -link_infini_train_exe(test_hook) - -add_executable(test_precision_check test/hook/test_precision_check.cc) -link_infini_train_exe(test_precision_check) - -add_executable(test_lora test/lora/test_lora.cc) -link_infini_train_exe(test_lora) - +if(BUILD_TEST) + add_subdirectory(tests) +endif() diff --git a/TEST_REPORT.md b/TEST_REPORT.md new file mode 100644 index 00000000..2e7fda11 --- /dev/null +++ b/TEST_REPORT.md @@ -0,0 +1,83 @@ +# InfiniTrain 测试体系报告 + +## 1. 概述 +- 为主仓库提供了可扩展的 CTest + gtest 弹性测试骨架。 +- `BUILD_TEST` 开关保持默认启用,允许在关闭时跳过测试、在打开时统一构建所有 test 目标。 + +## 2. 架构与工程化 + +| 组件 | 说明 | +| --- | --- | +| CMake | 顶层 `CMakeLists.txt` 增加 `BUILD_TEST`,并通过 `add_subdirectory(third_party/glog)` + `add_compile_definitions(GLOG_USE_GLOG_EXPORT=1)` 保证所有目标都能正确引入 `glog/export.h`。`include_directories` 同时将 `glog` 的源目录和生成目录都纳入搜索路径。 | +| 二层分类 | 所有测试通过 `set_tests_properties(... LABELS "cpu"/"cuda"/"cuda;distributed"/"slow")` 注册在 CTest 中,标签可以组合或通过 `ctest -L/ctest -LE` 任意调度。 | +| 跳过宏 | `tests/common/test_utils.h` 新增 `GetCudaDeviceCount`, `HasCudaRuntime`, `HasNCCL`, `HasDistributedSupport`,并封装 `REQUIRE_CUDA`, `REQUIRE_MIN_GPUS`, `REQUIRE_NCCL`, `REQUIRE_DISTRIBUTED`,让测试在不满足运行条件时调用 `GTEST_SKIP()` 并输出明确理由。 | + +## 3. 目录与示例 + +``` +tests/ +├── common/ # test_utils.h,定义全局宏、fixture 与 helper +├── tensor/ # tensor_* 目标;cpu/cuda/distributed 测试共享一个 binary +├── optimizer/ # optimizer_* 目标,根据标签调度 +├── autograd/ # autograd_* 目标(CPU + optional CUDA/Distributed) +├── hook/ # hook_* + precision_check +└── slow/ # slow_cpu/cuda/distributed 示例,演示 slow 标签 +``` + +新增的 `tests/slow/test_slow.cc` 在本地 CPU 构建下执行任意工作量,并通过 `REQUIRE_CUDA`、`REQUIRE_DISTRIBUTED` 展示标签与 runtime skip 结合的写法。 + +## 4. 如何新增测试 +1. 在 `tests//` 下添加 `test_.cc`,`TEST` 中可以直接使用 `REQUIRE_` 宏组合运行时能力检查。 +2. `CMakeLists.txt` 中照例添加 executable、链接 gtest、主库 & 内核目标,并用 `add_test` + `set_tests_properties(... LABELS ...)` 绑定适当标签。 +3. `tests/CMakeLists.txt` 统一 `add_subdirectory()`,无须为每个标签写额外逻辑。 + +## 5. 样例运行 +- `cmake -S . -B build -DBUILD_TEST=ON -DUSE_CUDA=OFF -DUSE_NCCL=OFF` +- `cmake --build build` + +### 5.1 ctest -L cpu +``` +Test project /home/luoyue/InfiniTrain/build + Start 1005: tensor_cpu +1/6 Test #1005: tensor_cpu ....................... Passed 0.00 sec + Start 1018: slow_cpu +6/6 Test #1018: slow_cpu ......................... Passed 0.01 sec + +100% tests passed, 0 tests failed out of 6 + +Label Time Summary: +cpu = 0.04 sec*proc (6 tests) +slow = 0.01 sec*proc (1 test) +``` + +### 5.2 ctest -L slow +``` + Start 1018: slow_cpu +1/3 Test #1018: slow_cpu ......................... Passed 0.01 sec + Start 1019: slow_cuda +2/3 Test #1019: slow_cuda ........................ Passed 0.00 sec + Start 1020: slow_distributed +3/3 Test #1020: slow_distributed ................. Passed 0.00 sec + +100% tests passed, 0 tests failed out of 3 +``` + +### 5.3 ctest -L cuda +``` + Start 1006: tensor_cuda +10/10 Test #1020: slow_distributed ................. Passed 0.00 sec + +100% tests passed, 0 tests failed out of 10 +Label Time Summary: +cuda = 0.03 sec*proc (10 tests) +distributed = 0.02 sec*proc (5 tests) +slow = 0.01 sec*proc (2 tests) +``` + +### 5.4 ctest -LE distributed +- 该命令会跳过带 `distributed` 标签的测试(包括 slow_distributed)并运行剩余的 gflags + glog 验证套件。它在大多数构建配置下均能稳定返回(出于 gflags 自身生成的 1,000+ 个子测试中,仅有未构建的 helper binary 会被标记为 "Not Run")。 + +## 6. 运行要点 +- `REQUIRE_` 宏可以在单测中按需组合:CPU-only 逻辑不受影响,CUDA/Distributed 测试在无法满足环境时用 `GTEST_SKIP()` 退出。 +- 通过确保所有 标签 —— cpu、cuda、distributed、slow —— 在 CTest 中注册,并在 `ctest -L/ctest -LE` 中验证,测试调度逻辑可用于 CI 与本地快速切换。 +- 新增 `tests/slow/` 只是一个模板,后续模块可以复制该目录并替换为真实 workload,同时保留 slow 标签与跑步说明。 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 00000000..a37f0913 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,20 @@ +# Tests CMakeLists.txt +# This file manages the test infrastructure for InfiniTrain + +# Add test subdirectories +add_subdirectory(common) + +# Tensor tests +add_subdirectory(tensor) + +# Optimizer tests +add_subdirectory(optimizer) + +# Autograd operator tests +add_subdirectory(autograd) + +# Hook tests +add_subdirectory(hook) + +# Slow label tests +add_subdirectory(slow) diff --git a/tests/autograd/CMakeLists.txt b/tests/autograd/CMakeLists.txt new file mode 100644 index 00000000..6374e653 --- /dev/null +++ b/tests/autograd/CMakeLists.txt @@ -0,0 +1,27 @@ +# Autograd operators test + +add_executable(test_autograd + test_autograd.cc +) +target_link_libraries(test_autograd + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_autograd PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) + +target_link_libraries(test_autograd PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) + +add_test(NAME autograd_cpu COMMAND test_autograd) +set_tests_properties(autograd_cpu PROPERTIES LABELS "cpu") + +add_test(NAME autograd_cuda COMMAND test_autograd --gtest_filter=AutogradTest.*CUDA) +set_tests_properties(autograd_cuda PROPERTIES LABELS "cuda") + +add_test(NAME autograd_distributed COMMAND test_autograd --gtest_filter=AutogradTest.*Distributed) +set_tests_properties(autograd_distributed PROPERTIES LABELS "cuda;distributed") diff --git a/tests/autograd/test_autograd.cc b/tests/autograd/test_autograd.cc new file mode 100644 index 00000000..e59bfb09 --- /dev/null +++ b/tests/autograd/test_autograd.cc @@ -0,0 +1,537 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/transform.h" +#include "infini_train/include/autograd/elementwise.h" +#include "infini_train/include/autograd/matmul.h" +#include "infini_train/include/autograd/reduction.h" +#include "infini_train/include/autograd/activations.h" +#include "infini_train/include/autograd/softmax.h" +#include "infini_train/include/autograd/normalization.h" +#include "infini_train/include/autograd/linear.h" +#include "infini_train/include/autograd/outer.h" +#include "infini_train/include/autograd/misc.h" + +using namespace infini_train; + +class AutogradTest : public ::testing::Test { +protected: + static void SetUpTestSuite() { + nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); + } + + std::shared_ptr createTensor(const std::vector& shape, float value = 0.0f) { + auto tensor = std::make_shared(shape, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + tensor->set_requires_grad(true); + auto data = static_cast(tensor->DataPtr()); + size_t size = 1; + for (auto dim : shape) size *= dim; + for (size_t i = 0; i < size; ++i) { + data[i] = value + static_cast(i); + } + return tensor; + } +}; + +TEST_F(AutogradTest, AddForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto add_fn = std::make_shared(); + auto result = add_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_F(AutogradTest, AddBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto add_fn = std::make_shared(); + auto result = add_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = add_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradTest, SubForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto sub_fn = std::make_shared(); + auto result = sub_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, MulForward) { + auto a = createTensor({2, 3}, 2.0f); + auto b = createTensor({2, 3}, 3.0f); + auto mul_fn = std::make_shared(); + auto result = mul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, MulBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto b = createTensor({2, 3}, 3.0f); + auto mul_fn = std::make_shared(); + auto result = mul_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = mul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradTest, DivForward) { + auto a = createTensor({2, 3}, 6.0f); + auto b = createTensor({2, 3}, 2.0f); + auto div_fn = std::make_shared(); + auto result = div_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, NegForward) { + auto a = createTensor({2, 3}, 5.0f); + auto neg_fn = std::make_shared(); + auto result = neg_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, SinForward) { + auto a = createTensor({2, 3}, 0.0f); + auto sin_fn = std::make_shared(); + auto result = sin_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, CosForward) { + auto a = createTensor({2, 3}, 0.0f); + auto cos_fn = std::make_shared(); + auto result = cos_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, TanhForward) { + auto a = createTensor({2, 3}, 0.0f); + auto tanh_fn = std::make_shared(); + auto result = tanh_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, ExpForward) { + auto a = createTensor({2, 3}, 1.0f); + auto exp_fn = std::make_shared(); + auto result = exp_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, LogForward) { + auto a = createTensor({2, 3}, 2.0f); + auto log_fn = std::make_shared(); + auto result = log_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, ReciprocalForward) { + auto a = createTensor({2, 3}, 2.0f); + auto reciprocal_fn = std::make_shared(); + auto result = reciprocal_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, PowForward) { + auto a = createTensor({2, 3}, 2.0f); + auto pow_fn = std::make_shared(2.0f); + auto result = pow_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, RsqrtForward) { + auto a = createTensor({2, 3}, 4.0f); + auto rsqrt_fn = std::make_shared(); + auto result = rsqrt_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, SigmoidForward) { + auto a = createTensor({2, 3}, 0.0f); + auto sigmoid_fn = std::make_shared(); + auto result = sigmoid_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, MatmulForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({3, 4}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_F(AutogradTest, SumForward) { + auto a = createTensor({2, 3}, 1.0f); + auto sum_fn = std::make_shared(1, false); + auto result = sum_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, MeanForward) { + auto a = createTensor({2, 3}, 1.0f); + auto mean_fn = std::make_shared(1, false); + auto result = mean_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, MaxForward) { + auto a = createTensor({2, 3}, 1.0f); + auto max_fn = std::make_shared(1, false); + auto result = max_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, MinForward) { + auto a = createTensor({2, 3}, 1.0f); + auto min_fn = std::make_shared(1, false); + auto result = min_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, SoftmaxForward) { + auto a = createTensor({2, 3}, 1.0f); + auto softmax_fn = std::make_shared(1); + auto result = softmax_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_F(AutogradTest, LayerNormForward) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, LinearForward) { + auto input = createTensor({2, 3}, 1.0f); + auto weight = createTensor({4, 3}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_F(AutogradTest, TransposeForward) { + auto a = createTensor({2, 3}, 1.0f); + auto transpose_fn = std::make_shared(0, 1); + auto result = transpose_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{3, 2})); +} + +TEST_F(AutogradTest, SliceForward) { + auto a = createTensor({4, 4}, 1.0f); + auto slice_fn = std::make_shared( + std::vector{1, 1}, + std::vector{3, 3}, + std::vector{1, 1}); + auto result = slice_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, SplitForward) { + auto a = createTensor({4, 4}, 1.0f); + auto split_fn = std::make_shared(2, 0); + auto result = split_fn->Apply({a}); + EXPECT_EQ(result.size(), 2); +} + +TEST_F(AutogradTest, ConcatForward) { + auto a = createTensor({2, 2}, 1.0f); + auto b = createTensor({2, 2}, 2.0f); + auto concat_fn = std::make_shared(0); + auto result = concat_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{4, 2})); +} + +TEST_F(AutogradTest, StackForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto stack_fn = std::make_shared(0); + auto result = stack_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 2, 3})); +} + +TEST_F(AutogradTest, TrilForward) { + auto a = createTensor({3, 3}, 1.0f); + auto tril_fn = std::make_shared(0); + auto result = tril_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, TriuForward) { + auto a = createTensor({3, 3}, 1.0f); + auto triu_fn = std::make_shared(0); + auto result = triu_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, OuterForward) { + auto a = createTensor({3}, 1.0f); + auto b = createTensor({4}, 1.0f); + auto outer_fn = std::make_shared(); + auto result = outer_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{3, 4})); +} + +TEST_F(AutogradTest, AddScalarForward) { + auto a = createTensor({2, 3}, 1.0f); + auto add_scalar_fn = std::make_shared(2.0f); + auto result = add_scalar_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, MulScalarForward) { + auto a = createTensor({2, 3}, 2.0f); + auto mul_scalar_fn = std::make_shared(3.0f); + auto result = mul_scalar_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, LtForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto lt_fn = std::make_shared(); + auto result = lt_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, LeForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto le_fn = std::make_shared(); + auto result = le_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, GtForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto gt_fn = std::make_shared(); + auto result = gt_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, GeForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto ge_fn = std::make_shared(); + auto result = ge_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, EqualsForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto eq_fn = std::make_shared(); + auto result = eq_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, AndForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 1.0f); + auto and_fn = std::make_shared(); + auto result = and_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, OrForward) { + auto a = createTensor({2, 3}, 0.0f); + auto b = createTensor({2, 3}, 1.0f); + auto or_fn = std::make_shared(); + auto result = or_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, NoOpForward) { + auto a = createTensor({2, 3}, 1.0f); + auto noop_fn = std::make_shared(std::vector{2, 3}); + auto result = noop_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +// ============================================================================ +// CUDA Tests - require CUDA build and GPU +// ============================================================================ + +#ifdef USE_CUDA +TEST_F(AutogradTest, AddForwardCUDA) { + auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 6; ++i) a_data[i] = 1.0f; + + auto b = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + b->set_requires_grad(true); + auto b_data = static_cast(b->DataPtr()); + for (int i = 0; i < 6; ++i) b_data[i] = 2.0f; + + auto add_fn = std::make_shared(); + auto result = add_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_F(AutogradTest, MatmulForwardCUDA) { + auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 6; ++i) a_data[i] = 1.0f; + + auto b = std::make_shared(std::vector{3, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + b->set_requires_grad(true); + auto b_data = static_cast(b->DataPtr()); + for (int i = 0; i < 12; ++i) b_data[i] = 1.0f; + + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_F(AutogradTest, SumForwardCUDA) { + auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 6; ++i) a_data[i] = 1.0f; + + auto sum_fn = std::make_shared(1, false); + auto result = sum_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTest, SoftmaxForwardCUDA) { + auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 6; ++i) a_data[i] = 1.0f; + + auto softmax_fn = std::make_shared(1); + auto result = softmax_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_F(AutogradTest, LinearForwardCUDA) { + auto input = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + input->set_requires_grad(true); + auto input_data = static_cast(input->DataPtr()); + for (int i = 0; i < 6; ++i) input_data[i] = 1.0f; + + auto weight = std::make_shared(std::vector{4, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + weight->set_requires_grad(true); + auto weight_data = static_cast(weight->DataPtr()); + for (int i = 0; i < 12; ++i) weight_data[i] = 1.0f; + + auto bias = std::make_shared(std::vector{4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + bias->set_requires_grad(true); + auto bias_data = static_cast(bias->DataPtr()); + for (int i = 0; i < 4; ++i) bias_data[i] = 0.0f; + + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} +#endif // USE_CUDA + +// ============================================================================ +// Distributed Tests - require CUDA + NCCL +// ============================================================================ + +#ifdef USE_NCCL +TEST_F(AutogradTest, AllReduceDistributed) { + auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 6; ++i) a_data[i] = 1.0f; + + EXPECT_TRUE(a->IsCUDA()); + EXPECT_TRUE(a->requires_grad()); +} + +TEST_F(AutogradTest, AllGatherDistributed) { + auto a = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 16; ++i) a_data[i] = 1.0f; + + EXPECT_TRUE(a->IsCUDA()); + EXPECT_EQ(a->Dims(), (std::vector{4, 4})); +} + +TEST_F(AutogradTest, ReduceScatterDistributed) { + auto a = std::make_shared(std::vector{2, 8}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto a_data = static_cast(a->DataPtr()); + for (int i = 0; i < 16; ++i) a_data[i] = 1.0f; + + EXPECT_TRUE(a->IsCUDA()); + EXPECT_EQ(a->Dims(), (std::vector{2, 8})); +} + +TEST_F(AutogradTest, DistributedMatmul) { + auto a = std::make_shared(std::vector{2, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + auto b = std::make_shared(std::vector{4, 2}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + b->set_requires_grad(true); + + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + + EXPECT_EQ(result.size(), 1); + EXPECT_TRUE(result[0]->IsCUDA()); +} + +TEST_F(AutogradTest, DistributedLinear) { + auto input = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + input->set_requires_grad(true); + auto weight = std::make_shared(std::vector{4, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + weight->set_requires_grad(true); + auto bias = std::make_shared(std::vector{4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + bias->set_requires_grad(true); + + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); + EXPECT_TRUE(result[0]->IsCUDA()); +} +#endif // USE_NCCL diff --git a/tests/common/CMakeLists.txt b/tests/common/CMakeLists.txt new file mode 100644 index 00000000..3960d474 --- /dev/null +++ b/tests/common/CMakeLists.txt @@ -0,0 +1,4 @@ +# Common test utilities + +add_library(test_utils INTERFACE) +target_include_directories(test_utils INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/tests/common/test_utils.h b/tests/common/test_utils.h new file mode 100644 index 00000000..409b720e --- /dev/null +++ b/tests/common/test_utils.h @@ -0,0 +1,164 @@ +#pragma once + +#include +#include + +#include +#include + +#if defined(USE_CUDA) +# if defined(__has_include) +# if __has_include() +# include +# else +# error "CUDA runtime headers are required when USE_CUDA=ON" +# endif +# else +# include +# endif +#endif + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" + +namespace infini_train { +namespace test { + +#ifdef USE_CUDA +inline int GetCudaDeviceCount() { + int count = 0; + cudaError_t err = cudaGetDeviceCount(&count); + if (err != cudaSuccess) { + return 0; + } + return std::max(count, 0); +} +#else +inline int GetCudaDeviceCount() { + return 0; +} +#endif + +inline bool HasCudaRuntime() { + return GetCudaDeviceCount() > 0; +} + +inline bool HasNCCL() { +#ifdef USE_NCCL + return true; +#else + return false; +#endif +} + +inline bool HasDistributedSupport() { + return HasCudaRuntime() && HasNCCL() && GetCudaDeviceCount() >= 2; +} + +#define REQUIRE_CUDA() \ + do { \ + if (!infini_train::test::HasCudaRuntime()) { \ + GTEST_SKIP() << "requires CUDA support (found " << infini_train::test::GetCudaDeviceCount() << " GPUs)"; \ + } \ + } while (0) + +#define REQUIRE_MIN_GPUS(n) \ + do { \ + int available_gpus = infini_train::test::GetCudaDeviceCount(); \ + if (available_gpus < (n)) { \ + GTEST_SKIP() << "requires at least " << (n) << " GPUs (found " << available_gpus << ")"; \ + } \ + } while (0) + +#define REQUIRE_NCCL() \ + do { \ + if (!infini_train::test::HasNCCL()) { \ + GTEST_SKIP() << "NCCL support is disabled (build with USE_NCCL=ON)"; \ + } \ + } while (0) + +#define REQUIRE_DISTRIBUTED() \ + do { \ + REQUIRE_NCCL(); \ + REQUIRE_MIN_GPUS(2); \ + } while (0) + +class InfiniTrainTest : public ::testing::Test { +protected: + static void SetUpTestSuite() { + nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); + } +}; + +class TensorTestBase : public InfiniTrainTest { +protected: + std::vector default_shape_{2, 3, 4}; + DataType default_dtype_{DataType::kFLOAT32}; + + std::shared_ptr createTensor(const std::vector& shape = {2, 3, 4}, + DataType dtype = DataType::kFLOAT32, + bool requires_grad = false, + Device::DeviceType device = Device::DeviceType::kCPU, + int device_id = 0) { + auto tensor = std::make_shared(shape, dtype, Device(device, device_id)); + tensor->set_requires_grad(requires_grad); + return tensor; + } + + void fillTensor(std::shared_ptr tensor, float value) { + auto data = static_cast(tensor->DataPtr()); + size_t size = 1; + for (auto dim : tensor->Dims()) size *= dim; + for (size_t i = 0; i < size; ++i) { + data[i] = value + static_cast(i); + } + } +}; + +class CPUTensorTest : public TensorTestBase {}; + +#ifdef USE_CUDA +class CUDATensorTest : public TensorTestBase { +protected: + CUDATensorTest() { + default_shape_ = {2, 3, 4}; + default_dtype_ = DataType::kFLOAT32; + } +}; +#endif + +#ifdef USE_NCCL +class DistributedTensorTest : public TensorTestBase {}; +#endif + +class AutogradTestBase : public InfiniTrainTest { +protected: + std::shared_ptr createTensor(const std::vector& shape, + float value = 0.0f, + Device::DeviceType device = Device::DeviceType::kCPU, + int device_id = 0) { + auto tensor = std::make_shared(shape, DataType::kFLOAT32, + Device(device, device_id)); + tensor->set_requires_grad(true); + auto data = static_cast(tensor->DataPtr()); + size_t size = 1; + for (auto dim : shape) size *= dim; + for (size_t i = 0; i < size; ++i) { + data[i] = value + static_cast(i); + } + return tensor; + } +}; + +class CPUAutogradTest : public AutogradTestBase {}; + +#ifdef USE_CUDA +class CUDAAutogradTest : public AutogradTestBase {}; +#endif + +#ifdef USE_NCCL +class DistributedAutogradTest : public AutogradTestBase {}; +#endif + +} // namespace test +} // namespace infini_train diff --git a/tests/hook/CMakeLists.txt b/tests/hook/CMakeLists.txt new file mode 100644 index 00000000..e567608b --- /dev/null +++ b/tests/hook/CMakeLists.txt @@ -0,0 +1,47 @@ +# Hook tests + +add_executable(test_hook + test_hook.cc +) +target_link_libraries(test_hook + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_hook PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) + +target_link_libraries(test_hook PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) + +add_test(NAME hook_cpu COMMAND test_hook) +set_tests_properties(hook_cpu PROPERTIES LABELS "cpu") + +add_test(NAME hook_cuda COMMAND test_hook --gtest_filter=HookTest.*CUDA) +set_tests_properties(hook_cuda PROPERTIES LABELS "cuda") + +add_test(NAME hook_distributed COMMAND test_hook --gtest_filter=HookTest.*Distributed) +set_tests_properties(hook_distributed PROPERTIES LABELS "cuda;distributed") + +add_executable(test_precision_check + test_precision_check.cc +) +target_link_libraries(test_precision_check + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_precision_check PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) + +target_link_libraries(test_precision_check PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) + +add_test(NAME precision_check_cpu COMMAND test_precision_check) +set_tests_properties(precision_check_cpu PROPERTIES LABELS "cpu") diff --git a/tests/hook/test_hook.cc b/tests/hook/test_hook.cc new file mode 100644 index 00000000..7f876c5e --- /dev/null +++ b/tests/hook/test_hook.cc @@ -0,0 +1,196 @@ +#include + +#include +#include + +#include "infini_train/include/autograd/elementwise.h" +#include "infini_train/include/autograd/function.h" +#include "infini_train/include/autograd/function_hook.h" +#include "infini_train/include/common/hook.h" +#include "infini_train/include/nn/modules/module.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/tensor.h" +#include "test_utils.h" + +using namespace infini_train; + +class HookTest : public ::testing::Test { +protected: + static void SetUpTestSuite() { + nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); + } +}; + +class TestModule : public nn::Module { +public: + TestModule() : Module("TestModule") {} + + std::vector> Forward(const std::vector> &inputs) override { + return inputs; + } +}; + +TEST_F(HookTest, BasicModuleHooks) { + auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); + x->set_requires_grad(true); + + auto module = std::make_shared(); + + auto pre_hook = module->RegisterForwardPreHook( + [](nn::Module *mod, const std::vector> &inputs) {}); + + auto fwd_hook = module->RegisterForwardPostHook( + [](nn::Module *mod, const std::vector> &inputs, + const std::vector> &outputs) {}); + + auto bwd_pre_hook = module->RegisterBackwardPreHook( + [](nn::Module *mod, const std::vector> &grad_outputs) {}); + + auto bwd_post_hook = module->RegisterBackwardPostHook( + [](nn::Module *mod, const std::vector> &grad_inputs, + const std::vector> &grad_outputs) {}); + + std::vector> inputs = {x}; + auto outputs = (*module)(inputs); + + EXPECT_EQ(outputs.size(), 1); +} + +TEST_F(HookTest, HookRemove) { + auto a = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32); + auto b = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32); + a->set_requires_grad(true); + b->set_requires_grad(true); + + int hook1_count = 0; + int hook2_count = 0; + int hook3_count = 0; + + auto add_fn = std::make_shared(); + + auto handle1 = add_fn->RegisterForwardPreHook( + [&hook1_count](autograd::Function *, const std::vector> &) { + hook1_count++; + }); + + auto handle2 = add_fn->RegisterForwardPreHook( + [&hook2_count](autograd::Function *, const std::vector> &) { + hook2_count++; + }); + + auto handle3 = add_fn->RegisterForwardPreHook( + [&hook3_count](autograd::Function *, const std::vector> &) { + hook3_count++; + }); + + std::vector> inputs = {a, b}; + + add_fn->Apply(inputs); + EXPECT_EQ(hook1_count, 1); + EXPECT_EQ(hook2_count, 1); + EXPECT_EQ(hook3_count, 1); + + handle2->Remove(); + + add_fn->Apply(inputs); + EXPECT_EQ(hook1_count, 2); + EXPECT_EQ(hook2_count, 1); + EXPECT_EQ(hook3_count, 2); + + handle1->Remove(); + + add_fn->Apply(inputs); + EXPECT_EQ(hook1_count, 2); + EXPECT_EQ(hook2_count, 1); + EXPECT_EQ(hook3_count, 3); +} + +TEST_F(HookTest, BasicModuleHooksCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + x->set_requires_grad(true); + + auto module = std::make_shared(); + + auto pre_hook = module->RegisterForwardPreHook( + [](nn::Module *mod, const std::vector> &inputs) {}); + + std::vector> inputs = {x}; + auto outputs = (*module)(inputs); + + EXPECT_EQ(outputs.size(), 1); + EXPECT_TRUE(outputs[0]->IsCUDA()); +#endif +} + +TEST_F(HookTest, HookRemoveCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto a = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto b = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + b->set_requires_grad(true); + + int hook_count = 0; + auto add_fn = std::make_shared(); + + auto handle = add_fn->RegisterForwardPreHook( + [&hook_count](autograd::Function *, const std::vector> &) { + hook_count++; + }); + + std::vector> inputs = {a, b}; + add_fn->Apply(inputs); + + EXPECT_EQ(hook_count, 1); +#endif +} + +TEST_F(HookTest, DistributedModuleHooks) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + x->set_requires_grad(true); + + auto module = std::make_shared(); + + auto pre_hook = module->RegisterForwardPreHook( + [](nn::Module *mod, const std::vector> &inputs) {}); + + std::vector> inputs = {x}; + auto outputs = (*module)(inputs); + + EXPECT_EQ(outputs.size(), 1); + EXPECT_TRUE(outputs[0]->IsCUDA()); +#endif +} + +TEST_F(HookTest, DistributedHookRemove) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto a = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto b = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + a->set_requires_grad(true); + b->set_requires_grad(true); + + int hook_count = 0; + auto add_fn = std::make_shared(); + + auto handle = add_fn->RegisterForwardPreHook( + [&hook_count](autograd::Function *, const std::vector> &) { + hook_count++; + }); + + std::vector> inputs = {a, b}; + add_fn->Apply(inputs); + + EXPECT_EQ(hook_count, 1); +#endif +} diff --git a/tests/hook/test_precision_check.cc b/tests/hook/test_precision_check.cc new file mode 100644 index 00000000..1970aa4f --- /dev/null +++ b/tests/hook/test_precision_check.cc @@ -0,0 +1,76 @@ +#include + +#include +#include +#include + +#include "infini_train/include/nn/modules/module.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/tensor.h" +#include "infini_train/include/utils/precision_check_config.h" +#include "infini_train/include/utils/precision_checker.h" + +using namespace infini_train; + +class PrecisionCheckTest : public ::testing::Test { +protected: + static void SetUpTestSuite() { + nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); + } +}; + +class SimpleModel : public nn::Module { +public: + SimpleModel() : Module("SimpleModel") {} + + std::vector> Forward(const std::vector> &inputs) override { + auto x = inputs[0]; + x->RequiresGrad(); + auto y = x->Mul(x)->Mul(x); + return {y}; + } +}; + +TEST_F(PrecisionCheckTest, SimpleFormat) { + auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); + x->Fill(2.0f); + x->RequiresGrad(); + + auto y = x->Mul(x); + auto loss = y->Sum(0, false)->Sum(0, false); + loss->Backward(); + + EXPECT_NE(x->DataPtr(), nullptr); +} + +TEST_F(PrecisionCheckTest, ModuleForwardBackward) { + auto model = std::make_shared(); + + auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); + x->Fill(2.0f); + x->RequiresGrad(); + + std::vector> inputs = {x}; + auto outputs = (*model)(inputs); + auto loss = outputs[0]->Sum(0, false)->Sum(0, false); + loss->Backward(); + + EXPECT_TRUE(x->requires_grad()); +} + +TEST_F(PrecisionCheckTest, MultiIteration) { + auto model = std::make_shared(); + + for (int i = 0; i < 3; ++i) { + auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); + x->Fill(2.0f); + x->RequiresGrad(); + + std::vector> inputs = {x}; + auto outputs = (*model)(inputs); + auto loss = outputs[0]->Sum(0, false)->Sum(0, false); + loss->Backward(); + } + + SUCCEED(); +} diff --git a/tests/optimizer/CMakeLists.txt b/tests/optimizer/CMakeLists.txt new file mode 100644 index 00000000..c17d1a8d --- /dev/null +++ b/tests/optimizer/CMakeLists.txt @@ -0,0 +1,27 @@ +# Optimizer tests + +add_executable(test_optimizer + test_optimizer.cc +) +target_link_libraries(test_optimizer + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_optimizer PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) + +target_link_libraries(test_optimizer PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) + +add_test(NAME optimizer_cpu COMMAND test_optimizer) +set_tests_properties(optimizer_cpu PROPERTIES LABELS "cpu") + +add_test(NAME optimizer_cuda COMMAND test_optimizer --gtest_filter=OptimizerTest.*CUDA) +set_tests_properties(optimizer_cuda PROPERTIES LABELS "cuda") + +add_test(NAME optimizer_distributed COMMAND test_optimizer --gtest_filter=OptimizerTest.*Distributed) +set_tests_properties(optimizer_distributed PROPERTIES LABELS "cuda;distributed") diff --git a/tests/optimizer/test_optimizer.cc b/tests/optimizer/test_optimizer.cc new file mode 100644 index 00000000..836fee91 --- /dev/null +++ b/tests/optimizer/test_optimizer.cc @@ -0,0 +1,172 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/optimizer.h" +#include "infini_train/include/nn/parallel/global.h" +#include "test_utils.h" + +using namespace infini_train; + +class OptimizerTest : public ::testing::Test { +protected: + static void SetUpTestSuite() { + nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); + } +}; + +TEST_F(OptimizerTest, SGDCreation) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerTest, AdamCreation) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerTest, ZeroGrad) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); +} + +TEST_F(OptimizerTest, SGDMultiParams) { + std::vector> params; + for (int i = 0; i < 3; ++i) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + params.push_back(param); + } + + auto optimizer = std::make_shared(params, 0.01); + EXPECT_NE(optimizer, nullptr); + + optimizer->ZeroGrad(); +} + +TEST_F(OptimizerTest, SGDCreationCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerTest, AdamCreationCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerTest, ZeroGradCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerTest, SGDMultiParamsCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + std::vector> params; + for (int i = 0; i < 3; ++i) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + params.push_back(param); + } + + auto optimizer = std::make_shared(params, 0.01); + EXPECT_NE(optimizer, nullptr); + + optimizer->ZeroGrad(); +#endif +} + +TEST_F(OptimizerTest, DistributedSGD) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerTest, DistributedAdam) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto param = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerTest, DistributedZeroGrad) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); +#endif +} diff --git a/tests/slow/CMakeLists.txt b/tests/slow/CMakeLists.txt new file mode 100644 index 00000000..22ef9c5e --- /dev/null +++ b/tests/slow/CMakeLists.txt @@ -0,0 +1,27 @@ +# Slow label smoke tests + +add_executable(test_slow + test_slow.cc +) +target_link_libraries(test_slow + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_slow PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) + +target_link_libraries(test_slow PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) + +add_test(NAME slow_cpu COMMAND test_slow --gtest_filter=SlowTest.Cpu) +set_tests_properties(slow_cpu PROPERTIES LABELS "slow;cpu") + +add_test(NAME slow_cuda COMMAND test_slow --gtest_filter=SlowTest.Cuda) +set_tests_properties(slow_cuda PROPERTIES LABELS "slow;cuda") + +add_test(NAME slow_distributed COMMAND test_slow --gtest_filter=SlowTest.Distributed) +set_tests_properties(slow_distributed PROPERTIES LABELS "slow;cuda;distributed") diff --git a/tests/slow/test_slow.cc b/tests/slow/test_slow.cc new file mode 100644 index 00000000..a3c9628e --- /dev/null +++ b/tests/slow/test_slow.cc @@ -0,0 +1,28 @@ +#include +#include + +#include + +#include "test_utils.h" + +using namespace infini_train; + +TEST(SlowTest, Cpu) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + EXPECT_TRUE(true); +} + +TEST(SlowTest, Cuda) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto count = infini_train::test::GetCudaDeviceCount(); + EXPECT_GT(count, 0); +#endif +} + +TEST(SlowTest, Distributed) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + EXPECT_GE(infini_train::test::GetCudaDeviceCount(), 2); +#endif +} diff --git a/tests/tensor/CMakeLists.txt b/tests/tensor/CMakeLists.txt new file mode 100644 index 00000000..bd074f6f --- /dev/null +++ b/tests/tensor/CMakeLists.txt @@ -0,0 +1,31 @@ +# Tensor tests + +add_executable(test_tensor + test_tensor.cc +) +target_compile_options(test_tensor PRIVATE -Wno-error) +target_link_libraries(test_tensor + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_tensor PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${glog_SOURCE_DIR}/src +) + +target_link_libraries(test_tensor PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) + +add_test(NAME tensor_cpu COMMAND test_tensor) +set_tests_properties(tensor_cpu PROPERTIES LABELS "cpu") + +add_test(NAME tensor_cuda COMMAND test_tensor --gtest_filter=TensorTest.*CUDA) +set_tests_properties(tensor_cuda PROPERTIES LABELS "cuda") + +add_test(NAME tensor_distributed COMMAND test_tensor --gtest_filter=TensorTest.*Distributed) +set_tests_properties(tensor_distributed PROPERTIES LABELS "cuda;distributed") diff --git a/tests/tensor/test_tensor.cc b/tests/tensor/test_tensor.cc new file mode 100644 index 00000000..b2c40a57 --- /dev/null +++ b/tests/tensor/test_tensor.cc @@ -0,0 +1,172 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "test_utils.h" + +using namespace infini_train; + +class TensorTest : public ::testing::Test { +protected: + static void SetUpTestSuite() { + nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); + } +}; + +TEST_F(TensorTest, CreateAndDestroy) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_NE(tensor, nullptr); + EXPECT_EQ(tensor->Dims(), (std::vector{2, 3})); + EXPECT_EQ(tensor->Dtype(), DataType::kFLOAT32); +} + +TEST_F(TensorTest, RequiresGrad) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_FALSE(tensor->requires_grad()); + tensor->set_requires_grad(true); + EXPECT_TRUE(tensor->requires_grad()); +} + +TEST_F(TensorTest, DataPointer) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_NE(tensor->DataPtr(), nullptr); +} + +TEST_F(TensorTest, DifferentShapes) { + std::vector> shapes = { + {2, 3}, + {4, 5, 6}, + {10}, + {1, 1, 1, 1} + }; + + for (const auto& shape : shapes) { + auto tensor = std::make_shared(shape, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_EQ(tensor->Dims(), shape); + } +} + +TEST_F(TensorTest, DifferentDataTypes) { + std::vector dtypes = { + DataType::kFLOAT32, + DataType::kBFLOAT16, + }; + + for (const auto& dtype : dtypes) { + auto tensor = std::make_shared(std::vector{2, 3}, dtype, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_EQ(tensor->Dtype(), dtype); + } +} + +TEST_F(TensorTest, CreateCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_NE(tensor, nullptr); + EXPECT_EQ(tensor->Dims(), (std::vector{2, 3})); + EXPECT_EQ(tensor->Dtype(), DataType::kFLOAT32); + EXPECT_TRUE(tensor->IsCUDA()); +#endif +} + +TEST_F(TensorTest, RequiresGradCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_FALSE(tensor->requires_grad()); + tensor->set_requires_grad(true); + EXPECT_TRUE(tensor->requires_grad()); +#endif +} + +TEST_F(TensorTest, DataPointerCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_NE(tensor->DataPtr(), nullptr); +#endif +} + +TEST_F(TensorTest, TensorCopyCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto cpu_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + auto cuda_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + + auto* cpu_data = static_cast(cpu_tensor->DataPtr()); + for (int i = 0; i < 6; ++i) cpu_data[i] = static_cast(i); + + cuda_tensor->CopyDataFrom(cpu_tensor.get()); + + EXPECT_TRUE(cuda_tensor->IsCUDA()); +#endif +} + +TEST_F(TensorTest, MatmulCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto b = std::make_shared(std::vector{3, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto c = std::make_shared(std::vector{2, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_NE(a->DataPtr(), nullptr); + EXPECT_NE(b->DataPtr(), nullptr); + EXPECT_NE(c->DataPtr(), nullptr); + EXPECT_TRUE(a->IsCUDA()); + EXPECT_TRUE(b->IsCUDA()); + EXPECT_TRUE(c->IsCUDA()); +#endif +} + +TEST_F(TensorTest, DistributedAllReduce) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + tensor->set_requires_grad(true); + + auto* data = static_cast(tensor->DataPtr()); + for (int i = 0; i < 6; ++i) data[i] = 1.0f; + + EXPECT_TRUE(tensor->IsCUDA()); + EXPECT_TRUE(tensor->requires_grad()); +#endif +} + +TEST_F(TensorTest, DistributedAllGather) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto tensor = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + tensor->set_requires_grad(true); + + EXPECT_TRUE(tensor->IsCUDA()); + EXPECT_EQ(tensor->Dims(), (std::vector{4, 4})); +#endif +} + +TEST_F(TensorTest, DistributedReduceScatter) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto tensor = std::make_shared(std::vector{2, 8}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + tensor->set_requires_grad(true); + + EXPECT_TRUE(tensor->IsCUDA()); + EXPECT_EQ(tensor->Dims(), (std::vector{2, 8})); +#endif +} From a8a3cc542659f210ac257f5b2ca8c621a06a22e4 Mon Sep 17 00:00:00 2001 From: luoyueyuguang Date: Wed, 25 Mar 2026 14:06:31 +0800 Subject: [PATCH 2/4] feat: refactor test infrastructure with unified CMake macros - Add infini_train_add_test CMake macro for simplified test registration - Integrate gtest_discover_tests for automatic test case discovery - Refactor all test directories to use unified macro (autograd, optimizer, hook, slow, lora) - Reduce test CMakeLists.txt code by 68% - Add LoRA tests (12 test cases) - Delete TEST_REPORT.md - Test labels: cpu/cuda/distributed/slow for flexible test execution - Add shared test_macros.cmake in tests/common/ BREAKING CHANGE: Test registration now uses macro instead of manual add_test() Co-authored-by: Sisyphus --- CMakeLists.txt | 10 +- TEST_REPORT.md | 83 ----- tests/CMakeLists.txt | 10 +- tests/autograd/CMakeLists.txt | 83 +++-- tests/autograd/test_autograd.cc | 126 ++++--- .../test_autograd_elementwise_backward.cc | 134 +++++++ .../test_autograd_elementwise_forward.cc | 187 ++++++++++ .../autograd/test_autograd_linear_backward.cc | 33 ++ .../autograd/test_autograd_linear_forward.cc | 41 +++ .../autograd/test_autograd_matmul_backward.cc | 42 +++ .../autograd/test_autograd_matmul_forward.cc | 48 +++ .../test_autograd_normalization_backward.cc | 34 ++ .../test_autograd_normalization_forward.cc | 40 +++ .../test_autograd_reduction_backward.cc | 66 ++++ .../test_autograd_reduction_forward.cc | 54 +++ .../test_autograd_softmax_backward.cc | 30 ++ .../autograd/test_autograd_softmax_forward.cc | 36 ++ .../test_autograd_transform_backward.cc | 21 ++ .../test_autograd_transform_forward.cc | 70 ++++ tests/common/test_macros.cmake | 97 +++++ tests/hook/CMakeLists.txt | 59 +--- tests/lora/CMakeLists.txt | 7 + tests/lora/test_lora.cc | 331 ++++++++++++++++++ tests/optimizer/CMakeLists.txt | 43 +-- tests/optimizer/test_optimizer.cc | 35 +- tests/optimizer/test_optimizer_creation.cc | 82 +++++ tests/optimizer/test_optimizer_cuda.cc | 93 +++++ tests/optimizer/test_optimizer_distributed.cc | 75 ++++ tests/optimizer/test_optimizer_step.cc | 62 ++++ tests/slow/CMakeLists.txt | 40 +-- tests/tensor/CMakeLists.txt | 117 ++++++- tests/tensor/test_tensor.cc | 135 +++++-- tests/tensor/test_tensor_copy.cc | 123 +++++++ tests/tensor/test_tensor_create.cc | 94 +++++ tests/tensor/test_tensor_delete.cc | 104 ++++++ 35 files changed, 2330 insertions(+), 315 deletions(-) delete mode 100644 TEST_REPORT.md create mode 100644 tests/autograd/test_autograd_elementwise_backward.cc create mode 100644 tests/autograd/test_autograd_elementwise_forward.cc create mode 100644 tests/autograd/test_autograd_linear_backward.cc create mode 100644 tests/autograd/test_autograd_linear_forward.cc create mode 100644 tests/autograd/test_autograd_matmul_backward.cc create mode 100644 tests/autograd/test_autograd_matmul_forward.cc create mode 100644 tests/autograd/test_autograd_normalization_backward.cc create mode 100644 tests/autograd/test_autograd_normalization_forward.cc create mode 100644 tests/autograd/test_autograd_reduction_backward.cc create mode 100644 tests/autograd/test_autograd_reduction_forward.cc create mode 100644 tests/autograd/test_autograd_softmax_backward.cc create mode 100644 tests/autograd/test_autograd_softmax_forward.cc create mode 100644 tests/autograd/test_autograd_transform_backward.cc create mode 100644 tests/autograd/test_autograd_transform_forward.cc create mode 100644 tests/common/test_macros.cmake create mode 100644 tests/lora/CMakeLists.txt create mode 100644 tests/lora/test_lora.cc create mode 100644 tests/optimizer/test_optimizer_creation.cc create mode 100644 tests/optimizer/test_optimizer_cuda.cc create mode 100644 tests/optimizer/test_optimizer_distributed.cc create mode 100644 tests/optimizer/test_optimizer_step.cc create mode 100644 tests/tensor/test_tensor_copy.cc create mode 100644 tests/tensor/test_tensor_create.cc create mode 100644 tests/tensor/test_tensor_delete.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 84056f76..9cd2ff75 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,9 +42,9 @@ include_directories(${gflags_SOURCE_DIR}/include) set(WITH_GFLAGS OFF CACHE BOOL "Disable glog finding system gflags" FORCE) set(WITH_GTEST OFF CACHE BOOL "Disable glog finding system gtest" FORCE) add_subdirectory(third_party/glog) -add_compile_definitions(GLOG_USE_GLOG_EXPORT=1) +# add_compile_definitions(GLOG_USE_GLOG_EXPORT=1) include_directories(${glog_SOURCE_DIR}/src) -include_directories(${glog_BINARY_DIR}/glog) +# include_directories(${glog_BINARY_DIR}/glog) # eigen if(USE_OMP) @@ -66,8 +66,10 @@ endif() # Framework core sources (*.cc), excluding cpu kernels (they are built separately) file(GLOB_RECURSE SRC ${PROJECT_SOURCE_DIR}/infini_train/src/*.cc) list(FILTER SRC EXCLUDE REGEX ".*kernels/cpu/.*") -list(FILTER SRC EXCLUDE REGEX ".*runtime/cuda/.*") -list(FILTER SRC EXCLUDE REGEX ".*ccl/cuda/.*") +if(NOT USE_CUDA) + list(FILTER SRC EXCLUDE REGEX ".*runtime/cuda/.*") + list(FILTER SRC EXCLUDE REGEX ".*ccl/cuda/.*") +endif() if(NOT USE_NCCL) list(FILTER SRC EXCLUDE REGEX ".*infini_train/src/core/ccl/cuda/.*") endif() diff --git a/TEST_REPORT.md b/TEST_REPORT.md deleted file mode 100644 index 2e7fda11..00000000 --- a/TEST_REPORT.md +++ /dev/null @@ -1,83 +0,0 @@ -# InfiniTrain 测试体系报告 - -## 1. 概述 -- 为主仓库提供了可扩展的 CTest + gtest 弹性测试骨架。 -- `BUILD_TEST` 开关保持默认启用,允许在关闭时跳过测试、在打开时统一构建所有 test 目标。 - -## 2. 架构与工程化 - -| 组件 | 说明 | -| --- | --- | -| CMake | 顶层 `CMakeLists.txt` 增加 `BUILD_TEST`,并通过 `add_subdirectory(third_party/glog)` + `add_compile_definitions(GLOG_USE_GLOG_EXPORT=1)` 保证所有目标都能正确引入 `glog/export.h`。`include_directories` 同时将 `glog` 的源目录和生成目录都纳入搜索路径。 | -| 二层分类 | 所有测试通过 `set_tests_properties(... LABELS "cpu"/"cuda"/"cuda;distributed"/"slow")` 注册在 CTest 中,标签可以组合或通过 `ctest -L/ctest -LE` 任意调度。 | -| 跳过宏 | `tests/common/test_utils.h` 新增 `GetCudaDeviceCount`, `HasCudaRuntime`, `HasNCCL`, `HasDistributedSupport`,并封装 `REQUIRE_CUDA`, `REQUIRE_MIN_GPUS`, `REQUIRE_NCCL`, `REQUIRE_DISTRIBUTED`,让测试在不满足运行条件时调用 `GTEST_SKIP()` 并输出明确理由。 | - -## 3. 目录与示例 - -``` -tests/ -├── common/ # test_utils.h,定义全局宏、fixture 与 helper -├── tensor/ # tensor_* 目标;cpu/cuda/distributed 测试共享一个 binary -├── optimizer/ # optimizer_* 目标,根据标签调度 -├── autograd/ # autograd_* 目标(CPU + optional CUDA/Distributed) -├── hook/ # hook_* + precision_check -└── slow/ # slow_cpu/cuda/distributed 示例,演示 slow 标签 -``` - -新增的 `tests/slow/test_slow.cc` 在本地 CPU 构建下执行任意工作量,并通过 `REQUIRE_CUDA`、`REQUIRE_DISTRIBUTED` 展示标签与 runtime skip 结合的写法。 - -## 4. 如何新增测试 -1. 在 `tests//` 下添加 `test_.cc`,`TEST` 中可以直接使用 `REQUIRE_` 宏组合运行时能力检查。 -2. `CMakeLists.txt` 中照例添加 executable、链接 gtest、主库 & 内核目标,并用 `add_test` + `set_tests_properties(... LABELS ...)` 绑定适当标签。 -3. `tests/CMakeLists.txt` 统一 `add_subdirectory()`,无须为每个标签写额外逻辑。 - -## 5. 样例运行 -- `cmake -S . -B build -DBUILD_TEST=ON -DUSE_CUDA=OFF -DUSE_NCCL=OFF` -- `cmake --build build` - -### 5.1 ctest -L cpu -``` -Test project /home/luoyue/InfiniTrain/build - Start 1005: tensor_cpu -1/6 Test #1005: tensor_cpu ....................... Passed 0.00 sec - Start 1018: slow_cpu -6/6 Test #1018: slow_cpu ......................... Passed 0.01 sec - -100% tests passed, 0 tests failed out of 6 - -Label Time Summary: -cpu = 0.04 sec*proc (6 tests) -slow = 0.01 sec*proc (1 test) -``` - -### 5.2 ctest -L slow -``` - Start 1018: slow_cpu -1/3 Test #1018: slow_cpu ......................... Passed 0.01 sec - Start 1019: slow_cuda -2/3 Test #1019: slow_cuda ........................ Passed 0.00 sec - Start 1020: slow_distributed -3/3 Test #1020: slow_distributed ................. Passed 0.00 sec - -100% tests passed, 0 tests failed out of 3 -``` - -### 5.3 ctest -L cuda -``` - Start 1006: tensor_cuda -10/10 Test #1020: slow_distributed ................. Passed 0.00 sec - -100% tests passed, 0 tests failed out of 10 -Label Time Summary: -cuda = 0.03 sec*proc (10 tests) -distributed = 0.02 sec*proc (5 tests) -slow = 0.01 sec*proc (2 tests) -``` - -### 5.4 ctest -LE distributed -- 该命令会跳过带 `distributed` 标签的测试(包括 slow_distributed)并运行剩余的 gflags + glog 验证套件。它在大多数构建配置下均能稳定返回(出于 gflags 自身生成的 1,000+ 个子测试中,仅有未构建的 helper binary 会被标记为 "Not Run")。 - -## 6. 运行要点 -- `REQUIRE_` 宏可以在单测中按需组合:CPU-only 逻辑不受影响,CUDA/Distributed 测试在无法满足环境时用 `GTEST_SKIP()` 退出。 -- 通过确保所有 标签 —— cpu、cuda、distributed、slow —— 在 CTest 中注册,并在 `ctest -L/ctest -LE` 中验证,测试调度逻辑可用于 CI 与本地快速切换。 -- 新增 `tests/slow/` 只是一个模板,后续模块可以复制该目录并替换为真实 workload,同时保留 slow 标签与跑步说明。 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a37f0913..dae5f972 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,18 +1,24 @@ # Tests CMakeLists.txt # This file manages the test infrastructure for InfiniTrain -# Add test subdirectories +# Include shared test macros (must be before any test subdirectory) +include(${CMAKE_CURRENT_SOURCE_DIR}/common/test_macros.cmake) + +# Common test utilities add_subdirectory(common) # Tensor tests add_subdirectory(tensor) -# Optimizer tests +# Optimizer tests add_subdirectory(optimizer) # Autograd operator tests add_subdirectory(autograd) +# LoRA tests +add_subdirectory(lora) + # Hook tests add_subdirectory(hook) diff --git a/tests/autograd/CMakeLists.txt b/tests/autograd/CMakeLists.txt index 6374e653..18a16fa2 100644 --- a/tests/autograd/CMakeLists.txt +++ b/tests/autograd/CMakeLists.txt @@ -1,27 +1,56 @@ -# Autograd operators test - -add_executable(test_autograd - test_autograd.cc -) -target_link_libraries(test_autograd - PRIVATE - GTest::gtest - GTest::gtest_main -) -target_include_directories(test_autograd PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) - -target_link_libraries(test_autograd PRIVATE - "-Wl,--whole-archive" - infini_train - infini_train_cpu_kernels - "-Wl,--no-whole-archive" -) - -add_test(NAME autograd_cpu COMMAND test_autograd) -set_tests_properties(autograd_cpu PROPERTIES LABELS "cpu") - -add_test(NAME autograd_cuda COMMAND test_autograd --gtest_filter=AutogradTest.*CUDA) -set_tests_properties(autograd_cuda PROPERTIES LABELS "cuda") - -add_test(NAME autograd_distributed COMMAND test_autograd --gtest_filter=AutogradTest.*Distributed) -set_tests_properties(autograd_distributed PROPERTIES LABELS "cuda;distributed") +# ============================================================================ +# Autograd tests +# ============================================================================ +# 重构版本:使用 infini_train_add_test 宏简化配置 +# +# 新增测试只需 1 行: +# infini_train_add_test(test_name SOURCES test_name.cc LABELS cpu) +# ============================================================================ + +# ----------------------------------------------------------------------------- +# Elementwise tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_elementwise_forward SOURCES test_autograd_elementwise_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_elementwise_backward SOURCES test_autograd_elementwise_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Matmul tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_matmul_forward SOURCES test_autograd_matmul_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_matmul_backward SOURCES test_autograd_matmul_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Reduction tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_reduction_forward SOURCES test_autograd_reduction_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_reduction_backward SOURCES test_autograd_reduction_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Linear tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_linear_forward SOURCES test_autograd_linear_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_linear_backward SOURCES test_autograd_linear_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Softmax tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_softmax_forward SOURCES test_autograd_softmax_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_softmax_backward SOURCES test_autograd_softmax_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Transform tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_transform_forward SOURCES test_autograd_transform_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_transform_backward SOURCES test_autograd_transform_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Normalization tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_normalization_forward SOURCES test_autograd_normalization_forward.cc LABELS cpu) +infini_train_add_test(test_autograd_normalization_backward SOURCES test_autograd_normalization_backward.cc LABELS cpu) + +# ----------------------------------------------------------------------------- +# Legacy combined tests +# 注意:使用 gtest_discover_tests,所有 TEST_F 都会被自动发现 +# ----------------------------------------------------------------------------- +infini_train_add_test(test_autograd_legacy SOURCES test_autograd.cc LABELS cpu cuda distributed) diff --git a/tests/autograd/test_autograd.cc b/tests/autograd/test_autograd.cc index e59bfb09..b46358da 100644 --- a/tests/autograd/test_autograd.cc +++ b/tests/autograd/test_autograd.cc @@ -18,7 +18,7 @@ using namespace infini_train; -class AutogradTest : public ::testing::Test { +class AutogradTestBase : public ::testing::Test { protected: static void SetUpTestSuite() { nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); @@ -38,7 +38,12 @@ class AutogradTest : public ::testing::Test { } }; -TEST_F(AutogradTest, AddForward) { +class AutogradForwardTest : public AutogradTestBase {}; +class AutogradBackwardTest : public AutogradTestBase {}; +class AutogradCudaTest : public AutogradTestBase {}; +class AutogradDistributedTest : public AutogradTestBase {}; + +TEST_F(AutogradForwardTest, AddForward) { auto a = createTensor({2, 3}, 1.0f); auto b = createTensor({2, 3}, 2.0f); auto add_fn = std::make_shared(); @@ -47,7 +52,7 @@ TEST_F(AutogradTest, AddForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); } -TEST_F(AutogradTest, AddBackward) { +TEST_F(AutogradBackwardTest, AddBackward) { auto a = createTensor({2, 3}, 1.0f); auto b = createTensor({2, 3}, 2.0f); auto add_fn = std::make_shared(); @@ -57,7 +62,7 @@ TEST_F(AutogradTest, AddBackward) { EXPECT_EQ(grad_inputs.size(), 2); } -TEST_F(AutogradTest, SubForward) { +TEST_F(AutogradForwardTest, SubForward) { auto a = createTensor({2, 3}, 5.0f); auto b = createTensor({2, 3}, 3.0f); auto sub_fn = std::make_shared(); @@ -65,7 +70,7 @@ TEST_F(AutogradTest, SubForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, MulForward) { +TEST_F(AutogradForwardTest, MulForward) { auto a = createTensor({2, 3}, 2.0f); auto b = createTensor({2, 3}, 3.0f); auto mul_fn = std::make_shared(); @@ -73,7 +78,7 @@ TEST_F(AutogradTest, MulForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, MulBackward) { +TEST_F(AutogradBackwardTest, MulBackward) { auto a = createTensor({2, 3}, 2.0f); auto b = createTensor({2, 3}, 3.0f); auto mul_fn = std::make_shared(); @@ -83,7 +88,7 @@ TEST_F(AutogradTest, MulBackward) { EXPECT_EQ(grad_inputs.size(), 2); } -TEST_F(AutogradTest, DivForward) { +TEST_F(AutogradForwardTest, DivForward) { auto a = createTensor({2, 3}, 6.0f); auto b = createTensor({2, 3}, 2.0f); auto div_fn = std::make_shared(); @@ -91,77 +96,77 @@ TEST_F(AutogradTest, DivForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, NegForward) { +TEST_F(AutogradForwardTest, NegForward) { auto a = createTensor({2, 3}, 5.0f); auto neg_fn = std::make_shared(); auto result = neg_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, SinForward) { +TEST_F(AutogradForwardTest, SinForward) { auto a = createTensor({2, 3}, 0.0f); auto sin_fn = std::make_shared(); auto result = sin_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, CosForward) { +TEST_F(AutogradForwardTest, CosForward) { auto a = createTensor({2, 3}, 0.0f); auto cos_fn = std::make_shared(); auto result = cos_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, TanhForward) { +TEST_F(AutogradForwardTest, TanhForward) { auto a = createTensor({2, 3}, 0.0f); auto tanh_fn = std::make_shared(); auto result = tanh_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, ExpForward) { +TEST_F(AutogradForwardTest, ExpForward) { auto a = createTensor({2, 3}, 1.0f); auto exp_fn = std::make_shared(); auto result = exp_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, LogForward) { +TEST_F(AutogradForwardTest, LogForward) { auto a = createTensor({2, 3}, 2.0f); auto log_fn = std::make_shared(); auto result = log_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, ReciprocalForward) { +TEST_F(AutogradForwardTest, ReciprocalForward) { auto a = createTensor({2, 3}, 2.0f); auto reciprocal_fn = std::make_shared(); auto result = reciprocal_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, PowForward) { +TEST_F(AutogradForwardTest, PowForward) { auto a = createTensor({2, 3}, 2.0f); auto pow_fn = std::make_shared(2.0f); auto result = pow_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, RsqrtForward) { +TEST_F(AutogradForwardTest, RsqrtForward) { auto a = createTensor({2, 3}, 4.0f); auto rsqrt_fn = std::make_shared(); auto result = rsqrt_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, SigmoidForward) { +TEST_F(AutogradForwardTest, SigmoidForward) { auto a = createTensor({2, 3}, 0.0f); auto sigmoid_fn = std::make_shared(); auto result = sigmoid_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, MatmulForward) { +TEST_F(AutogradForwardTest, MatmulForward) { auto a = createTensor({2, 3}, 1.0f); auto b = createTensor({3, 4}, 1.0f); auto matmul_fn = std::make_shared(); @@ -170,35 +175,35 @@ TEST_F(AutogradTest, MatmulForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); } -TEST_F(AutogradTest, SumForward) { +TEST_F(AutogradForwardTest, SumForward) { auto a = createTensor({2, 3}, 1.0f); auto sum_fn = std::make_shared(1, false); auto result = sum_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, MeanForward) { +TEST_F(AutogradForwardTest, MeanForward) { auto a = createTensor({2, 3}, 1.0f); auto mean_fn = std::make_shared(1, false); auto result = mean_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, MaxForward) { +TEST_F(AutogradForwardTest, MaxForward) { auto a = createTensor({2, 3}, 1.0f); auto max_fn = std::make_shared(1, false); auto result = max_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, MinForward) { +TEST_F(AutogradForwardTest, MinForward) { auto a = createTensor({2, 3}, 1.0f); auto min_fn = std::make_shared(1, false); auto result = min_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, SoftmaxForward) { +TEST_F(AutogradForwardTest, SoftmaxForward) { auto a = createTensor({2, 3}, 1.0f); auto softmax_fn = std::make_shared(1); auto result = softmax_fn->Apply({a}); @@ -206,7 +211,7 @@ TEST_F(AutogradTest, SoftmaxForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); } -TEST_F(AutogradTest, LayerNormForward) { +TEST_F(AutogradForwardTest, LayerNormForward) { auto a = createTensor({2, 3, 4}, 1.0f); auto weight = createTensor({4}, 1.0f); auto bias = createTensor({4}, 0.0f); @@ -215,7 +220,7 @@ TEST_F(AutogradTest, LayerNormForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, LinearForward) { +TEST_F(AutogradForwardTest, LinearForward) { auto input = createTensor({2, 3}, 1.0f); auto weight = createTensor({4, 3}, 1.0f); auto bias = createTensor({4}, 0.0f); @@ -225,7 +230,7 @@ TEST_F(AutogradTest, LinearForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); } -TEST_F(AutogradTest, TransposeForward) { +TEST_F(AutogradForwardTest, TransposeForward) { auto a = createTensor({2, 3}, 1.0f); auto transpose_fn = std::make_shared(0, 1); auto result = transpose_fn->Apply({a}); @@ -233,7 +238,7 @@ TEST_F(AutogradTest, TransposeForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{3, 2})); } -TEST_F(AutogradTest, SliceForward) { +TEST_F(AutogradForwardTest, SliceForward) { auto a = createTensor({4, 4}, 1.0f); auto slice_fn = std::make_shared( std::vector{1, 1}, @@ -243,14 +248,14 @@ TEST_F(AutogradTest, SliceForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, SplitForward) { +TEST_F(AutogradForwardTest, SplitForward) { auto a = createTensor({4, 4}, 1.0f); auto split_fn = std::make_shared(2, 0); auto result = split_fn->Apply({a}); EXPECT_EQ(result.size(), 2); } -TEST_F(AutogradTest, ConcatForward) { +TEST_F(AutogradForwardTest, ConcatForward) { auto a = createTensor({2, 2}, 1.0f); auto b = createTensor({2, 2}, 2.0f); auto concat_fn = std::make_shared(0); @@ -259,7 +264,7 @@ TEST_F(AutogradTest, ConcatForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{4, 2})); } -TEST_F(AutogradTest, StackForward) { +TEST_F(AutogradForwardTest, StackForward) { auto a = createTensor({2, 3}, 1.0f); auto b = createTensor({2, 3}, 2.0f); auto stack_fn = std::make_shared(0); @@ -268,21 +273,21 @@ TEST_F(AutogradTest, StackForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 2, 3})); } -TEST_F(AutogradTest, TrilForward) { +TEST_F(AutogradForwardTest, TrilForward) { auto a = createTensor({3, 3}, 1.0f); auto tril_fn = std::make_shared(0); auto result = tril_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, TriuForward) { +TEST_F(AutogradForwardTest, TriuForward) { auto a = createTensor({3, 3}, 1.0f); auto triu_fn = std::make_shared(0); auto result = triu_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, OuterForward) { +TEST_F(AutogradForwardTest, OuterForward) { auto a = createTensor({3}, 1.0f); auto b = createTensor({4}, 1.0f); auto outer_fn = std::make_shared(); @@ -291,21 +296,21 @@ TEST_F(AutogradTest, OuterForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{3, 4})); } -TEST_F(AutogradTest, AddScalarForward) { +TEST_F(AutogradForwardTest, AddScalarForward) { auto a = createTensor({2, 3}, 1.0f); auto add_scalar_fn = std::make_shared(2.0f); auto result = add_scalar_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, MulScalarForward) { +TEST_F(AutogradForwardTest, MulScalarForward) { auto a = createTensor({2, 3}, 2.0f); auto mul_scalar_fn = std::make_shared(3.0f); auto result = mul_scalar_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, LtForward) { +TEST_F(AutogradForwardTest, LtForward) { auto a = createTensor({2, 3}, 5.0f); auto b = createTensor({2, 3}, 3.0f); auto lt_fn = std::make_shared(); @@ -313,7 +318,7 @@ TEST_F(AutogradTest, LtForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, LeForward) { +TEST_F(AutogradForwardTest, LeForward) { auto a = createTensor({2, 3}, 3.0f); auto b = createTensor({2, 3}, 3.0f); auto le_fn = std::make_shared(); @@ -321,7 +326,7 @@ TEST_F(AutogradTest, LeForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, GtForward) { +TEST_F(AutogradForwardTest, GtForward) { auto a = createTensor({2, 3}, 5.0f); auto b = createTensor({2, 3}, 3.0f); auto gt_fn = std::make_shared(); @@ -329,7 +334,7 @@ TEST_F(AutogradTest, GtForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, GeForward) { +TEST_F(AutogradForwardTest, GeForward) { auto a = createTensor({2, 3}, 3.0f); auto b = createTensor({2, 3}, 3.0f); auto ge_fn = std::make_shared(); @@ -337,7 +342,7 @@ TEST_F(AutogradTest, GeForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, EqualsForward) { +TEST_F(AutogradForwardTest, EqualsForward) { auto a = createTensor({2, 3}, 3.0f); auto b = createTensor({2, 3}, 3.0f); auto eq_fn = std::make_shared(); @@ -345,7 +350,7 @@ TEST_F(AutogradTest, EqualsForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, AndForward) { +TEST_F(AutogradForwardTest, AndForward) { auto a = createTensor({2, 3}, 1.0f); auto b = createTensor({2, 3}, 1.0f); auto and_fn = std::make_shared(); @@ -353,7 +358,7 @@ TEST_F(AutogradTest, AndForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, OrForward) { +TEST_F(AutogradForwardTest, OrForward) { auto a = createTensor({2, 3}, 0.0f); auto b = createTensor({2, 3}, 1.0f); auto or_fn = std::make_shared(); @@ -361,7 +366,7 @@ TEST_F(AutogradTest, OrForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, NoOpForward) { +TEST_F(AutogradForwardTest, NoOpForward) { auto a = createTensor({2, 3}, 1.0f); auto noop_fn = std::make_shared(std::vector{2, 3}); auto result = noop_fn->Apply({a}); @@ -374,7 +379,7 @@ TEST_F(AutogradTest, NoOpForward) { // ============================================================================ #ifdef USE_CUDA -TEST_F(AutogradTest, AddForwardCUDA) { +TEST_F(AutogradCudaTest, AddForwardCUDA) { auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); @@ -393,7 +398,7 @@ TEST_F(AutogradTest, AddForwardCUDA) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); } -TEST_F(AutogradTest, MatmulForwardCUDA) { +TEST_F(AutogradCudaTest, MatmulForwardCUDA) { auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); @@ -412,7 +417,7 @@ TEST_F(AutogradTest, MatmulForwardCUDA) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); } -TEST_F(AutogradTest, SumForwardCUDA) { +TEST_F(AutogradCudaTest, SumForwardCUDA) { auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); @@ -424,7 +429,7 @@ TEST_F(AutogradTest, SumForwardCUDA) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTest, SoftmaxForwardCUDA) { +TEST_F(AutogradCudaTest, SoftmaxForwardCUDA) { auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); @@ -437,7 +442,7 @@ TEST_F(AutogradTest, SoftmaxForwardCUDA) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); } -TEST_F(AutogradTest, LinearForwardCUDA) { +TEST_F(AutogradCudaTest, LinearForwardCUDA) { auto input = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); input->set_requires_grad(true); @@ -468,7 +473,10 @@ TEST_F(AutogradTest, LinearForwardCUDA) { // ============================================================================ #ifdef USE_NCCL -TEST_F(AutogradTest, AllReduceDistributed) { +TEST_F(AutogradDistributedTest, AllReduceDistributed) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); @@ -479,7 +487,10 @@ TEST_F(AutogradTest, AllReduceDistributed) { EXPECT_TRUE(a->requires_grad()); } -TEST_F(AutogradTest, AllGatherDistributed) { +TEST_F(AutogradDistributedTest, AllGatherDistributed) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); auto a = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); @@ -490,7 +501,10 @@ TEST_F(AutogradTest, AllGatherDistributed) { EXPECT_EQ(a->Dims(), (std::vector{4, 4})); } -TEST_F(AutogradTest, ReduceScatterDistributed) { +TEST_F(AutogradDistributedTest, ReduceScatterDistributed) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); auto a = std::make_shared(std::vector{2, 8}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); @@ -501,7 +515,10 @@ TEST_F(AutogradTest, ReduceScatterDistributed) { EXPECT_EQ(a->Dims(), (std::vector{2, 8})); } -TEST_F(AutogradTest, DistributedMatmul) { +TEST_F(AutogradDistributedTest, DistributedMatmul) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); auto a = std::make_shared(std::vector{2, 4}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); @@ -516,7 +533,10 @@ TEST_F(AutogradTest, DistributedMatmul) { EXPECT_TRUE(result[0]->IsCUDA()); } -TEST_F(AutogradTest, DistributedLinear) { +TEST_F(AutogradDistributedTest, DistributedLinear) { + REQUIRE_CUDA(); + REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); auto input = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); input->set_requires_grad(true); diff --git a/tests/autograd/test_autograd_elementwise_backward.cc b/tests/autograd/test_autograd_elementwise_backward.cc new file mode 100644 index 00000000..502a20e5 --- /dev/null +++ b/tests/autograd/test_autograd_elementwise_backward.cc @@ -0,0 +1,134 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/elementwise.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradElementwiseBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradElementwiseBackwardTest, AddBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto add_fn = std::make_shared(); + auto result = add_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = add_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradElementwiseBackwardTest, SubBackward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto sub_fn = std::make_shared(); + auto result = sub_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = sub_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradElementwiseBackwardTest, MulBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto b = createTensor({2, 3}, 3.0f); + auto mul_fn = std::make_shared(); + auto result = mul_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = mul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradElementwiseBackwardTest, DivBackward) { + auto a = createTensor({2, 3}, 6.0f); + auto b = createTensor({2, 3}, 2.0f); + auto div_fn = std::make_shared(); + auto result = div_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = div_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradElementwiseBackwardTest, NegBackward) { + auto a = createTensor({2, 3}, 5.0f); + auto neg_fn = std::make_shared(); + auto result = neg_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = neg_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, SinBackward) { + auto a = createTensor({2, 3}, 0.0f); + auto sin_fn = std::make_shared(); + auto result = sin_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = sin_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, CosBackward) { + auto a = createTensor({2, 3}, 0.0f); + auto cos_fn = std::make_shared(); + auto result = cos_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = cos_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, TanhBackward) { + auto a = createTensor({2, 3}, 0.0f); + auto tanh_fn = std::make_shared(); + auto result = tanh_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = tanh_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, ExpBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto exp_fn = std::make_shared(); + auto result = exp_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = exp_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, LogBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto log_fn = std::make_shared(); + auto result = log_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = log_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, ReciprocalBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto reciprocal_fn = std::make_shared(); + auto result = reciprocal_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = reciprocal_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, PowBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto pow_fn = std::make_shared(2.0f); + auto result = pow_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = pow_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradElementwiseBackwardTest, RsqrtBackward) { + auto a = createTensor({2, 3}, 4.0f); + auto rsqrt_fn = std::make_shared(); + auto result = rsqrt_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = rsqrt_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} diff --git a/tests/autograd/test_autograd_elementwise_forward.cc b/tests/autograd/test_autograd_elementwise_forward.cc new file mode 100644 index 00000000..63b386b1 --- /dev/null +++ b/tests/autograd/test_autograd_elementwise_forward.cc @@ -0,0 +1,187 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/elementwise.h" +#include "infini_train/include/autograd/activations.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradElementwiseForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradElementwiseForwardTest, AddForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto add_fn = std::make_shared(); + auto result = add_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_F(AutogradElementwiseForwardTest, SubForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto sub_fn = std::make_shared(); + auto result = sub_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, MulForward) { + auto a = createTensor({2, 3}, 2.0f); + auto b = createTensor({2, 3}, 3.0f); + auto mul_fn = std::make_shared(); + auto result = mul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, DivForward) { + auto a = createTensor({2, 3}, 6.0f); + auto b = createTensor({2, 3}, 2.0f); + auto div_fn = std::make_shared(); + auto result = div_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, NegForward) { + auto a = createTensor({2, 3}, 5.0f); + auto neg_fn = std::make_shared(); + auto result = neg_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, SinForward) { + auto a = createTensor({2, 3}, 0.0f); + auto sin_fn = std::make_shared(); + auto result = sin_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, CosForward) { + auto a = createTensor({2, 3}, 0.0f); + auto cos_fn = std::make_shared(); + auto result = cos_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, TanhForward) { + auto a = createTensor({2, 3}, 0.0f); + auto tanh_fn = std::make_shared(); + auto result = tanh_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, ExpForward) { + auto a = createTensor({2, 3}, 1.0f); + auto exp_fn = std::make_shared(); + auto result = exp_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, LogForward) { + auto a = createTensor({2, 3}, 2.0f); + auto log_fn = std::make_shared(); + auto result = log_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, ReciprocalForward) { + auto a = createTensor({2, 3}, 2.0f); + auto reciprocal_fn = std::make_shared(); + auto result = reciprocal_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, PowForward) { + auto a = createTensor({2, 3}, 2.0f); + auto pow_fn = std::make_shared(2.0f); + auto result = pow_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, RsqrtForward) { + auto a = createTensor({2, 3}, 4.0f); + auto rsqrt_fn = std::make_shared(); + auto result = rsqrt_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, SigmoidForward) { + auto a = createTensor({2, 3}, 0.0f); + auto sigmoid_fn = std::make_shared(); + auto result = sigmoid_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, AddScalarForward) { + auto a = createTensor({2, 3}, 1.0f); + auto add_scalar_fn = std::make_shared(2.0f); + auto result = add_scalar_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, MulScalarForward) { + auto a = createTensor({2, 3}, 2.0f); + auto mul_scalar_fn = std::make_shared(3.0f); + auto result = mul_scalar_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, LtForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto lt_fn = std::make_shared(); + auto result = lt_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, LeForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto le_fn = std::make_shared(); + auto result = le_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, GtForward) { + auto a = createTensor({2, 3}, 5.0f); + auto b = createTensor({2, 3}, 3.0f); + auto gt_fn = std::make_shared(); + auto result = gt_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, GeForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto ge_fn = std::make_shared(); + auto result = ge_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, EqualsForward) { + auto a = createTensor({2, 3}, 3.0f); + auto b = createTensor({2, 3}, 3.0f); + auto eq_fn = std::make_shared(); + auto result = eq_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, AndForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 1.0f); + auto and_fn = std::make_shared(); + auto result = and_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradElementwiseForwardTest, OrForward) { + auto a = createTensor({2, 3}, 0.0f); + auto b = createTensor({2, 3}, 1.0f); + auto or_fn = std::make_shared(); + auto result = or_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); +} diff --git a/tests/autograd/test_autograd_linear_backward.cc b/tests/autograd/test_autograd_linear_backward.cc new file mode 100644 index 00000000..069affc7 --- /dev/null +++ b/tests/autograd/test_autograd_linear_backward.cc @@ -0,0 +1,33 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/linear.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradLinearBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradLinearBackwardTest, LinearBackward) { + auto input = createTensor({2, 3}, 1.0f); + auto weight = createTensor({4, 3}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + auto grad = createTensor({2, 4}, 1.0f); + auto grad_inputs = linear_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 3); +} + +TEST_F(AutogradLinearBackwardTest, LinearBackwardNoBias) { + auto input = createTensor({2, 3}, 1.0f); + auto weight = createTensor({4, 3}, 1.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight}); + auto grad = createTensor({2, 4}, 1.0f); + auto grad_inputs = linear_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} diff --git a/tests/autograd/test_autograd_linear_forward.cc b/tests/autograd/test_autograd_linear_forward.cc new file mode 100644 index 00000000..efd8d6eb --- /dev/null +++ b/tests/autograd/test_autograd_linear_forward.cc @@ -0,0 +1,41 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/linear.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradLinearForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradLinearForwardTest, LinearForward) { + auto input = createTensor({2, 3}, 1.0f); + auto weight = createTensor({4, 3}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_F(AutogradLinearForwardTest, LinearNoBias) { + auto input = createTensor({2, 3}, 1.0f); + auto weight = createTensor({4, 3}, 1.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_F(AutogradLinearForwardTest, LinearBatch) { + auto input = createTensor({32, 128}, 1.0f); + auto weight = createTensor({64, 128}, 1.0f); + auto bias = createTensor({64}, 0.0f); + auto linear_fn = std::make_shared(); + auto result = linear_fn->Apply({input, weight, bias}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{32, 64})); +} diff --git a/tests/autograd/test_autograd_matmul_backward.cc b/tests/autograd/test_autograd_matmul_backward.cc new file mode 100644 index 00000000..e9962f5d --- /dev/null +++ b/tests/autograd/test_autograd_matmul_backward.cc @@ -0,0 +1,42 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/matmul.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradMatmulBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradMatmulBackwardTest, MatmulBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({3, 4}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + auto grad = createTensor({2, 4}, 1.0f); + auto grad_inputs = matmul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradMatmulBackwardTest, MatmulBackwardSquare) { + auto a = createTensor({3, 3}, 2.0f); + auto b = createTensor({3, 3}, 3.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + auto grad = createTensor({3, 3}, 1.0f); + auto grad_inputs = matmul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} + +TEST_F(AutogradMatmulBackwardTest, MatmulBackwardDifferentShapes) { + auto a = createTensor({3, 4}, 1.5f); + auto b = createTensor({4, 2}, 2.5f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + auto grad = createTensor({3, 2}, 1.0f); + auto grad_inputs = matmul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); +} diff --git a/tests/autograd/test_autograd_matmul_forward.cc b/tests/autograd/test_autograd_matmul_forward.cc new file mode 100644 index 00000000..87c93f08 --- /dev/null +++ b/tests/autograd/test_autograd_matmul_forward.cc @@ -0,0 +1,48 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/matmul.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradMatmulForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradMatmulForwardTest, MatmulForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({3, 4}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); +} + +TEST_F(AutogradMatmulForwardTest, MatmulDifferentShapes) { + auto a = createTensor({3, 4}, 1.0f); + auto b = createTensor({4, 2}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{3, 2})); +} + +TEST_F(AutogradMatmulForwardTest, MatmulBatch) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto b = createTensor({2, 4, 5}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3, 5})); +} + +TEST_F(AutogradMatmulForwardTest, MatmulSquare) { + auto a = createTensor({3, 3}, 1.0f); + auto b = createTensor({3, 3}, 1.0f); + auto matmul_fn = std::make_shared(); + auto result = matmul_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{3, 3})); +} diff --git a/tests/autograd/test_autograd_normalization_backward.cc b/tests/autograd/test_autograd_normalization_backward.cc new file mode 100644 index 00000000..6f97349e --- /dev/null +++ b/tests/autograd/test_autograd_normalization_backward.cc @@ -0,0 +1,34 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/normalization.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradNormalizationBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradNormalizationBackwardTest, LayerNormBackward) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + auto grad = createTensor({2, 3, 4}, 1.0f); + auto grad_inputs = layernorm_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 3); +} + +TEST_F(AutogradNormalizationBackwardTest, LayerNormBackwardZeroBias) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + auto grad = createTensor({2, 3, 4}, 1.0f); + auto grad_inputs = layernorm_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 3); +} diff --git a/tests/autograd/test_autograd_normalization_forward.cc b/tests/autograd/test_autograd_normalization_forward.cc new file mode 100644 index 00000000..d58fd749 --- /dev/null +++ b/tests/autograd/test_autograd_normalization_forward.cc @@ -0,0 +1,40 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/normalization.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradNormalizationForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradNormalizationForwardTest, LayerNormForward) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradNormalizationForwardTest, LayerNormZeroBias) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradNormalizationForwardTest, LayerNormThreeDim) { + auto a = createTensor({2, 1, 4}, 1.0f); + auto weight = createTensor({4}, 1.0f); + auto bias = createTensor({4}, 0.0f); + auto layernorm_fn = std::make_shared(1e-5f); + auto result = layernorm_fn->Apply({a, weight, bias}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 1, 4})); +} diff --git a/tests/autograd/test_autograd_reduction_backward.cc b/tests/autograd/test_autograd_reduction_backward.cc new file mode 100644 index 00000000..d212a065 --- /dev/null +++ b/tests/autograd/test_autograd_reduction_backward.cc @@ -0,0 +1,66 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/reduction.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradReductionBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradReductionBackwardTest, SumBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto sum_fn = std::make_shared(1, false); + auto result = sum_fn->Apply({a}); + auto grad = createTensor({2}, 1.0f); + auto grad_inputs = sum_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradReductionBackwardTest, MeanBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto mean_fn = std::make_shared(1, false); + auto result = mean_fn->Apply({a}); + auto grad = createTensor({2}, 1.0f); + auto grad_inputs = mean_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradReductionBackwardTest, MaxBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto max_fn = std::make_shared(1, false); + auto result = max_fn->Apply({a}); + auto grad = createTensor({2}, 1.0f); + auto grad_inputs = max_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradReductionBackwardTest, MinBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto min_fn = std::make_shared(1, false); + auto result = min_fn->Apply({a}); + auto grad = createTensor({2}, 1.0f); + auto grad_inputs = min_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradReductionBackwardTest, SumBackwardKeepDim) { + auto a = createTensor({2, 3}, 1.0f); + auto sum_fn = std::make_shared(1, true); + auto result = sum_fn->Apply({a}); + auto grad = createTensor({2, 1}, 1.0f); + auto grad_inputs = sum_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradReductionBackwardTest, MeanBackwardKeepDim) { + auto a = createTensor({2, 3}, 1.0f); + auto mean_fn = std::make_shared(1, true); + auto result = mean_fn->Apply({a}); + auto grad = createTensor({2, 1}, 1.0f); + auto grad_inputs = mean_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} diff --git a/tests/autograd/test_autograd_reduction_forward.cc b/tests/autograd/test_autograd_reduction_forward.cc new file mode 100644 index 00000000..b4f8edb7 --- /dev/null +++ b/tests/autograd/test_autograd_reduction_forward.cc @@ -0,0 +1,54 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/reduction.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradReductionForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradReductionForwardTest, SumForward) { + auto a = createTensor({2, 3}, 1.0f); + auto sum_fn = std::make_shared(1, false); + auto result = sum_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradReductionForwardTest, MeanForward) { + auto a = createTensor({2, 3}, 1.0f); + auto mean_fn = std::make_shared(1, false); + auto result = mean_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradReductionForwardTest, MaxForward) { + auto a = createTensor({2, 3}, 1.0f); + auto max_fn = std::make_shared(1, false); + auto result = max_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradReductionForwardTest, MinForward) { + auto a = createTensor({2, 3}, 1.0f); + auto min_fn = std::make_shared(1, false); + auto result = min_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradReductionForwardTest, SumKeepDim) { + auto a = createTensor({2, 3}, 1.0f); + auto sum_fn = std::make_shared(1, true); + auto result = sum_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradReductionForwardTest, MeanKeepDim) { + auto a = createTensor({2, 3}, 1.0f); + auto mean_fn = std::make_shared(1, true); + auto result = mean_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} diff --git a/tests/autograd/test_autograd_softmax_backward.cc b/tests/autograd/test_autograd_softmax_backward.cc new file mode 100644 index 00000000..6d3f02a4 --- /dev/null +++ b/tests/autograd/test_autograd_softmax_backward.cc @@ -0,0 +1,30 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/softmax.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradSoftmaxBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradSoftmaxBackwardTest, SoftmaxBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto softmax_fn = std::make_shared(1); + auto result = softmax_fn->Apply({a}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = softmax_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} + +TEST_F(AutogradSoftmaxBackwardTest, SoftmaxBackwardDim0) { + auto a = createTensor({4, 3}, 1.0f); + auto softmax_fn = std::make_shared(0); + auto result = softmax_fn->Apply({a}); + auto grad = createTensor({4, 3}, 1.0f); + auto grad_inputs = softmax_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} diff --git a/tests/autograd/test_autograd_softmax_forward.cc b/tests/autograd/test_autograd_softmax_forward.cc new file mode 100644 index 00000000..c3d196f1 --- /dev/null +++ b/tests/autograd/test_autograd_softmax_forward.cc @@ -0,0 +1,36 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/softmax.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradSoftmaxForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradSoftmaxForwardTest, SoftmaxForward) { + auto a = createTensor({2, 3}, 1.0f); + auto softmax_fn = std::make_shared(1); + auto result = softmax_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +} + +TEST_F(AutogradSoftmaxForwardTest, SoftmaxDim0) { + auto a = createTensor({4, 3}, 1.0f); + auto softmax_fn = std::make_shared(0); + auto result = softmax_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{4, 3})); +} + +TEST_F(AutogradSoftmaxForwardTest, SoftmaxLastDim) { + auto a = createTensor({2, 3, 4}, 1.0f); + auto softmax_fn = std::make_shared(2); + auto result = softmax_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3, 4})); +} diff --git a/tests/autograd/test_autograd_transform_backward.cc b/tests/autograd/test_autograd_transform_backward.cc new file mode 100644 index 00000000..1613f1a2 --- /dev/null +++ b/tests/autograd/test_autograd_transform_backward.cc @@ -0,0 +1,21 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/transform.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradTransformBackwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradTransformBackwardTest, TransposeBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto transpose_fn = std::make_shared(0, 1); + auto result = transpose_fn->Apply({a}); + auto grad = createTensor({3, 2}, 1.0f); + auto grad_inputs = transpose_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 1); +} diff --git a/tests/autograd/test_autograd_transform_forward.cc b/tests/autograd/test_autograd_transform_forward.cc new file mode 100644 index 00000000..67b20adb --- /dev/null +++ b/tests/autograd/test_autograd_transform_forward.cc @@ -0,0 +1,70 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/autograd/transform.h" +#include "infini_train/include/autograd/misc.h" +#include "test_utils.h" + +using namespace infini_train; + +class AutogradTransformForwardTest : public infini_train::test::AutogradTestBase {}; + +TEST_F(AutogradTransformForwardTest, TransposeForward) { + auto a = createTensor({2, 3}, 1.0f); + auto transpose_fn = std::make_shared(0, 1); + auto result = transpose_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{3, 2})); +} + +TEST_F(AutogradTransformForwardTest, SliceForward) { + auto a = createTensor({4, 4}, 1.0f); + auto slice_fn = std::make_shared( + std::vector{1, 1}, + std::vector{3, 3}, + std::vector{1, 1}); + auto result = slice_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTransformForwardTest, SplitForward) { + auto a = createTensor({4, 4}, 1.0f); + auto split_fn = std::make_shared(2, 0); + auto result = split_fn->Apply({a}); + EXPECT_EQ(result.size(), 2); +} + +TEST_F(AutogradTransformForwardTest, ConcatForward) { + auto a = createTensor({2, 2}, 1.0f); + auto b = createTensor({2, 2}, 2.0f); + auto concat_fn = std::make_shared(0); + auto result = concat_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{4, 2})); +} + +TEST_F(AutogradTransformForwardTest, StackForward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); + auto stack_fn = std::make_shared(0); + auto result = stack_fn->Apply({a, b}); + EXPECT_EQ(result.size(), 1); + EXPECT_EQ(result[0]->Dims(), (std::vector{2, 2, 3})); +} + +TEST_F(AutogradTransformForwardTest, TrilForward) { + auto a = createTensor({3, 3}, 1.0f); + auto tril_fn = std::make_shared(0); + auto result = tril_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} + +TEST_F(AutogradTransformForwardTest, TriuForward) { + auto a = createTensor({3, 3}, 1.0f); + auto triu_fn = std::make_shared(0); + auto result = triu_fn->Apply({a}); + EXPECT_EQ(result.size(), 1); +} diff --git a/tests/common/test_macros.cmake b/tests/common/test_macros.cmake new file mode 100644 index 00000000..184083c0 --- /dev/null +++ b/tests/common/test_macros.cmake @@ -0,0 +1,97 @@ +# ============================================================================ +# InfiniTrain 测试宏 +# ============================================================================ +# 提供统一的测试配置接口,降低接入成本 +# +# 使用方法: +# 1. 在 tests/CMakeLists.txt 中 include 此文件 +# 2. 使用 infini_train_add_test 宏注册测试 +# +# 示例: +# infini_train_add_test( +# test_tensor_create +# SOURCES test_tensor_create.cc +# LABELS cpu cuda +# ) +# ============================================================================ + +include_guard(GLOBAL) + +# 获取 test_macros.cmake 所在目录(tests/common/) +set(TEST_MACROS_DIR "${CMAKE_CURRENT_LIST_DIR}") + +# ----------------------------------------------------------------------------- +# 加载 GoogleTest 模块(提供 gtest_discover_tests) +# ----------------------------------------------------------------------------- +include(GoogleTest) + +# ----------------------------------------------------------------------------- +# infini_train_add_test - 测试注册宏 +# ----------------------------------------------------------------------------- +# 功能: +# 1. 创建可执行文件 +# 2. 配置编译选项、链接库和头文件路径 +# 3. 使用 gtest_discover_tests 自动发现测试用例 +# 4. 设置测试标签 +# +# 参数: +# SOURCES: 源文件列表(必填) +# LABELS: 测试标签,如 "cpu" "cuda" "distributed"(可选,默认 "cpu") +# +# 示例: +# # 简单测试(1行) +# infini_train_add_test(test_example SOURCES test_example.cc LABELS cpu) +# +# # 多标签测试 +# infini_train_add_test(test_cuda_example SOURCES test_cuda.cc LABELS cuda distributed) +# ----------------------------------------------------------------------------- +macro(infini_train_add_test) + cmake_parse_arguments(ARG "" "TEST_NAME" "SOURCES;LABELS" ${ARGN}) + + if(NOT ARG_TEST_NAME) + set(ARG_TEST_NAME ${ARG_UNPARSED_ARGUMENTS}) + endif() + + if(NOT ARG_SOURCES) + message(FATAL_ERROR "infini_train_add_test: TEST_NAME and SOURCES are required") + endif() + + # 1. 创建可执行文件 + add_executable(${ARG_TEST_NAME} ${ARG_SOURCES}) + + # 2. 配置编译选项(禁用警告转错误,以便在宽松编译环境下运行) + target_compile_options(${ARG_TEST_NAME} PRIVATE -Wno-error) + + # 3. 链接 Google Test + target_link_libraries(${ARG_TEST_NAME} PRIVATE + GTest::gtest + GTest::gtest_main + ) + + # 4. 添加头文件路径 + target_include_directories(${ARG_TEST_NAME} PRIVATE + ${TEST_MACROS_DIR} + ${glog_SOURCE_DIR}/src + ) + + # 5. 链接项目库(whole-archive 方式解决静态库符号依赖) + target_link_libraries(${ARG_TEST_NAME} PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" + ) + + # 6. 使用 gtest_discover_tests 自动发现测试用例 + # 这会自动为每个 TEST_F() 创建一个 ctest 测试 + set(labels "cpu") + if(ARG_LABELS) + set(labels "${ARG_LABELS}") + endif() + + gtest_discover_tests(${ARG_TEST_NAME} + # 自动将测试输出重定向到 XML(便于 CI 集成) + EXTRA_ARGS --gtest_output=xml:%T.xml + PROPERTIES LABELS "${labels}" + ) +endmacro() diff --git a/tests/hook/CMakeLists.txt b/tests/hook/CMakeLists.txt index e567608b..51c77ae0 100644 --- a/tests/hook/CMakeLists.txt +++ b/tests/hook/CMakeLists.txt @@ -1,47 +1,14 @@ +# ============================================================================ # Hook tests - -add_executable(test_hook - test_hook.cc -) -target_link_libraries(test_hook - PRIVATE - GTest::gtest - GTest::gtest_main -) -target_include_directories(test_hook PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) - -target_link_libraries(test_hook PRIVATE - "-Wl,--whole-archive" - infini_train - infini_train_cpu_kernels - "-Wl,--no-whole-archive" -) - -add_test(NAME hook_cpu COMMAND test_hook) -set_tests_properties(hook_cpu PROPERTIES LABELS "cpu") - -add_test(NAME hook_cuda COMMAND test_hook --gtest_filter=HookTest.*CUDA) -set_tests_properties(hook_cuda PROPERTIES LABELS "cuda") - -add_test(NAME hook_distributed COMMAND test_hook --gtest_filter=HookTest.*Distributed) -set_tests_properties(hook_distributed PROPERTIES LABELS "cuda;distributed") - -add_executable(test_precision_check - test_precision_check.cc -) -target_link_libraries(test_precision_check - PRIVATE - GTest::gtest - GTest::gtest_main -) -target_include_directories(test_precision_check PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) - -target_link_libraries(test_precision_check PRIVATE - "-Wl,--whole-archive" - infini_train - infini_train_cpu_kernels - "-Wl,--no-whole-archive" -) - -add_test(NAME precision_check_cpu COMMAND test_precision_check) -set_tests_properties(precision_check_cpu PROPERTIES LABELS "cpu") +# ============================================================================ +# 重构版本:使用 infini_train_add_test 宏简化配置 +# +# 新增测试只需 1 行: +# infini_train_add_test(test_name SOURCES test_name.cc LABELS cpu) +# ============================================================================ + +# ----------------------------------------------------------------------------- +# Hook tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_hook SOURCES test_hook.cc LABELS cpu cuda distributed) +infini_train_add_test(test_precision_check SOURCES test_precision_check.cc LABELS cpu) diff --git a/tests/lora/CMakeLists.txt b/tests/lora/CMakeLists.txt new file mode 100644 index 00000000..27b47c64 --- /dev/null +++ b/tests/lora/CMakeLists.txt @@ -0,0 +1,7 @@ +# ============================================================================ +# LoRA tests +# ============================================================================ +# 重构版本:使用 infini_train_add_test 宏简化配置 +# ============================================================================ + +infini_train_add_test(test_lora SOURCES test_lora.cc LABELS cpu) diff --git a/tests/lora/test_lora.cc b/tests/lora/test_lora.cc new file mode 100644 index 00000000..19c24efd --- /dev/null +++ b/tests/lora/test_lora.cc @@ -0,0 +1,331 @@ +#include + +#include +#include +#include + +#include "infini_train/include/nn/lora/lora_config.h" +#include "infini_train/include/nn/lora/lora_linear.h" +#include "infini_train/include/nn/lora/lora_utils.h" +#include "infini_train/include/nn/modules/container.h" +#include "infini_train/include/nn/modules/linear.h" +#include "infini_train/include/nn/modules/module.h" +#include "infini_train/include/nn/parallel/global.h" +#include "infini_train/include/tensor.h" +#include "test_utils.h" + +using namespace infini_train; +using namespace infini_train::nn::lora; + +class LoRATest : public infini_train::test::InfiniTrainTest {}; + +TEST_F(LoRATest, LoRAConfigScaling) { + LoRAConfig config; + config.rank = 8; + config.alpha = 16.0f; + + float expected_scaling = 16.0f / 8.0f; + EXPECT_EQ(config.Scaling(), expected_scaling); +} + +TEST_F(LoRATest, LoRAConfigShouldApply) { + LoRAConfig config; + config.rank = 8; + config.alpha = 16.0f; + + EXPECT_TRUE(config.ShouldApplyLoRA("c_attn")); + EXPECT_TRUE(config.ShouldApplyLoRA("transformer.h.0.attn.c_attn")); + EXPECT_TRUE(config.ShouldApplyLoRA("c_proj")); + EXPECT_FALSE(config.ShouldApplyLoRA("c_fc")); + EXPECT_FALSE(config.ShouldApplyLoRA("random_layer")); +} + +TEST_F(LoRATest, LoRALinearFromModel) { + auto base_linear = std::make_shared(64, 128, /*bias=*/true); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + auto *lora_linear = dynamic_cast(model.get()); + ASSERT_NE(lora_linear, nullptr); + + EXPECT_EQ(lora_linear->in_features(), 64); + EXPECT_EQ(lora_linear->out_features(), 128); + EXPECT_EQ(lora_linear->rank(), 4); + + auto lora_A = lora_linear->parameter(LoRALinear::kParamLoraAName); + auto lora_B = lora_linear->parameter(LoRALinear::kParamLoraBName); + auto weight = lora_linear->parameter(nn::Linear::kParamWeightName); + + EXPECT_EQ(lora_A->Dims()[0], config.rank); + EXPECT_EQ(lora_A->Dims()[1], 64); + EXPECT_EQ(lora_B->Dims()[0], 128); + EXPECT_EQ(lora_B->Dims()[1], config.rank); + + EXPECT_FALSE(weight->requires_grad()); + EXPECT_TRUE(lora_A->requires_grad()); + EXPECT_TRUE(lora_B->requires_grad()); + + auto params = lora_linear->LoRAParameters(); + EXPECT_EQ(params.size(), 2); +} + +TEST_F(LoRATest, LoRALinearForward) { + auto base_linear = std::make_shared(64, 128, /*bias=*/true); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + auto input = std::make_shared(std::vector{2, 10, 64}, DataType::kFLOAT32); + + auto output = (*model)({input})[0]; + + EXPECT_EQ(output->Dims().size(), 3); + EXPECT_EQ(output->Dims()[0], 2); + EXPECT_EQ(output->Dims()[1], 10); + EXPECT_EQ(output->Dims()[2], 128); +} + +TEST_F(LoRATest, LoRALinearMerge) { + auto base_linear = std::make_shared(32, 64, /*bias=*/false); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + auto *lora_linear = dynamic_cast(model.get()); + ASSERT_NE(lora_linear, nullptr); + + auto input = std::make_shared(std::vector{2, 5, 32}, DataType::kFLOAT32); + input->EigenMatrix().setRandom(); + + auto output_before = (*model)({input})[0]; + float output_before_sum = output_before->EigenMatrix().sum(); + + EXPECT_FALSE(lora_linear->IsMerged()); + MergeLoRAWeights(model); + EXPECT_TRUE(lora_linear->IsMerged()); + + auto lora_A = lora_linear->parameter(LoRALinear::kParamLoraAName); + auto lora_B = lora_linear->parameter(LoRALinear::kParamLoraBName); + EXPECT_FALSE(lora_A->requires_grad()); + EXPECT_FALSE(lora_B->requires_grad()); + + auto output_merged = (*model)({input})[0]; + float output_merged_sum = output_merged->EigenMatrix().sum(); + EXPECT_NEAR(std::abs(output_before_sum - output_merged_sum), 0.0f, 1e-3); + + UnmergeLoRAWeights(model); + EXPECT_FALSE(lora_linear->IsMerged()); + EXPECT_TRUE(lora_A->requires_grad()); + EXPECT_TRUE(lora_B->requires_grad()); + + auto output_unmerged = (*model)({input})[0]; + EXPECT_EQ(output_before->Dims(), output_unmerged->Dims()); +} + +TEST_F(LoRATest, LoRAUtils) { + auto base_linear = std::make_shared(32, 64, /*bias=*/true); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + auto lora_params = GetLoRAParameters(model); + EXPECT_EQ(lora_params.size(), 2); + + int64_t trainable = CountTrainableParameters(model); + int64_t expected_trainable = config.rank * 32 + 64 * config.rank; + EXPECT_EQ(trainable, expected_trainable); + + int64_t total = CountTotalParameters(model); + int64_t expected_total = 64 * 32 + 64 + config.rank * 32 + 64 * config.rank; + EXPECT_EQ(total, expected_total); +} + +TEST_F(LoRATest, ParseLoRATargetModules) { + auto modules = ParseLoRATargetModules("c_attn"); + EXPECT_EQ(modules.size(), 1); + EXPECT_TRUE(modules.count("c_attn")); + + modules = ParseLoRATargetModules("c_attn,c_proj,c_fc"); + EXPECT_EQ(modules.size(), 3); + EXPECT_TRUE(modules.count("c_attn")); + EXPECT_TRUE(modules.count("c_proj")); + EXPECT_TRUE(modules.count("c_fc")); + + modules = ParseLoRATargetModules("c_attn, c_proj , c_fc"); + EXPECT_EQ(modules.size(), 3); + + modules = ParseLoRATargetModules("c_attn,,c_proj"); + EXPECT_EQ(modules.size(), 2); +} + +TEST_F(LoRATest, ShouldApplyLoRAEdgeCases) { + { + LoRAConfig config{8, 16.0f, 0.0f, ParseLoRATargetModules("c_attn,attn.c_proj")}; + EXPECT_TRUE(config.ShouldApplyLoRA("attn.c_proj")); + EXPECT_TRUE(config.ShouldApplyLoRA("transformer.h.0.attn.c_proj")); + EXPECT_FALSE(config.ShouldApplyLoRA("mlp.c_proj")); + } + + { + LoRAConfig config{8, 16.0f, 0.0f, ParseLoRATargetModules("c_attn,mlp.c_proj")}; + EXPECT_FALSE(config.ShouldApplyLoRA("attn.c_proj")); + EXPECT_TRUE(config.ShouldApplyLoRA("mlp.c_proj")); + } + + { + LoRAConfig config{8, 16.0f, 0.0f, ParseLoRATargetModules("c_attn,c_proj")}; + EXPECT_TRUE(config.ShouldApplyLoRA("transformer.h.0.attn.c_proj")); + EXPECT_TRUE(config.ShouldApplyLoRA("transformer.h.0.mlp.c_proj")); + } +} + +TEST_F(LoRATest, FreezeUnfreeze) { + auto base_linear = std::make_shared(64, 128, /*bias=*/true); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + auto all_params = model->Parameters(); + + int64_t total_trainable = 0; + for (const auto &p : all_params) { + if (p->requires_grad()) { + total_trainable += p->NumElements(); + } + } + int64_t expected = config.rank * 64 + 128 * config.rank; + EXPECT_EQ(total_trainable, expected); + + FreezeBaseModel(model); + + int64_t after_freeze = 0; + for (const auto &p : all_params) { + if (p->requires_grad()) { + after_freeze += p->NumElements(); + } + } + EXPECT_EQ(after_freeze, expected); + + UnfreezeModel(model); + int64_t after_unfreeze = 0; + for (const auto &p : all_params) { + if (p->requires_grad()) { + after_unfreeze += p->NumElements(); + } + } + int64_t expected_unfreeze = 64 * 128 + 128 + config.rank * 64 + 128 * config.rank; + EXPECT_EQ(after_unfreeze, expected_unfreeze); +} + +TEST_F(LoRATest, LoRAStateDict) { + auto base_linear = std::make_shared(64, 128, /*bias=*/true); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + auto state_dict = model->StateDict(); + + EXPECT_TRUE(state_dict.count("weight")); + EXPECT_TRUE(state_dict.count("bias")); + EXPECT_TRUE(state_dict.count("lora_A")); + EXPECT_TRUE(state_dict.count("lora_B")); + + EXPECT_TRUE(state_dict.at("lora_A")->requires_grad()); + EXPECT_TRUE(state_dict.at("lora_B")->requires_grad()); + EXPECT_FALSE(state_dict.at("weight")->requires_grad()); + + EXPECT_EQ(state_dict.at("lora_A")->Dims()[0], config.rank); + EXPECT_EQ(state_dict.at("lora_A")->Dims()[1], 64); + EXPECT_EQ(state_dict.at("lora_B")->Dims()[0], 128); + EXPECT_EQ(state_dict.at("lora_B")->Dims()[1], config.rank); +} + +TEST_F(LoRATest, GetLoRAModel) { + auto base_linear = std::make_shared(64, 128, /*bias=*/true); + + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + + auto model = GetLoRAModel(base_linear, config); + + EXPECT_NE(model, nullptr); + + auto lora_params = GetLoRAParameters(model); + EXPECT_EQ(lora_params.size(), 2); + + int64_t total_elements = 0; + for (const auto &t : lora_params) { + total_elements += t->NumElements(); + } + int64_t expected_elements = config.rank * 64 + 128 * config.rank; + EXPECT_EQ(total_elements, expected_elements); + + MergeLoRAWeights(model); + auto *lora_mod = dynamic_cast(model.get()); + EXPECT_NE(lora_mod, nullptr); + EXPECT_FALSE(lora_mod->LoRAParameters()[0]->requires_grad()); + + UnmergeLoRAWeights(model); + EXPECT_TRUE(lora_mod->LoRAParameters()[0]->requires_grad()); +} + +TEST_F(LoRATest, MergeAndUnload) { + auto base_linear = std::make_shared(64, 128, /*bias=*/true); + LoRAConfig config; + config.rank = 4; + config.alpha = 8.0f; + config.target_modules = {"Linear"}; + auto model = GetLoRAModel(base_linear, config); + + EXPECT_NE(dynamic_cast(model.get()), nullptr); + + auto input = std::make_shared(std::vector{2, 5, 64}, DataType::kFLOAT32); + input->EigenMatrix().setRandom(); + auto output_before = (*model)({input})[0]; + float output_before_sum = output_before->EigenMatrix().sum(); + + auto unloaded_model = MergeAndUnload(model); + EXPECT_NE(unloaded_model, nullptr); + EXPECT_EQ(dynamic_cast(unloaded_model.get()), nullptr); + + auto state_dict = unloaded_model->StateDict(); + for (const auto &[name, param] : state_dict) { + EXPECT_EQ(name.find("lora_A"), std::string::npos); + EXPECT_EQ(name.find("lora_B"), std::string::npos); + } + + auto output_after = (*unloaded_model)({input})[0]; + float output_after_sum = output_after->EigenMatrix().sum(); + EXPECT_NEAR(std::abs(output_before_sum - output_after_sum), 0.0f, 1e-3); + + for (const auto ¶m : unloaded_model->Parameters()) { + EXPECT_TRUE(param->requires_grad()); + } +} diff --git a/tests/optimizer/CMakeLists.txt b/tests/optimizer/CMakeLists.txt index c17d1a8d..3dc00354 100644 --- a/tests/optimizer/CMakeLists.txt +++ b/tests/optimizer/CMakeLists.txt @@ -1,27 +1,22 @@ +# ============================================================================ # Optimizer tests +# ============================================================================ +# 重构版本:使用 infini_train_add_test 宏简化配置 +# +# 新增测试只需 1 行: +# infini_train_add_test(test_name SOURCES test_name.cc LABELS cpu) +# ============================================================================ -add_executable(test_optimizer - test_optimizer.cc -) -target_link_libraries(test_optimizer - PRIVATE - GTest::gtest - GTest::gtest_main -) -target_include_directories(test_optimizer PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) - -target_link_libraries(test_optimizer PRIVATE - "-Wl,--whole-archive" - infini_train - infini_train_cpu_kernels - "-Wl,--no-whole-archive" -) - -add_test(NAME optimizer_cpu COMMAND test_optimizer) -set_tests_properties(optimizer_cpu PROPERTIES LABELS "cpu") - -add_test(NAME optimizer_cuda COMMAND test_optimizer --gtest_filter=OptimizerTest.*CUDA) -set_tests_properties(optimizer_cuda PROPERTIES LABELS "cuda") +# ----------------------------------------------------------------------------- +# Optimizer tests +# ----------------------------------------------------------------------------- +infini_train_add_test(test_optimizer_creation SOURCES test_optimizer_creation.cc LABELS cpu) +infini_train_add_test(test_optimizer_step SOURCES test_optimizer_step.cc LABELS cpu) +infini_train_add_test(test_optimizer_cuda SOURCES test_optimizer_cuda.cc LABELS cuda) +infini_train_add_test(test_optimizer_distributed SOURCES test_optimizer_distributed.cc LABELS cuda distributed) -add_test(NAME optimizer_distributed COMMAND test_optimizer --gtest_filter=OptimizerTest.*Distributed) -set_tests_properties(optimizer_distributed PROPERTIES LABELS "cuda;distributed") +# ----------------------------------------------------------------------------- +# Legacy combined tests +# 注意:使用 gtest_discover_tests,所有 TEST_F 都会被自动发现 +# ----------------------------------------------------------------------------- +infini_train_add_test(test_optimizer_legacy SOURCES test_optimizer.cc LABELS cpu cuda distributed) diff --git a/tests/optimizer/test_optimizer.cc b/tests/optimizer/test_optimizer.cc index 836fee91..6baa34e5 100644 --- a/tests/optimizer/test_optimizer.cc +++ b/tests/optimizer/test_optimizer.cc @@ -9,14 +9,19 @@ using namespace infini_train; -class OptimizerTest : public ::testing::Test { +class OptimizerTestBase : public ::testing::Test { protected: static void SetUpTestSuite() { nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); } }; -TEST_F(OptimizerTest, SGDCreation) { +class OptimizerCreationTest : public OptimizerTestBase {}; +class OptimizerGradTest : public OptimizerTestBase {}; +class OptimizerCudaTest : public OptimizerTestBase {}; +class OptimizerDistributedTest : public OptimizerTestBase {}; + +TEST_F(OptimizerCreationTest, SGDCreation) { auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCPU, 0)); param->set_requires_grad(true); @@ -27,7 +32,7 @@ TEST_F(OptimizerTest, SGDCreation) { EXPECT_NE(optimizer, nullptr); } -TEST_F(OptimizerTest, AdamCreation) { +TEST_F(OptimizerCreationTest, AdamCreation) { auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCPU, 0)); param->set_requires_grad(true); @@ -38,7 +43,7 @@ TEST_F(OptimizerTest, AdamCreation) { EXPECT_NE(optimizer, nullptr); } -TEST_F(OptimizerTest, ZeroGrad) { +TEST_F(OptimizerGradTest, ZeroGrad) { auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCPU, 0)); param->set_requires_grad(true); @@ -49,7 +54,7 @@ TEST_F(OptimizerTest, ZeroGrad) { optimizer->ZeroGrad(); } -TEST_F(OptimizerTest, SGDMultiParams) { +TEST_F(OptimizerCreationTest, SGDMultiParams) { std::vector> params; for (int i = 0; i < 3; ++i) { auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, @@ -64,7 +69,7 @@ TEST_F(OptimizerTest, SGDMultiParams) { optimizer->ZeroGrad(); } -TEST_F(OptimizerTest, SGDCreationCUDA) { +TEST_F(OptimizerCudaTest, SGDCreationCUDA) { REQUIRE_CUDA(); #if defined(USE_CUDA) auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, @@ -79,7 +84,7 @@ TEST_F(OptimizerTest, SGDCreationCUDA) { #endif } -TEST_F(OptimizerTest, AdamCreationCUDA) { +TEST_F(OptimizerCudaTest, AdamCreationCUDA) { REQUIRE_CUDA(); #if defined(USE_CUDA) auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, @@ -94,7 +99,7 @@ TEST_F(OptimizerTest, AdamCreationCUDA) { #endif } -TEST_F(OptimizerTest, ZeroGradCUDA) { +TEST_F(OptimizerCudaTest, ZeroGradCUDA) { REQUIRE_CUDA(); #if defined(USE_CUDA) auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, @@ -109,7 +114,7 @@ TEST_F(OptimizerTest, ZeroGradCUDA) { #endif } -TEST_F(OptimizerTest, SGDMultiParamsCUDA) { +TEST_F(OptimizerCudaTest, SGDMultiParamsCUDA) { REQUIRE_CUDA(); #if defined(USE_CUDA) std::vector> params; @@ -127,8 +132,10 @@ TEST_F(OptimizerTest, SGDMultiParamsCUDA) { #endif } -TEST_F(OptimizerTest, DistributedSGD) { +TEST_F(OptimizerDistributedTest, DistributedSGD) { + REQUIRE_CUDA(); REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); #if defined(USE_CUDA) && defined(USE_NCCL) auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); @@ -142,8 +149,10 @@ TEST_F(OptimizerTest, DistributedSGD) { #endif } -TEST_F(OptimizerTest, DistributedAdam) { +TEST_F(OptimizerDistributedTest, DistributedAdam) { + REQUIRE_CUDA(); REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); #if defined(USE_CUDA) && defined(USE_NCCL) auto param = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); @@ -157,8 +166,10 @@ TEST_F(OptimizerTest, DistributedAdam) { #endif } -TEST_F(OptimizerTest, DistributedZeroGrad) { +TEST_F(OptimizerDistributedTest, DistributedZeroGrad) { + REQUIRE_CUDA(); REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); #if defined(USE_CUDA) && defined(USE_NCCL) auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); diff --git a/tests/optimizer/test_optimizer_creation.cc b/tests/optimizer/test_optimizer_creation.cc new file mode 100644 index 00000000..fdb69d70 --- /dev/null +++ b/tests/optimizer/test_optimizer_creation.cc @@ -0,0 +1,82 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/optimizer.h" +#include "infini_train/include/nn/parallel/global.h" +#include "tests/common/test_utils.h" + +using namespace infini_train; + +class OptimizerCreationTest : public infini_train::test::InfiniTrainTest {}; + +TEST_F(OptimizerCreationTest, SGDCreation) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerCreationTest, AdamCreation) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerCreationTest, SGDWithMomentum) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerCreationTest, SGDMultiParams) { + std::vector> params; + for (int i = 0; i < 3; ++i) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + params.push_back(param); + } + + auto optimizer = std::make_shared(params, 0.01); + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerCreationTest, AdamMultiParams) { + std::vector> params; + for (int i = 0; i < 3; ++i) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + params.push_back(param); + } + + auto optimizer = std::make_shared(params, 0.001); + EXPECT_NE(optimizer, nullptr); +} + +TEST_F(OptimizerCreationTest, SGDWithWeightDecay) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); +} diff --git a/tests/optimizer/test_optimizer_cuda.cc b/tests/optimizer/test_optimizer_cuda.cc new file mode 100644 index 00000000..b56ace2e --- /dev/null +++ b/tests/optimizer/test_optimizer_cuda.cc @@ -0,0 +1,93 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/optimizer.h" +#include "infini_train/include/nn/parallel/global.h" +#include "tests/common/test_utils.h" + +using namespace infini_train; + +class OptimizerCUDATest : public infini_train::test::InfiniTrainTest {}; + +TEST_F(OptimizerCUDATest, SGDCreationCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerCUDATest, AdamCreationCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerCUDATest, ZeroGradCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerCUDATest, SGDMultiParamsCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + std::vector> params; + for (int i = 0; i < 3; ++i) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + params.push_back(param); + } + + auto optimizer = std::make_shared(params, 0.01); + EXPECT_NE(optimizer, nullptr); + + optimizer->ZeroGrad(); +#endif +} + +TEST_F(OptimizerCUDATest, AdamStepCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + auto* data = static_cast(param->DataPtr()); + for (int i = 0; i < 6; ++i) data[i] = 1.0f; + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + optimizer->ZeroGrad(); + optimizer->Step(); + EXPECT_TRUE(param->IsCUDA()); +#endif +} diff --git a/tests/optimizer/test_optimizer_distributed.cc b/tests/optimizer/test_optimizer_distributed.cc new file mode 100644 index 00000000..f4f092bc --- /dev/null +++ b/tests/optimizer/test_optimizer_distributed.cc @@ -0,0 +1,75 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/optimizer.h" +#include "infini_train/include/nn/parallel/global.h" +#include "tests/common/test_utils.h" + +using namespace infini_train; + +class OptimizerDistributedTest : public infini_train::test::InfiniTrainTest {}; + +TEST_F(OptimizerDistributedTest, DistributedSGD) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerDistributedTest, DistributedAdam) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto param = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + EXPECT_NE(optimizer, nullptr); + EXPECT_TRUE(param->IsCUDA()); +#endif +} + +TEST_F(OptimizerDistributedTest, DistributedZeroGrad) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); +#endif +} + +TEST_F(OptimizerDistributedTest, DistributedMultiParams) { + REQUIRE_DISTRIBUTED(); +#if defined(USE_CUDA) && defined(USE_NCCL) + std::vector> params; + for (int i = 0; i < 2; ++i) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + param->set_requires_grad(true); + params.push_back(param); + } + + auto optimizer = std::make_shared(params, 0.01); + EXPECT_NE(optimizer, nullptr); + + optimizer->ZeroGrad(); + optimizer->Step(); +#endif +} diff --git a/tests/optimizer/test_optimizer_step.cc b/tests/optimizer/test_optimizer_step.cc new file mode 100644 index 00000000..d5407998 --- /dev/null +++ b/tests/optimizer/test_optimizer_step.cc @@ -0,0 +1,62 @@ +#include + +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/optimizer.h" +#include "infini_train/include/nn/parallel/global.h" +#include "tests/common/test_utils.h" + +using namespace infini_train; + +class OptimizerStepTest : public infini_train::test::InfiniTrainTest {}; + +TEST_F(OptimizerStepTest, SGDStep) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + auto* data = static_cast(param->DataPtr()); + for (int i = 0; i < 6; ++i) data[i] = 1.0f; + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); + optimizer->Step(); +} + +TEST_F(OptimizerStepTest, AdamStep) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + auto* data = static_cast(param->DataPtr()); + for (int i = 0; i < 6; ++i) data[i] = 1.0f; + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.001); + + optimizer->ZeroGrad(); + optimizer->Step(); +} + +TEST_F(OptimizerStepTest, ZeroGrad) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(); +} + +TEST_F(OptimizerStepTest, ZeroGradWithNone) { + auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + param->set_requires_grad(true); + + std::vector> params = {param}; + auto optimizer = std::make_shared(params, 0.01); + + optimizer->ZeroGrad(false); +} diff --git a/tests/slow/CMakeLists.txt b/tests/slow/CMakeLists.txt index 22ef9c5e..dd76f187 100644 --- a/tests/slow/CMakeLists.txt +++ b/tests/slow/CMakeLists.txt @@ -1,27 +1,13 @@ -# Slow label smoke tests - -add_executable(test_slow - test_slow.cc -) -target_link_libraries(test_slow - PRIVATE - GTest::gtest - GTest::gtest_main -) -target_include_directories(test_slow PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common) - -target_link_libraries(test_slow PRIVATE - "-Wl,--whole-archive" - infini_train - infini_train_cpu_kernels - "-Wl,--no-whole-archive" -) - -add_test(NAME slow_cpu COMMAND test_slow --gtest_filter=SlowTest.Cpu) -set_tests_properties(slow_cpu PROPERTIES LABELS "slow;cpu") - -add_test(NAME slow_cuda COMMAND test_slow --gtest_filter=SlowTest.Cuda) -set_tests_properties(slow_cuda PROPERTIES LABELS "slow;cuda") - -add_test(NAME slow_distributed COMMAND test_slow --gtest_filter=SlowTest.Distributed) -set_tests_properties(slow_distributed PROPERTIES LABELS "slow;cuda;distributed") +# ============================================================================ +# Slow tests +# ============================================================================ +# 重构版本:使用 infini_train_add_test 宏简化配置 +# +# 新增测试只需 1 行: +# infini_train_add_test(test_name SOURCES test_name.cc LABELS slow cpu) +# ============================================================================ + +# ----------------------------------------------------------------------------- +# Slow tests (标签包含 slow) +# ----------------------------------------------------------------------------- +infini_train_add_test(test_slow SOURCES test_slow.cc LABELS slow cpu cuda distributed) diff --git a/tests/tensor/CMakeLists.txt b/tests/tensor/CMakeLists.txt index bd074f6f..3df1708e 100644 --- a/tests/tensor/CMakeLists.txt +++ b/tests/tensor/CMakeLists.txt @@ -1,31 +1,126 @@ # Tensor tests -add_executable(test_tensor - test_tensor.cc +# test_tensor_create +add_executable(test_tensor_create + test_tensor_create.cc ) -target_compile_options(test_tensor PRIVATE -Wno-error) -target_link_libraries(test_tensor +target_compile_options(test_tensor_create PRIVATE -Wno-error) +target_link_libraries(test_tensor_create PRIVATE GTest::gtest GTest::gtest_main ) -target_include_directories(test_tensor PRIVATE +target_include_directories(test_tensor_create PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common ${glog_SOURCE_DIR}/src ) +target_link_libraries(test_tensor_create PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) +add_test(NAME tensor_create COMMAND test_tensor_create) +set_tests_properties(tensor_create PROPERTIES LABELS "cpu;cuda") -target_link_libraries(test_tensor PRIVATE +# test_tensor_copy +add_executable(test_tensor_copy + test_tensor_copy.cc +) +target_compile_options(test_tensor_copy PRIVATE -Wno-error) +target_link_libraries(test_tensor_copy + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_tensor_copy PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${glog_SOURCE_DIR}/src +) +target_link_libraries(test_tensor_copy PRIVATE "-Wl,--whole-archive" infini_train infini_train_cpu_kernels "-Wl,--no-whole-archive" ) +add_test(NAME tensor_copy COMMAND test_tensor_copy) +set_tests_properties(tensor_copy PROPERTIES LABELS "cpu;cuda") -add_test(NAME tensor_cpu COMMAND test_tensor) -set_tests_properties(tensor_cpu PROPERTIES LABELS "cpu") +# test_tensor_delete +add_executable(test_tensor_delete + test_tensor_delete.cc +) +target_compile_options(test_tensor_delete PRIVATE -Wno-error) +target_link_libraries(test_tensor_delete + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_tensor_delete PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${glog_SOURCE_DIR}/src +) +target_link_libraries(test_tensor_delete PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) +add_test(NAME tensor_delete COMMAND test_tensor_delete) +set_tests_properties(tensor_delete PROPERTIES LABELS "cpu") -add_test(NAME tensor_cuda COMMAND test_tensor --gtest_filter=TensorTest.*CUDA) -set_tests_properties(tensor_cuda PROPERTIES LABELS "cuda") +# test_tensor_op (keep original for backward compatibility) +add_executable(test_tensor_op + test_tensor.cc +) +target_compile_options(test_tensor_op PRIVATE -Wno-error) +target_link_libraries(test_tensor_op + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_tensor_op PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${glog_SOURCE_DIR}/src +) +target_link_libraries(test_tensor_op PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) +add_test(NAME tensor_ops COMMAND test_tensor_op --gtest_filter=TensorOpTest.*) +set_tests_properties(tensor_ops PROPERTIES LABELS "cpu;cuda") -add_test(NAME tensor_distributed COMMAND test_tensor --gtest_filter=TensorTest.*Distributed) +# test_tensor_distributed (keep original for backward compatibility) +add_executable(test_tensor_dist + test_tensor.cc +) +target_compile_options(test_tensor_dist PRIVATE -Wno-error) +target_link_libraries(test_tensor_dist + PRIVATE + GTest::gtest + GTest::gtest_main +) +target_include_directories(test_tensor_dist PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${glog_SOURCE_DIR}/src +) +target_link_libraries(test_tensor_dist PRIVATE + "-Wl,--whole-archive" + infini_train + infini_train_cpu_kernels + "-Wl,--no-whole-archive" +) +add_test(NAME tensor_distributed COMMAND test_tensor_dist --gtest_filter=TensorDistributedTest.*) set_tests_properties(tensor_distributed PROPERTIES LABELS "cuda;distributed") + +# Convenience aggregate target so `cmake --build ... --target test_tensor` works +add_custom_target(test_tensor + DEPENDS + test_tensor_create + test_tensor_copy + test_tensor_delete + test_tensor_op + test_tensor_dist +) diff --git a/tests/tensor/test_tensor.cc b/tests/tensor/test_tensor.cc index b2c40a57..23eb940c 100644 --- a/tests/tensor/test_tensor.cc +++ b/tests/tensor/test_tensor.cc @@ -1,6 +1,7 @@ #include #include +#include #include "infini_train/include/tensor.h" #include "infini_train/include/nn/parallel/global.h" @@ -8,14 +9,36 @@ using namespace infini_train; -class TensorTest : public ::testing::Test { +class TensorTestBase : public ::testing::Test { protected: static void SetUpTestSuite() { nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); } + + static size_t Numel(const std::shared_ptr& tensor) { + size_t n = 1; + for (auto dim : tensor->Dims()) { + n *= static_cast(dim); + } + return n; + } + + static void FillSequential(const std::shared_ptr& tensor, float start = 0.0f) { + auto* data = static_cast(tensor->DataPtr()); + auto n = Numel(tensor); + for (size_t i = 0; i < n; ++i) { + data[i] = start + static_cast(i); + } + } }; -TEST_F(TensorTest, CreateAndDestroy) { +class TensorCreateTest : public TensorTestBase {}; +class TensorCopyTest : public TensorTestBase {}; +class TensorDeleteTest : public TensorTestBase {}; +class TensorOpTest : public TensorTestBase {}; +class TensorDistributedTest : public TensorTestBase {}; + +TEST_F(TensorCreateTest, CreatesCpuTensorWithShapeAndType) { auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCPU, 0)); EXPECT_NE(tensor, nullptr); @@ -23,7 +46,7 @@ TEST_F(TensorTest, CreateAndDestroy) { EXPECT_EQ(tensor->Dtype(), DataType::kFLOAT32); } -TEST_F(TensorTest, RequiresGrad) { +TEST_F(TensorCreateTest, TracksRequiresGrad) { auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCPU, 0)); EXPECT_FALSE(tensor->requires_grad()); @@ -31,13 +54,13 @@ TEST_F(TensorTest, RequiresGrad) { EXPECT_TRUE(tensor->requires_grad()); } -TEST_F(TensorTest, DataPointer) { +TEST_F(TensorCreateTest, ProvidesDataPointer) { auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCPU, 0)); EXPECT_NE(tensor->DataPtr(), nullptr); } -TEST_F(TensorTest, DifferentShapes) { +TEST_F(TensorCreateTest, SupportsMultipleShapes) { std::vector> shapes = { {2, 3}, {4, 5, 6}, @@ -52,7 +75,7 @@ TEST_F(TensorTest, DifferentShapes) { } } -TEST_F(TensorTest, DifferentDataTypes) { +TEST_F(TensorCreateTest, SupportsMultipleDtypes) { std::vector dtypes = { DataType::kFLOAT32, DataType::kBFLOAT16, @@ -65,19 +88,19 @@ TEST_F(TensorTest, DifferentDataTypes) { } } -TEST_F(TensorTest, CreateCUDA) { +TEST_F(TensorCreateTest, CreatesTensorOnCUDA) { REQUIRE_CUDA(); #if defined(USE_CUDA) auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); EXPECT_NE(tensor, nullptr); + EXPECT_TRUE(tensor->IsCUDA()); EXPECT_EQ(tensor->Dims(), (std::vector{2, 3})); EXPECT_EQ(tensor->Dtype(), DataType::kFLOAT32); - EXPECT_TRUE(tensor->IsCUDA()); #endif } -TEST_F(TensorTest, RequiresGradCUDA) { +TEST_F(TensorCreateTest, TracksRequiresGradOnCUDA) { REQUIRE_CUDA(); #if defined(USE_CUDA) auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, @@ -88,7 +111,7 @@ TEST_F(TensorTest, RequiresGradCUDA) { #endif } -TEST_F(TensorTest, DataPointerCUDA) { +TEST_F(TensorCreateTest, ProvidesDataPointerOnCUDA) { REQUIRE_CUDA(); #if defined(USE_CUDA) auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, @@ -97,24 +120,52 @@ TEST_F(TensorTest, DataPointerCUDA) { #endif } -TEST_F(TensorTest, TensorCopyCUDA) { +TEST_F(TensorCopyTest, CopiesCPUToCPU) { + auto source = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + auto target = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + FillSequential(source, 1.0f); + + target->CopyFrom(source); + + auto* target_data = static_cast(target->DataPtr()); + for (int i = 0; i < 6; ++i) { + EXPECT_FLOAT_EQ(target_data[i], 1.0f + static_cast(i)); + } +} + +TEST_F(TensorCopyTest, CopiesCPUToCUDA) { REQUIRE_CUDA(); #if defined(USE_CUDA) auto cpu_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCPU, 0)); auto cuda_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, - Device(Device::DeviceType::kCUDA, 0)); - - auto* cpu_data = static_cast(cpu_tensor->DataPtr()); - for (int i = 0; i < 6; ++i) cpu_data[i] = static_cast(i); - - cuda_tensor->CopyDataFrom(cpu_tensor.get()); - + Device(Device::DeviceType::kCUDA, 0)); + + FillSequential(cpu_tensor, 0.0f); + cuda_tensor->CopyFrom(cpu_tensor); + EXPECT_TRUE(cuda_tensor->IsCUDA()); #endif } -TEST_F(TensorTest, MatmulCUDA) { +TEST_F(TensorCopyTest, CopiesCUDAtoCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto source = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto target = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + FillSequential(source, 2.0f); + + target->CopyFrom(source); + + EXPECT_TRUE(target->IsCUDA()); +#endif +} + +TEST_F(TensorOpTest, MatmulCUDAAllocatesOutputs) { REQUIRE_CUDA(); #if defined(USE_CUDA) auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, @@ -132,40 +183,72 @@ TEST_F(TensorTest, MatmulCUDA) { #endif } -TEST_F(TensorTest, DistributedAllReduce) { +TEST_F(TensorDeleteTest, ReleasesResourcesOnReset) { + std::weak_ptr weak_tensor; + { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + tensor->set_requires_grad(true); + weak_tensor = tensor; + } + EXPECT_TRUE(weak_tensor.expired()); +} + +TEST_F(TensorDeleteTest, MoveTransferKeepsData) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + FillSequential(tensor, 5.0f); + + auto moved = std::move(tensor); + EXPECT_EQ(tensor, nullptr); + ASSERT_NE(moved, nullptr); + + auto* data = static_cast(moved->DataPtr()); + for (int i = 0; i < 6; ++i) { + EXPECT_FLOAT_EQ(data[i], 5.0f + static_cast(i)); + } +} + +TEST_F(TensorDistributedTest, AllReduce) { + REQUIRE_CUDA(); REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); #if defined(USE_CUDA) && defined(USE_NCCL) auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); tensor->set_requires_grad(true); - + auto* data = static_cast(tensor->DataPtr()); for (int i = 0; i < 6; ++i) data[i] = 1.0f; - + EXPECT_TRUE(tensor->IsCUDA()); EXPECT_TRUE(tensor->requires_grad()); #endif } -TEST_F(TensorTest, DistributedAllGather) { +TEST_F(TensorDistributedTest, AllGather) { + REQUIRE_CUDA(); REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); #if defined(USE_CUDA) && defined(USE_NCCL) auto tensor = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); tensor->set_requires_grad(true); - + EXPECT_TRUE(tensor->IsCUDA()); EXPECT_EQ(tensor->Dims(), (std::vector{4, 4})); #endif } -TEST_F(TensorTest, DistributedReduceScatter) { +TEST_F(TensorDistributedTest, ReduceScatter) { + REQUIRE_CUDA(); REQUIRE_DISTRIBUTED(); + REQUIRE_NCCL(); #if defined(USE_CUDA) && defined(USE_NCCL) auto tensor = std::make_shared(std::vector{2, 8}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); tensor->set_requires_grad(true); - + EXPECT_TRUE(tensor->IsCUDA()); EXPECT_EQ(tensor->Dims(), (std::vector{2, 8})); #endif diff --git a/tests/tensor/test_tensor_copy.cc b/tests/tensor/test_tensor_copy.cc new file mode 100644 index 00000000..452062c4 --- /dev/null +++ b/tests/tensor/test_tensor_copy.cc @@ -0,0 +1,123 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "test_utils.h" + +using namespace infini_train; + +class TensorCopyTest : public infini_train::test::TensorTestBase {}; + +static void FillSequential(const std::shared_ptr& tensor, float start = 0.0f) { + auto* data = static_cast(tensor->DataPtr()); + size_t n = 1; + for (auto dim : tensor->Dims()) { + n *= static_cast(dim); + } + for (size_t i = 0; i < n; ++i) { + data[i] = start + static_cast(i); + } +} + +TEST_F(TensorCopyTest, CopiesCPUToCPU) { + auto source = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + auto target = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + FillSequential(source, 1.0f); + + target->CopyFrom(source); + + auto* target_data = static_cast(target->DataPtr()); + for (int i = 0; i < 6; ++i) { + EXPECT_FLOAT_EQ(target_data[i], 1.0f + static_cast(i)); + } +} + +TEST_F(TensorCopyTest, CopiesCPUToCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto cpu_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + auto cuda_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + + FillSequential(cpu_tensor, 0.0f); + cuda_tensor->CopyFrom(cpu_tensor); + + EXPECT_TRUE(cuda_tensor->IsCUDA()); +#endif +} + +TEST_F(TensorCopyTest, CopiesCUDAtoCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto source = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto target = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + FillSequential(source, 2.0f); + + target->CopyFrom(source); + + EXPECT_TRUE(target->IsCUDA()); +#endif +} + +TEST_F(TensorCopyTest, CopiesCUDAtoCPU) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto cuda_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto cpu_tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + + FillSequential(cuda_tensor, 1.0f); + cpu_tensor->CopyFrom(cuda_tensor); + + EXPECT_FALSE(cpu_tensor->IsCUDA()); + EXPECT_TRUE(cpu_tensor->IsCPU()); +#endif +} + +TEST_F(TensorCopyTest, CopiesBetweenSameShape) { + auto source = std::make_shared(std::vector{4, 5, 6}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + auto target = std::make_shared(std::vector{4, 5, 6}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + FillSequential(source, 0.0f); + + target->CopyFrom(source); + + EXPECT_EQ(source->Dims(), target->Dims()); +} + +TEST_F(TensorCopyTest, CopiesPreservesDataType) { + auto source = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + auto target = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + + EXPECT_EQ(source->Dtype(), target->Dtype()); + target->CopyFrom(source); + EXPECT_EQ(target->Dtype(), DataType::kFLOAT32); +} + +TEST_F(TensorCopyTest, CopiesWithDifferentDeviceId) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto source = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + auto target = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 1)); + FillSequential(source, 5.0f); + + target->CopyFrom(source); + + EXPECT_EQ(source->GetDevice().index(), 0); + EXPECT_EQ(target->GetDevice().index(), 1); +#endif +} diff --git a/tests/tensor/test_tensor_create.cc b/tests/tensor/test_tensor_create.cc new file mode 100644 index 00000000..c39246e3 --- /dev/null +++ b/tests/tensor/test_tensor_create.cc @@ -0,0 +1,94 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "test_utils.h" + +using namespace infini_train; + +class TensorCreateTest : public infini_train::test::TensorTestBase {}; + +TEST_F(TensorCreateTest, CreatesCpuTensorWithShapeAndType) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_NE(tensor, nullptr); + EXPECT_EQ(tensor->Dims(), (std::vector{2, 3})); + EXPECT_EQ(tensor->Dtype(), DataType::kFLOAT32); +} + +TEST_F(TensorCreateTest, TracksRequiresGrad) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_FALSE(tensor->requires_grad()); + tensor->set_requires_grad(true); + EXPECT_TRUE(tensor->requires_grad()); +} + +TEST_F(TensorCreateTest, ProvidesDataPointer) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_NE(tensor->DataPtr(), nullptr); +} + +TEST_F(TensorCreateTest, SupportsMultipleShapes) { + std::vector> shapes = { + {2, 3}, + {4, 5, 6}, + {10}, + {1, 1, 1, 1} + }; + + for (const auto& shape : shapes) { + auto tensor = std::make_shared(shape, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_EQ(tensor->Dims(), shape); + } +} + +TEST_F(TensorCreateTest, SupportsMultipleDtypes) { + std::vector dtypes = { + DataType::kFLOAT32, + DataType::kBFLOAT16, + }; + + for (const auto& dtype : dtypes) { + auto tensor = std::make_shared(std::vector{2, 3}, dtype, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_EQ(tensor->Dtype(), dtype); + } +} + +TEST_F(TensorCreateTest, CreatesTensorOnCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_NE(tensor, nullptr); + EXPECT_TRUE(tensor->IsCUDA()); + EXPECT_EQ(tensor->Dims(), (std::vector{2, 3})); + EXPECT_EQ(tensor->Dtype(), DataType::kFLOAT32); +#endif +} + +TEST_F(TensorCreateTest, TracksRequiresGradOnCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_FALSE(tensor->requires_grad()); + tensor->set_requires_grad(true); + EXPECT_TRUE(tensor->requires_grad()); +#endif +} + +TEST_F(TensorCreateTest, ProvidesDataPointerOnCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + EXPECT_NE(tensor->DataPtr(), nullptr); +#endif +} diff --git a/tests/tensor/test_tensor_delete.cc b/tests/tensor/test_tensor_delete.cc new file mode 100644 index 00000000..e959b807 --- /dev/null +++ b/tests/tensor/test_tensor_delete.cc @@ -0,0 +1,104 @@ +#include + +#include +#include + +#include "infini_train/include/tensor.h" +#include "infini_train/include/nn/parallel/global.h" +#include "test_utils.h" + +using namespace infini_train; + +class TensorDeleteTest : public infini_train::test::TensorTestBase {}; + +static void FillSequential(const std::shared_ptr& tensor, float start = 0.0f) { + auto* data = static_cast(tensor->DataPtr()); + size_t n = 1; + for (auto dim : tensor->Dims()) { + n *= static_cast(dim); + } + for (size_t i = 0; i < n; ++i) { + data[i] = start + static_cast(i); + } +} + +TEST_F(TensorDeleteTest, ReleasesResourcesOnReset) { + std::weak_ptr weak_tensor; + { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + tensor->set_requires_grad(true); + weak_tensor = tensor; + } + EXPECT_TRUE(weak_tensor.expired()); +} + +TEST_F(TensorDeleteTest, MoveTransferKeepsData) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + FillSequential(tensor, 5.0f); + + auto moved = std::move(tensor); + EXPECT_EQ(tensor, nullptr); + ASSERT_NE(moved, nullptr); + + auto* data = static_cast(moved->DataPtr()); + for (int i = 0; i < 6; ++i) { + EXPECT_FLOAT_EQ(data[i], 5.0f + static_cast(i)); + } +} + +TEST_F(TensorDeleteTest, NullifiesPointerOnMove) { + auto tensor = std::make_shared(std::vector{3, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_NE(tensor, nullptr); + + auto moved_tensor = std::move(tensor); + EXPECT_EQ(tensor, nullptr); + EXPECT_NE(moved_tensor, nullptr); +} + +TEST_F(TensorDeleteTest, SharedPtrRefCountOnCopy) { + auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + FillSequential(tensor, 1.0f); + + auto copy1 = tensor; + auto copy2 = tensor; + + EXPECT_EQ(tensor.use_count(), 3); + EXPECT_EQ(copy1.use_count(), 3); + EXPECT_EQ(copy2.use_count(), 3); + + copy1.reset(); + EXPECT_EQ(tensor.use_count(), 2); + + copy2.reset(); + EXPECT_EQ(tensor.use_count(), 1); + + EXPECT_NE(tensor, nullptr); +} + +TEST_F(TensorDeleteTest, TensorDestroyedAfterScope) { + bool destroyed = false; + { + auto tensor = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32, + Device(Device::DeviceType::kCPU, 0)); + EXPECT_NE(tensor, nullptr); + } +} + +TEST_F(TensorDeleteTest, ReleaseMemoryOnCUDA) { + REQUIRE_CUDA(); +#if defined(USE_CUDA) + std::weak_ptr weak_tensor; + { + auto tensor = std::make_shared(std::vector{100, 100}, DataType::kFLOAT32, + Device(Device::DeviceType::kCUDA, 0)); + tensor->set_requires_grad(true); + EXPECT_TRUE(tensor->IsCUDA()); + weak_tensor = tensor; + } + EXPECT_TRUE(weak_tensor.expired()); +#endif +} From bf9aebe1b41b788ae5482fa44686f23aa56a3467 Mon Sep 17 00:00:00 2001 From: luoyueyuguang Date: Sat, 28 Mar 2026 00:05:07 +0800 Subject: [PATCH 3/4] fix: make distributed labels selectable --- tests/autograd/CMakeLists.txt | 98 ++++++------ tests/autograd/test_autograd.cc | 52 +++---- tests/common/test_macros.cmake | 9 +- tests/common/test_utils.h | 60 ++++++-- tests/hook/CMakeLists.txt | 47 ++++-- tests/hook/test_hook.cc | 4 +- tests/lora/CMakeLists.txt | 28 +++- tests/optimizer/CMakeLists.txt | 57 ++++--- tests/optimizer/test_optimizer.cc | 10 +- tests/optimizer/test_optimizer_cuda.cc | 11 +- tests/optimizer/test_optimizer_distributed.cc | 4 +- tests/slow/CMakeLists.txt | 45 ++++-- tests/tensor/CMakeLists.txt | 142 +++--------------- tests/tensor/test_tensor.cc | 27 ++-- tests/tensor/test_tensor_copy.cc | 18 +-- tests/tensor/test_tensor_create.cc | 2 +- tests/tensor/test_tensor_delete.cc | 2 +- 17 files changed, 304 insertions(+), 312 deletions(-) diff --git a/tests/autograd/CMakeLists.txt b/tests/autograd/CMakeLists.txt index 18a16fa2..08aa1a96 100644 --- a/tests/autograd/CMakeLists.txt +++ b/tests/autograd/CMakeLists.txt @@ -1,56 +1,54 @@ # ============================================================================ # Autograd tests # ============================================================================ -# 重构版本:使用 infini_train_add_test 宏简化配置 -# -# 新增测试只需 1 行: -# infini_train_add_test(test_name SOURCES test_name.cc LABELS cpu) +# 重构版本:使用单一 test binary 聚合测试源码,减少 target 膨胀 # ============================================================================ # ----------------------------------------------------------------------------- -# Elementwise tests -# ----------------------------------------------------------------------------- -infini_train_add_test(test_autograd_elementwise_forward SOURCES test_autograd_elementwise_forward.cc LABELS cpu) -infini_train_add_test(test_autograd_elementwise_backward SOURCES test_autograd_elementwise_backward.cc LABELS cpu) - -# ----------------------------------------------------------------------------- -# Matmul tests -# ----------------------------------------------------------------------------- -infini_train_add_test(test_autograd_matmul_forward SOURCES test_autograd_matmul_forward.cc LABELS cpu) -infini_train_add_test(test_autograd_matmul_backward SOURCES test_autograd_matmul_backward.cc LABELS cpu) - -# ----------------------------------------------------------------------------- -# Reduction tests -# ----------------------------------------------------------------------------- -infini_train_add_test(test_autograd_reduction_forward SOURCES test_autograd_reduction_forward.cc LABELS cpu) -infini_train_add_test(test_autograd_reduction_backward SOURCES test_autograd_reduction_backward.cc LABELS cpu) - -# ----------------------------------------------------------------------------- -# Linear tests -# ----------------------------------------------------------------------------- -infini_train_add_test(test_autograd_linear_forward SOURCES test_autograd_linear_forward.cc LABELS cpu) -infini_train_add_test(test_autograd_linear_backward SOURCES test_autograd_linear_backward.cc LABELS cpu) - -# ----------------------------------------------------------------------------- -# Softmax tests -# ----------------------------------------------------------------------------- -infini_train_add_test(test_autograd_softmax_forward SOURCES test_autograd_softmax_forward.cc LABELS cpu) -infini_train_add_test(test_autograd_softmax_backward SOURCES test_autograd_softmax_backward.cc LABELS cpu) - -# ----------------------------------------------------------------------------- -# Transform tests -# ----------------------------------------------------------------------------- -infini_train_add_test(test_autograd_transform_forward SOURCES test_autograd_transform_forward.cc LABELS cpu) -infini_train_add_test(test_autograd_transform_backward SOURCES test_autograd_transform_backward.cc LABELS cpu) - -# ----------------------------------------------------------------------------- -# Normalization tests -# ----------------------------------------------------------------------------- -infini_train_add_test(test_autograd_normalization_forward SOURCES test_autograd_normalization_forward.cc LABELS cpu) -infini_train_add_test(test_autograd_normalization_backward SOURCES test_autograd_normalization_backward.cc LABELS cpu) - -# ----------------------------------------------------------------------------- -# Legacy combined tests -# 注意:使用 gtest_discover_tests,所有 TEST_F 都会被自动发现 -# ----------------------------------------------------------------------------- -infini_train_add_test(test_autograd_legacy SOURCES test_autograd.cc LABELS cpu cuda distributed) +# Autograd tests (single binary) +# ----------------------------------------------------------------------------- +set(AUTOGRAD_TEST_DIR "${CMAKE_CURRENT_SOURCE_DIR}") +set(AUTOGRAD_TEST_SOURCES + ${AUTOGRAD_TEST_DIR}/test_autograd_elementwise_forward.cc + ${AUTOGRAD_TEST_DIR}/test_autograd_elementwise_backward.cc + ${AUTOGRAD_TEST_DIR}/test_autograd_matmul_forward.cc + ${AUTOGRAD_TEST_DIR}/test_autograd_matmul_backward.cc + ${AUTOGRAD_TEST_DIR}/test_autograd_reduction_forward.cc + ${AUTOGRAD_TEST_DIR}/test_autograd_reduction_backward.cc + ${AUTOGRAD_TEST_DIR}/test_autograd_linear_forward.cc + ${AUTOGRAD_TEST_DIR}/test_autograd_linear_backward.cc + ${AUTOGRAD_TEST_DIR}/test_autograd_softmax_forward.cc + ${AUTOGRAD_TEST_DIR}/test_autograd_softmax_backward.cc + ${AUTOGRAD_TEST_DIR}/test_autograd_transform_forward.cc + ${AUTOGRAD_TEST_DIR}/test_autograd_transform_backward.cc + ${AUTOGRAD_TEST_DIR}/test_autograd_normalization_forward.cc + ${AUTOGRAD_TEST_DIR}/test_autograd_normalization_backward.cc + ${AUTOGRAD_TEST_DIR}/test_autograd.cc +) + +add_executable(test_autograd ${AUTOGRAD_TEST_SOURCES}) +target_compile_options(test_autograd PRIVATE -Wno-error) +link_infini_train_exe(test_autograd) +target_link_libraries(test_autograd PRIVATE GTest::gtest GTest::gtest_main) +target_include_directories(test_autograd + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${glog_SOURCE_DIR}/src +) + +include(GoogleTest) +gtest_discover_tests(test_autograd + EXTRA_ARGS --gtest_output=xml:%T.xml + TEST_FILTER "-AutogradCudaTest.*:AutogradDistributedTest.*" + PROPERTIES LABELS "cpu" +) +gtest_discover_tests(test_autograd + EXTRA_ARGS --gtest_output=xml:%T.xml + TEST_FILTER "AutogradCudaTest.*" + PROPERTIES LABELS "cuda" +) +gtest_discover_tests(test_autograd + EXTRA_ARGS --gtest_output=xml:%T.xml + TEST_FILTER "AutogradDistributedTest.*" + PROPERTIES LABELS "distributed" +) diff --git a/tests/autograd/test_autograd.cc b/tests/autograd/test_autograd.cc index b46358da..5d189599 100644 --- a/tests/autograd/test_autograd.cc +++ b/tests/autograd/test_autograd.cc @@ -15,6 +15,7 @@ #include "infini_train/include/autograd/linear.h" #include "infini_train/include/autograd/outer.h" #include "infini_train/include/autograd/misc.h" +#include "test_utils.h" using namespace infini_train; @@ -380,17 +381,16 @@ TEST_F(AutogradForwardTest, NoOpForward) { #ifdef USE_CUDA TEST_F(AutogradCudaTest, AddForwardCUDA) { + REQUIRE_CUDA(); auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); - auto a_data = static_cast(a->DataPtr()); - for (int i = 0; i < 6; ++i) a_data[i] = 1.0f; + infini_train::test::FillConstantTensor(a, 1.0f); auto b = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); b->set_requires_grad(true); - auto b_data = static_cast(b->DataPtr()); - for (int i = 0; i < 6; ++i) b_data[i] = 2.0f; + infini_train::test::FillConstantTensor(b, 2.0f); auto add_fn = std::make_shared(); auto result = add_fn->Apply({a, b}); @@ -399,17 +399,16 @@ TEST_F(AutogradCudaTest, AddForwardCUDA) { } TEST_F(AutogradCudaTest, MatmulForwardCUDA) { + REQUIRE_CUDA(); auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); - auto a_data = static_cast(a->DataPtr()); - for (int i = 0; i < 6; ++i) a_data[i] = 1.0f; + infini_train::test::FillConstantTensor(a, 1.0f); auto b = std::make_shared(std::vector{3, 4}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); b->set_requires_grad(true); - auto b_data = static_cast(b->DataPtr()); - for (int i = 0; i < 12; ++i) b_data[i] = 1.0f; + infini_train::test::FillConstantTensor(b, 1.0f); auto matmul_fn = std::make_shared(); auto result = matmul_fn->Apply({a, b}); @@ -418,11 +417,11 @@ TEST_F(AutogradCudaTest, MatmulForwardCUDA) { } TEST_F(AutogradCudaTest, SumForwardCUDA) { + REQUIRE_CUDA(); auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); - auto a_data = static_cast(a->DataPtr()); - for (int i = 0; i < 6; ++i) a_data[i] = 1.0f; + infini_train::test::FillConstantTensor(a, 1.0f); auto sum_fn = std::make_shared(1, false); auto result = sum_fn->Apply({a}); @@ -430,11 +429,11 @@ TEST_F(AutogradCudaTest, SumForwardCUDA) { } TEST_F(AutogradCudaTest, SoftmaxForwardCUDA) { + REQUIRE_CUDA(); auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); - auto a_data = static_cast(a->DataPtr()); - for (int i = 0; i < 6; ++i) a_data[i] = 1.0f; + infini_train::test::FillConstantTensor(a, 1.0f); auto softmax_fn = std::make_shared(1); auto result = softmax_fn->Apply({a}); @@ -443,23 +442,21 @@ TEST_F(AutogradCudaTest, SoftmaxForwardCUDA) { } TEST_F(AutogradCudaTest, LinearForwardCUDA) { + REQUIRE_CUDA(); auto input = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); input->set_requires_grad(true); - auto input_data = static_cast(input->DataPtr()); - for (int i = 0; i < 6; ++i) input_data[i] = 1.0f; + infini_train::test::FillConstantTensor(input, 1.0f); auto weight = std::make_shared(std::vector{4, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); weight->set_requires_grad(true); - auto weight_data = static_cast(weight->DataPtr()); - for (int i = 0; i < 12; ++i) weight_data[i] = 1.0f; + infini_train::test::FillConstantTensor(weight, 1.0f); auto bias = std::make_shared(std::vector{4}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); bias->set_requires_grad(true); - auto bias_data = static_cast(bias->DataPtr()); - for (int i = 0; i < 4; ++i) bias_data[i] = 0.0f; + infini_train::test::FillConstantTensor(bias, 0.0f); auto linear_fn = std::make_shared(); auto result = linear_fn->Apply({input, weight, bias}); @@ -480,10 +477,9 @@ TEST_F(AutogradDistributedTest, AllReduceDistributed) { auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); - auto a_data = static_cast(a->DataPtr()); - for (int i = 0; i < 6; ++i) a_data[i] = 1.0f; + infini_train::test::FillConstantTensor(a, 1.0f); - EXPECT_TRUE(a->IsCUDA()); + EXPECT_TRUE(a->GetDevice().IsCUDA()); EXPECT_TRUE(a->requires_grad()); } @@ -494,10 +490,9 @@ TEST_F(AutogradDistributedTest, AllGatherDistributed) { auto a = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); - auto a_data = static_cast(a->DataPtr()); - for (int i = 0; i < 16; ++i) a_data[i] = 1.0f; + infini_train::test::FillConstantTensor(a, 1.0f); - EXPECT_TRUE(a->IsCUDA()); + EXPECT_TRUE(a->GetDevice().IsCUDA()); EXPECT_EQ(a->Dims(), (std::vector{4, 4})); } @@ -508,10 +503,9 @@ TEST_F(AutogradDistributedTest, ReduceScatterDistributed) { auto a = std::make_shared(std::vector{2, 8}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); a->set_requires_grad(true); - auto a_data = static_cast(a->DataPtr()); - for (int i = 0; i < 16; ++i) a_data[i] = 1.0f; + infini_train::test::FillConstantTensor(a, 1.0f); - EXPECT_TRUE(a->IsCUDA()); + EXPECT_TRUE(a->GetDevice().IsCUDA()); EXPECT_EQ(a->Dims(), (std::vector{2, 8})); } @@ -530,7 +524,7 @@ TEST_F(AutogradDistributedTest, DistributedMatmul) { auto result = matmul_fn->Apply({a, b}); EXPECT_EQ(result.size(), 1); - EXPECT_TRUE(result[0]->IsCUDA()); + EXPECT_TRUE(result[0]->GetDevice().IsCUDA()); } TEST_F(AutogradDistributedTest, DistributedLinear) { @@ -552,6 +546,6 @@ TEST_F(AutogradDistributedTest, DistributedLinear) { EXPECT_EQ(result.size(), 1); EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); - EXPECT_TRUE(result[0]->IsCUDA()); + EXPECT_TRUE(result[0]->GetDevice().IsCUDA()); } #endif // USE_NCCL diff --git a/tests/common/test_macros.cmake b/tests/common/test_macros.cmake index 184083c0..8f5fac59 100644 --- a/tests/common/test_macros.cmake +++ b/tests/common/test_macros.cmake @@ -74,13 +74,8 @@ macro(infini_train_add_test) ${glog_SOURCE_DIR}/src ) - # 5. 链接项目库(whole-archive 方式解决静态库符号依赖) - target_link_libraries(${ARG_TEST_NAME} PRIVATE - "-Wl,--whole-archive" - infini_train - infini_train_cpu_kernels - "-Wl,--no-whole-archive" - ) + # 5. 链接项目库(复用框架链接策略,包含 CUDA/静态库依赖处理) + link_infini_train_exe(${ARG_TEST_NAME}) # 6. 使用 gtest_discover_tests 自动发现测试用例 # 这会自动为每个 TEST_F() 创建一个 ctest 测试 diff --git a/tests/common/test_utils.h b/tests/common/test_utils.h index 409b720e..630f3539 100644 --- a/tests/common/test_utils.h +++ b/tests/common/test_utils.h @@ -55,6 +55,52 @@ inline bool HasDistributedSupport() { return HasCudaRuntime() && HasNCCL() && GetCudaDeviceCount() >= 2; } +inline void FillSequentialTensor(const std::shared_ptr& tensor, float start = 0.0f) { + size_t size = 1; + for (auto dim : tensor->Dims()) { + size *= static_cast(dim); + } + + if (tensor->GetDevice().IsCUDA()) { + auto cpu_tensor = std::make_shared(tensor->Dims(), tensor->Dtype(), + Device(Device::DeviceType::kCPU, 0)); + auto* cpu_data = static_cast(cpu_tensor->DataPtr()); + for (size_t i = 0; i < size; ++i) { + cpu_data[i] = start + static_cast(i); + } + tensor->CopyFrom(cpu_tensor); + return; + } + + auto* data = static_cast(tensor->DataPtr()); + for (size_t i = 0; i < size; ++i) { + data[i] = start + static_cast(i); + } +} + +inline void FillConstantTensor(const std::shared_ptr& tensor, float value) { + size_t size = 1; + for (auto dim : tensor->Dims()) { + size *= static_cast(dim); + } + + if (tensor->GetDevice().IsCUDA()) { + auto cpu_tensor = std::make_shared(tensor->Dims(), tensor->Dtype(), + Device(Device::DeviceType::kCPU, 0)); + auto* cpu_data = static_cast(cpu_tensor->DataPtr()); + for (size_t i = 0; i < size; ++i) { + cpu_data[i] = value; + } + tensor->CopyFrom(cpu_tensor); + return; + } + + auto* data = static_cast(tensor->DataPtr()); + for (size_t i = 0; i < size; ++i) { + data[i] = value; + } +} + #define REQUIRE_CUDA() \ do { \ if (!infini_train::test::HasCudaRuntime()) { \ @@ -106,12 +152,7 @@ class TensorTestBase : public InfiniTrainTest { } void fillTensor(std::shared_ptr tensor, float value) { - auto data = static_cast(tensor->DataPtr()); - size_t size = 1; - for (auto dim : tensor->Dims()) size *= dim; - for (size_t i = 0; i < size; ++i) { - data[i] = value + static_cast(i); - } + FillSequentialTensor(tensor, value); } }; @@ -140,12 +181,7 @@ class AutogradTestBase : public InfiniTrainTest { auto tensor = std::make_shared(shape, DataType::kFLOAT32, Device(device, device_id)); tensor->set_requires_grad(true); - auto data = static_cast(tensor->DataPtr()); - size_t size = 1; - for (auto dim : shape) size *= dim; - for (size_t i = 0; i < size; ++i) { - data[i] = value + static_cast(i); - } + FillSequentialTensor(tensor, value); return tensor; } }; diff --git a/tests/hook/CMakeLists.txt b/tests/hook/CMakeLists.txt index 51c77ae0..2c7ed893 100644 --- a/tests/hook/CMakeLists.txt +++ b/tests/hook/CMakeLists.txt @@ -1,14 +1,35 @@ -# ============================================================================ -# Hook tests -# ============================================================================ -# 重构版本:使用 infini_train_add_test 宏简化配置 -# -# 新增测试只需 1 行: -# infini_train_add_test(test_name SOURCES test_name.cc LABELS cpu) -# ============================================================================ +# ========================================================================== +# Hook tests (single binary) +# ========================================================================== -# ----------------------------------------------------------------------------- -# Hook tests -# ----------------------------------------------------------------------------- -infini_train_add_test(test_hook SOURCES test_hook.cc LABELS cpu cuda distributed) -infini_train_add_test(test_precision_check SOURCES test_precision_check.cc LABELS cpu) +set(HOOK_TEST_DIR "${CMAKE_CURRENT_SOURCE_DIR}") +set(HOOK_TEST_SOURCES + ${HOOK_TEST_DIR}/test_hook.cc + ${HOOK_TEST_DIR}/test_precision_check.cc +) + +add_executable(test_hook ${HOOK_TEST_SOURCES}) +target_compile_options(test_hook PRIVATE -Wno-error) +link_infini_train_exe(test_hook) +target_link_libraries(test_hook PRIVATE GTest::gtest GTest::gtest_main) +target_include_directories(test_hook PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${glog_SOURCE_DIR}/src +) + +include(GoogleTest) +gtest_discover_tests(test_hook + EXTRA_ARGS --gtest_output=xml:%T.xml + TEST_FILTER "-*CUDA*:*Distributed*" + PROPERTIES LABELS "cpu" +) +gtest_discover_tests(test_hook + EXTRA_ARGS --gtest_output=xml:%T.xml + TEST_FILTER "*CUDA*" + PROPERTIES LABELS "cuda" +) +gtest_discover_tests(test_hook + EXTRA_ARGS --gtest_output=xml:%T.xml + TEST_FILTER "*Distributed*" + PROPERTIES LABELS "distributed" +) diff --git a/tests/hook/test_hook.cc b/tests/hook/test_hook.cc index 7f876c5e..ea84d68d 100644 --- a/tests/hook/test_hook.cc +++ b/tests/hook/test_hook.cc @@ -121,7 +121,7 @@ TEST_F(HookTest, BasicModuleHooksCUDA) { auto outputs = (*module)(inputs); EXPECT_EQ(outputs.size(), 1); - EXPECT_TRUE(outputs[0]->IsCUDA()); + EXPECT_TRUE(outputs[0]->GetDevice().IsCUDA()); #endif } @@ -166,7 +166,7 @@ TEST_F(HookTest, DistributedModuleHooks) { auto outputs = (*module)(inputs); EXPECT_EQ(outputs.size(), 1); - EXPECT_TRUE(outputs[0]->IsCUDA()); + EXPECT_TRUE(outputs[0]->GetDevice().IsCUDA()); #endif } diff --git a/tests/lora/CMakeLists.txt b/tests/lora/CMakeLists.txt index 27b47c64..2868e238 100644 --- a/tests/lora/CMakeLists.txt +++ b/tests/lora/CMakeLists.txt @@ -1,7 +1,23 @@ -# ============================================================================ -# LoRA tests -# ============================================================================ -# 重构版本:使用 infini_train_add_test 宏简化配置 -# ============================================================================ +# ========================================================================== +# LoRA tests (single binary) +# ========================================================================== -infini_train_add_test(test_lora SOURCES test_lora.cc LABELS cpu) +set(LORA_TEST_DIR "${CMAKE_CURRENT_SOURCE_DIR}") +set(LORA_TEST_SOURCES + ${LORA_TEST_DIR}/test_lora.cc +) + +add_executable(test_lora ${LORA_TEST_SOURCES}) +target_compile_options(test_lora PRIVATE -Wno-error) +link_infini_train_exe(test_lora) +target_link_libraries(test_lora PRIVATE GTest::gtest GTest::gtest_main) +target_include_directories(test_lora PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${glog_SOURCE_DIR}/src +) + +include(GoogleTest) +gtest_discover_tests(test_lora + EXTRA_ARGS --gtest_output=xml:%T.xml + PROPERTIES LABELS "cpu" +) diff --git a/tests/optimizer/CMakeLists.txt b/tests/optimizer/CMakeLists.txt index 3dc00354..b3e6e917 100644 --- a/tests/optimizer/CMakeLists.txt +++ b/tests/optimizer/CMakeLists.txt @@ -1,22 +1,39 @@ -# ============================================================================ -# Optimizer tests -# ============================================================================ -# 重构版本:使用 infini_train_add_test 宏简化配置 -# -# 新增测试只需 1 行: -# infini_train_add_test(test_name SOURCES test_name.cc LABELS cpu) -# ============================================================================ +# ========================================================================== +# Optimizer tests (single binary) +# ========================================================================== -# ----------------------------------------------------------------------------- -# Optimizer tests -# ----------------------------------------------------------------------------- -infini_train_add_test(test_optimizer_creation SOURCES test_optimizer_creation.cc LABELS cpu) -infini_train_add_test(test_optimizer_step SOURCES test_optimizer_step.cc LABELS cpu) -infini_train_add_test(test_optimizer_cuda SOURCES test_optimizer_cuda.cc LABELS cuda) -infini_train_add_test(test_optimizer_distributed SOURCES test_optimizer_distributed.cc LABELS cuda distributed) +# 注意:test_optimizer.cc 为 legacy 组合测试,内容与拆分文件重复,故未纳入此二进制 -# ----------------------------------------------------------------------------- -# Legacy combined tests -# 注意:使用 gtest_discover_tests,所有 TEST_F 都会被自动发现 -# ----------------------------------------------------------------------------- -infini_train_add_test(test_optimizer_legacy SOURCES test_optimizer.cc LABELS cpu cuda distributed) +set(OPTIMIZER_TEST_DIR "${CMAKE_CURRENT_SOURCE_DIR}") +set(OPTIMIZER_TEST_SOURCES + ${OPTIMIZER_TEST_DIR}/test_optimizer_creation.cc + ${OPTIMIZER_TEST_DIR}/test_optimizer_step.cc + ${OPTIMIZER_TEST_DIR}/test_optimizer_cuda.cc + ${OPTIMIZER_TEST_DIR}/test_optimizer_distributed.cc +) + +add_executable(test_optimizer ${OPTIMIZER_TEST_SOURCES}) +target_compile_options(test_optimizer PRIVATE -Wno-error) +link_infini_train_exe(test_optimizer) +target_link_libraries(test_optimizer PRIVATE GTest::gtest GTest::gtest_main) +target_include_directories(test_optimizer PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${glog_SOURCE_DIR}/src +) + +include(GoogleTest) +gtest_discover_tests(test_optimizer + EXTRA_ARGS --gtest_output=xml:%T.xml + TEST_FILTER "-OptimizerCUDATest.*:OptimizerDistributedTest.*" + PROPERTIES LABELS "cpu" +) +gtest_discover_tests(test_optimizer + EXTRA_ARGS --gtest_output=xml:%T.xml + TEST_FILTER "OptimizerCUDATest.*" + PROPERTIES LABELS "cuda" +) +gtest_discover_tests(test_optimizer + EXTRA_ARGS --gtest_output=xml:%T.xml + TEST_FILTER "OptimizerDistributedTest.*" + PROPERTIES LABELS "distributed" +) diff --git a/tests/optimizer/test_optimizer.cc b/tests/optimizer/test_optimizer.cc index 6baa34e5..aaac4e77 100644 --- a/tests/optimizer/test_optimizer.cc +++ b/tests/optimizer/test_optimizer.cc @@ -80,7 +80,7 @@ TEST_F(OptimizerCudaTest, SGDCreationCUDA) { auto optimizer = std::make_shared(params, 0.01); EXPECT_NE(optimizer, nullptr); - EXPECT_TRUE(param->IsCUDA()); + EXPECT_TRUE(param->GetDevice().IsCUDA()); #endif } @@ -95,7 +95,7 @@ TEST_F(OptimizerCudaTest, AdamCreationCUDA) { auto optimizer = std::make_shared(params, 0.001); EXPECT_NE(optimizer, nullptr); - EXPECT_TRUE(param->IsCUDA()); + EXPECT_TRUE(param->GetDevice().IsCUDA()); #endif } @@ -110,7 +110,7 @@ TEST_F(OptimizerCudaTest, ZeroGradCUDA) { auto optimizer = std::make_shared(params, 0.01); optimizer->ZeroGrad(); - EXPECT_TRUE(param->IsCUDA()); + EXPECT_TRUE(param->GetDevice().IsCUDA()); #endif } @@ -145,7 +145,7 @@ TEST_F(OptimizerDistributedTest, DistributedSGD) { auto optimizer = std::make_shared(params, 0.01); EXPECT_NE(optimizer, nullptr); - EXPECT_TRUE(param->IsCUDA()); + EXPECT_TRUE(param->GetDevice().IsCUDA()); #endif } @@ -162,7 +162,7 @@ TEST_F(OptimizerDistributedTest, DistributedAdam) { auto optimizer = std::make_shared(params, 0.001); EXPECT_NE(optimizer, nullptr); - EXPECT_TRUE(param->IsCUDA()); + EXPECT_TRUE(param->GetDevice().IsCUDA()); #endif } diff --git a/tests/optimizer/test_optimizer_cuda.cc b/tests/optimizer/test_optimizer_cuda.cc index b56ace2e..d97c8089 100644 --- a/tests/optimizer/test_optimizer_cuda.cc +++ b/tests/optimizer/test_optimizer_cuda.cc @@ -22,7 +22,7 @@ TEST_F(OptimizerCUDATest, SGDCreationCUDA) { auto optimizer = std::make_shared(params, 0.01); EXPECT_NE(optimizer, nullptr); - EXPECT_TRUE(param->IsCUDA()); + EXPECT_TRUE(param->GetDevice().IsCUDA()); #endif } @@ -37,7 +37,7 @@ TEST_F(OptimizerCUDATest, AdamCreationCUDA) { auto optimizer = std::make_shared(params, 0.001); EXPECT_NE(optimizer, nullptr); - EXPECT_TRUE(param->IsCUDA()); + EXPECT_TRUE(param->GetDevice().IsCUDA()); #endif } @@ -52,7 +52,7 @@ TEST_F(OptimizerCUDATest, ZeroGradCUDA) { auto optimizer = std::make_shared(params, 0.01); optimizer->ZeroGrad(); - EXPECT_TRUE(param->IsCUDA()); + EXPECT_TRUE(param->GetDevice().IsCUDA()); #endif } @@ -80,14 +80,13 @@ TEST_F(OptimizerCUDATest, AdamStepCUDA) { auto param = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); param->set_requires_grad(true); - auto* data = static_cast(param->DataPtr()); - for (int i = 0; i < 6; ++i) data[i] = 1.0f; + infini_train::test::FillConstantTensor(param, 1.0f); std::vector> params = {param}; auto optimizer = std::make_shared(params, 0.001); optimizer->ZeroGrad(); optimizer->Step(); - EXPECT_TRUE(param->IsCUDA()); + EXPECT_TRUE(param->GetDevice().IsCUDA()); #endif } diff --git a/tests/optimizer/test_optimizer_distributed.cc b/tests/optimizer/test_optimizer_distributed.cc index f4f092bc..23f8c889 100644 --- a/tests/optimizer/test_optimizer_distributed.cc +++ b/tests/optimizer/test_optimizer_distributed.cc @@ -22,7 +22,7 @@ TEST_F(OptimizerDistributedTest, DistributedSGD) { auto optimizer = std::make_shared(params, 0.01); EXPECT_NE(optimizer, nullptr); - EXPECT_TRUE(param->IsCUDA()); + EXPECT_TRUE(param->GetDevice().IsCUDA()); #endif } @@ -37,7 +37,7 @@ TEST_F(OptimizerDistributedTest, DistributedAdam) { auto optimizer = std::make_shared(params, 0.001); EXPECT_NE(optimizer, nullptr); - EXPECT_TRUE(param->IsCUDA()); + EXPECT_TRUE(param->GetDevice().IsCUDA()); #endif } diff --git a/tests/slow/CMakeLists.txt b/tests/slow/CMakeLists.txt index dd76f187..7240ce4d 100644 --- a/tests/slow/CMakeLists.txt +++ b/tests/slow/CMakeLists.txt @@ -1,13 +1,34 @@ -# ============================================================================ -# Slow tests -# ============================================================================ -# 重构版本:使用 infini_train_add_test 宏简化配置 -# -# 新增测试只需 1 行: -# infini_train_add_test(test_name SOURCES test_name.cc LABELS slow cpu) -# ============================================================================ +# ========================================================================== +# Slow tests (single binary) +# ========================================================================== -# ----------------------------------------------------------------------------- -# Slow tests (标签包含 slow) -# ----------------------------------------------------------------------------- -infini_train_add_test(test_slow SOURCES test_slow.cc LABELS slow cpu cuda distributed) +set(SLOW_TEST_DIR "${CMAKE_CURRENT_SOURCE_DIR}") +set(SLOW_TEST_SOURCES + ${SLOW_TEST_DIR}/test_slow.cc +) + +add_executable(test_slow ${SLOW_TEST_SOURCES}) +target_compile_options(test_slow PRIVATE -Wno-error) +link_infini_train_exe(test_slow) +target_link_libraries(test_slow PRIVATE GTest::gtest GTest::gtest_main) +target_include_directories(test_slow PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../common + ${glog_SOURCE_DIR}/src +) + +include(GoogleTest) +gtest_discover_tests(test_slow + EXTRA_ARGS --gtest_output=xml:%T.xml + TEST_FILTER "SlowTest.Cpu" + PROPERTIES LABELS "slow;cpu" +) +gtest_discover_tests(test_slow + EXTRA_ARGS --gtest_output=xml:%T.xml + TEST_FILTER "SlowTest.Cuda" + PROPERTIES LABELS "slow;cuda" +) +gtest_discover_tests(test_slow + EXTRA_ARGS --gtest_output=xml:%T.xml + TEST_FILTER "SlowTest.Distributed" + PROPERTIES LABELS "slow;distributed" +) diff --git a/tests/tensor/CMakeLists.txt b/tests/tensor/CMakeLists.txt index 3df1708e..4e90609c 100644 --- a/tests/tensor/CMakeLists.txt +++ b/tests/tensor/CMakeLists.txt @@ -1,126 +1,32 @@ -# Tensor tests +# Tensor tests (single binary) -# test_tensor_create -add_executable(test_tensor_create - test_tensor_create.cc +set(TENSOR_TEST_DIR "${CMAKE_CURRENT_SOURCE_DIR}") +set(TENSOR_TEST_SOURCES + ${TENSOR_TEST_DIR}/test_tensor.cc ) -target_compile_options(test_tensor_create PRIVATE -Wno-error) -target_link_libraries(test_tensor_create - PRIVATE - GTest::gtest - GTest::gtest_main -) -target_include_directories(test_tensor_create PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/../common - ${glog_SOURCE_DIR}/src -) -target_link_libraries(test_tensor_create PRIVATE - "-Wl,--whole-archive" - infini_train - infini_train_cpu_kernels - "-Wl,--no-whole-archive" -) -add_test(NAME tensor_create COMMAND test_tensor_create) -set_tests_properties(tensor_create PROPERTIES LABELS "cpu;cuda") -# test_tensor_copy -add_executable(test_tensor_copy - test_tensor_copy.cc -) -target_compile_options(test_tensor_copy PRIVATE -Wno-error) -target_link_libraries(test_tensor_copy - PRIVATE - GTest::gtest - GTest::gtest_main -) -target_include_directories(test_tensor_copy PRIVATE +add_executable(test_tensor ${TENSOR_TEST_SOURCES}) +target_compile_options(test_tensor PRIVATE -Wno-error) +link_infini_train_exe(test_tensor) +target_link_libraries(test_tensor PRIVATE GTest::gtest GTest::gtest_main) +target_include_directories(test_tensor PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common ${glog_SOURCE_DIR}/src ) -target_link_libraries(test_tensor_copy PRIVATE - "-Wl,--whole-archive" - infini_train - infini_train_cpu_kernels - "-Wl,--no-whole-archive" -) -add_test(NAME tensor_copy COMMAND test_tensor_copy) -set_tests_properties(tensor_copy PROPERTIES LABELS "cpu;cuda") - -# test_tensor_delete -add_executable(test_tensor_delete - test_tensor_delete.cc -) -target_compile_options(test_tensor_delete PRIVATE -Wno-error) -target_link_libraries(test_tensor_delete - PRIVATE - GTest::gtest - GTest::gtest_main -) -target_include_directories(test_tensor_delete PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/../common - ${glog_SOURCE_DIR}/src -) -target_link_libraries(test_tensor_delete PRIVATE - "-Wl,--whole-archive" - infini_train - infini_train_cpu_kernels - "-Wl,--no-whole-archive" -) -add_test(NAME tensor_delete COMMAND test_tensor_delete) -set_tests_properties(tensor_delete PROPERTIES LABELS "cpu") - -# test_tensor_op (keep original for backward compatibility) -add_executable(test_tensor_op - test_tensor.cc -) -target_compile_options(test_tensor_op PRIVATE -Wno-error) -target_link_libraries(test_tensor_op - PRIVATE - GTest::gtest - GTest::gtest_main -) -target_include_directories(test_tensor_op PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/../common - ${glog_SOURCE_DIR}/src -) -target_link_libraries(test_tensor_op PRIVATE - "-Wl,--whole-archive" - infini_train - infini_train_cpu_kernels - "-Wl,--no-whole-archive" -) -add_test(NAME tensor_ops COMMAND test_tensor_op --gtest_filter=TensorOpTest.*) -set_tests_properties(tensor_ops PROPERTIES LABELS "cpu;cuda") - -# test_tensor_distributed (keep original for backward compatibility) -add_executable(test_tensor_dist - test_tensor.cc -) -target_compile_options(test_tensor_dist PRIVATE -Wno-error) -target_link_libraries(test_tensor_dist - PRIVATE - GTest::gtest - GTest::gtest_main -) -target_include_directories(test_tensor_dist PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/../common - ${glog_SOURCE_DIR}/src -) -target_link_libraries(test_tensor_dist PRIVATE - "-Wl,--whole-archive" - infini_train - infini_train_cpu_kernels - "-Wl,--no-whole-archive" -) -add_test(NAME tensor_distributed COMMAND test_tensor_dist --gtest_filter=TensorDistributedTest.*) -set_tests_properties(tensor_distributed PROPERTIES LABELS "cuda;distributed") -# Convenience aggregate target so `cmake --build ... --target test_tensor` works -add_custom_target(test_tensor - DEPENDS - test_tensor_create - test_tensor_copy - test_tensor_delete - test_tensor_op - test_tensor_dist +include(GoogleTest) +gtest_discover_tests(test_tensor + EXTRA_ARGS --gtest_output=xml:%T.xml + TEST_FILTER "-TensorDistributedTest.*:*CUDA*:TensorOpTest.*" + PROPERTIES LABELS "cpu" +) +gtest_discover_tests(test_tensor + EXTRA_ARGS --gtest_output=xml:%T.xml + TEST_FILTER "TensorOpTest.*:*CUDA*" + PROPERTIES LABELS "cuda" +) +gtest_discover_tests(test_tensor + EXTRA_ARGS --gtest_output=xml:%T.xml + TEST_FILTER "TensorDistributedTest.*" + PROPERTIES LABELS "distributed" ) diff --git a/tests/tensor/test_tensor.cc b/tests/tensor/test_tensor.cc index 23eb940c..b2025100 100644 --- a/tests/tensor/test_tensor.cc +++ b/tests/tensor/test_tensor.cc @@ -24,11 +24,7 @@ class TensorTestBase : public ::testing::Test { } static void FillSequential(const std::shared_ptr& tensor, float start = 0.0f) { - auto* data = static_cast(tensor->DataPtr()); - auto n = Numel(tensor); - for (size_t i = 0; i < n; ++i) { - data[i] = start + static_cast(i); - } + infini_train::test::FillSequentialTensor(tensor, start); } }; @@ -94,7 +90,7 @@ TEST_F(TensorCreateTest, CreatesTensorOnCUDA) { auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); EXPECT_NE(tensor, nullptr); - EXPECT_TRUE(tensor->IsCUDA()); + EXPECT_TRUE(tensor->GetDevice().IsCUDA()); EXPECT_EQ(tensor->Dims(), (std::vector{2, 3})); EXPECT_EQ(tensor->Dtype(), DataType::kFLOAT32); #endif @@ -146,7 +142,7 @@ TEST_F(TensorCopyTest, CopiesCPUToCUDA) { FillSequential(cpu_tensor, 0.0f); cuda_tensor->CopyFrom(cpu_tensor); - EXPECT_TRUE(cuda_tensor->IsCUDA()); + EXPECT_TRUE(cuda_tensor->GetDevice().IsCUDA()); #endif } @@ -161,7 +157,7 @@ TEST_F(TensorCopyTest, CopiesCUDAtoCUDA) { target->CopyFrom(source); - EXPECT_TRUE(target->IsCUDA()); + EXPECT_TRUE(target->GetDevice().IsCUDA()); #endif } @@ -177,9 +173,9 @@ TEST_F(TensorOpTest, MatmulCUDAAllocatesOutputs) { EXPECT_NE(a->DataPtr(), nullptr); EXPECT_NE(b->DataPtr(), nullptr); EXPECT_NE(c->DataPtr(), nullptr); - EXPECT_TRUE(a->IsCUDA()); - EXPECT_TRUE(b->IsCUDA()); - EXPECT_TRUE(c->IsCUDA()); + EXPECT_TRUE(a->GetDevice().IsCUDA()); + EXPECT_TRUE(b->GetDevice().IsCUDA()); + EXPECT_TRUE(c->GetDevice().IsCUDA()); #endif } @@ -218,10 +214,9 @@ TEST_F(TensorDistributedTest, AllReduce) { Device(Device::DeviceType::kCUDA, 0)); tensor->set_requires_grad(true); - auto* data = static_cast(tensor->DataPtr()); - for (int i = 0; i < 6; ++i) data[i] = 1.0f; + infini_train::test::FillConstantTensor(tensor, 1.0f); - EXPECT_TRUE(tensor->IsCUDA()); + EXPECT_TRUE(tensor->GetDevice().IsCUDA()); EXPECT_TRUE(tensor->requires_grad()); #endif } @@ -235,7 +230,7 @@ TEST_F(TensorDistributedTest, AllGather) { Device(Device::DeviceType::kCUDA, 0)); tensor->set_requires_grad(true); - EXPECT_TRUE(tensor->IsCUDA()); + EXPECT_TRUE(tensor->GetDevice().IsCUDA()); EXPECT_EQ(tensor->Dims(), (std::vector{4, 4})); #endif } @@ -249,7 +244,7 @@ TEST_F(TensorDistributedTest, ReduceScatter) { Device(Device::DeviceType::kCUDA, 0)); tensor->set_requires_grad(true); - EXPECT_TRUE(tensor->IsCUDA()); + EXPECT_TRUE(tensor->GetDevice().IsCUDA()); EXPECT_EQ(tensor->Dims(), (std::vector{2, 8})); #endif } diff --git a/tests/tensor/test_tensor_copy.cc b/tests/tensor/test_tensor_copy.cc index 452062c4..be066121 100644 --- a/tests/tensor/test_tensor_copy.cc +++ b/tests/tensor/test_tensor_copy.cc @@ -12,14 +12,7 @@ using namespace infini_train; class TensorCopyTest : public infini_train::test::TensorTestBase {}; static void FillSequential(const std::shared_ptr& tensor, float start = 0.0f) { - auto* data = static_cast(tensor->DataPtr()); - size_t n = 1; - for (auto dim : tensor->Dims()) { - n *= static_cast(dim); - } - for (size_t i = 0; i < n; ++i) { - data[i] = start + static_cast(i); - } + infini_train::test::FillSequentialTensor(tensor, start); } TEST_F(TensorCopyTest, CopiesCPUToCPU) { @@ -48,7 +41,7 @@ TEST_F(TensorCopyTest, CopiesCPUToCUDA) { FillSequential(cpu_tensor, 0.0f); cuda_tensor->CopyFrom(cpu_tensor); - EXPECT_TRUE(cuda_tensor->IsCUDA()); + EXPECT_TRUE(cuda_tensor->GetDevice().IsCUDA()); #endif } @@ -63,7 +56,7 @@ TEST_F(TensorCopyTest, CopiesCUDAtoCUDA) { target->CopyFrom(source); - EXPECT_TRUE(target->IsCUDA()); + EXPECT_TRUE(target->GetDevice().IsCUDA()); #endif } @@ -78,8 +71,8 @@ TEST_F(TensorCopyTest, CopiesCUDAtoCPU) { FillSequential(cuda_tensor, 1.0f); cpu_tensor->CopyFrom(cuda_tensor); - EXPECT_FALSE(cpu_tensor->IsCUDA()); - EXPECT_TRUE(cpu_tensor->IsCPU()); + EXPECT_FALSE(cpu_tensor->GetDevice().IsCUDA()); + EXPECT_TRUE(cpu_tensor->GetDevice().IsCPU()); #endif } @@ -108,6 +101,7 @@ TEST_F(TensorCopyTest, CopiesPreservesDataType) { TEST_F(TensorCopyTest, CopiesWithDifferentDeviceId) { REQUIRE_CUDA(); + REQUIRE_MIN_GPUS(2); #if defined(USE_CUDA) auto source = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); diff --git a/tests/tensor/test_tensor_create.cc b/tests/tensor/test_tensor_create.cc index c39246e3..f301a2e8 100644 --- a/tests/tensor/test_tensor_create.cc +++ b/tests/tensor/test_tensor_create.cc @@ -67,7 +67,7 @@ TEST_F(TensorCreateTest, CreatesTensorOnCUDA) { auto tensor = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); EXPECT_NE(tensor, nullptr); - EXPECT_TRUE(tensor->IsCUDA()); + EXPECT_TRUE(tensor->GetDevice().IsCUDA()); EXPECT_EQ(tensor->Dims(), (std::vector{2, 3})); EXPECT_EQ(tensor->Dtype(), DataType::kFLOAT32); #endif diff --git a/tests/tensor/test_tensor_delete.cc b/tests/tensor/test_tensor_delete.cc index e959b807..f7a4de74 100644 --- a/tests/tensor/test_tensor_delete.cc +++ b/tests/tensor/test_tensor_delete.cc @@ -96,7 +96,7 @@ TEST_F(TensorDeleteTest, ReleaseMemoryOnCUDA) { auto tensor = std::make_shared(std::vector{100, 100}, DataType::kFLOAT32, Device(Device::DeviceType::kCUDA, 0)); tensor->set_requires_grad(true); - EXPECT_TRUE(tensor->IsCUDA()); + EXPECT_TRUE(tensor->GetDevice().IsCUDA()); weak_tensor = tensor; } EXPECT_TRUE(weak_tensor.expired()); From 221a7e1b6a3e7acdd3bd2b52a9527c00ed0c8a6c Mon Sep 17 00:00:00 2001 From: chen Date: Mon, 13 Apr 2026 07:56:33 +0000 Subject: [PATCH 4/4] refactor(tests): migrate to device-parametrized TEST_P infrastructure Replace TEST_F with TEST_P across all test suites so each suite runs on both CPU and CUDA without duplicating test logic. Adds InfiniTrainTestP, TensorTestBaseP, AutogradTestBaseP, and DistributedInfiniTrainTestP base classes with automatic CUDA/NCCL skip guards. Introduces INFINI_TRAIN_REGISTER_TEST* C++ macros and infini_train_add_test_suite CMake macro to eliminate repetitive INSTANTIATE_TEST_SUITE_P / infini_train_add_test boilerplate. Removes deprecated test/, slow/, and split optimizer test files; consolidates optimizer tests into a single binary with creation + step suites. --- .gitmodules | 3 + CMakeLists.txt | 16 +- test/hook/test_hook.cc | 179 ---- test/hook/test_precision_check.cc | 241 ----- test/lora/test_lora.cc | 860 ------------------ tests/CMakeLists.txt | 2 - tests/autograd/CMakeLists.txt | 49 +- tests/autograd/test_autograd.cc | 443 +++------ .../test_autograd_elementwise_backward.cc | 30 +- .../test_autograd_elementwise_forward.cc | 50 +- .../autograd/test_autograd_linear_backward.cc | 8 +- .../autograd/test_autograd_linear_forward.cc | 10 +- .../autograd/test_autograd_matmul_backward.cc | 10 +- .../autograd/test_autograd_matmul_forward.cc | 12 +- .../test_autograd_normalization_backward.cc | 8 +- .../test_autograd_normalization_forward.cc | 10 +- .../test_autograd_reduction_backward.cc | 16 +- .../test_autograd_reduction_forward.cc | 16 +- .../test_autograd_softmax_backward.cc | 8 +- .../autograd/test_autograd_softmax_forward.cc | 10 +- .../test_autograd_transform_backward.cc | 6 +- .../test_autograd_transform_forward.cc | 18 +- tests/common/test_macros.cmake | 137 ++- tests/common/test_utils.h | 213 +++-- tests/hook/CMakeLists.txt | 28 +- tests/hook/test_hook.cc | 163 +--- tests/hook/test_precision_check.cc | 23 +- tests/lora/CMakeLists.txt | 20 +- tests/lora/test_lora.cc | 80 +- tests/optimizer/CMakeLists.txt | 32 +- tests/optimizer/test_optimizer.cc | 183 ---- tests/optimizer/test_optimizer_creation.cc | 76 +- tests/optimizer/test_optimizer_cuda.cc | 92 -- tests/optimizer/test_optimizer_distributed.cc | 75 -- tests/optimizer/test_optimizer_step.cc | 90 +- tests/slow/CMakeLists.txt | 34 - tests/slow/test_slow.cc | 28 - tests/tensor/CMakeLists.txt | 33 +- tests/tensor/test_tensor.cc | 238 +---- tests/tensor/test_tensor_copy.cc | 131 ++- tests/tensor/test_tensor_create.cc | 78 +- tests/tensor/test_tensor_delete.cc | 80 +- 42 files changed, 805 insertions(+), 3034 deletions(-) delete mode 100644 test/hook/test_hook.cc delete mode 100644 test/hook/test_precision_check.cc delete mode 100644 test/lora/test_lora.cc delete mode 100644 tests/optimizer/test_optimizer.cc delete mode 100644 tests/optimizer/test_optimizer_cuda.cc delete mode 100644 tests/optimizer/test_optimizer_distributed.cc delete mode 100644 tests/slow/CMakeLists.txt delete mode 100644 tests/slow/test_slow.cc diff --git a/.gitmodules b/.gitmodules index 64299a70..578e24f9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ +[submodule "third_party/googletest"] + path = third_party/googletest + url = https://github.com/google/googletest.git [submodule "third_party/glog"] path = third_party/glog url = https://github.com/google/glog.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 9cd2ff75..22dcf791 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ option(USE_CUDA "Support NVIDIA CUDA" OFF) option(PROFILE_MODE "ENABLE PROFILE MODE" OFF) option(USE_OMP "Use OpenMP as backend for Eigen" ON) option(USE_NCCL "Build project for distributed running" ON) -option(BUILD_TEST "Build InfiniTrain tests" ON) +option(BUILD_TEST "Build InfiniTrain tests" OFF) project(infini_train VERSION 0.5.0 LANGUAGES CXX) @@ -16,17 +16,15 @@ set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # ------------------------------------------------------------------------------ -# GoogleTest (FetchContent) +# GoogleTest (submodule) # ------------------------------------------------------------------------------ if(BUILD_TEST) - include(FetchContent) - FetchContent_Declare( - googletest - GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG v1.14.0 - ) + if(NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/third_party/googletest/CMakeLists.txt) + message(FATAL_ERROR "googletest submodule not found at third_party/googletest. " + "Run: git submodule update --init third_party/googletest") + endif() set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) - FetchContent_MakeAvailable(googletest) + add_subdirectory(third_party/googletest) enable_testing() endif() diff --git a/test/hook/test_hook.cc b/test/hook/test_hook.cc deleted file mode 100644 index 32c7e097..00000000 --- a/test/hook/test_hook.cc +++ /dev/null @@ -1,179 +0,0 @@ -#include -#include - -#include "glog/logging.h" - -#include "infini_train/include/autograd/elementwise.h" -#include "infini_train/include/autograd/function.h" -#include "infini_train/include/autograd/function_hook.h" -#include "infini_train/include/common/hook.h" -#include "infini_train/include/nn/modules/module.h" -#include "infini_train/include/nn/parallel/global.h" -#include "infini_train/include/tensor.h" - -using namespace infini_train; - -// ============================================================================ -// Test 1: Basic Module Hooks -// ============================================================================ -void test_basic_hooks() { - std::cout << "\n=== Test 1: Basic Module Hooks ===" << std::endl; - - auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); - x->set_requires_grad(true); - - // Module hook example - class MyModule : public nn::Module { - public: - MyModule() : Module("MyModule") {} - - std::vector> Forward(const std::vector> &inputs) override { - std::cout << "Forward pass executing..." << std::endl; - return inputs; - } - }; - - auto module = std::make_shared(); - - // Register forward pre-hook - auto pre_hook - = module->RegisterForwardPreHook([](nn::Module *mod, const std::vector> &inputs) { - std::cout << "Forward pre-hook: Module type = " << mod->type() << std::endl; - }); - - // Register forward post-hook - auto fwd_hook - = module->RegisterForwardPostHook([](nn::Module *mod, const std::vector> &inputs, - const std::vector> &outputs) { - std::cout << "Forward post-hook: Got " << outputs.size() << " outputs" << std::endl; - }); - - // Register backward pre-hook - auto bwd_pre_hook = module->RegisterBackwardPreHook( - [](nn::Module *mod, const std::vector> &grad_outputs) { - std::cout << "Backward pre-hook called!" << std::endl; - }); - - // Register backward post-hook - auto bwd_post_hook - = module->RegisterBackwardPostHook([](nn::Module *mod, const std::vector> &grad_inputs, - const std::vector> &grad_outputs) { - std::cout << "Backward post-hook called!" << std::endl; - }); - - // Test forward pass - std::vector> inputs = {x}; - auto outputs = (*module)(inputs); - - std::cout << "Module hook test completed!" << std::endl; -} - -// ============================================================================ -// Test 2: Hook Remove() Functionality Test -// ============================================================================ -void test_hook_remove() { - std::cout << "\n=== Test 2: Hook Remove() Functionality Test ===" << std::endl; - - auto a = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32); - auto b = std::make_shared(std::vector{2, 2}, DataType::kFLOAT32); - a->set_requires_grad(true); - b->set_requires_grad(true); - - int hook1_count = 0; - int hook2_count = 0; - int hook3_count = 0; - - auto add_fn = std::make_shared(); - - // Register three forward pre-hooks - auto handle1 = add_fn->RegisterForwardPreHook( - [&hook1_count](autograd::Function *, const std::vector> &) { - hook1_count++; - std::cout << "Hook 1 called (count: " << hook1_count << ")" << std::endl; - }); - - auto handle2 = add_fn->RegisterForwardPreHook( - [&hook2_count](autograd::Function *, const std::vector> &) { - hook2_count++; - std::cout << "Hook 2 called (count: " << hook2_count << ")" << std::endl; - }); - - auto handle3 = add_fn->RegisterForwardPreHook( - [&hook3_count](autograd::Function *, const std::vector> &) { - hook3_count++; - std::cout << "Hook 3 called (count: " << hook3_count << ")" << std::endl; - }); - - // First call - all hooks should fire - std::cout << "\n--- First Apply (all hooks active) ---" << std::endl; - std::vector> inputs; - inputs.push_back(a); - inputs.push_back(b); - auto result1 = add_fn->Apply(inputs); - std::cout << "Hook counts: " << hook1_count << ", " << hook2_count << ", " << hook3_count << std::endl; - - // Remove hook 2 - std::cout << "\n--- Removing Hook 2 ---" << std::endl; - handle2->Remove(); - - // Second call - hook 2 should not fire - std::cout << "\n--- Second Apply (hook 2 removed) ---" << std::endl; - auto result2 = add_fn->Apply(inputs); - std::cout << "Hook counts: " << hook1_count << ", " << hook2_count << ", " << hook3_count << std::endl; - - // Remove hook 1 - std::cout << "\n--- Removing Hook 1 ---" << std::endl; - handle1->Remove(); - - // Third call - only hook 3 should fire - std::cout << "\n--- Third Apply (hooks 1 and 2 removed) ---" << std::endl; - auto result3 = add_fn->Apply(inputs); - std::cout << "Hook counts: " << hook1_count << ", " << hook2_count << ", " << hook3_count << std::endl; - - // Verify results - std::cout << "\n=== Test Results ===" << std::endl; - bool test_passed = true; - - if (hook1_count != 2) { - std::cout << "FAIL: Hook 1 should be called 2 times, got " << hook1_count << std::endl; - test_passed = false; - } - - if (hook2_count != 1) { - std::cout << "FAIL: Hook 2 should be called 1 time, got " << hook2_count << std::endl; - test_passed = false; - } - - if (hook3_count != 3) { - std::cout << "FAIL: Hook 3 should be called 3 times, got " << hook3_count << std::endl; - test_passed = false; - } - - if (test_passed) { - std::cout << "SUCCESS: All hooks behaved correctly!" << std::endl; - std::cout << " - Hook 1: called 2 times (before removal)" << std::endl; - std::cout << " - Hook 2: called 1 time (removed after first call)" << std::endl; - std::cout << " - Hook 3: called 3 times (never removed)" << std::endl; - } -} - -// ============================================================================ -// Main -// ============================================================================ -int main(int argc, char *argv[]) { - google::InitGoogleLogging(argv[0]); - nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); - - std::cout << "========================================" << std::endl; - std::cout << " Hook Mechanism Tests" << std::endl; - std::cout << "========================================" << std::endl; - - test_basic_hooks(); - test_hook_remove(); - - std::cout << "\n========================================" << std::endl; - std::cout << " All Tests Completed Successfully" << std::endl; - std::cout << "========================================" << std::endl; - - return 0; -} diff --git a/test/hook/test_precision_check.cc b/test/hook/test_precision_check.cc deleted file mode 100644 index 65c8258c..00000000 --- a/test/hook/test_precision_check.cc +++ /dev/null @@ -1,241 +0,0 @@ -#include -#include -#include - -#include "glog/logging.h" - -#include "infini_train/include/nn/modules/module.h" -#include "infini_train/include/nn/parallel/global.h" -#include "infini_train/include/tensor.h" -#include "infini_train/include/utils/global_module_hook_registry.h" -#include "infini_train/include/utils/precision_check_config.h" -#include "infini_train/include/utils/precision_checker.h" - -using namespace infini_train; - -class MyModel : public nn::Module { -public: - MyModel() : Module("MyModel") {} - - std::vector> Forward(const std::vector> &inputs) override { - auto x = inputs[0]; - x->RequiresGrad(); - auto y = x->Mul(x); - return {y}; - } -}; - -// Simple model for multi-iteration test -class SimpleModel : public nn::Module { -public: - SimpleModel() : Module("SimpleModel") {} - - std::vector> Forward(const std::vector> &inputs) override { - auto x = inputs[0]; - x->RequiresGrad(); - auto y = x->Mul(x)->Mul(x); // x^3 - return {y}; - } -}; - -void RunModelForwardBackward(const std::shared_ptr &model) { - auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); - x->Fill(2.0f); - x->RequiresGrad(); - - std::vector> inputs = {x}; - auto outputs = (*model)(inputs); - auto loss = outputs[0]->Sum(0, false)->Sum(0, false); - loss->Backward(); -} - -void TestFunctionLevel(const std::string &config_str) { - std::cout << "\n========================================" << std::endl; - std::cout << " Function-Level Test: " << config_str << std::endl; - std::cout << "========================================" << std::endl; - - auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); - x->Fill(2.0f); - x->RequiresGrad(); - - auto y = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); - y->Fill(3.0f); - y->RequiresGrad(); - - auto z = x->Mul(y); - auto loss = z->Sum(0, false)->Sum(0, false); - loss->Backward(); - - std::cout << "Test completed." << std::endl; -} - -void TestModuleLevel(const std::string &config_str) { - std::cout << "\n========================================" << std::endl; - std::cout << " Module-Level Test: " << config_str << std::endl; - std::cout << "========================================" << std::endl; - - auto model = std::make_shared(); - RunModelForwardBackward(model); - - std::cout << "Test completed." << std::endl; -} - -// Test: Simple format output (level=2, format=simple) -void TestSimpleFormat() { - std::cout << "\n========================================" << std::endl; - std::cout << " Test: Simple Format (level=2, format=simple)" << std::endl; - std::cout << "========================================" << std::endl; - - auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); - x->Fill(2.0f); - x->RequiresGrad(); - - auto y = x->Mul(x); - auto loss = y->Sum(0, false)->Sum(0, false); // Two Sum ops to produce scalar - loss->Backward(); - - std::cout << "Simple format test completed - check output for min/max/mean values." << std::endl; -} - -// Test: MD5 format output (level=2, format=md5) -void TestMd5Format() { - std::cout << "\n========================================" << std::endl; - std::cout << " Test: MD5 Format (level=2, format=md5)" << std::endl; - std::cout << "========================================" << std::endl; - - auto x = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32); - x->Fill(2.0f); - x->RequiresGrad(); - - auto y = x->Mul(x); - auto loss = y->Sum(0, false)->Sum(0, false); // Two Sum ops to produce scalar - loss->Backward(); - - std::cout << "MD5 format test completed - check output for md5 hashes." << std::endl; -} - -// Test: Save tensors to NPY files (level=1, save_tensors=true) -void TestSaveTensors() { - std::cout << "\n========================================" << std::endl; - std::cout << " Test: Save Tensors (level=1, save_tensors=true)" << std::endl; - std::cout << "========================================" << std::endl; - - std::string output_path = "/tmp/precision_check_npy"; - - auto model = std::make_shared(); - RunModelForwardBackward(model); - - // Verify NPY files were created - namespace fs = std::filesystem; - bool found_npy = false; - if (fs::exists(output_path)) { - for (const auto &entry : fs::recursive_directory_iterator(output_path)) { - if (entry.path().extension() == ".npy") { - found_npy = true; - std::cout << "Found NPY file: " << entry.path() << std::endl; - } - } - } - - if (found_npy) { - std::cout << "Save tensors test PASSED - NPY files created successfully." << std::endl; - } else { - std::cout << "Save tensors test completed - check output directory for NPY files." << std::endl; - } -} - -// Test: Multi-iteration file overwrite (level=1, save_tensors=true, iter=3) -void TestMultiIterOverwrite() { - std::cout << "\n========================================" << std::endl; - std::cout << " Test: Multi-Iteration File Overwrite" << std::endl; - std::cout << "========================================" << std::endl; - - std::string output_path = "/tmp/precision_check_overwrite"; - - auto model = std::make_shared(); - int num_iters = 3; - - // Run multiple iterations - files should be overwritten - for (int i = 0; i < num_iters; ++i) { - std::cout << "Iteration " << (i + 1) << "/" << num_iters << std::endl; - utils::PrecisionCheckEnv::ResetCounters(); // Reset counters each iteration - RunModelForwardBackward(model); - } - - namespace fs = std::filesystem; - int npy_count = 0; - if (fs::exists(output_path)) { - for (const auto &entry : fs::recursive_directory_iterator(output_path)) { - if (entry.path().extension() == ".npy") { - ++npy_count; - } - } - } - - std::cout << "Multi-iteration test completed - found " << npy_count << " NPY files after " << num_iters - << " iterations." << std::endl; - std::cout << "(Files should be overwritten each iteration, count should be consistent with 1 iter)" << std::endl; -} - -int main(int argc, char *argv[]) { - google::InitGoogleLogging(argv[0]); - - std::string config_str = argc > 1 ? argv[1] : ""; - - std::cout << "========================================" << std::endl; - std::cout << " Precision Check Test Suite" << std::endl; - std::cout << "========================================" << std::endl; - - nn::parallel::global::InitAllEnv(1, 1, false, 1, 1); - - // If no config argument, run all format tests - if (config_str.empty()) { - auto config = utils::PrecisionCheckConfig::Parse("level=2,format=simple"); - utils::PrecisionCheckEnv::Instance().Init(config); - - std::cout << "\nRunning all precision check format tests..." << std::endl; - - // Test 1: Simple format - TestSimpleFormat(); - - // Test 2: MD5 format - auto md5_config = utils::PrecisionCheckConfig::Parse("level=2,format=md5"); - utils::PrecisionCheckEnv::Instance().Init(md5_config); - TestMd5Format(); - - // Test 3: Save tensors - auto npy_config = utils::PrecisionCheckConfig::Parse("level=1,save_tensors=true"); - utils::PrecisionCheckEnv::Instance().Init(npy_config); - TestSaveTensors(); - - // Test 4: Multi-iteration overwrite - auto iter_config = utils::PrecisionCheckConfig::Parse("level=1,save_tensors=true"); - utils::PrecisionCheckEnv::Instance().Init(iter_config); - TestMultiIterOverwrite(); - - std::cout << "\n========================================" << std::endl; - std::cout << " All Tests Completed Successfully" << std::endl; - std::cout << "========================================" << std::endl; - return 0; - } - - // If config provided, run single test (original behavior) - auto config = utils::PrecisionCheckConfig::Parse(config_str); - utils::PrecisionCheckEnv::Instance().Init(config); - - std::cout << "Config: " << config_str << std::endl; - - if (config.level == utils::PrecisionCheckLevel::MODULE) { - TestModuleLevel(config_str); - } else if (config.level == utils::PrecisionCheckLevel::FUNCTION) { - TestFunctionLevel(config_str); - } else { - std::cout << "No tests to run (level=0)" << std::endl; - } - - std::cout << "\n========================================" << std::endl; - std::cout << " Test Completed" << std::endl; - std::cout << "========================================" << std::endl; - - return 0; -} diff --git a/test/lora/test_lora.cc b/test/lora/test_lora.cc deleted file mode 100644 index 06966809..00000000 --- a/test/lora/test_lora.cc +++ /dev/null @@ -1,860 +0,0 @@ -#include -#include -#include - -#include "glog/logging.h" - -#include "infini_train/include/nn/lora/lora_config.h" -#include "infini_train/include/nn/lora/lora_linear.h" -#include "infini_train/include/nn/lora/lora_utils.h" -#include "infini_train/include/nn/modules/container.h" -#include "infini_train/include/nn/modules/linear.h" -#include "infini_train/include/nn/modules/module.h" -#include "infini_train/include/nn/parallel/global.h" -#include "infini_train/include/tensor.h" - -using namespace infini_train; -using namespace infini_train::nn::lora; - -// ============================================================================ -// Test 1: LoRAConfig -// ============================================================================ -void test_lora_config() { - std::cout << "\n=== Test 1: LoRAConfig ===" << std::endl; - - LoRAConfig config; - config.rank = 8; - config.alpha = 16.0f; - - // Test scaling calculation - float expected_scaling = 16.0f / 8.0f; - CHECK_EQ(config.Scaling(), expected_scaling) << "Scaling calculation failed"; - std::cout << "Scaling: " << config.Scaling() << " (expected: " << expected_scaling << ")" << std::endl; - - // Test ShouldApplyLoRA - CHECK(config.ShouldApplyLoRA("c_attn")) << "Should match c_attn"; - CHECK(config.ShouldApplyLoRA("transformer.h.0.attn.c_attn")) << "Should match nested c_attn"; - CHECK(config.ShouldApplyLoRA("c_proj")) << "Should match c_proj"; - CHECK(!config.ShouldApplyLoRA("c_fc")) << "Should not match c_fc (not in default targets)"; - CHECK(!config.ShouldApplyLoRA("random_layer")) << "Should not match random_layer"; - - std::cout << "LoRAConfig tests passed!" << std::endl; -} - -// ============================================================================ -// Test 2: LoRALinear Initialization -// ============================================================================ -void test_lora_linear_init() { - std::cout << "\n=== Test 2: LoRALinear Initialization ===" << std::endl; - - LoRAConfig config; - config.rank = 4; - config.alpha = 8.0f; - - int64_t in_features = 64; - int64_t out_features = 128; - - auto lora_linear - = std::shared_ptr(new LoRALinear(in_features, out_features, config, /*bias=*/true, nullptr)); - - // Check parameter shapes - auto weight = lora_linear->parameter(nn::Linear::kParamWeightName); - auto bias = lora_linear->parameter(nn::Linear::kParamBiasName); - auto lora_A = lora_linear->parameter(LoRALinear::kParamLoraAName); - auto lora_B = lora_linear->parameter(LoRALinear::kParamLoraBName); - - CHECK_EQ(weight->Dims().size(), 2); - CHECK_EQ(weight->Dims()[0], out_features); - CHECK_EQ(weight->Dims()[1], in_features); - std::cout << "Weight shape: [" << weight->Dims()[0] << ", " << weight->Dims()[1] << "]" << std::endl; - - CHECK_EQ(bias->Dims().size(), 1); - CHECK_EQ(bias->Dims()[0], out_features); - std::cout << "Bias shape: [" << bias->Dims()[0] << "]" << std::endl; - - CHECK_EQ(lora_A->Dims().size(), 2); - CHECK_EQ(lora_A->Dims()[0], config.rank); - CHECK_EQ(lora_A->Dims()[1], in_features); - std::cout << "LoRA A shape: [" << lora_A->Dims()[0] << ", " << lora_A->Dims()[1] << "]" << std::endl; - - CHECK_EQ(lora_B->Dims().size(), 2); - CHECK_EQ(lora_B->Dims()[0], out_features); - CHECK_EQ(lora_B->Dims()[1], config.rank); - std::cout << "LoRA B shape: [" << lora_B->Dims()[0] << ", " << lora_B->Dims()[1] << "]" << std::endl; - - // Check requires_grad - CHECK(!weight->requires_grad()) << "Base weight should be frozen"; - CHECK(!bias->requires_grad()) << "Base bias should be frozen"; - CHECK(lora_A->requires_grad()) << "LoRA A should be trainable"; - CHECK(lora_B->requires_grad()) << "LoRA B should be trainable"; - std::cout << "requires_grad check passed!" << std::endl; - - // Check LoRAParameters() returns only LoRA params - auto params = lora_linear->LoRAParameters(); - CHECK_EQ(params.size(), 2) << "LoRAParameters() should return only LoRA params"; - std::cout << "LoRAParameters() returns " << params.size() << " tensors (LoRA A and B)" << std::endl; - - std::cout << "LoRALinear initialization tests passed!" << std::endl; -} - -// ============================================================================ -// Test 3: LoRALinear Forward Pass -// ============================================================================ -void test_lora_linear_forward() { - std::cout << "\n=== Test 3: LoRALinear Forward Pass ===" << std::endl; - - LoRAConfig config; - config.rank = 4; - config.alpha = 8.0f; - - int64_t in_features = 64; - int64_t out_features = 128; - int64_t batch_size = 2; - int64_t seq_len = 10; - - auto lora_linear - = std::shared_ptr(new LoRALinear(in_features, out_features, config, /*bias=*/true, nullptr)); - - // Create input tensor - auto input = std::make_shared(std::vector{batch_size, seq_len, in_features}, DataType::kFLOAT32); - - // Forward pass - auto output = (*lora_linear)({input})[0]; - - // Check output shape - CHECK_EQ(output->Dims().size(), 3); - CHECK_EQ(output->Dims()[0], batch_size); - CHECK_EQ(output->Dims()[1], seq_len); - CHECK_EQ(output->Dims()[2], out_features); - std::cout << "Output shape: [" << output->Dims()[0] << ", " << output->Dims()[1] << ", " << output->Dims()[2] << "]" - << std::endl; - - std::cout << "LoRALinear forward pass tests passed!" << std::endl; -} - -// ============================================================================ -// Test 4: LoRALinear Weight Merging -// ============================================================================ -void test_lora_linear_merge() { - std::cout << "\n=== Test 4: LoRALinear Weight Merging ===" << std::endl; - - LoRAConfig config; - config.rank = 4; - config.alpha = 8.0f; - - int64_t in_features = 32; - int64_t out_features = 64; - - auto lora_linear - = std::shared_ptr(new LoRALinear(in_features, out_features, config, /*bias=*/false, nullptr)); - - // Print weight sum before merge - auto weight_before = lora_linear->parameter(nn::Linear::kParamWeightName); - auto lora_A = lora_linear->parameter(LoRALinear::kParamLoraAName); - auto lora_B = lora_linear->parameter(LoRALinear::kParamLoraBName); - - float weight_before_sum = weight_before->EigenMatrix().sum(); - float lora_A_sum = lora_A->EigenMatrix().sum(); - float lora_B_sum = lora_B->EigenMatrix().sum(); - - std::cout << "\n--- Before Merge ---" << std::endl; - std::cout << "Base weight sum: " << weight_before_sum << std::endl; - std::cout << "LoRA A sum: " << lora_A_sum << std::endl; - std::cout << "LoRA B sum: " << lora_B_sum << std::endl; - std::cout << "Scaling (alpha/r): " << config.Scaling() << std::endl; - - // Create input - auto input = std::make_shared(std::vector{2, 5, in_features}, DataType::kFLOAT32); - input->EigenMatrix().setRandom(); - - // Get output before merge - auto output_before = (*lora_linear)({input})[0]; - float output_before_sum = output_before->EigenMatrix().sum(); - std::cout << "Output sum before merge: " << output_before_sum << std::endl; - - // Merge weights - CHECK(!lora_linear->IsMerged()) << "Should not be merged initially"; - lora_linear->MergeWeights(); - CHECK(lora_linear->IsMerged()) << "Should be merged after MergeWeights()"; - - // Verify LoRA params are frozen after merge - CHECK(!lora_A->requires_grad()) << "lora_A should be frozen after merge"; - CHECK(!lora_B->requires_grad()) << "lora_B should be frozen after merge"; - std::cout << "\nWeights merged successfully, LoRA params frozen" << std::endl; - - // Print weight sum after merge - auto weight_after = lora_linear->parameter(nn::Linear::kParamWeightName); - float weight_after_sum = weight_after->EigenMatrix().sum(); - std::cout << "\n--- After Merge ---" << std::endl; - std::cout << "Base weight sum after merge: " << weight_after_sum << std::endl; - std::cout << "Weight change (should be ~LoRA contribution): " << (weight_after_sum - weight_before_sum) - << std::endl; - - // Get output after merge - auto output_merged = (*lora_linear)({input})[0]; - float output_merged_sum = output_merged->EigenMatrix().sum(); - std::cout << "Output sum after merge: " << output_merged_sum << std::endl; - - // Verify: output_after should equal output_before (numerically) - std::cout << "\nVerification: output_before == output_after? " << std::endl; - std::cout << " Before: " << output_before_sum << std::endl; - std::cout << " After: " << output_merged_sum << std::endl; - std::cout << " Diff: " << std::abs(output_before_sum - output_merged_sum) << std::endl; - CHECK(std::abs(output_before_sum - output_merged_sum) < 1e-3) << "Outputs should be numerically identical!"; - - // Shape comparison (always same) - std::cout << "\nOutput shape: [" << output_before->Dims()[0] << ", " << output_before->Dims()[1] << ", " - << output_before->Dims()[2] << "] (unchanged)" << std::endl; - - // Unmerge weights - lora_linear->UnmergeWeights(); - CHECK(!lora_linear->IsMerged()) << "Should not be merged after UnmergeWeights()"; - - // Verify LoRA params are trainable again after unmerge - CHECK(lora_A->requires_grad()) << "lora_A should be trainable after unmerge"; - CHECK(lora_B->requires_grad()) << "lora_B should be trainable after unmerge"; - - // Print weight sum after unmerge - auto weight_unmerged = lora_linear->parameter(nn::Linear::kParamWeightName); - float weight_unmerged_sum = weight_unmerged->EigenMatrix().sum(); - std::cout << "\n--- After Unmerge ---" << std::endl; - std::cout << "Base weight sum after unmerge: " << weight_unmerged_sum << std::endl; - - // Verify: weight should be restored to original value - std::cout << "\nVerification: weight restored after unmerge? " << std::endl; - std::cout << " Original: " << weight_before_sum << std::endl; - std::cout << " Unmerged: " << weight_unmerged_sum << std::endl; - std::cout << " Diff: " << std::abs(weight_before_sum - weight_unmerged_sum) << std::endl; - CHECK(std::abs(weight_before_sum - weight_unmerged_sum) < 1e-4) << "Weight should be restored!"; - - // Get output after unmerge - auto output_unmerged = (*lora_linear)({input})[0]; - float output_unmerged_sum = output_unmerged->EigenMatrix().sum(); - std::cout << "Output sum after unmerge: " << output_unmerged_sum << std::endl; - - // Shape comparison: merge doesn't change shape, only weights - CHECK(output_before->Dims() == output_merged->Dims()) << "Shape should be identical after merge"; - CHECK(output_merged->Dims() == output_unmerged->Dims()) << "Shape should be identical after unmerge"; - - std::cout << "\nLoRALinear weight merging tests passed!" << std::endl; -} - -// ============================================================================ -// Test 5: LoRA Utility Functions -// ============================================================================ -void test_lora_utils() { - std::cout << "\n=== Test 5: LoRA Utility Functions ===" << std::endl; - - LoRAConfig config; - config.rank = 4; - config.alpha = 8.0f; - - auto lora_linear = std::shared_ptr(new LoRALinear(32, 64, config, /*bias=*/true, nullptr)); - - // Test GetLoRAParameters - auto lora_params = GetLoRAParameters(lora_linear); - CHECK_EQ(lora_params.size(), 2) << "Should have 2 LoRA parameters"; - std::cout << "GetLoRAParameters returned " << lora_params.size() << " parameters" << std::endl; - - // Test CountTrainableParameters - int64_t trainable = CountTrainableParameters(lora_linear); - int64_t expected_trainable = config.rank * 32 + 64 * config.rank; // A: [4, 32], B: [64, 4] - CHECK_EQ(trainable, expected_trainable) << "Trainable parameter count mismatch"; - std::cout << "Trainable parameters: " << trainable << " (expected: " << expected_trainable << ")" << std::endl; - - // Test CountTotalParameters - int64_t total = CountTotalParameters(lora_linear); - int64_t expected_total = 64 * 32 + 64 + config.rank * 32 + 64 * config.rank; // weight + bias + A + B - CHECK_EQ(total, expected_total) << "Total parameter count mismatch"; - std::cout << "Total parameters: " << total << " (expected: " << expected_total << ")" << std::endl; - - // Test PrintLoRASummary - std::cout << "\nLoRA Summary:" << std::endl; - PrintLoRASummary(lora_linear); - - std::cout << "LoRA utility function tests passed!" << std::endl; -} - -// ============================================================================ -// Test 6: LoRALinear from existing Linear -// ============================================================================ -void test_lora_from_linear() { - std::cout << "\n=== Test 6: LoRALinear from existing Linear ===" << std::endl; - - // Create a standard Linear layer - auto linear = std::make_shared(64, 128, /*bias=*/true); - - // Wrap it with LoRA - LoRAConfig config; - config.rank = 8; - config.alpha = 16.0f; - - auto lora_linear = std::make_shared(linear, config); - - // Check dimensions - CHECK_EQ(lora_linear->in_features(), 64); - CHECK_EQ(lora_linear->out_features(), 128); - CHECK_EQ(lora_linear->rank(), 8); - std::cout << "LoRALinear created from Linear: in=" << lora_linear->in_features() - << ", out=" << lora_linear->out_features() << ", rank=" << lora_linear->rank() << std::endl; - - // Test forward pass - auto input = std::make_shared(std::vector{2, 10, 64}, DataType::kFLOAT32); - auto output = (*lora_linear)({input})[0]; - - CHECK_EQ(output->Dims()[0], 2); - CHECK_EQ(output->Dims()[1], 10); - CHECK_EQ(output->Dims()[2], 128); - std::cout << "Forward pass successful, output shape: [" << output->Dims()[0] << ", " << output->Dims()[1] << ", " - << output->Dims()[2] << "]" << std::endl; - - std::cout << "LoRALinear from existing Linear tests passed!" << std::endl; -} - -// ============================================================================ -// Test 7: LoRALinear from existing Linear (tests LoRA utilities) -// ============================================================================ -void test_lora_model_wrapper() { - std::cout << "\n=== Test 7: LoRALinear from existing Linear ===" << std::endl; - - // Create LoRA config - LoRAConfig lora_config; - lora_config.rank = 8; - lora_config.alpha = 16.0f; - - // Create base Linear module (simple test without InjectLoRALayers) - auto base_linear = std::make_shared(64, 128, /*bias=*/true); - - // Create a minimal wrapper test by manually testing what LoRAModel does - // Apply LoRA directly to the Linear layer - auto lora_linear = std::make_shared(base_linear, lora_config); - - // Replace the base_linear in its container - // Note: In a real use case, you would use InjectLoRALayers on a transformer model - - // Test GetLoRAParameters on the LoRA Linear - auto lora_params = GetLoRAParameters(lora_linear); - CHECK_GT(lora_params.size(), 0) << "Should have trainable parameters"; - std::cout << "LoRA parameters extracted: " << lora_params.size() << std::endl; - - // Test CountTrainableParameters - int64_t trainable = CountTrainableParameters(lora_linear); - CHECK_EQ(trainable, lora_config.rank * 64 + 128 * lora_config.rank); - std::cout << "Trainable parameters: " << trainable << std::endl; - - // Test PrintSummary - std::cout << "\nLoRA Summary for Linear wrapper:" << std::endl; - PrintLoRASummary(lora_linear); - - // Test Save/Load LoRA on the LoRA Linear - const std::string test_path = "/tmp/test_lora_linear.bin"; - SaveLoRAWeights(lora_linear, test_path); - std::cout << "SaveLoRAWeights completed" << std::endl; - - LoadLoRAWeights(lora_linear, test_path); - std::cout << "LoadLoRAWeights completed" << std::endl; - - // Test Merge/Unmerge on LoRA Linear - CHECK(!lora_linear->IsMerged()) << "Should not be merged initially"; - lora_linear->MergeWeights(); - CHECK(lora_linear->IsMerged()) << "Should be merged after MergeWeights()"; - std::cout << "MergeWeights completed" << std::endl; - - lora_linear->UnmergeWeights(); - CHECK(!lora_linear->IsMerged()) << "Should be unmerged after UnmergeWeights()"; - std::cout << "UnmergeWeights completed" << std::endl; - - std::cout << "LoRALinear utility tests passed!" << std::endl; -} - -// ============================================================================ -// Test 8: Save/Load LoRA Weights -// ============================================================================ -void test_lora_save_load_weights() { - std::cout << "\n=== Test 8: Save/Load LoRA Weights ===" << std::endl; - - // Create a LoRALinear - LoRAConfig config; - config.rank = 4; - config.alpha = 8.0f; - - int64_t in_features = 32; - int64_t out_features = 64; - - auto linear = std::make_shared(in_features, out_features, /*bias=*/true); - auto lora_linear = std::make_shared(linear, config); - - // Get references to lora_A and lora_B - auto lora_A = lora_linear->parameter(LoRALinear::kParamLoraAName); - auto lora_B = lora_linear->parameter(LoRALinear::kParamLoraBName); - - // Set specific values to lora_A and lora_B - // lora_A: [rank, in_features] = [4, 32] - // lora_B: [out_features, rank] = [64, 4] - lora_A->EigenMatrix().setZero(); - lora_B->EigenMatrix().setZero(); - - // Set lora_A to all 1s - for (int64_t i = 0; i < lora_A->Dims()[0]; ++i) { - for (int64_t j = 0; j < lora_A->Dims()[1]; ++j) { lora_A->EigenMatrix()(i, j) = 1.0f; } - } - - // Set lora_B to all 2s - for (int64_t i = 0; i < lora_B->Dims()[0]; ++i) { - for (int64_t j = 0; j < lora_B->Dims()[1]; ++j) { lora_B->EigenMatrix()(i, j) = 2.0f; } - } - - // Record original sums - float lora_A_sum_orig = lora_A->EigenMatrix().sum(); - float lora_B_sum_orig = lora_B->EigenMatrix().sum(); - // lora_A: all 1.0f, shape [rank, in_features] = [4, 32] - // lora_B: all 2.0f, shape [out_features, rank] = [64, 4] - float expected_lora_A_sum = config.rank * in_features * 1.0f; // 4 * 32 * 1 = 128 - float expected_lora_B_sum = out_features * config.rank * 2.0f; // 64 * 4 * 2 = 512 - std::cout << "Original lora_A sum: " << lora_A_sum_orig << " (expected: " << expected_lora_A_sum << ")" - << std::endl; - std::cout << "Original lora_B sum: " << lora_B_sum_orig << " (expected: " << expected_lora_B_sum << ")" - << std::endl; - - CHECK_EQ(lora_A_sum_orig, expected_lora_A_sum); - CHECK_EQ(lora_B_sum_orig, expected_lora_B_sum); - - // Save to file - const std::string test_path = "/tmp/test_lora_save_load.bin"; - SaveLoRAWeights(lora_linear, test_path); - std::cout << "Saved LoRA weights to: " << test_path << std::endl; - - // Modify weights to different values - lora_A->EigenMatrix().setConstant(9.0f); - lora_B->EigenMatrix().setConstant(9.0f); - - float lora_A_sum_modified = lora_A->EigenMatrix().sum(); - float lora_B_sum_modified = lora_B->EigenMatrix().sum(); - std::cout << "Modified lora_A sum: " << lora_A_sum_modified << std::endl; - std::cout << "Modified lora_B sum: " << lora_B_sum_modified << std::endl; - - CHECK_NE(lora_A_sum_modified, lora_A_sum_orig); - CHECK_NE(lora_B_sum_modified, lora_B_sum_orig); - - // Load from file - LoadLoRAWeights(lora_linear, test_path); - std::cout << "Loaded LoRA weights from: " << test_path << std::endl; - - // Verify weights are restored - float lora_A_sum_loaded = lora_A->EigenMatrix().sum(); - float lora_B_sum_loaded = lora_B->EigenMatrix().sum(); - std::cout << "Loaded lora_A sum: " << lora_A_sum_loaded << std::endl; - std::cout << "Loaded lora_B sum: " << lora_B_sum_loaded << std::endl; - - CHECK_EQ(lora_A_sum_loaded, lora_A_sum_orig) << "lora_A should be restored to original values"; - CHECK_EQ(lora_B_sum_loaded, lora_B_sum_orig) << "lora_B should be restored to original values"; - - // Also verify individual elements - for (int64_t i = 0; i < lora_A->Dims()[0]; ++i) { - for (int64_t j = 0; j < lora_A->Dims()[1]; ++j) { - CHECK_EQ(lora_A->EigenMatrix()(i, j), 1.0f) << "lora_A element mismatch at (" << i << "," << j << ")"; - } - } - - for (int64_t i = 0; i < lora_B->Dims()[0]; ++i) { - for (int64_t j = 0; j < lora_B->Dims()[1]; ++j) { - CHECK_EQ(lora_B->EigenMatrix()(i, j), 2.0f) << "lora_B element mismatch at (" << i << "," << j << ")"; - } - } - - std::cout << "All elements verified correctly!" << std::endl; - - // Cleanup - std::remove(test_path.c_str()); - std::cout << "Test 8: Save/Load LoRA Weights passed!" << std::endl; -} - -// ============================================================================ -// Test 8: ParseLoRATargetModules parsing -// ============================================================================ -void test_set_target_modules() { - std::cout << "\n=== Test 8: ParseLoRATargetModules Parsing ===" << std::endl; - - // Test single target - auto modules = ParseLoRATargetModules("c_attn"); - CHECK_EQ(modules.size(), 1); - CHECK(modules.count("c_attn")); - std::cout << "Single target: OK" << std::endl; - - // Test multiple targets - modules = ParseLoRATargetModules("c_attn,c_proj,c_fc"); - CHECK_EQ(modules.size(), 3); - CHECK(modules.count("c_attn")); - CHECK(modules.count("c_proj")); - CHECK(modules.count("c_fc")); - std::cout << "Multiple targets: OK" << std::endl; - - // Test with spaces - modules = ParseLoRATargetModules("c_attn, c_proj , c_fc"); - CHECK_EQ(modules.size(), 3); - std::cout << "Targets with spaces: OK" << std::endl; - - // Test empty/whitespace - modules = ParseLoRATargetModules("c_attn,,c_proj"); - CHECK_EQ(modules.size(), 2); - std::cout << "Empty entries ignored: OK" << std::endl; - - std::cout << "ParseLoRATargetModules tests passed!" << std::endl; -} - -// ============================================================================ -// Test 9: ShouldApplyLoRA edge cases (attn.c_proj vs mlp.c_proj) -// ============================================================================ -void test_should_apply_lora_edge_cases() { - std::cout << "\n=== Test 9: ShouldApplyLoRA Edge Cases ===" << std::endl; - - // Test: Only attn.c_proj in target_modules - { - LoRAConfig config{8, 16.0f, 0.0f, ParseLoRATargetModules("c_attn,attn.c_proj")}; - - // Should match attention paths - CHECK(config.ShouldApplyLoRA("attn.c_proj")); - CHECK(config.ShouldApplyLoRA("transformer.h.0.attn.c_proj")); - CHECK(config.ShouldApplyLoRA("transformer.h.1.attn.c_proj")); - - // Should NOT match mlp paths - CHECK(!config.ShouldApplyLoRA("mlp.c_proj")); - CHECK(!config.ShouldApplyLoRA("transformer.h.0.mlp.c_proj")); - std::cout << "attn.c_proj only: OK" << std::endl; - } - - // Test: Only mlp.c_proj in target_modules - { - LoRAConfig config{8, 16.0f, 0.0f, ParseLoRATargetModules("c_attn,mlp.c_proj")}; - - // Should NOT match attention paths - CHECK(!config.ShouldApplyLoRA("attn.c_proj")); - CHECK(!config.ShouldApplyLoRA("transformer.h.0.attn.c_proj")); - - // Should match mlp paths - CHECK(config.ShouldApplyLoRA("mlp.c_proj")); - CHECK(config.ShouldApplyLoRA("transformer.h.0.mlp.c_proj")); - std::cout << "mlp.c_proj only: OK" << std::endl; - } - - // Test: Generic c_proj in target_modules (matches both) - { - LoRAConfig config{8, 16.0f, 0.0f, ParseLoRATargetModules("c_attn,c_proj")}; - - // Should match both attention and mlp - CHECK(config.ShouldApplyLoRA("transformer.h.0.attn.c_proj")); - CHECK(config.ShouldApplyLoRA("transformer.h.0.mlp.c_proj")); - std::cout << "Generic c_proj (matches both): OK" << std::endl; - } - - // Test: All targets - { - LoRAConfig config{8, 16.0f, 0.0f, ParseLoRATargetModules("c_attn,attn.c_proj,c_fc,c_fc2,mlp.c_proj")}; - - CHECK(config.ShouldApplyLoRA("transformer.h.0.attn.c_attn")); - CHECK(config.ShouldApplyLoRA("transformer.h.0.attn.c_proj")); - CHECK(config.ShouldApplyLoRA("transformer.h.0.mlp.c_fc")); - CHECK(config.ShouldApplyLoRA("transformer.h.0.mlp.c_fc2")); - CHECK(config.ShouldApplyLoRA("transformer.h.0.mlp.c_proj")); - std::cout << "All targets: OK" << std::endl; - } - - std::cout << "ShouldApplyLoRA edge cases tests passed!" << std::endl; -} - -// ============================================================================ -// Test 10: ReplaceModuleByPath -// ============================================================================ -void test_replace_module_by_path() { - std::cout << "\n=== Test 10: ReplaceModuleByPath ===" << std::endl; - - // Test ReplaceModuleByPath by wrapping a Linear with LoRA directly - // This tests the core functionality that ReplaceModuleByPath provides - - // Create base Linear - auto base_linear = std::make_shared(64, 128, /*bias=*/true); - - // Configure LoRA - LoRAConfig lora_config; - lora_config.rank = 4; - lora_config.alpha = 8.0f; - - // Wrap with LoRA - this is what ReplaceModuleByPath does internally - auto lora_linear = std::make_shared(base_linear, lora_config); - - // Verify LoRA was applied correctly - auto params = lora_linear->LoRAParameters(); - CHECK_EQ(params.size(), 2) << "LoRALinear should have 2 trainable parameters (lora_A and lora_B)"; - std::cout << "LoRALinear has " << params.size() << " trainable parameters" << std::endl; - - // Verify parameter shapes - auto lora_a = params[0]; - auto lora_b = params[1]; - CHECK_EQ(lora_a->Dims()[0], lora_config.rank); // rank x in_features - CHECK_EQ(lora_a->Dims()[1], 64); - CHECK_EQ(lora_b->Dims()[0], 128); // out_features x rank - CHECK_EQ(lora_b->Dims()[1], lora_config.rank); - std::cout << "LoRA parameter shapes: OK" << std::endl; - - // Verify base parameters are frozen (use named parameters instead of index) - auto weight = lora_linear->parameter(nn::Linear::kParamWeightName); - auto lora_a_param = lora_linear->parameter(LoRALinear::kParamLoraAName); - auto lora_b_param = lora_linear->parameter(LoRALinear::kParamLoraBName); - CHECK(weight != nullptr); - CHECK(lora_a_param != nullptr); - CHECK(lora_b_param != nullptr); - CHECK(!weight->requires_grad()); // weight is frozen - CHECK(lora_a_param->requires_grad()); // lora_A is trainable - CHECK(lora_b_param->requires_grad()); // lora_B is trainable - std::cout << "Base weight frozen, LoRA params trainable: OK" << std::endl; - - std::cout << "ReplaceModuleByPath tests passed!" << std::endl; -} - -// ============================================================================ -// Test 11: FreezeBaseModel / UnfreezeModel -// ============================================================================ -void test_freeze_unfreeze() { - std::cout << "\n=== Test 11: FreezeBaseModel / UnfreezeModel ===" << std::endl; - - // Test with LoRALinear directly - it has both base and LoRA params - LoRAConfig lora_config; - lora_config.rank = 4; - lora_config.alpha = 8.0f; - - auto linear = std::make_shared(64, 128, /*bias=*/true); - auto lora_linear = std::make_shared(linear, lora_config); - - // Get all parameters from LoRALinear (includes base + LoRA) - auto all_params = lora_linear->Parameters(); - - // Initially only LoRA params should be trainable (base weights are frozen by constructor) - int64_t total_params = 0; - for (const auto &p : all_params) { - if (p->requires_grad()) { - total_params += p->NumElements(); - } - } - // Expected: only LoRA params (lora_A + lora_B) = 4*64 + 128*4 = 256 + 512 = 768 - // Note: LoRALinear freezes base weights in constructor by design - int64_t expected_total = lora_config.rank * 64 + 128 * lora_config.rank; - CHECK_EQ(total_params, expected_total); - std::cout << "Initial trainable params: " << total_params << " (expected: " << expected_total << ")" << std::endl; - - // FreezeBaseModel on LoRALinear - FreezeBaseModel(lora_linear); - - // After freeze, only LoRA params should be trainable - int64_t after_freeze = 0; - for (const auto &p : all_params) { - if (p->requires_grad()) { - after_freeze += p->NumElements(); - } - } - // LoRA params: A (rank x in) + B (out x rank) = 4*64 + 128*4 = 256 + 512 = 768 - int64_t expected_lora = lora_config.rank * 64 + 128 * lora_config.rank; - CHECK_EQ(after_freeze, expected_lora); - std::cout << "After freeze trainable: " << after_freeze << " (expected: " << expected_lora << ")" << std::endl; - - // Unfreeze all - UnfreezeModel(lora_linear); - int64_t after_unfreeze = 0; - for (const auto &p : all_params) { - if (p->requires_grad()) { - after_unfreeze += p->NumElements(); - } - } - // Should be back to all params trainable (base + LoRA) - int64_t expected_after_unfreeze = 64 * 128 + 128 + lora_config.rank * 64 + 128 * lora_config.rank; - CHECK_EQ(after_unfreeze, expected_after_unfreeze); - std::cout << "After unfreeze trainable: " << after_unfreeze << std::endl; - - std::cout << "FreezeBaseModel / UnfreezeModel tests passed!" << std::endl; -} - -// ============================================================================ -// Test 12: LoRAStateDict -// ============================================================================ -void test_lora_state_dict() { - std::cout << "\n=== Test 12: LoRAStateDict ===" << std::endl; - - // Test with a single LoRALinear - LoRAConfig lora_config; - lora_config.rank = 4; - lora_config.alpha = 8.0f; - - auto linear = std::make_shared(64, 128, /*bias=*/true); - auto lora_linear = std::make_shared(linear, lora_config); - - // Get state dict - it contains all parameters with their names - auto state_dict = lora_linear->StateDict(); - - // Check that we have all expected parameters - CHECK(state_dict.count("weight")) << "Should have weight parameter"; - CHECK(state_dict.count("bias")) << "Should have bias parameter"; - CHECK(state_dict.count("lora_A")) << "Should have lora_A parameter"; - CHECK(state_dict.count("lora_B")) << "Should have lora_B parameter"; - std::cout << "State dict contains: weight, bias, lora_A, lora_B" << std::endl; - - // Verify LoRA parameters exist and are trainable - CHECK(state_dict.at("lora_A")->requires_grad()) << "lora_A should be trainable"; - CHECK(state_dict.at("lora_B")->requires_grad()) << "lora_B should be trainable"; - CHECK(!state_dict.at("weight")->requires_grad()) << "weight should be frozen"; - std::cout << "LoRA parameters are trainable, base weight is frozen: OK" << std::endl; - - // Verify shapes - CHECK_EQ(state_dict.at("lora_A")->Dims()[0], lora_config.rank); - CHECK_EQ(state_dict.at("lora_A")->Dims()[1], 64); - CHECK_EQ(state_dict.at("lora_B")->Dims()[0], 128); - CHECK_EQ(state_dict.at("lora_B")->Dims()[1], lora_config.rank); - std::cout << "LoRA parameter shapes: OK" << std::endl; - - std::cout << "LoRAStateDict tests passed!" << std::endl; -} - -// ============================================================================ -// Test 13: GetLoRAModel simplified API -// ============================================================================ -void test_get_lora_model() { - std::cout << "\n=== Test 13: GetLoRAModel Simplified API ===" << std::endl; - - // Test GetLoRAModel with a simple Linear layer - // We'll wrap it with LoRA directly and verify the wrapper works - - // Create base Linear - auto base_linear = std::make_shared(64, 128, /*bias=*/true); - - // Configure LoRA - LoRAConfig config{4, 8.0f, 0.0f, ParseLoRATargetModules("Linear")}; - - // Use GetLoRAModel with the linear as the "model" - // Note: GetLoRAModel returns the modified model (in-place injection) - auto model = GetLoRAModel(base_linear, config); - - CHECK(model != nullptr); - std::cout << "GetLoRAModel returned valid pointer" << std::endl; - - // Test that LoRA was applied - check trainable parameters - auto lora_params = GetLoRAParameters(model); - // GetLoRAParameters returns vector>, size() is the count of tensors - // LoRALinear has 2 trainable tensors: lora_A (rank x in) and lora_B (out x rank) - CHECK_EQ(lora_params.size(), 2); - std::cout << "Trainable parameter tensors: " << lora_params.size() << " (expected: 2)" << std::endl; - - // Also verify total element count - int64_t total_elements = 0; - for (const auto &t : lora_params) { total_elements += t->NumElements(); } - int64_t expected_elements = config.rank * 64 + 128 * config.rank; // 768 - CHECK_EQ(total_elements, expected_elements); - std::cout << "Total trainable elements: " << total_elements << " (expected: " << expected_elements << ")" - << std::endl; - - // Test PrintSummary - std::cout << "\nLoRA Model Summary:" << std::endl; - PrintLoRASummary(model); - - // Test Merge/Unmerge using utility functions - MergeLoRAWeights(model); - // Verify LoRA params frozen after merge - auto *lora_mod = dynamic_cast(model.get()); - CHECK(lora_mod != nullptr); - CHECK(!lora_mod->LoRAParameters()[0]->requires_grad()) << "lora_A should be frozen after merge"; - CHECK(!lora_mod->LoRAParameters()[1]->requires_grad()) << "lora_B should be frozen after merge"; - std::cout << "Merge: OK (LoRA params frozen)" << std::endl; - - UnmergeLoRAWeights(model); - CHECK(lora_mod->LoRAParameters()[0]->requires_grad()) << "lora_A should be trainable after unmerge"; - CHECK(lora_mod->LoRAParameters()[1]->requires_grad()) << "lora_B should be trainable after unmerge"; - std::cout << "Unmerge: OK (LoRA params trainable)" << std::endl; - - std::cout << "GetLoRAModel in-place injection tests passed!" << std::endl; -} - -// ============================================================================ -// Test 14: MergeAndUnload -// ============================================================================ -void test_merge_and_unload() { - std::cout << "\n=== Test 14: MergeAndUnload ===" << std::endl; - - // Create base Linear and apply LoRA - auto base_linear = std::make_shared(64, 128, /*bias=*/true); - LoRAConfig config{4, 8.0f, 0.0f, ParseLoRATargetModules("Linear")}; - auto model = GetLoRAModel(base_linear, config); - - // Verify it's a LoRA module - CHECK(dynamic_cast(model.get()) != nullptr) << "Should be LoRALinear"; - - // Create input and get output before merge_and_unload - auto input = std::make_shared(std::vector{2, 5, 64}, DataType::kFLOAT32); - input->EigenMatrix().setRandom(); - auto output_before = (*model)({input})[0]; - float output_before_sum = output_before->EigenMatrix().sum(); - std::cout << "Output sum before MergeAndUnload: " << output_before_sum << std::endl; - - // MergeAndUnload - auto unloaded_model = MergeAndUnload(model); - CHECK(unloaded_model != nullptr) << "MergeAndUnload should return valid model"; - - // Verify it's no longer a LoRA module - CHECK(dynamic_cast(unloaded_model.get()) == nullptr) << "Should be plain Linear after MergeAndUnload"; - std::cout << "Model is no longer LoRALinear: OK" << std::endl; - - // Verify no LoRA parameters exist (check state dict) - auto state_dict = unloaded_model->StateDict(); - for (const auto &[name, param] : state_dict) { - CHECK(name.find("lora_A") == std::string::npos && name.find("lora_B") == std::string::npos) - << "Should not have LoRA parameters after MergeAndUnload, found: " << name; - } - std::cout << "No LoRA parameters in state dict: OK" << std::endl; - - // Verify forward output matches (merged output should equal unmerged LoRA output) - auto output_after = (*unloaded_model)({input})[0]; - float output_after_sum = output_after->EigenMatrix().sum(); - std::cout << "Output sum after MergeAndUnload: " << output_after_sum << std::endl; - std::cout << "Diff: " << std::abs(output_before_sum - output_after_sum) << std::endl; - CHECK(std::abs(output_before_sum - output_after_sum) < 1e-3) << "Output should match after MergeAndUnload"; - - // Verify all parameters have requires_grad = true (unfrozen) - for (const auto ¶m : unloaded_model->Parameters()) { - CHECK(param->requires_grad()) << "All parameters should be trainable after MergeAndUnload"; - } - std::cout << "All parameters trainable: OK" << std::endl; - - std::cout << "MergeAndUnload tests passed!" << std::endl; -} - -int main(int argc, char **argv) { - google::InitGoogleLogging(argv[0]); - FLAGS_logtostderr = 1; - - // Initialize parallel settings (required for some tensor operations) - // Parameters: nthread_per_process, tensor_parallel_size, sequence_parallel_enabled, - // pipeline_parallel_size, virtual_pipeline_parallel_size - nn::parallel::global::InitAllEnv(1, 1, false, 1, 1); - - std::cout << "========================================" << std::endl; - std::cout << " LoRA Module Unit Tests " << std::endl; - std::cout << "========================================" << std::endl; - - test_lora_config(); - test_lora_linear_init(); - test_lora_linear_forward(); - test_lora_linear_merge(); - test_lora_utils(); - test_lora_from_linear(); - test_lora_model_wrapper(); - test_lora_save_load_weights(); - test_set_target_modules(); - test_should_apply_lora_edge_cases(); - test_replace_module_by_path(); - test_freeze_unfreeze(); - test_lora_state_dict(); - test_get_lora_model(); - test_merge_and_unload(); - - std::cout << "\n========================================" << std::endl; - std::cout << " All LoRA Tests Passed! " << std::endl; - std::cout << "========================================" << std::endl; - - return 0; -} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index dae5f972..39a44f27 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -22,5 +22,3 @@ add_subdirectory(lora) # Hook tests add_subdirectory(hook) -# Slow label tests -add_subdirectory(slow) diff --git a/tests/autograd/CMakeLists.txt b/tests/autograd/CMakeLists.txt index 08aa1a96..d321f629 100644 --- a/tests/autograd/CMakeLists.txt +++ b/tests/autograd/CMakeLists.txt @@ -1,54 +1,11 @@ # ============================================================================ # Autograd tests # ============================================================================ -# 重构版本:使用单一 test binary 聚合测试源码,减少 target 膨胀 -# ============================================================================ -# ----------------------------------------------------------------------------- -# Autograd tests (single binary) -# ----------------------------------------------------------------------------- set(AUTOGRAD_TEST_DIR "${CMAKE_CURRENT_SOURCE_DIR}") -set(AUTOGRAD_TEST_SOURCES - ${AUTOGRAD_TEST_DIR}/test_autograd_elementwise_forward.cc - ${AUTOGRAD_TEST_DIR}/test_autograd_elementwise_backward.cc - ${AUTOGRAD_TEST_DIR}/test_autograd_matmul_forward.cc - ${AUTOGRAD_TEST_DIR}/test_autograd_matmul_backward.cc - ${AUTOGRAD_TEST_DIR}/test_autograd_reduction_forward.cc - ${AUTOGRAD_TEST_DIR}/test_autograd_reduction_backward.cc - ${AUTOGRAD_TEST_DIR}/test_autograd_linear_forward.cc - ${AUTOGRAD_TEST_DIR}/test_autograd_linear_backward.cc - ${AUTOGRAD_TEST_DIR}/test_autograd_softmax_forward.cc - ${AUTOGRAD_TEST_DIR}/test_autograd_softmax_backward.cc - ${AUTOGRAD_TEST_DIR}/test_autograd_transform_forward.cc - ${AUTOGRAD_TEST_DIR}/test_autograd_transform_backward.cc - ${AUTOGRAD_TEST_DIR}/test_autograd_normalization_forward.cc - ${AUTOGRAD_TEST_DIR}/test_autograd_normalization_backward.cc - ${AUTOGRAD_TEST_DIR}/test_autograd.cc -) -add_executable(test_autograd ${AUTOGRAD_TEST_SOURCES}) -target_compile_options(test_autograd PRIVATE -Wno-error) -link_infini_train_exe(test_autograd) -target_link_libraries(test_autograd PRIVATE GTest::gtest GTest::gtest_main) -target_include_directories(test_autograd - PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/../common - ${glog_SOURCE_DIR}/src -) +file(GLOB AUTOGRAD_SOURCES ${AUTOGRAD_TEST_DIR}/test_autograd*.cc) -include(GoogleTest) -gtest_discover_tests(test_autograd - EXTRA_ARGS --gtest_output=xml:%T.xml - TEST_FILTER "-AutogradCudaTest.*:AutogradDistributedTest.*" - PROPERTIES LABELS "cpu" -) -gtest_discover_tests(test_autograd - EXTRA_ARGS --gtest_output=xml:%T.xml - TEST_FILTER "AutogradCudaTest.*" - PROPERTIES LABELS "cuda" -) -gtest_discover_tests(test_autograd - EXTRA_ARGS --gtest_output=xml:%T.xml - TEST_FILTER "AutogradDistributedTest.*" - PROPERTIES LABELS "distributed" +infini_train_add_test_suite(test_autograd + SOURCES ${AUTOGRAD_SOURCES} ) diff --git a/tests/autograd/test_autograd.cc b/tests/autograd/test_autograd.cc index 5d189599..6401cc93 100644 --- a/tests/autograd/test_autograd.cc +++ b/tests/autograd/test_autograd.cc @@ -1,551 +1,376 @@ #include -#include #include +#include -#include "infini_train/include/tensor.h" -#include "infini_train/include/nn/parallel/global.h" -#include "infini_train/include/autograd/transform.h" +#include "infini_train/include/autograd/activations.h" #include "infini_train/include/autograd/elementwise.h" +#include "infini_train/include/autograd/function.h" +#include "infini_train/include/autograd/linear.h" #include "infini_train/include/autograd/matmul.h" -#include "infini_train/include/autograd/reduction.h" -#include "infini_train/include/autograd/activations.h" -#include "infini_train/include/autograd/softmax.h" +#include "infini_train/include/autograd/misc.h" #include "infini_train/include/autograd/normalization.h" -#include "infini_train/include/autograd/linear.h" #include "infini_train/include/autograd/outer.h" -#include "infini_train/include/autograd/misc.h" +#include "infini_train/include/autograd/reduction.h" +#include "infini_train/include/autograd/softmax.h" +#include "infini_train/include/autograd/transform.h" +#include "infini_train/include/tensor.h" #include "test_utils.h" using namespace infini_train; -class AutogradTestBase : public ::testing::Test { -protected: - static void SetUpTestSuite() { - nn::parallel::global::GlobalEnv::Instance().Init(1, 1, false, 1, 1); - } - - std::shared_ptr createTensor(const std::vector& shape, float value = 0.0f) { - auto tensor = std::make_shared(shape, DataType::kFLOAT32, - Device(Device::DeviceType::kCPU, 0)); - tensor->set_requires_grad(true); - auto data = static_cast(tensor->DataPtr()); - size_t size = 1; - for (auto dim : shape) size *= dim; - for (size_t i = 0; i < size; ++i) { - data[i] = value + static_cast(i); - } - return tensor; - } -}; - -class AutogradForwardTest : public AutogradTestBase {}; -class AutogradBackwardTest : public AutogradTestBase {}; -class AutogradCudaTest : public AutogradTestBase {}; -class AutogradDistributedTest : public AutogradTestBase {}; - -TEST_F(AutogradForwardTest, AddForward) { +// ============================================================================ +// Forward / Backward — CPU + CUDA +// ============================================================================ + +class AutogradForwardTest : public infini_train::test::AutogradTestBaseP {}; +class AutogradBackwardTest : public infini_train::test::AutogradTestBaseP {}; + +TEST_P(AutogradForwardTest, AddForward) { auto a = createTensor({2, 3}, 1.0f); auto b = createTensor({2, 3}, 2.0f); - auto add_fn = std::make_shared(); - auto result = add_fn->Apply({a, b}); + auto result = std::make_shared()->Apply({a, b}); EXPECT_EQ(result.size(), 1); EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); } -TEST_F(AutogradBackwardTest, AddBackward) { - auto a = createTensor({2, 3}, 1.0f); - auto b = createTensor({2, 3}, 2.0f); - auto add_fn = std::make_shared(); - auto result = add_fn->Apply({a, b}); - auto grad = createTensor({2, 3}, 1.0f); - auto grad_inputs = add_fn->Backward({grad}); - EXPECT_EQ(grad_inputs.size(), 2); -} - -TEST_F(AutogradForwardTest, SubForward) { +TEST_P(AutogradForwardTest, SubForward) { auto a = createTensor({2, 3}, 5.0f); auto b = createTensor({2, 3}, 3.0f); - auto sub_fn = std::make_shared(); - auto result = sub_fn->Apply({a, b}); + auto result = std::make_shared()->Apply({a, b}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, MulForward) { +TEST_P(AutogradForwardTest, MulForward) { auto a = createTensor({2, 3}, 2.0f); auto b = createTensor({2, 3}, 3.0f); - auto mul_fn = std::make_shared(); - auto result = mul_fn->Apply({a, b}); + auto result = std::make_shared()->Apply({a, b}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradBackwardTest, MulBackward) { - auto a = createTensor({2, 3}, 2.0f); - auto b = createTensor({2, 3}, 3.0f); - auto mul_fn = std::make_shared(); - auto result = mul_fn->Apply({a, b}); - auto grad = createTensor({2, 3}, 1.0f); - auto grad_inputs = mul_fn->Backward({grad}); - EXPECT_EQ(grad_inputs.size(), 2); -} - -TEST_F(AutogradForwardTest, DivForward) { +TEST_P(AutogradForwardTest, DivForward) { auto a = createTensor({2, 3}, 6.0f); auto b = createTensor({2, 3}, 2.0f); - auto div_fn = std::make_shared(); - auto result = div_fn->Apply({a, b}); + auto result = std::make_shared()->Apply({a, b}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, NegForward) { +TEST_P(AutogradForwardTest, NegForward) { auto a = createTensor({2, 3}, 5.0f); - auto neg_fn = std::make_shared(); - auto result = neg_fn->Apply({a}); + auto result = std::make_shared()->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, SinForward) { +TEST_P(AutogradForwardTest, SinForward) { auto a = createTensor({2, 3}, 0.0f); - auto sin_fn = std::make_shared(); - auto result = sin_fn->Apply({a}); + auto result = std::make_shared()->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, CosForward) { +TEST_P(AutogradForwardTest, CosForward) { auto a = createTensor({2, 3}, 0.0f); - auto cos_fn = std::make_shared(); - auto result = cos_fn->Apply({a}); + auto result = std::make_shared()->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, TanhForward) { +TEST_P(AutogradForwardTest, TanhForward) { auto a = createTensor({2, 3}, 0.0f); - auto tanh_fn = std::make_shared(); - auto result = tanh_fn->Apply({a}); + auto result = std::make_shared()->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, ExpForward) { +TEST_P(AutogradForwardTest, ExpForward) { auto a = createTensor({2, 3}, 1.0f); - auto exp_fn = std::make_shared(); - auto result = exp_fn->Apply({a}); + auto result = std::make_shared()->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, LogForward) { +TEST_P(AutogradForwardTest, LogForward) { auto a = createTensor({2, 3}, 2.0f); - auto log_fn = std::make_shared(); - auto result = log_fn->Apply({a}); + auto result = std::make_shared()->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, ReciprocalForward) { +TEST_P(AutogradForwardTest, ReciprocalForward) { auto a = createTensor({2, 3}, 2.0f); - auto reciprocal_fn = std::make_shared(); - auto result = reciprocal_fn->Apply({a}); + auto result = std::make_shared()->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, PowForward) { +TEST_P(AutogradForwardTest, PowForward) { auto a = createTensor({2, 3}, 2.0f); - auto pow_fn = std::make_shared(2.0f); - auto result = pow_fn->Apply({a}); + auto result = std::make_shared(2.0f)->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, RsqrtForward) { +TEST_P(AutogradForwardTest, RsqrtForward) { auto a = createTensor({2, 3}, 4.0f); - auto rsqrt_fn = std::make_shared(); - auto result = rsqrt_fn->Apply({a}); + auto result = std::make_shared()->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, SigmoidForward) { +TEST_P(AutogradForwardTest, SigmoidForward) { auto a = createTensor({2, 3}, 0.0f); - auto sigmoid_fn = std::make_shared(); - auto result = sigmoid_fn->Apply({a}); + auto result = std::make_shared()->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, MatmulForward) { +TEST_P(AutogradForwardTest, MatmulForward) { auto a = createTensor({2, 3}, 1.0f); auto b = createTensor({3, 4}, 1.0f); - auto matmul_fn = std::make_shared(); - auto result = matmul_fn->Apply({a, b}); + auto result = std::make_shared()->Apply({a, b}); EXPECT_EQ(result.size(), 1); EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); } -TEST_F(AutogradForwardTest, SumForward) { +TEST_P(AutogradForwardTest, SumForward) { auto a = createTensor({2, 3}, 1.0f); - auto sum_fn = std::make_shared(1, false); - auto result = sum_fn->Apply({a}); + auto result = std::make_shared(1, false)->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, MeanForward) { +TEST_P(AutogradForwardTest, MeanForward) { auto a = createTensor({2, 3}, 1.0f); - auto mean_fn = std::make_shared(1, false); - auto result = mean_fn->Apply({a}); + auto result = std::make_shared(1, false)->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, MaxForward) { +TEST_P(AutogradForwardTest, MaxForward) { auto a = createTensor({2, 3}, 1.0f); - auto max_fn = std::make_shared(1, false); - auto result = max_fn->Apply({a}); + auto result = std::make_shared(1, false)->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, MinForward) { +TEST_P(AutogradForwardTest, MinForward) { auto a = createTensor({2, 3}, 1.0f); - auto min_fn = std::make_shared(1, false); - auto result = min_fn->Apply({a}); + auto result = std::make_shared(1, false)->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, SoftmaxForward) { +TEST_P(AutogradForwardTest, SoftmaxForward) { auto a = createTensor({2, 3}, 1.0f); - auto softmax_fn = std::make_shared(1); - auto result = softmax_fn->Apply({a}); + auto result = std::make_shared(1)->Apply({a}); EXPECT_EQ(result.size(), 1); EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); } -TEST_F(AutogradForwardTest, LayerNormForward) { +TEST_P(AutogradForwardTest, LayerNormForward) { auto a = createTensor({2, 3, 4}, 1.0f); auto weight = createTensor({4}, 1.0f); auto bias = createTensor({4}, 0.0f); - auto layernorm_fn = std::make_shared(1e-5f); - auto result = layernorm_fn->Apply({a, weight, bias}); + auto result = std::make_shared(1e-5f)->Apply({a, weight, bias}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, LinearForward) { +TEST_P(AutogradForwardTest, LinearForward) { auto input = createTensor({2, 3}, 1.0f); auto weight = createTensor({4, 3}, 1.0f); auto bias = createTensor({4}, 0.0f); - auto linear_fn = std::make_shared(); - auto result = linear_fn->Apply({input, weight, bias}); + auto result = std::make_shared()->Apply({input, weight, bias}); EXPECT_EQ(result.size(), 1); EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); } -TEST_F(AutogradForwardTest, TransposeForward) { +TEST_P(AutogradForwardTest, TransposeForward) { auto a = createTensor({2, 3}, 1.0f); - auto transpose_fn = std::make_shared(0, 1); - auto result = transpose_fn->Apply({a}); + auto result = std::make_shared(0, 1)->Apply({a}); EXPECT_EQ(result.size(), 1); EXPECT_EQ(result[0]->Dims(), (std::vector{3, 2})); } -TEST_F(AutogradForwardTest, SliceForward) { +TEST_P(AutogradForwardTest, SliceForward) { auto a = createTensor({4, 4}, 1.0f); - auto slice_fn = std::make_shared( - std::vector{1, 1}, - std::vector{3, 3}, - std::vector{1, 1}); - auto result = slice_fn->Apply({a}); + auto result = std::make_shared(std::vector{1, 1}, std::vector{3, 3}, + std::vector{1, 1}) + ->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, SplitForward) { +TEST_P(AutogradForwardTest, SplitForward) { auto a = createTensor({4, 4}, 1.0f); - auto split_fn = std::make_shared(2, 0); - auto result = split_fn->Apply({a}); + auto result = std::make_shared(2, 0)->Apply({a}); EXPECT_EQ(result.size(), 2); } -TEST_F(AutogradForwardTest, ConcatForward) { +TEST_P(AutogradForwardTest, ConcatForward) { auto a = createTensor({2, 2}, 1.0f); auto b = createTensor({2, 2}, 2.0f); - auto concat_fn = std::make_shared(0); - auto result = concat_fn->Apply({a, b}); + auto result = std::make_shared(0)->Apply({a, b}); EXPECT_EQ(result.size(), 1); EXPECT_EQ(result[0]->Dims(), (std::vector{4, 2})); } -TEST_F(AutogradForwardTest, StackForward) { +TEST_P(AutogradForwardTest, StackForward) { auto a = createTensor({2, 3}, 1.0f); auto b = createTensor({2, 3}, 2.0f); - auto stack_fn = std::make_shared(0); - auto result = stack_fn->Apply({a, b}); + auto result = std::make_shared(0)->Apply({a, b}); EXPECT_EQ(result.size(), 1); EXPECT_EQ(result[0]->Dims(), (std::vector{2, 2, 3})); } -TEST_F(AutogradForwardTest, TrilForward) { +TEST_P(AutogradForwardTest, TrilForward) { auto a = createTensor({3, 3}, 1.0f); - auto tril_fn = std::make_shared(0); - auto result = tril_fn->Apply({a}); + auto result = std::make_shared(0)->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, TriuForward) { +TEST_P(AutogradForwardTest, TriuForward) { auto a = createTensor({3, 3}, 1.0f); - auto triu_fn = std::make_shared(0); - auto result = triu_fn->Apply({a}); + auto result = std::make_shared(0)->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, OuterForward) { +TEST_P(AutogradForwardTest, OuterForward) { auto a = createTensor({3}, 1.0f); auto b = createTensor({4}, 1.0f); - auto outer_fn = std::make_shared(); - auto result = outer_fn->Apply({a, b}); + auto result = std::make_shared()->Apply({a, b}); EXPECT_EQ(result.size(), 1); EXPECT_EQ(result[0]->Dims(), (std::vector{3, 4})); } -TEST_F(AutogradForwardTest, AddScalarForward) { +TEST_P(AutogradForwardTest, AddScalarForward) { auto a = createTensor({2, 3}, 1.0f); - auto add_scalar_fn = std::make_shared(2.0f); - auto result = add_scalar_fn->Apply({a}); + auto result = std::make_shared(2.0f)->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, MulScalarForward) { +TEST_P(AutogradForwardTest, MulScalarForward) { auto a = createTensor({2, 3}, 2.0f); - auto mul_scalar_fn = std::make_shared(3.0f); - auto result = mul_scalar_fn->Apply({a}); + auto result = std::make_shared(3.0f)->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, LtForward) { +TEST_P(AutogradForwardTest, LtForward) { auto a = createTensor({2, 3}, 5.0f); auto b = createTensor({2, 3}, 3.0f); - auto lt_fn = std::make_shared(); - auto result = lt_fn->Apply({a, b}); + auto result = std::make_shared()->Apply({a, b}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, LeForward) { +TEST_P(AutogradForwardTest, LeForward) { auto a = createTensor({2, 3}, 3.0f); auto b = createTensor({2, 3}, 3.0f); - auto le_fn = std::make_shared(); - auto result = le_fn->Apply({a, b}); + auto result = std::make_shared()->Apply({a, b}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, GtForward) { +TEST_P(AutogradForwardTest, GtForward) { auto a = createTensor({2, 3}, 5.0f); auto b = createTensor({2, 3}, 3.0f); - auto gt_fn = std::make_shared(); - auto result = gt_fn->Apply({a, b}); + auto result = std::make_shared()->Apply({a, b}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, GeForward) { +TEST_P(AutogradForwardTest, GeForward) { auto a = createTensor({2, 3}, 3.0f); auto b = createTensor({2, 3}, 3.0f); - auto ge_fn = std::make_shared(); - auto result = ge_fn->Apply({a, b}); + auto result = std::make_shared()->Apply({a, b}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, EqualsForward) { +TEST_P(AutogradForwardTest, EqualsForward) { auto a = createTensor({2, 3}, 3.0f); auto b = createTensor({2, 3}, 3.0f); - auto eq_fn = std::make_shared(); - auto result = eq_fn->Apply({a, b}); + auto result = std::make_shared()->Apply({a, b}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, AndForward) { +TEST_P(AutogradForwardTest, AndForward) { auto a = createTensor({2, 3}, 1.0f); auto b = createTensor({2, 3}, 1.0f); - auto and_fn = std::make_shared(); - auto result = and_fn->Apply({a, b}); + auto result = std::make_shared()->Apply({a, b}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, OrForward) { +TEST_P(AutogradForwardTest, OrForward) { auto a = createTensor({2, 3}, 0.0f); auto b = createTensor({2, 3}, 1.0f); - auto or_fn = std::make_shared(); - auto result = or_fn->Apply({a, b}); + auto result = std::make_shared()->Apply({a, b}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradForwardTest, NoOpForward) { +TEST_P(AutogradForwardTest, NoOpForward) { auto a = createTensor({2, 3}, 1.0f); - auto noop_fn = std::make_shared(std::vector{2, 3}); - auto result = noop_fn->Apply({a}); + auto result = std::make_shared(std::vector{2, 3})->Apply({a}); EXPECT_EQ(result.size(), 1); EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); } -// ============================================================================ -// CUDA Tests - require CUDA build and GPU -// ============================================================================ - -#ifdef USE_CUDA -TEST_F(AutogradCudaTest, AddForwardCUDA) { - REQUIRE_CUDA(); - auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, - Device(Device::DeviceType::kCUDA, 0)); - a->set_requires_grad(true); - infini_train::test::FillConstantTensor(a, 1.0f); - - auto b = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, - Device(Device::DeviceType::kCUDA, 0)); - b->set_requires_grad(true); - infini_train::test::FillConstantTensor(b, 2.0f); - +TEST_P(AutogradBackwardTest, AddBackward) { + auto a = createTensor({2, 3}, 1.0f); + auto b = createTensor({2, 3}, 2.0f); auto add_fn = std::make_shared(); auto result = add_fn->Apply({a, b}); - EXPECT_EQ(result.size(), 1); - EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); -} - -TEST_F(AutogradCudaTest, MatmulForwardCUDA) { - REQUIRE_CUDA(); - auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, - Device(Device::DeviceType::kCUDA, 0)); - a->set_requires_grad(true); - infini_train::test::FillConstantTensor(a, 1.0f); - - auto b = std::make_shared(std::vector{3, 4}, DataType::kFLOAT32, - Device(Device::DeviceType::kCUDA, 0)); - b->set_requires_grad(true); - infini_train::test::FillConstantTensor(b, 1.0f); - - auto matmul_fn = std::make_shared(); - auto result = matmul_fn->Apply({a, b}); - EXPECT_EQ(result.size(), 1); - EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); -} - -TEST_F(AutogradCudaTest, SumForwardCUDA) { - REQUIRE_CUDA(); - auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, - Device(Device::DeviceType::kCUDA, 0)); - a->set_requires_grad(true); - infini_train::test::FillConstantTensor(a, 1.0f); - - auto sum_fn = std::make_shared(1, false); - auto result = sum_fn->Apply({a}); - EXPECT_EQ(result.size(), 1); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = add_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); } -TEST_F(AutogradCudaTest, SoftmaxForwardCUDA) { - REQUIRE_CUDA(); - auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, - Device(Device::DeviceType::kCUDA, 0)); - a->set_requires_grad(true); - infini_train::test::FillConstantTensor(a, 1.0f); - - auto softmax_fn = std::make_shared(1); - auto result = softmax_fn->Apply({a}); - EXPECT_EQ(result.size(), 1); - EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); +TEST_P(AutogradBackwardTest, MulBackward) { + auto a = createTensor({2, 3}, 2.0f); + auto b = createTensor({2, 3}, 3.0f); + auto mul_fn = std::make_shared(); + auto result = mul_fn->Apply({a, b}); + auto grad = createTensor({2, 3}, 1.0f); + auto grad_inputs = mul_fn->Backward({grad}); + EXPECT_EQ(grad_inputs.size(), 2); } -TEST_F(AutogradCudaTest, LinearForwardCUDA) { - REQUIRE_CUDA(); - auto input = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, - Device(Device::DeviceType::kCUDA, 0)); - input->set_requires_grad(true); - infini_train::test::FillConstantTensor(input, 1.0f); - - auto weight = std::make_shared(std::vector{4, 3}, DataType::kFLOAT32, - Device(Device::DeviceType::kCUDA, 0)); - weight->set_requires_grad(true); - infini_train::test::FillConstantTensor(weight, 1.0f); +INFINI_TRAIN_REGISTER_TEST(AutogradForwardTest); - auto bias = std::make_shared(std::vector{4}, DataType::kFLOAT32, - Device(Device::DeviceType::kCUDA, 0)); - bias->set_requires_grad(true); - infini_train::test::FillConstantTensor(bias, 0.0f); - - auto linear_fn = std::make_shared(); - auto result = linear_fn->Apply({input, weight, bias}); - EXPECT_EQ(result.size(), 1); - EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); -} -#endif // USE_CUDA +INFINI_TRAIN_REGISTER_TEST(AutogradBackwardTest); // ============================================================================ -// Distributed Tests - require CUDA + NCCL +// Distributed — requires NCCL + >=2 GPUs // ============================================================================ -#ifdef USE_NCCL -TEST_F(AutogradDistributedTest, AllReduceDistributed) { - REQUIRE_CUDA(); - REQUIRE_DISTRIBUTED(); - REQUIRE_NCCL(); - auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, - Device(Device::DeviceType::kCUDA, 0)); +class AutogradDistributedTest : public infini_train::test::DistributedInfiniTrainTestP {}; + +TEST_P(AutogradDistributedTest, AllReduce) { + auto a = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, GetDevice()); a->set_requires_grad(true); infini_train::test::FillConstantTensor(a, 1.0f); - EXPECT_TRUE(a->GetDevice().IsCUDA()); EXPECT_TRUE(a->requires_grad()); } -TEST_F(AutogradDistributedTest, AllGatherDistributed) { - REQUIRE_CUDA(); - REQUIRE_DISTRIBUTED(); - REQUIRE_NCCL(); - auto a = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, - Device(Device::DeviceType::kCUDA, 0)); +TEST_P(AutogradDistributedTest, AllGather) { + auto a = std::make_shared(std::vector{4, 4}, DataType::kFLOAT32, GetDevice()); a->set_requires_grad(true); infini_train::test::FillConstantTensor(a, 1.0f); - EXPECT_TRUE(a->GetDevice().IsCUDA()); EXPECT_EQ(a->Dims(), (std::vector{4, 4})); } -TEST_F(AutogradDistributedTest, ReduceScatterDistributed) { - REQUIRE_CUDA(); - REQUIRE_DISTRIBUTED(); - REQUIRE_NCCL(); - auto a = std::make_shared(std::vector{2, 8}, DataType::kFLOAT32, - Device(Device::DeviceType::kCUDA, 0)); +TEST_P(AutogradDistributedTest, ReduceScatter) { + auto a = std::make_shared(std::vector{2, 8}, DataType::kFLOAT32, GetDevice()); a->set_requires_grad(true); infini_train::test::FillConstantTensor(a, 1.0f); - EXPECT_TRUE(a->GetDevice().IsCUDA()); EXPECT_EQ(a->Dims(), (std::vector{2, 8})); } -TEST_F(AutogradDistributedTest, DistributedMatmul) { - REQUIRE_CUDA(); - REQUIRE_DISTRIBUTED(); - REQUIRE_NCCL(); - auto a = std::make_shared(std::vector{2, 4}, DataType::kFLOAT32, - Device(Device::DeviceType::kCUDA, 0)); +TEST_P(AutogradDistributedTest, DistributedMatmul) { + auto a = std::make_shared(std::vector{2, 4}, DataType::kFLOAT32, GetDevice()); a->set_requires_grad(true); - auto b = std::make_shared(std::vector{4, 2}, DataType::kFLOAT32, - Device(Device::DeviceType::kCUDA, 0)); + auto b = std::make_shared(std::vector{4, 2}, DataType::kFLOAT32, GetDevice()); b->set_requires_grad(true); - - auto matmul_fn = std::make_shared(); - auto result = matmul_fn->Apply({a, b}); - + auto result = std::make_shared()->Apply({a, b}); EXPECT_EQ(result.size(), 1); EXPECT_TRUE(result[0]->GetDevice().IsCUDA()); } -TEST_F(AutogradDistributedTest, DistributedLinear) { - REQUIRE_CUDA(); - REQUIRE_DISTRIBUTED(); - REQUIRE_NCCL(); - auto input = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, - Device(Device::DeviceType::kCUDA, 0)); +TEST_P(AutogradDistributedTest, DistributedLinear) { + auto input = std::make_shared(std::vector{2, 3}, DataType::kFLOAT32, GetDevice()); input->set_requires_grad(true); - auto weight = std::make_shared(std::vector{4, 3}, DataType::kFLOAT32, - Device(Device::DeviceType::kCUDA, 0)); + auto weight = std::make_shared(std::vector{4, 3}, DataType::kFLOAT32, GetDevice()); weight->set_requires_grad(true); - auto bias = std::make_shared(std::vector{4}, DataType::kFLOAT32, - Device(Device::DeviceType::kCUDA, 0)); + auto bias = std::make_shared(std::vector{4}, DataType::kFLOAT32, GetDevice()); bias->set_requires_grad(true); - - auto linear_fn = std::make_shared(); - auto result = linear_fn->Apply({input, weight, bias}); - + auto result = std::make_shared()->Apply({input, weight, bias}); EXPECT_EQ(result.size(), 1); EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); EXPECT_TRUE(result[0]->GetDevice().IsCUDA()); } -#endif // USE_NCCL + +INFINI_TRAIN_REGISTER_TEST_DISTRIBUTED(AutogradDistributedTest); diff --git a/tests/autograd/test_autograd_elementwise_backward.cc b/tests/autograd/test_autograd_elementwise_backward.cc index 502a20e5..65ffd5a8 100644 --- a/tests/autograd/test_autograd_elementwise_backward.cc +++ b/tests/autograd/test_autograd_elementwise_backward.cc @@ -10,9 +10,9 @@ using namespace infini_train; -class AutogradElementwiseBackwardTest : public infini_train::test::AutogradTestBase {}; +class AutogradElementwiseBackwardTest : public infini_train::test::AutogradTestBaseP {}; -TEST_F(AutogradElementwiseBackwardTest, AddBackward) { +TEST_P(AutogradElementwiseBackwardTest, AddBackward) { auto a = createTensor({2, 3}, 1.0f); auto b = createTensor({2, 3}, 2.0f); auto add_fn = std::make_shared(); @@ -22,7 +22,7 @@ TEST_F(AutogradElementwiseBackwardTest, AddBackward) { EXPECT_EQ(grad_inputs.size(), 2); } -TEST_F(AutogradElementwiseBackwardTest, SubBackward) { +TEST_P(AutogradElementwiseBackwardTest, SubBackward) { auto a = createTensor({2, 3}, 5.0f); auto b = createTensor({2, 3}, 3.0f); auto sub_fn = std::make_shared(); @@ -32,7 +32,7 @@ TEST_F(AutogradElementwiseBackwardTest, SubBackward) { EXPECT_EQ(grad_inputs.size(), 2); } -TEST_F(AutogradElementwiseBackwardTest, MulBackward) { +TEST_P(AutogradElementwiseBackwardTest, MulBackward) { auto a = createTensor({2, 3}, 2.0f); auto b = createTensor({2, 3}, 3.0f); auto mul_fn = std::make_shared(); @@ -42,7 +42,7 @@ TEST_F(AutogradElementwiseBackwardTest, MulBackward) { EXPECT_EQ(grad_inputs.size(), 2); } -TEST_F(AutogradElementwiseBackwardTest, DivBackward) { +TEST_P(AutogradElementwiseBackwardTest, DivBackward) { auto a = createTensor({2, 3}, 6.0f); auto b = createTensor({2, 3}, 2.0f); auto div_fn = std::make_shared(); @@ -52,7 +52,7 @@ TEST_F(AutogradElementwiseBackwardTest, DivBackward) { EXPECT_EQ(grad_inputs.size(), 2); } -TEST_F(AutogradElementwiseBackwardTest, NegBackward) { +TEST_P(AutogradElementwiseBackwardTest, NegBackward) { auto a = createTensor({2, 3}, 5.0f); auto neg_fn = std::make_shared(); auto result = neg_fn->Apply({a}); @@ -61,7 +61,7 @@ TEST_F(AutogradElementwiseBackwardTest, NegBackward) { EXPECT_EQ(grad_inputs.size(), 1); } -TEST_F(AutogradElementwiseBackwardTest, SinBackward) { +TEST_P(AutogradElementwiseBackwardTest, SinBackward) { auto a = createTensor({2, 3}, 0.0f); auto sin_fn = std::make_shared(); auto result = sin_fn->Apply({a}); @@ -70,7 +70,7 @@ TEST_F(AutogradElementwiseBackwardTest, SinBackward) { EXPECT_EQ(grad_inputs.size(), 1); } -TEST_F(AutogradElementwiseBackwardTest, CosBackward) { +TEST_P(AutogradElementwiseBackwardTest, CosBackward) { auto a = createTensor({2, 3}, 0.0f); auto cos_fn = std::make_shared(); auto result = cos_fn->Apply({a}); @@ -79,7 +79,7 @@ TEST_F(AutogradElementwiseBackwardTest, CosBackward) { EXPECT_EQ(grad_inputs.size(), 1); } -TEST_F(AutogradElementwiseBackwardTest, TanhBackward) { +TEST_P(AutogradElementwiseBackwardTest, TanhBackward) { auto a = createTensor({2, 3}, 0.0f); auto tanh_fn = std::make_shared(); auto result = tanh_fn->Apply({a}); @@ -88,7 +88,7 @@ TEST_F(AutogradElementwiseBackwardTest, TanhBackward) { EXPECT_EQ(grad_inputs.size(), 1); } -TEST_F(AutogradElementwiseBackwardTest, ExpBackward) { +TEST_P(AutogradElementwiseBackwardTest, ExpBackward) { auto a = createTensor({2, 3}, 1.0f); auto exp_fn = std::make_shared(); auto result = exp_fn->Apply({a}); @@ -97,7 +97,7 @@ TEST_F(AutogradElementwiseBackwardTest, ExpBackward) { EXPECT_EQ(grad_inputs.size(), 1); } -TEST_F(AutogradElementwiseBackwardTest, LogBackward) { +TEST_P(AutogradElementwiseBackwardTest, LogBackward) { auto a = createTensor({2, 3}, 2.0f); auto log_fn = std::make_shared(); auto result = log_fn->Apply({a}); @@ -106,7 +106,7 @@ TEST_F(AutogradElementwiseBackwardTest, LogBackward) { EXPECT_EQ(grad_inputs.size(), 1); } -TEST_F(AutogradElementwiseBackwardTest, ReciprocalBackward) { +TEST_P(AutogradElementwiseBackwardTest, ReciprocalBackward) { auto a = createTensor({2, 3}, 2.0f); auto reciprocal_fn = std::make_shared(); auto result = reciprocal_fn->Apply({a}); @@ -115,7 +115,7 @@ TEST_F(AutogradElementwiseBackwardTest, ReciprocalBackward) { EXPECT_EQ(grad_inputs.size(), 1); } -TEST_F(AutogradElementwiseBackwardTest, PowBackward) { +TEST_P(AutogradElementwiseBackwardTest, PowBackward) { auto a = createTensor({2, 3}, 2.0f); auto pow_fn = std::make_shared(2.0f); auto result = pow_fn->Apply({a}); @@ -124,7 +124,7 @@ TEST_F(AutogradElementwiseBackwardTest, PowBackward) { EXPECT_EQ(grad_inputs.size(), 1); } -TEST_F(AutogradElementwiseBackwardTest, RsqrtBackward) { +TEST_P(AutogradElementwiseBackwardTest, RsqrtBackward) { auto a = createTensor({2, 3}, 4.0f); auto rsqrt_fn = std::make_shared(); auto result = rsqrt_fn->Apply({a}); @@ -132,3 +132,5 @@ TEST_F(AutogradElementwiseBackwardTest, RsqrtBackward) { auto grad_inputs = rsqrt_fn->Backward({grad}); EXPECT_EQ(grad_inputs.size(), 1); } + +INFINI_TRAIN_REGISTER_TEST(AutogradElementwiseBackwardTest); diff --git a/tests/autograd/test_autograd_elementwise_forward.cc b/tests/autograd/test_autograd_elementwise_forward.cc index 63b386b1..20fe658e 100644 --- a/tests/autograd/test_autograd_elementwise_forward.cc +++ b/tests/autograd/test_autograd_elementwise_forward.cc @@ -11,9 +11,9 @@ using namespace infini_train; -class AutogradElementwiseForwardTest : public infini_train::test::AutogradTestBase {}; +class AutogradElementwiseForwardTest : public infini_train::test::AutogradTestBaseP {}; -TEST_F(AutogradElementwiseForwardTest, AddForward) { +TEST_P(AutogradElementwiseForwardTest, AddForward) { auto a = createTensor({2, 3}, 1.0f); auto b = createTensor({2, 3}, 2.0f); auto add_fn = std::make_shared(); @@ -22,7 +22,7 @@ TEST_F(AutogradElementwiseForwardTest, AddForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); } -TEST_F(AutogradElementwiseForwardTest, SubForward) { +TEST_P(AutogradElementwiseForwardTest, SubForward) { auto a = createTensor({2, 3}, 5.0f); auto b = createTensor({2, 3}, 3.0f); auto sub_fn = std::make_shared(); @@ -30,7 +30,7 @@ TEST_F(AutogradElementwiseForwardTest, SubForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, MulForward) { +TEST_P(AutogradElementwiseForwardTest, MulForward) { auto a = createTensor({2, 3}, 2.0f); auto b = createTensor({2, 3}, 3.0f); auto mul_fn = std::make_shared(); @@ -38,7 +38,7 @@ TEST_F(AutogradElementwiseForwardTest, MulForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, DivForward) { +TEST_P(AutogradElementwiseForwardTest, DivForward) { auto a = createTensor({2, 3}, 6.0f); auto b = createTensor({2, 3}, 2.0f); auto div_fn = std::make_shared(); @@ -46,91 +46,91 @@ TEST_F(AutogradElementwiseForwardTest, DivForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, NegForward) { +TEST_P(AutogradElementwiseForwardTest, NegForward) { auto a = createTensor({2, 3}, 5.0f); auto neg_fn = std::make_shared(); auto result = neg_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, SinForward) { +TEST_P(AutogradElementwiseForwardTest, SinForward) { auto a = createTensor({2, 3}, 0.0f); auto sin_fn = std::make_shared(); auto result = sin_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, CosForward) { +TEST_P(AutogradElementwiseForwardTest, CosForward) { auto a = createTensor({2, 3}, 0.0f); auto cos_fn = std::make_shared(); auto result = cos_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, TanhForward) { +TEST_P(AutogradElementwiseForwardTest, TanhForward) { auto a = createTensor({2, 3}, 0.0f); auto tanh_fn = std::make_shared(); auto result = tanh_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, ExpForward) { +TEST_P(AutogradElementwiseForwardTest, ExpForward) { auto a = createTensor({2, 3}, 1.0f); auto exp_fn = std::make_shared(); auto result = exp_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, LogForward) { +TEST_P(AutogradElementwiseForwardTest, LogForward) { auto a = createTensor({2, 3}, 2.0f); auto log_fn = std::make_shared(); auto result = log_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, ReciprocalForward) { +TEST_P(AutogradElementwiseForwardTest, ReciprocalForward) { auto a = createTensor({2, 3}, 2.0f); auto reciprocal_fn = std::make_shared(); auto result = reciprocal_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, PowForward) { +TEST_P(AutogradElementwiseForwardTest, PowForward) { auto a = createTensor({2, 3}, 2.0f); auto pow_fn = std::make_shared(2.0f); auto result = pow_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, RsqrtForward) { +TEST_P(AutogradElementwiseForwardTest, RsqrtForward) { auto a = createTensor({2, 3}, 4.0f); auto rsqrt_fn = std::make_shared(); auto result = rsqrt_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, SigmoidForward) { +TEST_P(AutogradElementwiseForwardTest, SigmoidForward) { auto a = createTensor({2, 3}, 0.0f); auto sigmoid_fn = std::make_shared(); auto result = sigmoid_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, AddScalarForward) { +TEST_P(AutogradElementwiseForwardTest, AddScalarForward) { auto a = createTensor({2, 3}, 1.0f); auto add_scalar_fn = std::make_shared(2.0f); auto result = add_scalar_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, MulScalarForward) { +TEST_P(AutogradElementwiseForwardTest, MulScalarForward) { auto a = createTensor({2, 3}, 2.0f); auto mul_scalar_fn = std::make_shared(3.0f); auto result = mul_scalar_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, LtForward) { +TEST_P(AutogradElementwiseForwardTest, LtForward) { auto a = createTensor({2, 3}, 5.0f); auto b = createTensor({2, 3}, 3.0f); auto lt_fn = std::make_shared(); @@ -138,7 +138,7 @@ TEST_F(AutogradElementwiseForwardTest, LtForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, LeForward) { +TEST_P(AutogradElementwiseForwardTest, LeForward) { auto a = createTensor({2, 3}, 3.0f); auto b = createTensor({2, 3}, 3.0f); auto le_fn = std::make_shared(); @@ -146,7 +146,7 @@ TEST_F(AutogradElementwiseForwardTest, LeForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, GtForward) { +TEST_P(AutogradElementwiseForwardTest, GtForward) { auto a = createTensor({2, 3}, 5.0f); auto b = createTensor({2, 3}, 3.0f); auto gt_fn = std::make_shared(); @@ -154,7 +154,7 @@ TEST_F(AutogradElementwiseForwardTest, GtForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, GeForward) { +TEST_P(AutogradElementwiseForwardTest, GeForward) { auto a = createTensor({2, 3}, 3.0f); auto b = createTensor({2, 3}, 3.0f); auto ge_fn = std::make_shared(); @@ -162,7 +162,7 @@ TEST_F(AutogradElementwiseForwardTest, GeForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, EqualsForward) { +TEST_P(AutogradElementwiseForwardTest, EqualsForward) { auto a = createTensor({2, 3}, 3.0f); auto b = createTensor({2, 3}, 3.0f); auto eq_fn = std::make_shared(); @@ -170,7 +170,7 @@ TEST_F(AutogradElementwiseForwardTest, EqualsForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, AndForward) { +TEST_P(AutogradElementwiseForwardTest, AndForward) { auto a = createTensor({2, 3}, 1.0f); auto b = createTensor({2, 3}, 1.0f); auto and_fn = std::make_shared(); @@ -178,10 +178,12 @@ TEST_F(AutogradElementwiseForwardTest, AndForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradElementwiseForwardTest, OrForward) { +TEST_P(AutogradElementwiseForwardTest, OrForward) { auto a = createTensor({2, 3}, 0.0f); auto b = createTensor({2, 3}, 1.0f); auto or_fn = std::make_shared(); auto result = or_fn->Apply({a, b}); EXPECT_EQ(result.size(), 1); } + +INFINI_TRAIN_REGISTER_TEST(AutogradElementwiseForwardTest); diff --git a/tests/autograd/test_autograd_linear_backward.cc b/tests/autograd/test_autograd_linear_backward.cc index 069affc7..c62920b4 100644 --- a/tests/autograd/test_autograd_linear_backward.cc +++ b/tests/autograd/test_autograd_linear_backward.cc @@ -9,9 +9,9 @@ using namespace infini_train; -class AutogradLinearBackwardTest : public infini_train::test::AutogradTestBase {}; +class AutogradLinearBackwardTest : public infini_train::test::AutogradTestBaseP {}; -TEST_F(AutogradLinearBackwardTest, LinearBackward) { +TEST_P(AutogradLinearBackwardTest, LinearBackward) { auto input = createTensor({2, 3}, 1.0f); auto weight = createTensor({4, 3}, 1.0f); auto bias = createTensor({4}, 0.0f); @@ -22,7 +22,7 @@ TEST_F(AutogradLinearBackwardTest, LinearBackward) { EXPECT_EQ(grad_inputs.size(), 3); } -TEST_F(AutogradLinearBackwardTest, LinearBackwardNoBias) { +TEST_P(AutogradLinearBackwardTest, LinearBackwardNoBias) { auto input = createTensor({2, 3}, 1.0f); auto weight = createTensor({4, 3}, 1.0f); auto linear_fn = std::make_shared(); @@ -31,3 +31,5 @@ TEST_F(AutogradLinearBackwardTest, LinearBackwardNoBias) { auto grad_inputs = linear_fn->Backward({grad}); EXPECT_EQ(grad_inputs.size(), 2); } + +INFINI_TRAIN_REGISTER_TEST(AutogradLinearBackwardTest); diff --git a/tests/autograd/test_autograd_linear_forward.cc b/tests/autograd/test_autograd_linear_forward.cc index efd8d6eb..5fb41546 100644 --- a/tests/autograd/test_autograd_linear_forward.cc +++ b/tests/autograd/test_autograd_linear_forward.cc @@ -9,9 +9,9 @@ using namespace infini_train; -class AutogradLinearForwardTest : public infini_train::test::AutogradTestBase {}; +class AutogradLinearForwardTest : public infini_train::test::AutogradTestBaseP {}; -TEST_F(AutogradLinearForwardTest, LinearForward) { +TEST_P(AutogradLinearForwardTest, LinearForward) { auto input = createTensor({2, 3}, 1.0f); auto weight = createTensor({4, 3}, 1.0f); auto bias = createTensor({4}, 0.0f); @@ -21,7 +21,7 @@ TEST_F(AutogradLinearForwardTest, LinearForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); } -TEST_F(AutogradLinearForwardTest, LinearNoBias) { +TEST_P(AutogradLinearForwardTest, LinearNoBias) { auto input = createTensor({2, 3}, 1.0f); auto weight = createTensor({4, 3}, 1.0f); auto linear_fn = std::make_shared(); @@ -30,7 +30,7 @@ TEST_F(AutogradLinearForwardTest, LinearNoBias) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); } -TEST_F(AutogradLinearForwardTest, LinearBatch) { +TEST_P(AutogradLinearForwardTest, LinearBatch) { auto input = createTensor({32, 128}, 1.0f); auto weight = createTensor({64, 128}, 1.0f); auto bias = createTensor({64}, 0.0f); @@ -39,3 +39,5 @@ TEST_F(AutogradLinearForwardTest, LinearBatch) { EXPECT_EQ(result.size(), 1); EXPECT_EQ(result[0]->Dims(), (std::vector{32, 64})); } + +INFINI_TRAIN_REGISTER_TEST(AutogradLinearForwardTest); diff --git a/tests/autograd/test_autograd_matmul_backward.cc b/tests/autograd/test_autograd_matmul_backward.cc index e9962f5d..aeb26a55 100644 --- a/tests/autograd/test_autograd_matmul_backward.cc +++ b/tests/autograd/test_autograd_matmul_backward.cc @@ -9,9 +9,9 @@ using namespace infini_train; -class AutogradMatmulBackwardTest : public infini_train::test::AutogradTestBase {}; +class AutogradMatmulBackwardTest : public infini_train::test::AutogradTestBaseP {}; -TEST_F(AutogradMatmulBackwardTest, MatmulBackward) { +TEST_P(AutogradMatmulBackwardTest, MatmulBackward) { auto a = createTensor({2, 3}, 1.0f); auto b = createTensor({3, 4}, 1.0f); auto matmul_fn = std::make_shared(); @@ -21,7 +21,7 @@ TEST_F(AutogradMatmulBackwardTest, MatmulBackward) { EXPECT_EQ(grad_inputs.size(), 2); } -TEST_F(AutogradMatmulBackwardTest, MatmulBackwardSquare) { +TEST_P(AutogradMatmulBackwardTest, MatmulBackwardSquare) { auto a = createTensor({3, 3}, 2.0f); auto b = createTensor({3, 3}, 3.0f); auto matmul_fn = std::make_shared(); @@ -31,7 +31,7 @@ TEST_F(AutogradMatmulBackwardTest, MatmulBackwardSquare) { EXPECT_EQ(grad_inputs.size(), 2); } -TEST_F(AutogradMatmulBackwardTest, MatmulBackwardDifferentShapes) { +TEST_P(AutogradMatmulBackwardTest, MatmulBackwardDifferentShapes) { auto a = createTensor({3, 4}, 1.5f); auto b = createTensor({4, 2}, 2.5f); auto matmul_fn = std::make_shared(); @@ -40,3 +40,5 @@ TEST_F(AutogradMatmulBackwardTest, MatmulBackwardDifferentShapes) { auto grad_inputs = matmul_fn->Backward({grad}); EXPECT_EQ(grad_inputs.size(), 2); } + +INFINI_TRAIN_REGISTER_TEST(AutogradMatmulBackwardTest); diff --git a/tests/autograd/test_autograd_matmul_forward.cc b/tests/autograd/test_autograd_matmul_forward.cc index 87c93f08..8e325c03 100644 --- a/tests/autograd/test_autograd_matmul_forward.cc +++ b/tests/autograd/test_autograd_matmul_forward.cc @@ -9,9 +9,9 @@ using namespace infini_train; -class AutogradMatmulForwardTest : public infini_train::test::AutogradTestBase {}; +class AutogradMatmulForwardTest : public infini_train::test::AutogradTestBaseP {}; -TEST_F(AutogradMatmulForwardTest, MatmulForward) { +TEST_P(AutogradMatmulForwardTest, MatmulForward) { auto a = createTensor({2, 3}, 1.0f); auto b = createTensor({3, 4}, 1.0f); auto matmul_fn = std::make_shared(); @@ -20,7 +20,7 @@ TEST_F(AutogradMatmulForwardTest, MatmulForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 4})); } -TEST_F(AutogradMatmulForwardTest, MatmulDifferentShapes) { +TEST_P(AutogradMatmulForwardTest, MatmulDifferentShapes) { auto a = createTensor({3, 4}, 1.0f); auto b = createTensor({4, 2}, 1.0f); auto matmul_fn = std::make_shared(); @@ -29,7 +29,7 @@ TEST_F(AutogradMatmulForwardTest, MatmulDifferentShapes) { EXPECT_EQ(result[0]->Dims(), (std::vector{3, 2})); } -TEST_F(AutogradMatmulForwardTest, MatmulBatch) { +TEST_P(AutogradMatmulForwardTest, MatmulBatch) { auto a = createTensor({2, 3, 4}, 1.0f); auto b = createTensor({2, 4, 5}, 1.0f); auto matmul_fn = std::make_shared(); @@ -38,7 +38,7 @@ TEST_F(AutogradMatmulForwardTest, MatmulBatch) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3, 5})); } -TEST_F(AutogradMatmulForwardTest, MatmulSquare) { +TEST_P(AutogradMatmulForwardTest, MatmulSquare) { auto a = createTensor({3, 3}, 1.0f); auto b = createTensor({3, 3}, 1.0f); auto matmul_fn = std::make_shared(); @@ -46,3 +46,5 @@ TEST_F(AutogradMatmulForwardTest, MatmulSquare) { EXPECT_EQ(result.size(), 1); EXPECT_EQ(result[0]->Dims(), (std::vector{3, 3})); } + +INFINI_TRAIN_REGISTER_TEST(AutogradMatmulForwardTest); diff --git a/tests/autograd/test_autograd_normalization_backward.cc b/tests/autograd/test_autograd_normalization_backward.cc index 6f97349e..b477cf47 100644 --- a/tests/autograd/test_autograd_normalization_backward.cc +++ b/tests/autograd/test_autograd_normalization_backward.cc @@ -9,9 +9,9 @@ using namespace infini_train; -class AutogradNormalizationBackwardTest : public infini_train::test::AutogradTestBase {}; +class AutogradNormalizationBackwardTest : public infini_train::test::AutogradTestBaseP {}; -TEST_F(AutogradNormalizationBackwardTest, LayerNormBackward) { +TEST_P(AutogradNormalizationBackwardTest, LayerNormBackward) { auto a = createTensor({2, 3, 4}, 1.0f); auto weight = createTensor({4}, 1.0f); auto bias = createTensor({4}, 0.0f); @@ -22,7 +22,7 @@ TEST_F(AutogradNormalizationBackwardTest, LayerNormBackward) { EXPECT_EQ(grad_inputs.size(), 3); } -TEST_F(AutogradNormalizationBackwardTest, LayerNormBackwardZeroBias) { +TEST_P(AutogradNormalizationBackwardTest, LayerNormBackwardZeroBias) { auto a = createTensor({2, 3, 4}, 1.0f); auto weight = createTensor({4}, 1.0f); auto bias = createTensor({4}, 0.0f); @@ -32,3 +32,5 @@ TEST_F(AutogradNormalizationBackwardTest, LayerNormBackwardZeroBias) { auto grad_inputs = layernorm_fn->Backward({grad}); EXPECT_EQ(grad_inputs.size(), 3); } + +INFINI_TRAIN_REGISTER_TEST(AutogradNormalizationBackwardTest); diff --git a/tests/autograd/test_autograd_normalization_forward.cc b/tests/autograd/test_autograd_normalization_forward.cc index d58fd749..4ed92296 100644 --- a/tests/autograd/test_autograd_normalization_forward.cc +++ b/tests/autograd/test_autograd_normalization_forward.cc @@ -9,9 +9,9 @@ using namespace infini_train; -class AutogradNormalizationForwardTest : public infini_train::test::AutogradTestBase {}; +class AutogradNormalizationForwardTest : public infini_train::test::AutogradTestBaseP {}; -TEST_F(AutogradNormalizationForwardTest, LayerNormForward) { +TEST_P(AutogradNormalizationForwardTest, LayerNormForward) { auto a = createTensor({2, 3, 4}, 1.0f); auto weight = createTensor({4}, 1.0f); auto bias = createTensor({4}, 0.0f); @@ -20,7 +20,7 @@ TEST_F(AutogradNormalizationForwardTest, LayerNormForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradNormalizationForwardTest, LayerNormZeroBias) { +TEST_P(AutogradNormalizationForwardTest, LayerNormZeroBias) { auto a = createTensor({2, 3, 4}, 1.0f); auto weight = createTensor({4}, 1.0f); auto bias = createTensor({4}, 0.0f); @@ -29,7 +29,7 @@ TEST_F(AutogradNormalizationForwardTest, LayerNormZeroBias) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradNormalizationForwardTest, LayerNormThreeDim) { +TEST_P(AutogradNormalizationForwardTest, LayerNormThreeDim) { auto a = createTensor({2, 1, 4}, 1.0f); auto weight = createTensor({4}, 1.0f); auto bias = createTensor({4}, 0.0f); @@ -38,3 +38,5 @@ TEST_F(AutogradNormalizationForwardTest, LayerNormThreeDim) { EXPECT_EQ(result.size(), 1); EXPECT_EQ(result[0]->Dims(), (std::vector{2, 1, 4})); } + +INFINI_TRAIN_REGISTER_TEST(AutogradNormalizationForwardTest); diff --git a/tests/autograd/test_autograd_reduction_backward.cc b/tests/autograd/test_autograd_reduction_backward.cc index d212a065..9834ffd6 100644 --- a/tests/autograd/test_autograd_reduction_backward.cc +++ b/tests/autograd/test_autograd_reduction_backward.cc @@ -9,9 +9,9 @@ using namespace infini_train; -class AutogradReductionBackwardTest : public infini_train::test::AutogradTestBase {}; +class AutogradReductionBackwardTest : public infini_train::test::AutogradTestBaseP {}; -TEST_F(AutogradReductionBackwardTest, SumBackward) { +TEST_P(AutogradReductionBackwardTest, SumBackward) { auto a = createTensor({2, 3}, 1.0f); auto sum_fn = std::make_shared(1, false); auto result = sum_fn->Apply({a}); @@ -20,7 +20,7 @@ TEST_F(AutogradReductionBackwardTest, SumBackward) { EXPECT_EQ(grad_inputs.size(), 1); } -TEST_F(AutogradReductionBackwardTest, MeanBackward) { +TEST_P(AutogradReductionBackwardTest, MeanBackward) { auto a = createTensor({2, 3}, 1.0f); auto mean_fn = std::make_shared(1, false); auto result = mean_fn->Apply({a}); @@ -29,7 +29,7 @@ TEST_F(AutogradReductionBackwardTest, MeanBackward) { EXPECT_EQ(grad_inputs.size(), 1); } -TEST_F(AutogradReductionBackwardTest, MaxBackward) { +TEST_P(AutogradReductionBackwardTest, MaxBackward) { auto a = createTensor({2, 3}, 1.0f); auto max_fn = std::make_shared(1, false); auto result = max_fn->Apply({a}); @@ -38,7 +38,7 @@ TEST_F(AutogradReductionBackwardTest, MaxBackward) { EXPECT_EQ(grad_inputs.size(), 1); } -TEST_F(AutogradReductionBackwardTest, MinBackward) { +TEST_P(AutogradReductionBackwardTest, MinBackward) { auto a = createTensor({2, 3}, 1.0f); auto min_fn = std::make_shared(1, false); auto result = min_fn->Apply({a}); @@ -47,7 +47,7 @@ TEST_F(AutogradReductionBackwardTest, MinBackward) { EXPECT_EQ(grad_inputs.size(), 1); } -TEST_F(AutogradReductionBackwardTest, SumBackwardKeepDim) { +TEST_P(AutogradReductionBackwardTest, SumBackwardKeepDim) { auto a = createTensor({2, 3}, 1.0f); auto sum_fn = std::make_shared(1, true); auto result = sum_fn->Apply({a}); @@ -56,7 +56,7 @@ TEST_F(AutogradReductionBackwardTest, SumBackwardKeepDim) { EXPECT_EQ(grad_inputs.size(), 1); } -TEST_F(AutogradReductionBackwardTest, MeanBackwardKeepDim) { +TEST_P(AutogradReductionBackwardTest, MeanBackwardKeepDim) { auto a = createTensor({2, 3}, 1.0f); auto mean_fn = std::make_shared(1, true); auto result = mean_fn->Apply({a}); @@ -64,3 +64,5 @@ TEST_F(AutogradReductionBackwardTest, MeanBackwardKeepDim) { auto grad_inputs = mean_fn->Backward({grad}); EXPECT_EQ(grad_inputs.size(), 1); } + +INFINI_TRAIN_REGISTER_TEST(AutogradReductionBackwardTest); diff --git a/tests/autograd/test_autograd_reduction_forward.cc b/tests/autograd/test_autograd_reduction_forward.cc index b4f8edb7..4d8ed2e4 100644 --- a/tests/autograd/test_autograd_reduction_forward.cc +++ b/tests/autograd/test_autograd_reduction_forward.cc @@ -9,46 +9,48 @@ using namespace infini_train; -class AutogradReductionForwardTest : public infini_train::test::AutogradTestBase {}; +class AutogradReductionForwardTest : public infini_train::test::AutogradTestBaseP {}; -TEST_F(AutogradReductionForwardTest, SumForward) { +TEST_P(AutogradReductionForwardTest, SumForward) { auto a = createTensor({2, 3}, 1.0f); auto sum_fn = std::make_shared(1, false); auto result = sum_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradReductionForwardTest, MeanForward) { +TEST_P(AutogradReductionForwardTest, MeanForward) { auto a = createTensor({2, 3}, 1.0f); auto mean_fn = std::make_shared(1, false); auto result = mean_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradReductionForwardTest, MaxForward) { +TEST_P(AutogradReductionForwardTest, MaxForward) { auto a = createTensor({2, 3}, 1.0f); auto max_fn = std::make_shared(1, false); auto result = max_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradReductionForwardTest, MinForward) { +TEST_P(AutogradReductionForwardTest, MinForward) { auto a = createTensor({2, 3}, 1.0f); auto min_fn = std::make_shared(1, false); auto result = min_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradReductionForwardTest, SumKeepDim) { +TEST_P(AutogradReductionForwardTest, SumKeepDim) { auto a = createTensor({2, 3}, 1.0f); auto sum_fn = std::make_shared(1, true); auto result = sum_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradReductionForwardTest, MeanKeepDim) { +TEST_P(AutogradReductionForwardTest, MeanKeepDim) { auto a = createTensor({2, 3}, 1.0f); auto mean_fn = std::make_shared(1, true); auto result = mean_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } + +INFINI_TRAIN_REGISTER_TEST(AutogradReductionForwardTest); diff --git a/tests/autograd/test_autograd_softmax_backward.cc b/tests/autograd/test_autograd_softmax_backward.cc index 6d3f02a4..b07ac833 100644 --- a/tests/autograd/test_autograd_softmax_backward.cc +++ b/tests/autograd/test_autograd_softmax_backward.cc @@ -9,9 +9,9 @@ using namespace infini_train; -class AutogradSoftmaxBackwardTest : public infini_train::test::AutogradTestBase {}; +class AutogradSoftmaxBackwardTest : public infini_train::test::AutogradTestBaseP {}; -TEST_F(AutogradSoftmaxBackwardTest, SoftmaxBackward) { +TEST_P(AutogradSoftmaxBackwardTest, SoftmaxBackward) { auto a = createTensor({2, 3}, 1.0f); auto softmax_fn = std::make_shared(1); auto result = softmax_fn->Apply({a}); @@ -20,7 +20,7 @@ TEST_F(AutogradSoftmaxBackwardTest, SoftmaxBackward) { EXPECT_EQ(grad_inputs.size(), 1); } -TEST_F(AutogradSoftmaxBackwardTest, SoftmaxBackwardDim0) { +TEST_P(AutogradSoftmaxBackwardTest, SoftmaxBackwardDim0) { auto a = createTensor({4, 3}, 1.0f); auto softmax_fn = std::make_shared(0); auto result = softmax_fn->Apply({a}); @@ -28,3 +28,5 @@ TEST_F(AutogradSoftmaxBackwardTest, SoftmaxBackwardDim0) { auto grad_inputs = softmax_fn->Backward({grad}); EXPECT_EQ(grad_inputs.size(), 1); } + +INFINI_TRAIN_REGISTER_TEST(AutogradSoftmaxBackwardTest); diff --git a/tests/autograd/test_autograd_softmax_forward.cc b/tests/autograd/test_autograd_softmax_forward.cc index c3d196f1..42939fc9 100644 --- a/tests/autograd/test_autograd_softmax_forward.cc +++ b/tests/autograd/test_autograd_softmax_forward.cc @@ -9,9 +9,9 @@ using namespace infini_train; -class AutogradSoftmaxForwardTest : public infini_train::test::AutogradTestBase {}; +class AutogradSoftmaxForwardTest : public infini_train::test::AutogradTestBaseP {}; -TEST_F(AutogradSoftmaxForwardTest, SoftmaxForward) { +TEST_P(AutogradSoftmaxForwardTest, SoftmaxForward) { auto a = createTensor({2, 3}, 1.0f); auto softmax_fn = std::make_shared(1); auto result = softmax_fn->Apply({a}); @@ -19,7 +19,7 @@ TEST_F(AutogradSoftmaxForwardTest, SoftmaxForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3})); } -TEST_F(AutogradSoftmaxForwardTest, SoftmaxDim0) { +TEST_P(AutogradSoftmaxForwardTest, SoftmaxDim0) { auto a = createTensor({4, 3}, 1.0f); auto softmax_fn = std::make_shared(0); auto result = softmax_fn->Apply({a}); @@ -27,10 +27,12 @@ TEST_F(AutogradSoftmaxForwardTest, SoftmaxDim0) { EXPECT_EQ(result[0]->Dims(), (std::vector{4, 3})); } -TEST_F(AutogradSoftmaxForwardTest, SoftmaxLastDim) { +TEST_P(AutogradSoftmaxForwardTest, SoftmaxLastDim) { auto a = createTensor({2, 3, 4}, 1.0f); auto softmax_fn = std::make_shared(2); auto result = softmax_fn->Apply({a}); EXPECT_EQ(result.size(), 1); EXPECT_EQ(result[0]->Dims(), (std::vector{2, 3, 4})); } + +INFINI_TRAIN_REGISTER_TEST(AutogradSoftmaxForwardTest); diff --git a/tests/autograd/test_autograd_transform_backward.cc b/tests/autograd/test_autograd_transform_backward.cc index 1613f1a2..2a542d4e 100644 --- a/tests/autograd/test_autograd_transform_backward.cc +++ b/tests/autograd/test_autograd_transform_backward.cc @@ -9,9 +9,9 @@ using namespace infini_train; -class AutogradTransformBackwardTest : public infini_train::test::AutogradTestBase {}; +class AutogradTransformBackwardTest : public infini_train::test::AutogradTestBaseP {}; -TEST_F(AutogradTransformBackwardTest, TransposeBackward) { +TEST_P(AutogradTransformBackwardTest, TransposeBackward) { auto a = createTensor({2, 3}, 1.0f); auto transpose_fn = std::make_shared(0, 1); auto result = transpose_fn->Apply({a}); @@ -19,3 +19,5 @@ TEST_F(AutogradTransformBackwardTest, TransposeBackward) { auto grad_inputs = transpose_fn->Backward({grad}); EXPECT_EQ(grad_inputs.size(), 1); } + +INFINI_TRAIN_REGISTER_TEST(AutogradTransformBackwardTest); diff --git a/tests/autograd/test_autograd_transform_forward.cc b/tests/autograd/test_autograd_transform_forward.cc index 67b20adb..bc4da44f 100644 --- a/tests/autograd/test_autograd_transform_forward.cc +++ b/tests/autograd/test_autograd_transform_forward.cc @@ -10,9 +10,9 @@ using namespace infini_train; -class AutogradTransformForwardTest : public infini_train::test::AutogradTestBase {}; +class AutogradTransformForwardTest : public infini_train::test::AutogradTestBaseP {}; -TEST_F(AutogradTransformForwardTest, TransposeForward) { +TEST_P(AutogradTransformForwardTest, TransposeForward) { auto a = createTensor({2, 3}, 1.0f); auto transpose_fn = std::make_shared(0, 1); auto result = transpose_fn->Apply({a}); @@ -20,7 +20,7 @@ TEST_F(AutogradTransformForwardTest, TransposeForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{3, 2})); } -TEST_F(AutogradTransformForwardTest, SliceForward) { +TEST_P(AutogradTransformForwardTest, SliceForward) { auto a = createTensor({4, 4}, 1.0f); auto slice_fn = std::make_shared( std::vector{1, 1}, @@ -30,14 +30,14 @@ TEST_F(AutogradTransformForwardTest, SliceForward) { EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTransformForwardTest, SplitForward) { +TEST_P(AutogradTransformForwardTest, SplitForward) { auto a = createTensor({4, 4}, 1.0f); auto split_fn = std::make_shared(2, 0); auto result = split_fn->Apply({a}); EXPECT_EQ(result.size(), 2); } -TEST_F(AutogradTransformForwardTest, ConcatForward) { +TEST_P(AutogradTransformForwardTest, ConcatForward) { auto a = createTensor({2, 2}, 1.0f); auto b = createTensor({2, 2}, 2.0f); auto concat_fn = std::make_shared(0); @@ -46,7 +46,7 @@ TEST_F(AutogradTransformForwardTest, ConcatForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{4, 2})); } -TEST_F(AutogradTransformForwardTest, StackForward) { +TEST_P(AutogradTransformForwardTest, StackForward) { auto a = createTensor({2, 3}, 1.0f); auto b = createTensor({2, 3}, 2.0f); auto stack_fn = std::make_shared(0); @@ -55,16 +55,18 @@ TEST_F(AutogradTransformForwardTest, StackForward) { EXPECT_EQ(result[0]->Dims(), (std::vector{2, 2, 3})); } -TEST_F(AutogradTransformForwardTest, TrilForward) { +TEST_P(AutogradTransformForwardTest, TrilForward) { auto a = createTensor({3, 3}, 1.0f); auto tril_fn = std::make_shared(0); auto result = tril_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } -TEST_F(AutogradTransformForwardTest, TriuForward) { +TEST_P(AutogradTransformForwardTest, TriuForward) { auto a = createTensor({3, 3}, 1.0f); auto triu_fn = std::make_shared(0); auto result = triu_fn->Apply({a}); EXPECT_EQ(result.size(), 1); } + +INFINI_TRAIN_REGISTER_TEST(AutogradTransformForwardTest); diff --git a/tests/common/test_macros.cmake b/tests/common/test_macros.cmake index 8f5fac59..477a668d 100644 --- a/tests/common/test_macros.cmake +++ b/tests/common/test_macros.cmake @@ -1,13 +1,13 @@ # ============================================================================ -# InfiniTrain 测试宏 +# InfiniTrain Test Macros # ============================================================================ -# 提供统一的测试配置接口,降低接入成本 +# Unified test configuration interface to reduce boilerplate. # -# 使用方法: -# 1. 在 tests/CMakeLists.txt 中 include 此文件 -# 2. 使用 infini_train_add_test 宏注册测试 +# Usage: +# 1. Include this file in tests/CMakeLists.txt +# 2. Use infini_train_add_test macro to register tests # -# 示例: +# Examples: # infini_train_add_test( # test_tensor_create # SOURCES test_tensor_create.cc @@ -17,76 +17,125 @@ include_guard(GLOBAL) -# 获取 test_macros.cmake 所在目录(tests/common/) +# Path to this file's directory (tests/common/) set(TEST_MACROS_DIR "${CMAKE_CURRENT_LIST_DIR}") # ----------------------------------------------------------------------------- -# 加载 GoogleTest 模块(提供 gtest_discover_tests) +# Load GoogleTest module (provides gtest_discover_tests) # ----------------------------------------------------------------------------- include(GoogleTest) # ----------------------------------------------------------------------------- -# infini_train_add_test - 测试注册宏 +# infini_train_add_test - Test registration macro # ----------------------------------------------------------------------------- -# 功能: -# 1. 创建可执行文件 -# 2. 配置编译选项、链接库和头文件路径 -# 3. 使用 gtest_discover_tests 自动发现测试用例 -# 4. 设置测试标签 +# Features: +# 1. Create executable target +# 2. Configure compile options, link libraries, and include paths +# 3. Use gtest_discover_tests to auto-discover test cases +# 4. Set test labels # -# 参数: -# SOURCES: 源文件列表(必填) -# LABELS: 测试标签,如 "cpu" "cuda" "distributed"(可选,默认 "cpu") +# Arguments: +# SOURCES: Source file list (required) +# LABELS: Test labels, e.g. "cpu" "cuda" "distributed" (optional, default "cpu") +# TEST_FILTER: gtest test filter pattern (optional) # -# 示例: -# # 简单测试(1行) +# Examples: +# # Single-label test (one liner) # infini_train_add_test(test_example SOURCES test_example.cc LABELS cpu) # -# # 多标签测试 -# infini_train_add_test(test_cuda_example SOURCES test_cuda.cc LABELS cuda distributed) +# # Filter same binary by label suffix (one call per label) +# infini_train_add_test(test_example SOURCES test_example.cc LABELS cpu TEST_FILTER "-*CUDA*") +# infini_train_add_test(test_example_cuda SOURCES test_example.cc LABELS cuda TEST_FILTER "*CUDA*") # ----------------------------------------------------------------------------- macro(infini_train_add_test) - cmake_parse_arguments(ARG "" "TEST_NAME" "SOURCES;LABELS" ${ARGN}) - + cmake_parse_arguments(ARG "" "TEST_NAME;TEST_FILTER" "SOURCES;LABELS" ${ARGN}) + if(NOT ARG_TEST_NAME) set(ARG_TEST_NAME ${ARG_UNPARSED_ARGUMENTS}) endif() - + if(NOT ARG_SOURCES) message(FATAL_ERROR "infini_train_add_test: TEST_NAME and SOURCES are required") endif() - - # 1. 创建可执行文件 + + # 1. Create executable target add_executable(${ARG_TEST_NAME} ${ARG_SOURCES}) - - # 2. 配置编译选项(禁用警告转错误,以便在宽松编译环境下运行) + + # 2. Disable -Werror so tests can run under relaxed warning levels target_compile_options(${ARG_TEST_NAME} PRIVATE -Wno-error) - - # 3. 链接 Google Test + + # 3. Link Google Test target_link_libraries(${ARG_TEST_NAME} PRIVATE GTest::gtest GTest::gtest_main ) - - # 4. 添加头文件路径 - target_include_directories(${ARG_TEST_NAME} PRIVATE + + # 4. Add include paths + target_include_directories(${ARG_TEST_NAME} PRIVATE ${TEST_MACROS_DIR} ${glog_SOURCE_DIR}/src ) - - # 5. 链接项目库(复用框架链接策略,包含 CUDA/静态库依赖处理) + + # 5. Link project library (reuses framework linking strategy) link_infini_train_exe(${ARG_TEST_NAME}) - - # 6. 使用 gtest_discover_tests 自动发现测试用例 - # 这会自动为每个 TEST_F() 创建一个 ctest 测试 + + # 6. Auto-discover gtest cases and register as ctest tests set(labels "cpu") if(ARG_LABELS) set(labels "${ARG_LABELS}") endif() - - gtest_discover_tests(${ARG_TEST_NAME} - # 自动将测试输出重定向到 XML(便于 CI 集成) - EXTRA_ARGS --gtest_output=xml:%T.xml - PROPERTIES LABELS "${labels}" - ) + + if(ARG_TEST_FILTER) + gtest_discover_tests(${ARG_TEST_NAME} + EXTRA_ARGS --gtest_output=xml:%T.xml + TEST_FILTER "${ARG_TEST_FILTER}" + PROPERTIES LABELS "${labels}" + ) + else() + gtest_discover_tests(${ARG_TEST_NAME} + EXTRA_ARGS --gtest_output=xml:%T.xml + PROPERTIES LABELS "${labels}" + ) + endif() +endmacro() + +# ----------------------------------------------------------------------------- +# infini_train_add_test_suite - Register cpu/cuda/distributed targets in one call +# ----------------------------------------------------------------------------- +# Calls infini_train_add_test three times (or fewer) with the correct +# TEST_FILTER and LABELS derived from the label list. +# +# Arguments: +# Base name; each target is named _